Hadia_Project / app.py
arif670's picture
Update app.py
8cead61 verified
import gradio as gr
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor, SpeechT5Processor, SpeechT5ForTextToSpeech
import torch
import torchaudio
# Load Speech-to-Text Model
stt_processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h")
stt_model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")
# Load Text-to-Speech Model
tts_processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
tts_model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")
# Function to convert speech to text
def speech_to_text(audio):
waveform, sample_rate = torchaudio.load(audio)
if sample_rate != 16000:
resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)
waveform = resampler(waveform)
input_values = stt_processor(waveform.squeeze(), return_tensors="pt", sampling_rate=16000).input_values
logits = stt_model(input_values).logits
predicted_ids = torch.argmax(logits, dim=-1)
transcription = stt_processor.decode(predicted_ids[0])
return transcription
# Function to convert text to speech
def text_to_speech(text):
inputs = tts_processor(text=text, return_tensors="pt")
speech = tts_model.generate_speech(inputs["input_ids"])
return (16000, speech.numpy())
# Function to handle the entire flow
def learn_english(audio, progress=gr.Progress()):
# Show loading indicator
progress(0, desc="Processing your audio...")
# Convert speech to text
progress(0.5, desc="Transcribing your speech...")
user_input = speech_to_text(audio)
# Generate feedback
progress(0.75, desc="Generating feedback...")
feedback = f"You said: '{user_input}'. Great job! Let's practice more."
audio_feedback = text_to_speech(feedback)
# Return results with a success message
return feedback, audio_feedback, "βœ… Feedback generated successfully!"
# Custom CSS for styling
custom_css = """
/* Import Google Fonts */
@import url('https://fonts.googleapis.com/css2?family=Roboto:wght@400;700&display=swap');
@import url('https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0-beta3/css/all.min.css');
/* General Styling */
body {
font-family: 'Roboto', sans-serif;
background-color: #f4f4f9;
margin: 0;
padding: 0;
}
/* Header Styling */
h1 {
color: #333;
text-align: center;
margin-bottom: 20px;
font-weight: 700;
animation: fadeIn 1s ease-in-out;
}
/* Button Styling */
.gr-button {
background-color: #ff6f61; /* Vibrant red-orange */
color: white;
border: none;
padding: 12px 24px;
border-radius: 25px; /* Rounded corners */
font-size: 16px;
cursor: pointer;
transition: all 0.3s ease;
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
position: relative;
overflow: hidden;
}
.gr-button:hover {
background-color: #ff3b2f; /* Darker red on hover */
transform: translateY(-3px); /* Slight upward movement */
box-shadow: 0 6px 10px rgba(0, 0, 0, 0.2);
}
.gr-button:active {
transform: translateY(0);
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
}
/* Bouncing Animation */
@keyframes bounce {
0%, 100% { transform: translateY(0); }
50% { transform: translateY(-10px); }
}
.gr-button.bounce {
animation: bounce 0.5s ease infinite;
}
/* Input and Output Boxes */
.gr-box {
border-radius: 15px;
border: 2px solid #ddd;
padding: 20px;
box-shadow: 0 8px 16px rgba(0, 0, 0, 0.1);
background-color: #fff;
animation: slideIn 0.8s ease-in-out;
}
/* Audio Feedback */
.gr-audio {
margin-top: 15px;
}
/* Markdown Styling */
.markdown-body {
font-size: 18px;
line-height: 1.6;
color: #555;
animation: fadeIn 1s ease-in-out;
}
/* Icons */
.icon {
font-size: 24px;
margin-right: 10px;
color: #ff6f61;
}
/* Success Message */
.success-message {
color: #28a745;
font-weight: bold;
text-align: center;
margin-top: 15px;
animation: fadeIn 1s ease-in-out;
}
/* Animations */
@keyframes fadeIn {
from { opacity: 0; }
to { opacity: 1; }
}
@keyframes slideIn {
from { transform: translateY(20px); opacity: 0; }
to { transform: translateY(0); opacity: 1; }
}
/* Responsive Design */
@media (max-width: 768px) {
.gr-row {
flex-direction: column;
}
.gr-column {
width: 100%;
margin-bottom: 20px;
}
h1 {
font-size: 24px;
}
.gr-button {
width: 100%;
padding: 12px;
}
}
"""
# Create Gradio Interface with Custom CSS
with gr.Blocks(theme=gr.themes.Soft(), css=custom_css) as demo:
gr.Markdown("""
# 🌟 English Language Learning App 🌟
Welcome to the English Language Learning App!
- Speak into the microphone, and the app will transcribe your speech.
- It will then provide feedback in both text and audio formats.
""")
with gr.Row():
with gr.Column(scale=1): # Left column for input
gr.Markdown("<i class='fas fa-microphone icon'></i> ### Step 1: Record Your Voice")
audio_input = gr.Audio(
type="filepath",
label="🎀 Speak into the Microphone",
interactive=True
)
submit_button = gr.Button("<i class='fas fa-check-circle icon'></i> Submit", variant="primary", elem_classes=["bounce"])
with gr.Column(scale=2): # Right column for output
gr.Markdown("<i class='fas fa-file-alt icon'></i> ### Step 2: View Feedback")
text_output = gr.Textbox(
label="πŸ“ Transcription",
placeholder="Your transcription will appear here...",
lines=3
)
audio_output = gr.Audio(
label="🎧 Audio Feedback",
autoplay=True
)
success_message = gr.HTML("", elem_classes=["success-message"])
# Bind the button to the function
submit_button.click(
learn_english,
inputs=audio_input,
outputs=[text_output, audio_output, success_message]
)
# Launch the app
demo.launch(server_name="0.0.0.0", server_port=7860)