Anupam007 commited on
Commit
f06070c
·
verified ·
1 Parent(s): 38426e1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -291
app.py CHANGED
@@ -1,4 +1,3 @@
1
- #Step 2: Import libraries
2
  import os
3
  import time
4
  import gradio as gr
@@ -10,328 +9,56 @@ import whisper
10
  import re
11
  from gtts import gTTS
12
  from transformers import pipeline, T5ForConditionalGeneration, T5Tokenizer
13
- from IPython.display import Audio, display
14
 
15
- # Check if GPU is available
16
  device = "cuda" if torch.cuda.is_available() else "cpu"
17
  print(f"Using device: {device}")
18
 
19
- # Step 3: Load models with error handling
20
- print("Loading models...")
21
  try:
22
  whisper_model = whisper.load_model("small", device=device)
23
- print("Whisper model loaded successfully")
24
  except Exception as e:
25
  print(f"Failed to load Whisper model: {e}")
26
- exit(1)
27
 
28
- try:
29
- t5_tokenizer = T5Tokenizer.from_pretrained("t5-base")
30
- t5_model = T5ForConditionalGeneration.from_pretrained("t5-base").to(device)
31
- print("T5 model loaded successfully for grammar correction")
32
- except Exception as e:
33
- print(f"Failed to load T5 model: {e}")
34
- exit(1)
35
 
36
  try:
37
  sentiment_analyzer = pipeline("text-classification",
38
  model="distilbert-base-uncased-finetuned-sst-2-english",
39
  device=0 if device == "cuda" else -1)
40
- print("Sentiment analyzer loaded successfully")
41
  except Exception as e:
42
  print(f"Failed to load sentiment analyzer: {e}")
43
- exit(1)
44
 
45
- # Step 4: Define processing functions
46
  def speech_to_text(audio_path):
47
- """Convert speech to text using Whisper"""
48
- if not audio_path or not os.path.exists(audio_path):
49
- return "Error: Invalid audio file path."
50
  try:
51
  result = whisper_model.transcribe(audio_path)
52
  return result["text"].strip()
53
  except Exception as e:
54
- print(f"Error in speech recognition: {e}")
55
- return "Could not recognize speech. Please try again."
56
-
57
- def correct_grammar_with_t5(text):
58
- """Use T5 model to correct grammar"""
59
- if not text or len(text.strip()) == 0:
60
- return text
61
- input_text = f"grammar: {text}"
62
- try:
63
- input_ids = t5_tokenizer(input_text, return_tensors="pt").input_ids.to(device)
64
- outputs = t5_model.generate(
65
- input_ids=input_ids,
66
- max_length=512,
67
- num_beams=4,
68
- early_stopping=True
69
- )
70
- corrected = t5_tokenizer.decode(outputs[0], skip_special_tokens=True)
71
- if corrected.strip() == text.strip() or len(corrected) < 2:
72
- corrected = apply_basic_grammar_rules(text)
73
- return corrected
74
- except Exception as e:
75
- print(f"Error in T5 grammar correction: {e}")
76
- return apply_basic_grammar_rules(text) # Fallback to basic rules
77
-
78
- def apply_basic_grammar_rules(text):
79
- """Apply basic grammar rules for correction"""
80
- if not text:
81
- return ""
82
- corrections = {
83
- r'\bi\b': 'I',
84
- r'\bi\'m\b': 'I\'m',
85
- r'\bi\'ve\b': 'I\'ve',
86
- r'\bi\'ll\b': 'I\'ll',
87
- r'\bim\b': 'I\'m',
88
- r'\bive\b': 'I\'ve',
89
- r'\bill\b': 'I\'ll',
90
- r'\bdont\b': 'don\'t',
91
- r'\bcant\b': 'can\'t',
92
- r'\bwont\b': 'won\'t',
93
- r'\btheir is\b': 'there is',
94
- r'\btheir are\b': 'there are',
95
- r'\byour welcome\b': 'you\'re welcome',
96
- r'\byour right\b': 'you\'re right',
97
- r'\bit\'?s been\b': 'it\'s been',
98
- r'\balot\b': 'a lot',
99
- r'\bcould of\b': 'could have',
100
- r'\bshould of\b': 'should have',
101
- r'\bwould of\b': 'would have',
102
- r'\bmust of\b': 'must have',
103
- }
104
- corrected = text
105
- for pattern, replacement in corrections.items():
106
- corrected = re.sub(pattern, replacement, corrected, flags=re.IGNORECASE)
107
- if corrected and len(corrected) > 0:
108
- corrected = corrected[0].upper() + corrected[1:]
109
- if corrected and not corrected.strip().endswith(('.', '!', '?')):
110
- corrected = corrected.strip() + '.'
111
- return corrected
112
-
113
- def identify_grammar_issues(original, corrected):
114
- """Identify grammar issues by comparing original and corrected text"""
115
- if not original or not corrected or original == corrected:
116
- return "No grammar issues detected."
117
-
118
- issues = []
119
- if len(original) > 0 and len(corrected) > 0:
120
- if original[0].islower() and corrected[0].isupper():
121
- issues.append("Capitalization: Sentences should start with a capital letter.")
122
- if not original.strip().endswith(('.', '!', '?')) and corrected.strip().endswith(('.', '!', '?')):
123
- issues.append("Punctuation: Sentences should end with proper punctuation.")
124
-
125
- patterns = {
126
- r'\bi\b': "Capitalization: The pronoun 'I' should always be capitalized.",
127
- r'\bim\b': "Contraction: 'im' should be written as 'I'm'.",
128
- r'\bive\b': "Contraction: 'ive' should be written as 'I've'.",
129
- r'\bdont\b': "Contraction: 'dont' should be written as 'don't'.",
130
- r'\bcant\b': "Contraction: 'cant' should be written as 'can't'.",
131
- r'\bwont\b': "Contraction: 'wont' should be written as 'won't'.",
132
- r'\btheir is\b': "Grammar: 'their is' should be 'there is'.",
133
- r'\btheir are\b': "Grammar: 'their are' should be 'there are'.",
134
- r'\byour welcome\b': "Grammar: 'your welcome' should be 'you're welcome'.",
135
- r'\byour right\b': "Grammar: 'your right' should be 'you're right'.",
136
- r'\balot\b': "Spelling: 'alot' should be written as 'a lot'.",
137
- r'\bcould of\b': "Grammar: 'could of' should be 'could have'.",
138
- r'\bshould of\b': "Grammar: 'should of' should be 'should have'.",
139
- r'\bwould of\b': "Grammar: 'would of' should be 'would have'.",
140
- }
141
-
142
- for pattern, explanation in patterns.items():
143
- if re.search(pattern, original, re.IGNORECASE) and not re.search(pattern, corrected, re.IGNORECASE):
144
- issues.append(explanation)
145
-
146
- if not issues and original != corrected:
147
- issues.append("Grammar: There were some grammar issues in your speech. Compare your original with the correction.")
148
-
149
- return "\n".join(issues)
150
-
151
- def analyze_pronunciation(audio_path, text):
152
- """Analyze pronunciation based on audio characteristics"""
153
- try:
154
- y, sr = librosa.load(audio_path)
155
- duration = librosa.get_duration(y=y, sr=sr)
156
- word_count = len(text.split())
157
-
158
- if word_count == 0:
159
- return "Could not analyze pronunciation. No words detected."
160
-
161
- speech_rate = (word_count / duration) * 60
162
- pitches, magnitudes = librosa.piptrack(y=y, sr=sr)
163
- pitch_values = [pitches[index, i] for i in range(magnitudes.shape[1])
164
- if (index := magnitudes[:, i].argmax()) and pitches[index, i] > 0]
165
- pitch_variability = np.std(pitch_values) if pitch_values else 0
166
- rms = librosa.feature.rms(y=y)[0]
167
- volume_variability = np.std(rms)
168
-
169
- feedback = []
170
- if speech_rate > 180:
171
- feedback.append("You're speaking quite fast (over 180 words per minute). Try slowing down slightly for better clarity.")
172
- elif speech_rate < 120:
173
- feedback.append("You're speaking a bit slowly (under 120 words per minute). Consider speeding up slightly to sound more fluent.")
174
- else:
175
- feedback.append("Your speaking rate is good (between 120-180 words per minute).")
176
-
177
- if pitch_variability < 10:
178
- feedback.append("Your speech could use more variation in tone. Try emphasizing important words more.")
179
- else:
180
- feedback.append("You have good variation in your tone and emphasis.")
181
-
182
- if volume_variability < 0.02:
183
- feedback.append("Try varying your volume more for emphasis on key points.")
184
- else:
185
- feedback.append("Your volume variation is good, which helps maintain listener interest.")
186
-
187
- return "\n".join(feedback)
188
- except Exception as e:
189
- print(f"Error in pronunciation analysis: {e}")
190
- return "Could not analyze pronunciation due to an error."
191
-
192
- def generate_learning_tip(original, corrected):
193
- """Generate a learning tip based on the differences between original and corrected text"""
194
- if not original or not corrected or original == corrected:
195
- return "Your grammar is excellent! Keep practicing to improve fluency and pronunciation."
196
-
197
- if re.search(r'\bi\b', original, re.IGNORECASE) and not re.search(r'\bi\b', corrected, re.IGNORECASE):
198
- return "Remember that the pronoun 'I' is always capitalized in English. This is a common mistake for English learners."
199
-
200
- if any(re.search(pattern, original, re.IGNORECASE) for pattern in [r'\bim\b', r'\bive\b', r'\bdont\b', r'\bcant\b']):
201
- return "Practice using apostrophes in contractions: 'I'm', 'I've', 'don't', 'can't'. Try writing these contractions a few times to memorize them."
202
-
203
- if re.search(r'\btheir is\b|\btheir are\b', original, re.IGNORECASE):
204
- return "Remember the difference between 'their', 'there', and 'they're'. 'Their' shows possession, 'there' indicates location, and 'they're' is a contraction of 'they are'."
205
-
206
- if re.search(r'\byour welcome\b|\byour right\b', original, re.IGNORECASE):
207
- return "Remember the difference between 'your' and 'you're'. 'Your' shows possession, while 'you're' is a contraction of 'you are'."
208
-
209
- if not original.strip().endswith(('.', '!', '?')) and corrected.strip().endswith(('.', '!', '?')):
210
- return "Remember to end your sentences with proper punctuation. This helps make your meaning clear in writing and indicates proper pauses in speech."
211
-
212
- generic_tips = [
213
- "Practice makes perfect! Try reading English content aloud for 10 minutes daily.",
214
- "Listen to native English speakers and mimic their pronunciation and rhythm.",
215
- "Record yourself speaking and compare it with native speakers.",
216
- "Focus on one grammar rule at a time until it becomes natural.",
217
- "Try to think in English rather than translating from your native language."
218
- ]
219
- import random
220
- return random.choice(generic_tips)
221
-
222
- def text_to_speech(text):
223
- """Convert text to speech using gTTS"""
224
- if not text:
225
- return None
226
- try:
227
- tts = gTTS(text=text, lang='en')
228
- fp = tempfile.NamedTemporaryFile(suffix='.mp3', delete=False)
229
- tts.save(fp.name)
230
- return fp.name
231
- except Exception as e:
232
- print(f"Error in text-to-speech conversion: {e}")
233
- return None
234
 
235
- # Step 5: Main processing function
236
  def process_audio(audio_path):
237
- """Process the audio input and provide feedback"""
238
  if not audio_path or not os.path.exists(audio_path):
239
- return "Error: No audio file provided.", "", "", "", "", None
240
-
241
  try:
242
  original_text = speech_to_text(audio_path)
243
- if original_text.startswith("Error") or original_text == "Could not recognize speech. Please try again.":
244
- return original_text, "", "", "", "", None
245
-
246
- corrected_text = correct_grammar_with_t5(original_text)
247
- pronunciation_feedback = analyze_pronunciation(audio_path, original_text)
248
- grammar_issues = identify_grammar_issues(original_text, corrected_text)
249
- learning_tip = generate_learning_tip(original_text, corrected_text)
250
- output_audio_path = text_to_speech(corrected_text)
251
-
252
- return original_text, corrected_text, grammar_issues, pronunciation_feedback, learning_tip, output_audio_path
253
  except Exception as e:
254
- print(f"Error processing audio: {e}")
255
- return f"Error: {str(e)}", "", "", "", "", None
256
-
257
- # Step 6: Create interactive practice exercises
258
- def generate_practice_exercise():
259
- """Generate a random practice exercise"""
260
- exercises = [
261
- "Tell me about your favorite hobby.",
262
- "Describe what you did yesterday.",
263
- "What is your opinion on online learning?",
264
- "Describe your ideal vacation destination.",
265
- "Talk about your favorite movie or book.",
266
- "What are your plans for the future?",
267
- "Describe your hometown to someone who has never been there.",
268
- "What advice would you give to someone learning English?",
269
- "If you could change one thing about your country, what would it be?",
270
- "Describe a challenging situation you've overcome.",
271
- "If you could have any superpower, what would it be and why?",
272
- "What is the most important quality in a friend?",
273
- "Describe your daily morning routine.",
274
- "What technology couldn't you live without?",
275
- "Talk about your favorite childhood memory."
276
- ]
277
- import random
278
- return random.choice(exercises)
279
 
280
- # Step 7: Create the Gradio interface
281
  def create_interface():
282
  with gr.Blocks() as app:
283
- with gr.Row():
284
- with gr.Column(scale=1):
285
- practice_box = gr.Textbox(label="Practice Topic", value="Tell me about your favorite hobby.")
286
- new_topic_btn = gr.Button("Generate New Topic")
287
- audio_input = gr.Audio(sources=["microphone"], type="filepath", label="Record your speech")
288
- submit_btn = gr.Button("Analyze My Speaking", variant="primary")
289
-
290
- with gr.Column(scale=2):
291
- with gr.Tab("Feedback"):
292
- original_text = gr.Textbox(label="What You Said")
293
- corrected_text = gr.Textbox(label="Corrected Version")
294
- grammar_issues = gr.Textbox(label="Grammar Issues")
295
- pronunciation_feedback = gr.Textbox(label="Pronunciation Feedback")
296
- learning_tip = gr.Textbox(label="Learning Tip")
297
-
298
- with gr.Tab("Correct Pronunciation"):
299
- gr.Markdown("Listen to the corrected version:")
300
- audio_output = gr.Audio(label="Correct pronunciation")
301
-
302
- submit_btn.click(
303
- process_audio,
304
- inputs=[audio_input],
305
- outputs=[original_text, corrected_text, grammar_issues, pronunciation_feedback, learning_tip, audio_output]
306
- )
307
-
308
- new_topic_btn.click(
309
- lambda: generate_practice_exercise(),
310
- inputs=None,
311
- outputs=practice_box
312
- )
313
-
314
- gr.Examples(
315
- examples=[
316
- ["I very happy to learning english today"],
317
- ["yesterday i go to the store and buy some food"],
318
- ["they was talking about the movie when i arrive"],
319
- ["she dont like to eating vegetables"],
320
- ["I have went to paris last summer vacation"]
321
- ],
322
- inputs=[original_text],
323
- outputs=[corrected_text, grammar_issues, learning_tip],
324
- fn=lambda text: (
325
- correct_grammar_with_t5(text),
326
- identify_grammar_issues(text, correct_grammar_with_t5(text)),
327
- generate_learning_tip(text, correct_grammar_with_t5(text))
328
- ),
329
- cache_examples=True
330
- )
331
-
332
  return app
333
 
334
- # Launch the application
335
  if __name__ == "__main__":
336
  app = create_interface()
337
- app.launch(share=True, debug=True)
 
 
1
  import os
2
  import time
3
  import gradio as gr
 
9
  import re
10
  from gtts import gTTS
11
  from transformers import pipeline, T5ForConditionalGeneration, T5Tokenizer
 
12
 
13
+ # Ensure correct device setting
14
  device = "cuda" if torch.cuda.is_available() else "cpu"
15
  print(f"Using device: {device}")
16
 
17
+ # Load models with error handling
 
18
  try:
19
  whisper_model = whisper.load_model("small", device=device)
 
20
  except Exception as e:
21
  print(f"Failed to load Whisper model: {e}")
22
+ whisper_model = None
23
 
24
+ t5_tokenizer = T5Tokenizer.from_pretrained("t5-base")
25
+ t5_model = T5ForConditionalGeneration.from_pretrained("t5-base").to(device)
 
 
 
 
 
26
 
27
  try:
28
  sentiment_analyzer = pipeline("text-classification",
29
  model="distilbert-base-uncased-finetuned-sst-2-english",
30
  device=0 if device == "cuda" else -1)
 
31
  except Exception as e:
32
  print(f"Failed to load sentiment analyzer: {e}")
33
+ sentiment_analyzer = None
34
 
 
35
  def speech_to_text(audio_path):
36
+ if not whisper_model:
37
+ return "Whisper model is not loaded."
 
38
  try:
39
  result = whisper_model.transcribe(audio_path)
40
  return result["text"].strip()
41
  except Exception as e:
42
+ return f"Speech recognition error: {e}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
 
44
  def process_audio(audio_path):
 
45
  if not audio_path or not os.path.exists(audio_path):
46
+ return "Error: No valid audio file provided.", "", "", "", "", None
 
47
  try:
48
  original_text = speech_to_text(audio_path)
49
+ corrected_text = original_text # Placeholder for grammar correction
50
+ return original_text, corrected_text, "", "", "", None
 
 
 
 
 
 
 
 
51
  except Exception as e:
52
+ return f"Processing error: {e}", "", "", "", "", None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
 
54
  def create_interface():
55
  with gr.Blocks() as app:
56
+ audio_input = gr.Audio(sources=["microphone"], type="filepath", label="Record your speech")
57
+ output_text = gr.Textbox(label="Recognized Text")
58
+ submit_btn = gr.Button("Analyze Speech")
59
+ submit_btn.click(process_audio, inputs=[audio_input], outputs=[output_text])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  return app
61
 
 
62
  if __name__ == "__main__":
63
  app = create_interface()
64
+ app.launch(server_port=int(os.getenv("PORT", 7860)), server_name="0.0.0.0")