Anupam007 commited on
Commit
5ca1b81
·
verified ·
1 Parent(s): 8393796

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +337 -0
app.py ADDED
@@ -0,0 +1,337 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #Step 2: Import libraries
2
+ import os
3
+ import time
4
+ import gradio as gr
5
+ import torch
6
+ import numpy as np
7
+ import tempfile
8
+ import librosa
9
+ import whisper
10
+ import re
11
+ from gtts import gTTS
12
+ from transformers import pipeline, T5ForConditionalGeneration, T5Tokenizer
13
+ from IPython.display import Audio, display
14
+
15
+ # Check if GPU is available
16
+ device = "cuda" if torch.cuda.is_available() else "cpu"
17
+ print(f"Using device: {device}")
18
+
19
+ # Step 3: Load models with error handling
20
+ print("Loading models...")
21
+ try:
22
+ whisper_model = whisper.load_model("small", device=device)
23
+ print("Whisper model loaded successfully")
24
+ except Exception as e:
25
+ print(f"Failed to load Whisper model: {e}")
26
+ exit(1)
27
+
28
+ try:
29
+ t5_tokenizer = T5Tokenizer.from_pretrained("t5-base")
30
+ t5_model = T5ForConditionalGeneration.from_pretrained("t5-base").to(device)
31
+ print("T5 model loaded successfully for grammar correction")
32
+ except Exception as e:
33
+ print(f"Failed to load T5 model: {e}")
34
+ exit(1)
35
+
36
+ try:
37
+ sentiment_analyzer = pipeline("text-classification",
38
+ model="distilbert-base-uncased-finetuned-sst-2-english",
39
+ device=0 if device == "cuda" else -1)
40
+ print("Sentiment analyzer loaded successfully")
41
+ except Exception as e:
42
+ print(f"Failed to load sentiment analyzer: {e}")
43
+ exit(1)
44
+
45
+ # Step 4: Define processing functions
46
+ def speech_to_text(audio_path):
47
+ """Convert speech to text using Whisper"""
48
+ if not audio_path or not os.path.exists(audio_path):
49
+ return "Error: Invalid audio file path."
50
+ try:
51
+ result = whisper_model.transcribe(audio_path)
52
+ return result["text"].strip()
53
+ except Exception as e:
54
+ print(f"Error in speech recognition: {e}")
55
+ return "Could not recognize speech. Please try again."
56
+
57
+ def correct_grammar_with_t5(text):
58
+ """Use T5 model to correct grammar"""
59
+ if not text or len(text.strip()) == 0:
60
+ return text
61
+ input_text = f"grammar: {text}"
62
+ try:
63
+ input_ids = t5_tokenizer(input_text, return_tensors="pt").input_ids.to(device)
64
+ outputs = t5_model.generate(
65
+ input_ids=input_ids,
66
+ max_length=512,
67
+ num_beams=4,
68
+ early_stopping=True
69
+ )
70
+ corrected = t5_tokenizer.decode(outputs[0], skip_special_tokens=True)
71
+ if corrected.strip() == text.strip() or len(corrected) < 2:
72
+ corrected = apply_basic_grammar_rules(text)
73
+ return corrected
74
+ except Exception as e:
75
+ print(f"Error in T5 grammar correction: {e}")
76
+ return apply_basic_grammar_rules(text) # Fallback to basic rules
77
+
78
+ def apply_basic_grammar_rules(text):
79
+ """Apply basic grammar rules for correction"""
80
+ if not text:
81
+ return ""
82
+ corrections = {
83
+ r'\bi\b': 'I',
84
+ r'\bi\'m\b': 'I\'m',
85
+ r'\bi\'ve\b': 'I\'ve',
86
+ r'\bi\'ll\b': 'I\'ll',
87
+ r'\bim\b': 'I\'m',
88
+ r'\bive\b': 'I\'ve',
89
+ r'\bill\b': 'I\'ll',
90
+ r'\bdont\b': 'don\'t',
91
+ r'\bcant\b': 'can\'t',
92
+ r'\bwont\b': 'won\'t',
93
+ r'\btheir is\b': 'there is',
94
+ r'\btheir are\b': 'there are',
95
+ r'\byour welcome\b': 'you\'re welcome',
96
+ r'\byour right\b': 'you\'re right',
97
+ r'\bit\'?s been\b': 'it\'s been',
98
+ r'\balot\b': 'a lot',
99
+ r'\bcould of\b': 'could have',
100
+ r'\bshould of\b': 'should have',
101
+ r'\bwould of\b': 'would have',
102
+ r'\bmust of\b': 'must have',
103
+ }
104
+ corrected = text
105
+ for pattern, replacement in corrections.items():
106
+ corrected = re.sub(pattern, replacement, corrected, flags=re.IGNORECASE)
107
+ if corrected and len(corrected) > 0:
108
+ corrected = corrected[0].upper() + corrected[1:]
109
+ if corrected and not corrected.strip().endswith(('.', '!', '?')):
110
+ corrected = corrected.strip() + '.'
111
+ return corrected
112
+
113
+ def identify_grammar_issues(original, corrected):
114
+ """Identify grammar issues by comparing original and corrected text"""
115
+ if not original or not corrected or original == corrected:
116
+ return "No grammar issues detected."
117
+
118
+ issues = []
119
+ if len(original) > 0 and len(corrected) > 0:
120
+ if original[0].islower() and corrected[0].isupper():
121
+ issues.append("Capitalization: Sentences should start with a capital letter.")
122
+ if not original.strip().endswith(('.', '!', '?')) and corrected.strip().endswith(('.', '!', '?')):
123
+ issues.append("Punctuation: Sentences should end with proper punctuation.")
124
+
125
+ patterns = {
126
+ r'\bi\b': "Capitalization: The pronoun 'I' should always be capitalized.",
127
+ r'\bim\b': "Contraction: 'im' should be written as 'I'm'.",
128
+ r'\bive\b': "Contraction: 'ive' should be written as 'I've'.",
129
+ r'\bdont\b': "Contraction: 'dont' should be written as 'don't'.",
130
+ r'\bcant\b': "Contraction: 'cant' should be written as 'can't'.",
131
+ r'\bwont\b': "Contraction: 'wont' should be written as 'won't'.",
132
+ r'\btheir is\b': "Grammar: 'their is' should be 'there is'.",
133
+ r'\btheir are\b': "Grammar: 'their are' should be 'there are'.",
134
+ r'\byour welcome\b': "Grammar: 'your welcome' should be 'you're welcome'.",
135
+ r'\byour right\b': "Grammar: 'your right' should be 'you're right'.",
136
+ r'\balot\b': "Spelling: 'alot' should be written as 'a lot'.",
137
+ r'\bcould of\b': "Grammar: 'could of' should be 'could have'.",
138
+ r'\bshould of\b': "Grammar: 'should of' should be 'should have'.",
139
+ r'\bwould of\b': "Grammar: 'would of' should be 'would have'.",
140
+ }
141
+
142
+ for pattern, explanation in patterns.items():
143
+ if re.search(pattern, original, re.IGNORECASE) and not re.search(pattern, corrected, re.IGNORECASE):
144
+ issues.append(explanation)
145
+
146
+ if not issues and original != corrected:
147
+ issues.append("Grammar: There were some grammar issues in your speech. Compare your original with the correction.")
148
+
149
+ return "\n".join(issues)
150
+
151
+ def analyze_pronunciation(audio_path, text):
152
+ """Analyze pronunciation based on audio characteristics"""
153
+ try:
154
+ y, sr = librosa.load(audio_path)
155
+ duration = librosa.get_duration(y=y, sr=sr)
156
+ word_count = len(text.split())
157
+
158
+ if word_count == 0:
159
+ return "Could not analyze pronunciation. No words detected."
160
+
161
+ speech_rate = (word_count / duration) * 60
162
+ pitches, magnitudes = librosa.piptrack(y=y, sr=sr)
163
+ pitch_values = [pitches[index, i] for i in range(magnitudes.shape[1])
164
+ if (index := magnitudes[:, i].argmax()) and pitches[index, i] > 0]
165
+ pitch_variability = np.std(pitch_values) if pitch_values else 0
166
+ rms = librosa.feature.rms(y=y)[0]
167
+ volume_variability = np.std(rms)
168
+
169
+ feedback = []
170
+ if speech_rate > 180:
171
+ feedback.append("You're speaking quite fast (over 180 words per minute). Try slowing down slightly for better clarity.")
172
+ elif speech_rate < 120:
173
+ feedback.append("You're speaking a bit slowly (under 120 words per minute). Consider speeding up slightly to sound more fluent.")
174
+ else:
175
+ feedback.append("Your speaking rate is good (between 120-180 words per minute).")
176
+
177
+ if pitch_variability < 10:
178
+ feedback.append("Your speech could use more variation in tone. Try emphasizing important words more.")
179
+ else:
180
+ feedback.append("You have good variation in your tone and emphasis.")
181
+
182
+ if volume_variability < 0.02:
183
+ feedback.append("Try varying your volume more for emphasis on key points.")
184
+ else:
185
+ feedback.append("Your volume variation is good, which helps maintain listener interest.")
186
+
187
+ return "\n".join(feedback)
188
+ except Exception as e:
189
+ print(f"Error in pronunciation analysis: {e}")
190
+ return "Could not analyze pronunciation due to an error."
191
+
192
+ def generate_learning_tip(original, corrected):
193
+ """Generate a learning tip based on the differences between original and corrected text"""
194
+ if not original or not corrected or original == corrected:
195
+ return "Your grammar is excellent! Keep practicing to improve fluency and pronunciation."
196
+
197
+ if re.search(r'\bi\b', original, re.IGNORECASE) and not re.search(r'\bi\b', corrected, re.IGNORECASE):
198
+ return "Remember that the pronoun 'I' is always capitalized in English. This is a common mistake for English learners."
199
+
200
+ if any(re.search(pattern, original, re.IGNORECASE) for pattern in [r'\bim\b', r'\bive\b', r'\bdont\b', r'\bcant\b']):
201
+ return "Practice using apostrophes in contractions: 'I'm', 'I've', 'don't', 'can't'. Try writing these contractions a few times to memorize them."
202
+
203
+ if re.search(r'\btheir is\b|\btheir are\b', original, re.IGNORECASE):
204
+ return "Remember the difference between 'their', 'there', and 'they're'. 'Their' shows possession, 'there' indicates location, and 'they're' is a contraction of 'they are'."
205
+
206
+ if re.search(r'\byour welcome\b|\byour right\b', original, re.IGNORECASE):
207
+ return "Remember the difference between 'your' and 'you're'. 'Your' shows possession, while 'you're' is a contraction of 'you are'."
208
+
209
+ if not original.strip().endswith(('.', '!', '?')) and corrected.strip().endswith(('.', '!', '?')):
210
+ return "Remember to end your sentences with proper punctuation. This helps make your meaning clear in writing and indicates proper pauses in speech."
211
+
212
+ generic_tips = [
213
+ "Practice makes perfect! Try reading English content aloud for 10 minutes daily.",
214
+ "Listen to native English speakers and mimic their pronunciation and rhythm.",
215
+ "Record yourself speaking and compare it with native speakers.",
216
+ "Focus on one grammar rule at a time until it becomes natural.",
217
+ "Try to think in English rather than translating from your native language."
218
+ ]
219
+ import random
220
+ return random.choice(generic_tips)
221
+
222
+ def text_to_speech(text):
223
+ """Convert text to speech using gTTS"""
224
+ if not text:
225
+ return None
226
+ try:
227
+ tts = gTTS(text=text, lang='en')
228
+ fp = tempfile.NamedTemporaryFile(suffix='.mp3', delete=False)
229
+ tts.save(fp.name)
230
+ return fp.name
231
+ except Exception as e:
232
+ print(f"Error in text-to-speech conversion: {e}")
233
+ return None
234
+
235
+ # Step 5: Main processing function
236
+ def process_audio(audio_path):
237
+ """Process the audio input and provide feedback"""
238
+ if not audio_path or not os.path.exists(audio_path):
239
+ return "Error: No audio file provided.", "", "", "", "", None
240
+
241
+ try:
242
+ original_text = speech_to_text(audio_path)
243
+ if original_text.startswith("Error") or original_text == "Could not recognize speech. Please try again.":
244
+ return original_text, "", "", "", "", None
245
+
246
+ corrected_text = correct_grammar_with_t5(original_text)
247
+ pronunciation_feedback = analyze_pronunciation(audio_path, original_text)
248
+ grammar_issues = identify_grammar_issues(original_text, corrected_text)
249
+ learning_tip = generate_learning_tip(original_text, corrected_text)
250
+ output_audio_path = text_to_speech(corrected_text)
251
+
252
+ return original_text, corrected_text, grammar_issues, pronunciation_feedback, learning_tip, output_audio_path
253
+ except Exception as e:
254
+ print(f"Error processing audio: {e}")
255
+ return f"Error: {str(e)}", "", "", "", "", None
256
+
257
+ # Step 6: Create interactive practice exercises
258
+ def generate_practice_exercise():
259
+ """Generate a random practice exercise"""
260
+ exercises = [
261
+ "Tell me about your favorite hobby.",
262
+ "Describe what you did yesterday.",
263
+ "What is your opinion on online learning?",
264
+ "Describe your ideal vacation destination.",
265
+ "Talk about your favorite movie or book.",
266
+ "What are your plans for the future?",
267
+ "Describe your hometown to someone who has never been there.",
268
+ "What advice would you give to someone learning English?",
269
+ "If you could change one thing about your country, what would it be?",
270
+ "Describe a challenging situation you've overcome.",
271
+ "If you could have any superpower, what would it be and why?",
272
+ "What is the most important quality in a friend?",
273
+ "Describe your daily morning routine.",
274
+ "What technology couldn't you live without?",
275
+ "Talk about your favorite childhood memory."
276
+ ]
277
+ import random
278
+ return random.choice(exercises)
279
+
280
+ # Step 7: Create the Gradio interface
281
+ def create_interface():
282
+ with gr.Blocks() as app:
283
+ with gr.Row():
284
+ with gr.Column(scale=1):
285
+ practice_box = gr.Textbox(label="Practice Topic", value="Tell me about your favorite hobby.")
286
+ new_topic_btn = gr.Button("Generate New Topic")
287
+ audio_input = gr.Audio(sources=["microphone"], type="filepath", label="Record your speech")
288
+ submit_btn = gr.Button("Analyze My Speaking", variant="primary")
289
+
290
+ with gr.Column(scale=2):
291
+ with gr.Tab("Feedback"):
292
+ original_text = gr.Textbox(label="What You Said")
293
+ corrected_text = gr.Textbox(label="Corrected Version")
294
+ grammar_issues = gr.Textbox(label="Grammar Issues")
295
+ pronunciation_feedback = gr.Textbox(label="Pronunciation Feedback")
296
+ learning_tip = gr.Textbox(label="Learning Tip")
297
+
298
+ with gr.Tab("Correct Pronunciation"):
299
+ gr.Markdown("Listen to the corrected version:")
300
+ audio_output = gr.Audio(label="Correct pronunciation")
301
+
302
+ submit_btn.click(
303
+ process_audio,
304
+ inputs=[audio_input],
305
+ outputs=[original_text, corrected_text, grammar_issues, pronunciation_feedback, learning_tip, audio_output]
306
+ )
307
+
308
+ new_topic_btn.click(
309
+ lambda: generate_practice_exercise(),
310
+ inputs=None,
311
+ outputs=practice_box
312
+ )
313
+
314
+ gr.Examples(
315
+ examples=[
316
+ ["I very happy to learning english today"],
317
+ ["yesterday i go to the store and buy some food"],
318
+ ["they was talking about the movie when i arrive"],
319
+ ["she dont like to eating vegetables"],
320
+ ["I have went to paris last summer vacation"]
321
+ ],
322
+ inputs=[original_text],
323
+ outputs=[corrected_text, grammar_issues, learning_tip],
324
+ fn=lambda text: (
325
+ correct_grammar_with_t5(text),
326
+ identify_grammar_issues(text, correct_grammar_with_t5(text)),
327
+ generate_learning_tip(text, correct_grammar_with_t5(text))
328
+ ),
329
+ cache_examples=True
330
+ )
331
+
332
+ return app
333
+
334
+ # Launch the application
335
+ if __name__ == "__main__":
336
+ app = create_interface()
337
+ app.launch(share=True, debug=True)