Afeezee commited on
Commit
296c2c3
·
verified ·
1 Parent(s): b847b8e

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +123 -0
app.py ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+ import gradio as gr
3
+ import numpy as np
4
+ from groq import Groq
5
+ from difflib import SequenceMatcher
6
+ import soundfile as sf
7
+
8
+ # Initialize Groq client with API key
9
+ client = Groq(api_key="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")
10
+
11
+ # Convert sound data to bytes
12
+ def sound_to_bytes(sound_data):
13
+ buffer = io.BytesIO()
14
+ sf.write(buffer, sound_data, 44100, format='WAV')
15
+ buffer.seek(0)
16
+ return buffer.read()
17
+
18
+ # Initialize score tracking
19
+ score = 0
20
+ attempts = 0
21
+
22
+ # Function to generate a word using Llama model
23
+ def generate_word():
24
+ try:
25
+ completion = client.chat.completions.create(
26
+ model="llama-3.1-70b-versatile",
27
+ messages=[
28
+ {"role": "system", "content": "You are an experienced English professor with over 20 years experience teaching English and you are also a native speaker. You are trying to teach proper English pronunciation by generating words or phrases for user to pronounce and you judge them if it is correct or not. Make sure just a single word or a very concise phrase. Don't mention any other word apart from the word generated."},
29
+ {"role": "user", "content": "Generate a word for pronunciation."}
30
+ ],
31
+ temperature=1.4,
32
+ max_tokens=4096,
33
+ top_p=1,
34
+ stream=True,
35
+ )
36
+ # Process streaming response
37
+ word = ""
38
+ for chunk in completion:
39
+ delta_content = chunk.choices[0].delta.content
40
+ if delta_content:
41
+ word += delta_content
42
+ word = word.strip().strip('"')
43
+ return word
44
+ except Exception as e:
45
+ return f"Error generating word: {e}"
46
+
47
+ # Function to check pronunciation
48
+ def check_pronunciation(audio, word):
49
+ global score, attempts
50
+ attempts += 1
51
+ try:
52
+ # Determine the source of the audio and handle accordingly
53
+ if isinstance(audio, tuple): # If the audio is a tuple, it's an uploaded file
54
+ audio_filename = "user_audio.wav"
55
+ sf.write(audio_filename, audio[1], samplerate=44100, format='WAV')
56
+ else: # If it's not a tuple, it's recorded from the microphone
57
+ audio_filename = "user_audio.m4a"
58
+ with open(audio_filename, "wb") as f:
59
+ f.write(audio) # Save the recorded audio as .m4a
60
+
61
+ # Transcribe using Groq's Whisper API
62
+ with open(audio_filename, "rb") as file:
63
+ transcription = client.audio.transcriptions.create(
64
+ file=(audio_filename, file.read()),
65
+ model="distil-whisper-large-v3-en",
66
+ temperature=0.28,
67
+ response_format="verbose_json",
68
+ )
69
+
70
+ transcription_text = transcription.text # Corrected line
71
+
72
+ # Compare transcription with the expected word
73
+ similarity = SequenceMatcher(None, transcription_text.lower(), word.lower()).ratio()
74
+ if similarity > 0.8: # Threshold for correct pronunciation
75
+ score += 1
76
+ feedback_audio = sound_to_bytes(correct_sound)
77
+ result_text = f"Correct! Expected: {word}. You said: {transcription_text}"
78
+ else:
79
+ feedback_audio = sound_to_bytes(incorrect_sound)
80
+ result_text = f"Incorrect. Expected: {word}. You said: {transcription_text}"
81
+
82
+ return result_text, score, feedback_audio
83
+ except Exception as e:
84
+ return f"Error checking pronunciation: {e}", score, None
85
+
86
+ # Function to reset the test and display percentage
87
+ def reset_test():
88
+ global score, attempts
89
+ if attempts > 0:
90
+ percentage = (score / attempts) * 100
91
+ else:
92
+ percentage = 0
93
+ final_score = (f"Your final score is {score}/{attempts}. "
94
+ f"Percentage: {percentage:.2f}%")
95
+ score = 0
96
+ attempts = 0
97
+ return final_score
98
+
99
+
100
+ # Gradio Interface
101
+ with gr.Blocks() as interface:
102
+ word_output = gr.Textbox(label="Word to Pronounce")
103
+ result_output = gr.Textbox(label="Result")
104
+ score_output = gr.Textbox(label="Score")
105
+
106
+ # Initialize with a word
107
+ initial_word = generate_word()
108
+ word_output.value = initial_word
109
+
110
+ # Generate new word on button click
111
+ word_button = gr.Button("Get New Word")
112
+ word_button.click(fn=generate_word, outputs=word_output)
113
+
114
+ # Audio input for pronunciation checking
115
+ audio_input = gr.Audio(type="numpy") # Handling both microphone and uploaded files
116
+ submit_button = gr.Button("Submit Pronunciation")
117
+ submit_button.click(fn=check_pronunciation, inputs=[audio_input, word_output], outputs=[result_output, score_output])
118
+
119
+ # Reset button to stop and show score
120
+ stop_button = gr.Button("Stop")
121
+ stop_button.click(fn=reset_test, outputs=score_output)
122
+
123
+ interface.launch()