Spaces:
Sleeping
Sleeping
Create app.py
Browse files
app.py
ADDED
|
@@ -0,0 +1,123 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import io
|
| 2 |
+
import gradio as gr
|
| 3 |
+
import numpy as np
|
| 4 |
+
from groq import Groq
|
| 5 |
+
from difflib import SequenceMatcher
|
| 6 |
+
import soundfile as sf
|
| 7 |
+
|
| 8 |
+
# Initialize Groq client with API key
|
| 9 |
+
client = Groq(api_key="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")
|
| 10 |
+
|
| 11 |
+
# Convert sound data to bytes
|
| 12 |
+
def sound_to_bytes(sound_data):
|
| 13 |
+
buffer = io.BytesIO()
|
| 14 |
+
sf.write(buffer, sound_data, 44100, format='WAV')
|
| 15 |
+
buffer.seek(0)
|
| 16 |
+
return buffer.read()
|
| 17 |
+
|
| 18 |
+
# Initialize score tracking
|
| 19 |
+
score = 0
|
| 20 |
+
attempts = 0
|
| 21 |
+
|
| 22 |
+
# Function to generate a word using Llama model
|
| 23 |
+
def generate_word():
|
| 24 |
+
try:
|
| 25 |
+
completion = client.chat.completions.create(
|
| 26 |
+
model="llama-3.1-70b-versatile",
|
| 27 |
+
messages=[
|
| 28 |
+
{"role": "system", "content": "You are an experienced English professor with over 20 years experience teaching English and you are also a native speaker. You are trying to teach proper English pronunciation by generating words or phrases for user to pronounce and you judge them if it is correct or not. Make sure just a single word or a very concise phrase. Don't mention any other word apart from the word generated."},
|
| 29 |
+
{"role": "user", "content": "Generate a word for pronunciation."}
|
| 30 |
+
],
|
| 31 |
+
temperature=1.4,
|
| 32 |
+
max_tokens=4096,
|
| 33 |
+
top_p=1,
|
| 34 |
+
stream=True,
|
| 35 |
+
)
|
| 36 |
+
# Process streaming response
|
| 37 |
+
word = ""
|
| 38 |
+
for chunk in completion:
|
| 39 |
+
delta_content = chunk.choices[0].delta.content
|
| 40 |
+
if delta_content:
|
| 41 |
+
word += delta_content
|
| 42 |
+
word = word.strip().strip('"')
|
| 43 |
+
return word
|
| 44 |
+
except Exception as e:
|
| 45 |
+
return f"Error generating word: {e}"
|
| 46 |
+
|
| 47 |
+
# Function to check pronunciation
|
| 48 |
+
def check_pronunciation(audio, word):
|
| 49 |
+
global score, attempts
|
| 50 |
+
attempts += 1
|
| 51 |
+
try:
|
| 52 |
+
# Determine the source of the audio and handle accordingly
|
| 53 |
+
if isinstance(audio, tuple): # If the audio is a tuple, it's an uploaded file
|
| 54 |
+
audio_filename = "user_audio.wav"
|
| 55 |
+
sf.write(audio_filename, audio[1], samplerate=44100, format='WAV')
|
| 56 |
+
else: # If it's not a tuple, it's recorded from the microphone
|
| 57 |
+
audio_filename = "user_audio.m4a"
|
| 58 |
+
with open(audio_filename, "wb") as f:
|
| 59 |
+
f.write(audio) # Save the recorded audio as .m4a
|
| 60 |
+
|
| 61 |
+
# Transcribe using Groq's Whisper API
|
| 62 |
+
with open(audio_filename, "rb") as file:
|
| 63 |
+
transcription = client.audio.transcriptions.create(
|
| 64 |
+
file=(audio_filename, file.read()),
|
| 65 |
+
model="distil-whisper-large-v3-en",
|
| 66 |
+
temperature=0.28,
|
| 67 |
+
response_format="verbose_json",
|
| 68 |
+
)
|
| 69 |
+
|
| 70 |
+
transcription_text = transcription.text # Corrected line
|
| 71 |
+
|
| 72 |
+
# Compare transcription with the expected word
|
| 73 |
+
similarity = SequenceMatcher(None, transcription_text.lower(), word.lower()).ratio()
|
| 74 |
+
if similarity > 0.8: # Threshold for correct pronunciation
|
| 75 |
+
score += 1
|
| 76 |
+
feedback_audio = sound_to_bytes(correct_sound)
|
| 77 |
+
result_text = f"Correct! Expected: {word}. You said: {transcription_text}"
|
| 78 |
+
else:
|
| 79 |
+
feedback_audio = sound_to_bytes(incorrect_sound)
|
| 80 |
+
result_text = f"Incorrect. Expected: {word}. You said: {transcription_text}"
|
| 81 |
+
|
| 82 |
+
return result_text, score, feedback_audio
|
| 83 |
+
except Exception as e:
|
| 84 |
+
return f"Error checking pronunciation: {e}", score, None
|
| 85 |
+
|
| 86 |
+
# Function to reset the test and display percentage
|
| 87 |
+
def reset_test():
|
| 88 |
+
global score, attempts
|
| 89 |
+
if attempts > 0:
|
| 90 |
+
percentage = (score / attempts) * 100
|
| 91 |
+
else:
|
| 92 |
+
percentage = 0
|
| 93 |
+
final_score = (f"Your final score is {score}/{attempts}. "
|
| 94 |
+
f"Percentage: {percentage:.2f}%")
|
| 95 |
+
score = 0
|
| 96 |
+
attempts = 0
|
| 97 |
+
return final_score
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
# Gradio Interface
|
| 101 |
+
with gr.Blocks() as interface:
|
| 102 |
+
word_output = gr.Textbox(label="Word to Pronounce")
|
| 103 |
+
result_output = gr.Textbox(label="Result")
|
| 104 |
+
score_output = gr.Textbox(label="Score")
|
| 105 |
+
|
| 106 |
+
# Initialize with a word
|
| 107 |
+
initial_word = generate_word()
|
| 108 |
+
word_output.value = initial_word
|
| 109 |
+
|
| 110 |
+
# Generate new word on button click
|
| 111 |
+
word_button = gr.Button("Get New Word")
|
| 112 |
+
word_button.click(fn=generate_word, outputs=word_output)
|
| 113 |
+
|
| 114 |
+
# Audio input for pronunciation checking
|
| 115 |
+
audio_input = gr.Audio(type="numpy") # Handling both microphone and uploaded files
|
| 116 |
+
submit_button = gr.Button("Submit Pronunciation")
|
| 117 |
+
submit_button.click(fn=check_pronunciation, inputs=[audio_input, word_output], outputs=[result_output, score_output])
|
| 118 |
+
|
| 119 |
+
# Reset button to stop and show score
|
| 120 |
+
stop_button = gr.Button("Stop")
|
| 121 |
+
stop_button.click(fn=reset_test, outputs=score_output)
|
| 122 |
+
|
| 123 |
+
interface.launch()
|