Spaces:

mramirez2001
/

EvaluadorOpenAI

Sleeping

App Files Files Community

mramirez2001 commited on Oct 2, 2025

Commit

5e0b8e4

verified ·

1 Parent(s): 4ece3bb

Upload app.py

Browse files

Files changed (1) hide show

app.py +37 -52

app.py CHANGED Viewed

@@ -21,7 +21,16 @@ print("Loading Whisper model...")
 whisper_model = whisper.load_model("base", device="cpu")
 print("Whisper model loaded.")
-# --- PROMPT DEL EXAMINADOR EXPERTO ---
 SYSTEM_PROMPT = """
 You are an expert English language examiner specializing in phonetics and accent reduction for ESL learners. Your task is to provide a detailed, diagnostic assessment of a student's spoken English based on a reference sentence and detailed word-level audio analysis.
@@ -57,42 +66,27 @@ Your entire response MUST be in English. You must return a single, valid JSON ob
 }
 """
-# --- 1. EXTRACCIÓN DETALLADA DE CARACTERÍSTICAS (WHISPER + LIBROSA) ---
 def extract_word_level_features(audio_path):
-    """
-    This function uses Whisper to get word timestamps and Librosa to get
-    features for each word's audio segment.
-    """
     try:
         y, sr = librosa.load(audio_path, sr=16000)
         result = whisper_model.transcribe(audio_path, word_timestamps=True, fp16=False)
-        if not result["segments"] or not result["segments"][0]["words"]:
             return []
         word_segments = result["segments"][0]["words"]
         features_list = []
         for segment in word_segments:
             start_sample = int(segment['start'] * sr)
             end_sample = int(segment['end'] * sr)
             word_audio = y[start_sample:end_sample]
-            # Calculate Root Mean Square (RMS) energy for the word
-            rms_energy = np.mean(librosa.feature.rms(y=word_audio))
-            features_list.append({
-                "word": segment['word'].strip(),
-                "start": round(segment['start'], 2),
-                "end": round(segment['end'], 2),
-                "energy": round(float(rms_energy), 4)
-            })
         return features_list
     except Exception as e:
         print(f"Error during feature extraction: {e}")
         return []
-# --- 2. FUNCIÓN PRINCIPAL DE EVALUACIÓN ---
 def run_evaluation(audio_input, reference_transcript):
     if not api_key_found: raise gr.Error("OpenAI API key not found.")
     if audio_input is None or not reference_transcript:
@@ -102,62 +96,47 @@ def run_evaluation(audio_input, reference_transcript):
     temp_audio_path = "temp_audio.wav"
     sf.write(temp_audio_path, y, sr)
-    # Step 1: Extract detailed features using Whisper and Librosa
     word_features = extract_word_level_features(temp_audio_path)
     if not word_features:
         return 0, "N/A", "Could not process the audio. Please try recording again.", None
-    # Step 2: Construct the detailed prompt for the OpenAI API
-    prompt_data = {
-        "reference_transcript": reference_transcript,
-        "spoken_words": word_features
-    }
     print("Sending detailed data to GPT-4o for analysis...")
     response = client.chat.completions.create(
-        model="gpt-4o",
-        response_format={"type": "json_object"},
-        messages=[
-            {"role": "system", "content": SYSTEM_PROMPT},
-            {"role": "user", "content": json.dumps(prompt_data)}
-        ]
     )
-    # Step 3: Process the API response and format it for display
     try:
         result = json.loads(response.choices[0].message.content)
-        # Format the detailed report for Gradio
         holistic_feedback_md = f"### Strengths\n{result['holistic_feedback']['strengths']}\n\n"
         holistic_feedback_md += f"### Areas for Improvement\n{result['holistic_feedback']['areas_for_improvement']}"
-        # Create a pandas DataFrame for better display
         word_analysis_df = pd.DataFrame(result['word_by_word_analysis'])
-        return (
-            result.get("overall_score_100", 0),
-            result.get("cefr_level", "N/A"),
-            holistic_feedback_md,
-            gr.DataFrame(value=word_analysis_df, headers=["Reference Word", "Spoken Word", "Score", "Correct IPA", "Feedback"], interactive=False)
-        )
     except (json.JSONDecodeError, KeyError) as e:
         print(f"Error processing API response: {e}")
         error_msg = "The API response was not in the expected format. Please try again."
         return 0, "Error", error_msg, None
-# --- 3. INTERFAZ DE GRADIO ---
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("# 🇬🇧 Expert Pronunciation Assessment")
-    gr.Markdown("Record yourself saying the reference sentence. Our AI examiner will provide a detailed diagnostic report on your performance.")
-    frase_ejemplo = "The rainbow is a division of white light into many beautiful colors."
     with gr.Row():
         with gr.Column(scale=1):
             audio_in = gr.Audio(sources=["microphone"], type="numpy", label="1. Record Your Voice")
-            text_in = gr.Textbox(lines=3, label="2. Reference Sentence", value=frase_ejemplo)
             submit_btn = gr.Button("Get Assessment", variant="primary")
         with gr.Column(scale=2):
@@ -169,7 +148,13 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
             holistic_feedback_out = gr.Markdown(label="Examiner's Feedback")
     gr.Markdown("--- \n ### Detailed Word-by-Word Analysis")
-    word_analysis_out = gr.DataFrame(headers=["Reference Word", "Spoken Word", "Score", "Correct IPA", "Feedback"], label="Phonetic Breakdown")
     submit_btn.click(
         fn=run_evaluation,

 whisper_model = whisper.load_model("base", device="cpu")
 print("Whisper model loaded.")
+# --- NUEVO: Lista de Trabalenguas ---
+TONGUE_TWISTERS = [
+    "Peter Piper picked a peck of pickled peppers.",
+    "She sells seashells by the seashore.",
+    "How much wood would a woodchuck chuck if a woodchuck could chuck wood?",
+    "Betty Botter bought some butter but she said the butter’s bitter.",
+    "A proper copper coffee pot."
+]
+# --- PROMPT DEL EXAMINADOR EXPERTO (Sin cambios) ---
 SYSTEM_PROMPT = """
 You are an expert English language examiner specializing in phonetics and accent reduction for ESL learners. Your task is to provide a detailed, diagnostic assessment of a student's spoken English based on a reference sentence and detailed word-level audio analysis.
 }
 """
+# --- 1. EXTRACCIÓN DE CARACTERÍSTICAS (Sin cambios) ---
 def extract_word_level_features(audio_path):
     try:
         y, sr = librosa.load(audio_path, sr=16000)
         result = whisper_model.transcribe(audio_path, word_timestamps=True, fp16=False)
+        if not result["segments"] or 'words' not in result["segments"][0]:
             return []
         word_segments = result["segments"][0]["words"]
         features_list = []
         for segment in word_segments:
             start_sample = int(segment['start'] * sr)
             end_sample = int(segment['end'] * sr)
             word_audio = y[start_sample:end_sample]
+            rms_energy = np.mean(librosa.feature.rms(y=word_audio)) if len(word_audio) > 0 else 0
+            features_list.append({"word": segment['word'].strip(), "start": round(segment['start'], 2), "end": round(segment['end'], 2), "energy": round(float(rms_energy), 4)})
         return features_list
     except Exception as e:
         print(f"Error during feature extraction: {e}")
         return []
+# --- 2. FUNCIÓN PRINCIPAL DE EVALUACIÓN (Sin cambios) ---
 def run_evaluation(audio_input, reference_transcript):
     if not api_key_found: raise gr.Error("OpenAI API key not found.")
     if audio_input is None or not reference_transcript:
     temp_audio_path = "temp_audio.wav"
     sf.write(temp_audio_path, y, sr)
     word_features = extract_word_level_features(temp_audio_path)
     if not word_features:
         return 0, "N/A", "Could not process the audio. Please try recording again.", None
+    prompt_data = {"reference_transcript": reference_transcript, "spoken_words": word_features}
     print("Sending detailed data to GPT-4o for analysis...")
     response = client.chat.completions.create(
+        model="gpt-4o", response_format={"type": "json_object"},
+        messages=[{"role": "system", "content": SYSTEM_PROMPT}, {"role": "user", "content": json.dumps(prompt_data)}]
     )
     try:
         result = json.loads(response.choices[0].message.content)
         holistic_feedback_md = f"### Strengths\n{result['holistic_feedback']['strengths']}\n\n"
         holistic_feedback_md += f"### Areas for Improvement\n{result['holistic_feedback']['areas_for_improvement']}"
         word_analysis_df = pd.DataFrame(result['word_by_word_analysis'])
+        return (result.get("overall_score_100", 0), result.get("cefr_level", "N/A"), holistic_feedback_md,
+                gr.DataFrame(value=word_analysis_df, headers=["Reference Word", "Spoken Word", "Score", "Correct IPA", "Feedback"], interactive=False))
     except (json.JSONDecodeError, KeyError) as e:
         print(f"Error processing API response: {e}")
         error_msg = "The API response was not in the expected format. Please try again."
         return 0, "Error", error_msg, None
+# --- 3. INTERFAZ DE GRADIO (Con las nuevas adecuaciones) ---
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("# 🇬🇧 Expert Pronunciation Assessment")
+    gr.Markdown("Choose a tongue twister or write your own sentence. Record yourself, and our AI examiner will provide a detailed diagnostic report.")
+    # --- NUEVO: Selector de Trabalenguas ---
+    tongue_twister_selector = gr.Dropdown(
+        choices=TONGUE_TWISTERS,
+        label="Or Choose a Tongue Twister to Practice",
+        info="Selecting one will automatically fill the reference sentence below."
+    )
     with gr.Row():
         with gr.Column(scale=1):
             audio_in = gr.Audio(sources=["microphone"], type="numpy", label="1. Record Your Voice")
+            text_in = gr.Textbox(lines=3, label="2. Reference Sentence", value=TONGUE_TWISTERS[0])
             submit_btn = gr.Button("Get Assessment", variant="primary")
         with gr.Column(scale=2):
             holistic_feedback_out = gr.Markdown(label="Examiner's Feedback")
     gr.Markdown("--- \n ### Detailed Word-by-Word Analysis")
+    word_analysis_out = gr.DataFrame(headers=["Reference Word", "Spoken Word", "Score", "Correct IPA", "Feedback"], label="Phonetic Breakdown", wrap=True)
+    # --- NUEVO: Lógica de Interacción ---
+    # Cuando el dropdown cambia, actualiza el campo de texto.
+    def update_text(choice):
+        return gr.Textbox(value=choice)
+    tongue_twister_selector.change(fn=update_text, inputs=tongue_twister_selector, outputs=text_in)
     submit_btn.click(
         fn=run_evaluation,