Spaces:

Bisher
/

khateeb_standalone

Sleeping

App Files Files Community

Bisher commited on Apr 17, 2025

Commit

18d0c35

verified ·

1 Parent(s): 1ae10e6

Update app.py

Browse files

Files changed (1) hide show

app.py +78 -234

app.py CHANGED Viewed

@@ -15,26 +15,17 @@ warnings.filterwarnings("ignore", message="Hypothesis is empty.*", category=User
 DIACRITIZATION_API_URL = "Bisher/CATT.diacratization"
 TRANSCRIPTION_API_URL = "gh-kaka22/diacritic_level_arabic_transcription"
-# Define the set of Arabic diacritic characters using pyarabic constants if available
 if araby:
     ARABIC_DIACRITICS = {
-        araby.FATHA,      # U+064E
-        araby.FATHATAN,   # U+064B
-        araby.DAMMA,      # U+064F
-        araby.DAMMATAN,   # U+064C
-        araby.KASRA,      # U+0650
-        araby.KASRATAN,   # U+064D
-        araby.SUKUN,      # U+0652
-        araby.SHADDA,     # U+0651
     }
 else:
-    # Fallback if pyarabic failed to import
     ARABIC_DIACRITICS = {'\u064B', '\u064C', '\u064D', '\u064E', '\u064F', '\u0650', '\u0651', '\u0652'}
-# --- Gradio API Clients ---
 def get_diacritization_client():
-    """Initializes and returns the client for the text diacritization API."""
     try:
         return Client(DIACRITIZATION_API_URL, download_files=True)
     except Exception as e:
@@ -42,7 +33,6 @@ def get_diacritization_client():
         return None
 def get_transcription_client():
-    """Initializes and returns the client for the audio transcription API."""
     try:
         return Client(TRANSCRIPTION_API_URL, download_files=True)
     except Exception as e:
@@ -50,280 +40,134 @@ def get_transcription_client():
         return None
 # --- Helper Functions ---
 def diacritize_text_api(text_to_diacritize):
-    """
-    Calls the Hugging Face space to diacritize the input text.
-    Args:
-        text_to_diacritize (str): The undiacritized Arabic text.
-    Returns:
-        tuple: (str, str) The diacritized text (or error message) returned twice,
-               once for the output component and once for the state.
-    """
     if not text_to_diacritize or not text_to_diacritize.strip():
-        error_msg = "Please enter some text to diacritize."
-        return error_msg, error_msg
     client = get_diacritization_client()
     if not client:
-        error_msg = "Error: Could not connect to the diacritization service."
-        return error_msg, error_msg
     try:
-        print(f"Sending text to diacritization API: {text_to_diacritize}")
         result = client.predict(
             model_type="Encoder-Only",
             input_text=text_to_diacritize,
             api_name="/predict"
         )
-        print(f"Received diacritized text: {result}")
-        result_str = str(result) if result is not None else "Error: Received empty response from diacritization service."
         return result_str, result_str
     except Exception as e:
-        print(f"Error during text diacritization API call: {e}")
-        error_msg = f"Error during diacritization: {e}"
-        return error_msg, error_msg
 def transcribe_audio_api(audio_filepath):
-    """
-    Calls the Hugging Face space to transcribe and diacritize the input audio.
-    Args:
-        audio_filepath (str): The path to the audio file.
-    Returns:
-        str: The diacritized transcript, or an error message.
-    """
     if not audio_filepath:
         return "Error: Please provide an audio recording or file."
     if not os.path.exists(audio_filepath):
-         return f"Error: Audio file not found at {audio_filepath}"
     client = get_transcription_client()
     if not client:
         return "Error: Could not connect to the transcription service."
     try:
-        print(f"Sending audio file to transcription API: {audio_filepath}")
         result = client.predict(
             audio=handle_file(audio_filepath),
             api_name="/predict"
         )
-        print(f"Received transcript: {result}")
         if isinstance(result, dict) and 'text' in result:
-             transcript = result['text']
-        elif isinstance(result, str):
-             transcript = result
         else:
-             print(f"Unexpected transcription result format: {result}")
-             return "Error: Unexpected format received from transcription service."
-        return str(transcript) if transcript is not None else "Error: Received empty response from transcription service."
     except Exception as e:
-        print(f"Error during audio transcription API call: {e}")
         return f"Error during transcription: {e}"
 def get_diacritics_sequence(text):
-    """
-    Extracts only the Arabic diacritic characters from a string.
-    Args:
-        text (str): The input string potentially containing diacritics.
-    Returns:
-        str: A space-separated string of diacritics found in the text.
-             Returns an empty string if no diacritics are found or input is not a string.
-    """
     if not isinstance(text, str):
-        return "" # Return empty string for non-string input
-    if not araby and not ARABIC_DIACRITICS:
-         print("Warning: pyarabic not loaded, cannot reliably extract diacritics.")
-         return ""
-    diacritics_only = [char for char in text if char in ARABIC_DIACRITICS]
     return ' '.join(diacritics_only)
 def calculate_metrics(reference, hypothesis):
-    """
-    Calculates Word Error Rate (WER), Diacritic Error Rate (DER),
-    and Character Error Rate (CER).
-    DER is calculated based *only* on the sequence of diacritic marks.
-    Args:
-        reference (str): The original diacritized text.
-        hypothesis (str): The diacritized transcript from the audio.
-    Returns:
-        tuple: (wer, der, cer) scores, or (None, None, None) if inputs are invalid or calculation fails.
-    """
-    if not isinstance(reference, str):
-        print(f"Error: Reference input is not a string (type: {type(reference)}). Value: {reference}")
-        reference = ""
-    if not isinstance(hypothesis, str):
-        print(f"Error: Hypothesis input is not a string (type: {type(hypothesis)}). Value: {hypothesis}")
-        hypothesis = ""
-    ref_strip = reference.strip()
-    hyp_strip = hypothesis.strip()
-    wer = None
-    der = None
-    cer = None
-    try:
-        # Handle cases where both are empty first
-        if not ref_strip and not hyp_strip:
-            return 0.0, 0.0, 0.0
-        # 1. Calculate Word Error Rate (WER)
-        if not ref_strip:
-            wer = 1.0 # Reference empty, hypothesis not
-        else:
-            wer = jiwer.wer(reference, hypothesis)
-        # 2. Calculate Diacritic Error Rate (DER) based *only* on diacritics
-        ref_diacritics = get_diacritics_sequence(reference)
-        hyp_diacritics = get_diacritics_sequence(hypothesis)
-        ref_diacritics_strip = ref_diacritics.strip()
-        hyp_diacritics_strip = hyp_diacritics.strip()
-        if not ref_diacritics_strip and not hyp_diacritics_strip:
-            der = 0.0
-        elif not ref_diacritics_strip:
-            der = 1.0
-            print("Warning: No diacritics found in reference text for DER calculation.")
-        else:
-            der = jiwer.wer(ref_diacritics, hyp_diacritics)
-        # 3. Calculate Character Error Rate (CER)
-        if not ref_strip:
-             # If reference is empty, CER is 1.0 (all hypothesis chars are insertions)
-             # unless hypothesis is also empty (handled above)
-            cer = 1.0
         else:
-            # jiwer.cer handles empty hypothesis correctly if reference is not empty
-            cer = jiwer.cer(reference, hypothesis)
-        # Round the results
-        wer_rounded = round(wer, 4) if wer is not None else None
-        der_rounded = round(der, 4) if der is not None else None
-        cer_rounded = round(cer, 4) if cer is not None else None
-        return wer_rounded, der_rounded, cer_rounded
-    except Exception as e:
-        print(f"Error calculating metrics: {e}")
-        return None, None, None
-def process_audio_and_compare(audio_input, original_diacritized_text):
-    """
-    Main function triggered after audio input.
-    Transcribes audio, calculates metrics (WER, DER, CER), and returns results.
-    Returns:
-        tuple: (transcript, wer, der, cer)
-               transcript (str): The transcribed text or an error message.
-               wer (float | None): Word Error Rate or None if error.
-               der (float | None): Diacritic Error Rate or None if error.
-               cer (float | None): Character Error Rate or None if error.
-    """
-    print("Processing audio and comparing...")
-    if not original_diacritized_text or not isinstance(original_diacritized_text, str) or original_diacritized_text.startswith("Error:"):
-        error_msg = "Error: Valid reference diacritized text not available. Please diacritize text first."
-        print(error_msg)
-        # Return default/error values for all outputs
-        return error_msg, None, None, None
-    transcript = transcribe_audio_api(audio_input)
-    if not isinstance(transcript, str) or transcript.startswith("Error:"):
-        error_msg = transcript if isinstance(transcript, str) else "Error: Transcription failed with non-string output."
-        print(error_msg)
-        # Return transcript error and None for metrics
-        return error_msg, None, None, None
-    # Calculate all three metrics
-    wer, der, cer = calculate_metrics(original_diacritized_text, transcript)
-    if wer is None or der is None or cer is None:
-        print("Metrics calculation failed.")
-        # Return transcript but None for metrics
-        return transcript, None, None, None
-    print(f"Comparison complete. WER: {wer}, DER: {der}, CER: {cer}")
-    # Return transcript and all three metrics
-    return transcript, wer, der, cer
 # --- Gradio Interface ---
 with gr.Blocks(theme=gr.themes.Soft()) as app:
     gr.Markdown(
         """
         # Arabic Diacritization and Reading Assessment Tool
-        1.  Enter undiacritized Arabic text and click **Diacritize Text**.
-        2.  Read the generated **Diacritized Text** aloud and record it using the microphone or upload an audio file.
-        3.  Click **Transcribe and Compare** to get the transcript and see the WER/DER/CER scores compared to the original diacritized text.
         """
     )
-    original_diacritized_state = gr.State("")
     with gr.Row():
         with gr.Column(scale=1):
-            text_input = gr.Textbox(
-                label="1. Enter Undiacritized Arabic Text",
-                placeholder="مثال: السلام عليكم",
-                lines=3,
-                text_align="right",
-            )
-            diacritize_button = gr.Button("Diacritize Text")
-            diacritized_text_output = gr.Textbox(
-                label="2. Diacritized Text (Reference)",
-                lines=3,
-                interactive=False,
-                text_align="right",
-            )
         with gr.Column(scale=1):
-            audio_input = gr.Audio(
-                sources=["microphone", "upload"],
-                type="filepath",
-                label="3. Record or Upload Audio of Reading Diacritized Text",
-            )
-            transcribe_button = gr.Button("Transcribe and Compare")
-            transcript_output = gr.Textbox(
-                label="4. Diacritized Transcript (Hypothesis)",
-                lines=3,
-                interactive=False,
-                text_align="right",
-            )
             with gr.Row():
-                 # Add CER output component
-                 wer_output = gr.Number(label="WER", interactive=False, precision=4) # Shortened label
-                 der_output = gr.Number(label="DER", interactive=False, precision=4) # Shortened label
-                 cer_output = gr.Number(label="CER", interactive=False, precision=4) # Added CER
-    # --- Connect Components ---
-    diacritize_button.click(
         fn=diacritize_text_api,
         inputs=[text_input],
-        outputs=[diacritized_text_output, original_diacritized_state]
     )
-    transcribe_button.click(
-        fn=process_audio_and_compare,
-        inputs=[audio_input, original_diacritized_state],
-        # Update outputs to include the new CER component
-        outputs=[transcript_output, wer_output, der_output, cer_output]
     )
-app.launch(debug=True, share=True)

 DIACRITIZATION_API_URL = "Bisher/CATT.diacratization"
 TRANSCRIPTION_API_URL = "gh-kaka22/diacritic_level_arabic_transcription"
+# Define Arabic diacritics
 if araby:
     ARABIC_DIACRITICS = {
+        araby.FATHA, araby.FATHATAN, araby.DAMMA, araby.DAMMATAN,
+        araby.KASRA, araby.KASRATAN, araby.SUKUN, araby.SHADDA,
     }
 else:
     ARABIC_DIACRITICS = {'\u064B', '\u064C', '\u064D', '\u064E', '\u064F', '\u0650', '\u0651', '\u0652'}
+# --- API Clients ---
 def get_diacritization_client():
     try:
         return Client(DIACRITIZATION_API_URL, download_files=True)
     except Exception as e:
         return None
 def get_transcription_client():
     try:
         return Client(TRANSCRIPTION_API_URL, download_files=True)
     except Exception as e:
         return None
 # --- Helper Functions ---
 def diacritize_text_api(text_to_diacritize):
     if not text_to_diacritize or not text_to_diacritize.strip():
+        return "Please enter some text to diacritize.", ""
     client = get_diacritization_client()
     if not client:
+        return "Error: Could not connect to the diacritization service.", ""
     try:
         result = client.predict(
             model_type="Encoder-Only",
             input_text=text_to_diacritize,
             api_name="/predict"
         )
+        result_str = str(result) if result is not None else "Error: Empty response from diacritization service."
         return result_str, result_str
     except Exception as e:
+        return f"Error during diacritization: {e}", ""
 def transcribe_audio_api(audio_filepath):
     if not audio_filepath:
         return "Error: Please provide an audio recording or file."
     if not os.path.exists(audio_filepath):
+        return f"Error: Audio file not found at {audio_filepath}"
     client = get_transcription_client()
     if not client:
         return "Error: Could not connect to the transcription service."
     try:
         result = client.predict(
             audio=handle_file(audio_filepath),
             api_name="/predict"
         )
         if isinstance(result, dict) and 'text' in result:
+            transcript = result['text']
         else:
+            transcript = str(result)
+        return transcript
     except Exception as e:
         return f"Error during transcription: {e}"
 def get_diacritics_sequence(text):
     if not isinstance(text, str):
+        return ""
+    diacritics_only = [c for c in text if c in ARABIC_DIACRITICS]
     return ' '.join(diacritics_only)
 def calculate_metrics(reference, hypothesis):
+    ref = reference or ""
+    hyp = hypothesis or ""
+    # WER
+    wer = jiwer.wer(ref, hyp) if ref.strip() else (1.0 if hyp.strip() else 0.0)
+    # DER
+    ref_d = get_diacritics_sequence(ref)
+    hyp_d = get_diacritics_sequence(hyp)
+    der = jiwer.wer(ref_d, hyp_d) if ref_d.strip() else (1.0 if hyp_d.strip() else 0.0)
+    # CER
+    cer = jiwer.cer(ref, hyp) if ref.strip() else (1.0 if hyp.strip() else 0.0)
+    return round(wer, 4), round(der, 4), round(cer, 4)
+import difflib
+def highlight_errors(reference, hypothesis):
+    ref_words = reference.split()
+    hyp_words = hypothesis.split()
+    matcher = difflib.SequenceMatcher(a=ref_words, b=hyp_words)
+    highlighted = []
+    errors = []
+    # Iterate over matched blocks and insert highlights for mismatches
+    i = j = 0
+    for tag, a0, a1, b0, b1 in matcher.get_opcodes():
+        if tag == 'equal':
+            for w in ref_words[a0:a1]:
+                highlighted.append(w)
         else:
+            # highlight reference words as errors
+            for w in ref_words[a0:a1]:
+                highlighted.append(f"<mark>{w}</mark>")
+                errors.append(w)
+        i = a1
+        j = b1
+    html = ' '.join(highlighted)
+    return html, ', '.join(errors)
 # --- Gradio Interface ---
 with gr.Blocks(theme=gr.themes.Soft()) as app:
     gr.Markdown(
         """
         # Arabic Diacritization and Reading Assessment Tool
+        1. Enter undiacritized Arabic text and click **Diacritize Text**.
+        2. Read the generated **Diacritized Text** aloud and record or upload audio.
+        3. Click **Transcribe and Compare** to see the transcript, WER/DER/CER, and mispronounced words highlighted.
         """
     )
+    original_state = gr.State("")
     with gr.Row():
         with gr.Column(scale=1):
+            text_input = gr.Textbox(label="Undiacritized Arabic Text", lines=3, text_align="right")
+            diacritize_btn = gr.Button("Diacritize Text")
+            diacritized_output = gr.Textbox(label="Diacritized Text (Reference)", lines=3, interactive=False, text_align="right")
         with gr.Column(scale=1):
+            audio_input = gr.Audio(label="Record or Upload Audio", type="filepath")
+            transcribe_btn = gr.Button("Transcribe and Compare")
+            transcript_output = gr.Textbox(label="Transcript (Hypothesis)", lines=3, interactive=False, text_align="right")
             with gr.Row():
+                wer_out = gr.Number(label="WER", interactive=False, precision=4)
+                der_out = gr.Number(label="DER", interactive=False, precision=4)
+                cer_out = gr.Number(label="CER", interactive=False, precision=4)
+            error_html = gr.HTML(label="Highlighted Errors")
+            error_list = gr.Textbox(label="Mispronounced Words", interactive=False)
+    diacritize_btn.click(
         fn=diacritize_text_api,
         inputs=[text_input],
+        outputs=[diacritized_output, original_state]
     )
+    def process(audio, ref_text):
+        transcript = transcribe_audio_api(audio)
+        if transcript.startswith("Error"):
+            return transcript, None, None, None, "", ""
+        wer, der, cer = calculate_metrics(ref_text, transcript)
+        html, errs = highlight_errors(ref_text, transcript)
+        return transcript, wer, der, cer, html, errs
+    transcribe_btn.click(
+        fn=process,
+        inputs=[audio_input, original_state],
+        outputs=[transcript_output, wer_out, der_out, cer_out, error_html, error_list]
     )
+app.launch(debug=True, share=True)