Spaces:

Bisher
/

khateeb_standalone

Sleeping

App Files Files Community

Bisher commited on Apr 17

Commit

647d7b9

verified ·

1 Parent(s): b11a63d

Create app.py

Browse files

Files changed (1) hide show

app.py +251 -0

app.py ADDED Viewed

	@@ -0,0 +1,251 @@

+import gradio as gr
+from gradio_client import Client, handle_file
+import jiwer
+import os
+import time
+import warnings
+# Suppress specific UserWarnings from jiwer related to empty strings
+warnings.filterwarnings("ignore", message="Reference is empty.*", category=UserWarning)
+warnings.filterwarnings("ignore", message="Hypothesis is empty.*", category=UserWarning)
+# --- Constants ---
+DIACRITIZATION_API_URL = "Bisher/CATT.diacratization"
+TRANSCRIPTION_API_URL = "gh-kaka22/diacritic_level_arabic_transcription"
+# --- Gradio API Clients ---
+# It's good practice to initialize clients outside the functions
+# if the app runs continuously, but be mindful of potential state issues
+# or connection timeouts in long-running deployments. For simplicity here,
+# we might re-initialize, though a single initialization is often preferred.
+def get_diacritization_client():
+    """Initializes and returns the client for the text diacritization API."""
+    try:
+        return Client(DIACRITIZATION_API_URL, download_files=True) # download_files might be needed depending on space setup
+    except Exception as e:
+        print(f"Error initializing diacritization client: {e}")
+        return None
+def get_transcription_client():
+    """Initializes and returns the client for the audio transcription API."""
+    try:
+        return Client(TRANSCRIPTION_API_URL, download_files=True) # download_files might be needed
+    except Exception as e:
+        print(f"Error initializing transcription client: {e}")
+        return None
+# --- Helper Functions ---
+def diacritize_text_api(text_to_diacritize):
+    """
+    Calls the Hugging Face space to diacritize the input text.
+    Args:
+        text_to_diacritize (str): The undiacritized Arabic text.
+    Returns:
+        str: The diacritized text, or an error message.
+    """
+    if not text_to_diacritize:
+        return "Please enter some text to diacritize."
+    client = get_diacritization_client()
+    if not client:
+        return "Error: Could not connect to the diacritization service."
+    try:
+        print(f"Sending text to diacritization API: {text_to_diacritize}")
+        result = client.predict(
+            model_type="Encoder-Only",  # Or 'Encoder-Decoder' if preferred
+            input_text=text_to_diacritize,
+            api_name="/predict"
+        )
+        print(f"Received diacritized text: {result}")
+        return result
+    except Exception as e:
+        print(f"Error during text diacritization API call: {e}")
+        # Provide more specific error feedback if possible
+        return f"Error during diacritization: {e}"
+def transcribe_audio_api(audio_filepath):
+    """
+    Calls the Hugging Face space to transcribe and diacritize the input audio.
+    Args:
+        audio_filepath (str): The path to the audio file.
+    Returns:
+        str: The diacritized transcript, or an error message.
+    """
+    if not audio_filepath:
+        return "Please provide an audio recording or file."
+    # Check if file exists and is accessible
+    if not os.path.exists(audio_filepath):
+         return f"Error: Audio file not found at {audio_filepath}"
+    client = get_transcription_client()
+    if not client:
+        return "Error: Could not connect to the transcription service."
+    try:
+        print(f"Sending audio file to transcription API: {audio_filepath}")
+        # Use handle_file to manage the audio file for the API call
+        result = client.predict(
+            audio=handle_file(audio_filepath),
+            api_name="/predict"
+        )
+        print(f"Received transcript: {result}")
+        # The API might return more structure, adapt if needed. Assuming it returns the text directly.
+        # Example: if result is {'text': '...'}, use result['text']
+        if isinstance(result, dict) and 'text' in result:
+             transcript = result['text']
+        elif isinstance(result, str):
+             transcript = result
+        else:
+             print(f"Unexpected transcription result format: {result}")
+             return "Error: Unexpected format received from transcription service."
+        return transcript
+    except Exception as e:
+        print(f"Error during audio transcription API call: {e}")
+        # Provide more specific error feedback if possible
+        return f"Error during transcription: {e}"
+def calculate_metrics(reference, hypothesis):
+    """
+    Calculates Word Error Rate (WER) and Diacritic Error Rate (DER).
+    Args:
+        reference (str): The original diacritized text.
+        hypothesis (str): The diacritized transcript from the audio.
+    Returns:
+        tuple: (wer, der) scores, or (None, None) if inputs are invalid.
+    """
+    if not isinstance(reference, str) or not isinstance(hypothesis, str):
+        print("Error: Invalid input types for metric calculation.")
+        return None, None
+    # Handle empty strings to avoid jiwer warnings/errors if not suppressed
+    if not reference.strip() and not hypothesis.strip():
+        return 0.0, 0.0 # Both empty, 0% error
+    if not reference.strip():
+        print("Warning: Reference text is empty.")
+        # WER/DER are typically 1.0 (or inf) if reference is empty and hypothesis is not.
+        # Jiwer might handle this, but let's return 1.0 for clarity.
+        return 1.0, 1.0
+    if not hypothesis.strip():
+        print("Warning: Hypothesis text is empty.")
+        # If hypothesis is empty but reference is not, WER/DER is 1.0
+        return 1.0, 1.0
+    try:
+        # 1. Calculate Word Error Rate (WER)
+        wer = jiwer.wer(reference, hypothesis)
+        # 2. Calculate Diacritic Error Rate (DER)
+        #    - Treat each character (including diacritics) as a token.
+        #    - Join characters with spaces to make jiwer treat them as "words".
+        ref_chars = ' '.join(list(reference))
+        hyp_chars = ' '.join(list(hypothesis))
+        der = jiwer.wer(ref_chars, hyp_chars)
+        return round(wer, 4), round(der, 4)
+    except Exception as e:
+        print(f"Error calculating metrics: {e}")
+        return None, None
+def process_audio_and_compare(audio_input, original_diacritized_text):
+    """
+    Main function triggered after audio input.
+    Transcribes audio, calculates metrics, and returns results.
+    """
+    print("Processing audio and comparing...")
+    if not original_diacritized_text:
+        return "Error: No original diacritized text found. Please diacritize text first.", None, None
+    # --- 1. Transcribe Audio ---
+    # Gradio provides the audio data (e.g., filepath for upload/mic)
+    transcript = transcribe_audio_api(audio_input)
+    if transcript.startswith("Error:"):
+        # If transcription failed, return the error and None for metrics
+        return transcript, None, None
+    # --- 2. Calculate Metrics ---
+    wer, der = calculate_metrics(original_diacritized_text, transcript)
+    print(f"Comparison complete. WER: {wer}, DER: {der}")
+    return transcript, wer, der
+# --- Gradio Interface ---
+with gr.Blocks(theme=gr.themes.Soft()) as app:
+    gr.Markdown(
+        """
+        # Arabic Diacritization and Reading Assessment Tool
+        1.  Enter undiacritized Arabic text and click **Diacritize Text**.
+        2.  Read the generated **Diacritized Text** aloud and record it using the microphone or upload an audio file.
+        3.  Click **Transcribe and Compare** to get the transcript and see the WER/DER scores compared to the original diacritized text.
+        """
+    )
+    # Store the original diacritized text for comparison later
+    original_diacritized_state = gr.State("")
+    with gr.Row():
+        with gr.Column(scale=1):
+            text_input = gr.Textbox(
+                label="1. Enter Undiacritized Arabic Text",
+                placeholder="مثال: السلام عليكم",
+                lines=3,
+                text_align="right", # Align text right for Arabic
+            )
+            diacritize_button = gr.Button("Diacritize Text")
+            diacritized_text_output = gr.Textbox(
+                label="2. Diacritized Text (Reference)",
+                lines=3,
+                interactive=False, # User shouldn't edit this directly
+                text_align="right",
+            )
+        with gr.Column(scale=1):
+            audio_input = gr.Audio(
+                sources=["microphone", "upload"],
+                type="filepath", # Get the path to the saved audio file
+                label="3. Record or Upload Audio of Reading Diacritized Text",
+            )
+            transcribe_button = gr.Button("Transcribe and Compare")
+            transcript_output = gr.Textbox(
+                label="4. Diacritized Transcript (Hypothesis)",
+                lines=3,
+                interactive=False,
+                text_align="right",
+            )
+            with gr.Row():
+                 wer_output = gr.Number(label="Word Error Rate (WER)", interactive=False)
+                 der_output = gr.Number(label="Diacritic Error Rate (DER)", interactive=False)
+    # --- Connect Components ---
+    # Action for Diacritize Button
+    diacritize_button.click(
+        fn=diacritize_text_api,
+        inputs=[text_input],
+        outputs=[diacritized_text_output, original_diacritized_state] # Update output and state
+    )
+    # Action for Transcribe Button
+    transcribe_button.click(
+        fn=process_audio_and_compare,
+        inputs=[audio_input, original_diacritized_state], # Pass audio and stored text
+        outputs=[transcript_output, wer_output, der_output] # Update transcript and metrics
+    )
+app.launch(debug=True)