Spaces:

st192011
/

Torgo-DSR-Lab

Running

App Files Files Community

st192011 commited on 3 days ago

Commit

1fab43b

verified ·

1 Parent(s): 987ad34

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -11

app.py CHANGED Viewed

@@ -23,12 +23,15 @@ whisper_asr = pipeline(
     }
 )
 HF_TOKEN = os.getenv("HF_TOKEN")
 PRIVATE_BACKEND_URL = "st192011/Torgo-DSR-Private"
 def normalize_text(text):
     if not text: return ""
-    return re.sub(r'[^\w\s]', '', text).lower().strip()
 def format_audio(audio_path):
     """Ensures audio is 16kHz mono to match ASR training conditions."""
@@ -75,9 +78,9 @@ def get_sample_logic(speaker_id):
 # --- Logic: Model Processing ---
 def process_audio_step_1(audio_path):
     """Runs Whisper Baseline and returns normalized text."""
-    if not audio_path: return "No audio", ""
-    # Pre-process audio format
     formatted_path = format_audio(audio_path)
     # Run Whisper
@@ -87,13 +90,21 @@ def process_audio_step_1(audio_path):
     return raw_w, norm_w
 def process_audio_step_2(audio_path, norm_whisper):
-    """Sends audio + normalized whisper to the Private Model."""
-    if not audio_path or not norm_whisper: return "Incomplete input from previous steps."
     try:
-        formatted_path = format_audio(audio_path)
         client = Client(PRIVATE_BACKEND_URL, hf_token=HF_TOKEN)
-        prediction = client.predict(formatted_path, norm_whisper, api_name="/predict_dsr")
         return prediction
     except Exception as e:
         return f"Backend Connection Required. Details: {e}"
@@ -101,7 +112,7 @@ def process_audio_step_2(audio_path, norm_whisper):
 # --- UI Construction ---
 with gr.Blocks(theme=gr.themes.Soft(), title="Torgo DSR Lab") as demo:
     gr.Markdown("# ⚗️ Torgo DSR Lab")
-    gr.Markdown("Neural Reconstruction Layer for Torgo (In-domain/LOSO) and UA-Speech (Zero-Shot).")
     # Hidden state to store the path of the currently active audio
     active_audio_path = gr.State("")
@@ -132,7 +143,7 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Torgo DSR Lab") as demo:
                     gr.Markdown("#### Step 1: ASR Baseline")
                     whisper_btn = gr.Button("Run Whisper Tiny")
                     w_raw = gr.Textbox(label="Whisper Raw Transcript")
-                    w_norm = gr.Textbox(label="Whisper Normalized")
                 gr.Markdown("---")
@@ -179,9 +190,9 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Torgo DSR Lab") as demo:
         outputs=[active_audio_path, gt_box, meta_display]
     )
-    # Personal Channel: Use Audio -> Update State -> Clear GT
     user_load_btn.click(
-        lambda x: (x, "User Provided Audio", {"Dataset": "Custom", "Severity": "Unknown"}),
         inputs=user_audio,
         outputs=[active_audio_path, gt_box, meta_display]
     )

     }
 )
+# Configuration from Environment Variables
 HF_TOKEN = os.getenv("HF_TOKEN")
 PRIVATE_BACKEND_URL = "st192011/Torgo-DSR-Private"
 def normalize_text(text):
     if not text: return ""
+    # Remove punctuation and lowercase
+    text = re.sub(r'[^\w\s]', '', text).lower().strip()
+    return " ".join(text.split())
 def format_audio(audio_path):
     """Ensures audio is 16kHz mono to match ASR training conditions."""
 # --- Logic: Model Processing ---
 def process_audio_step_1(audio_path):
     """Runs Whisper Baseline and returns normalized text."""
+    if not audio_path: return "No audio loaded", ""
+    # Pre-process audio format to 16k
     formatted_path = format_audio(audio_path)
     # Run Whisper
     return raw_w, norm_w
 def process_audio_step_2(audio_path, norm_whisper):
+    """Sends audio + normalized whisper to the Private Model API."""
+    if not audio_path or not norm_whisper:
+        return "Please load data and run Whisper (Step 1) first."
     try:
+        # Connect to the private API
         client = Client(PRIVATE_BACKEND_URL, hf_token=HF_TOKEN)
+        # Call the endpoint 'predict_dsr' defined in the Private Space
+        # We send the audio file and the normalized whisper transcript
+        prediction = client.predict(
+            audio_path,
+            norm_whisper,
+            api_name="/predict_dsr"
+        )
         return prediction
     except Exception as e:
         return f"Backend Connection Required. Details: {e}"
 # --- UI Construction ---
 with gr.Blocks(theme=gr.themes.Soft(), title="Torgo DSR Lab") as demo:
     gr.Markdown("# ⚗️ Torgo DSR Lab")
+    gr.Markdown("Neural Reconstruction Layer for Torgo and UA-Speech Zero-Shot.")
     # Hidden state to store the path of the currently active audio
     active_audio_path = gr.State("")
                     gr.Markdown("#### Step 1: ASR Baseline")
                     whisper_btn = gr.Button("Run Whisper Tiny")
                     w_raw = gr.Textbox(label="Whisper Raw Transcript")
+                    w_norm = gr.Textbox(label="Whisper Normalized (Input for Model)")
                 gr.Markdown("---")
         outputs=[active_audio_path, gt_box, meta_display]
     )
+    # Personal Channel: Use Audio -> Update State -> Clear Reference
     user_load_btn.click(
+        lambda x: (x, "User Recorded (No Ground Truth)", {"Dataset": "Custom", "Severity": "N/A"}),
         inputs=user_audio,
         outputs=[active_audio_path, gt_box, meta_display]
     )