st192011 commited on
Commit
1fab43b
·
verified ·
1 Parent(s): 987ad34

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -11
app.py CHANGED
@@ -23,12 +23,15 @@ whisper_asr = pipeline(
23
  }
24
  )
25
 
 
26
  HF_TOKEN = os.getenv("HF_TOKEN")
27
  PRIVATE_BACKEND_URL = "st192011/Torgo-DSR-Private"
28
 
29
  def normalize_text(text):
30
  if not text: return ""
31
- return re.sub(r'[^\w\s]', '', text).lower().strip()
 
 
32
 
33
  def format_audio(audio_path):
34
  """Ensures audio is 16kHz mono to match ASR training conditions."""
@@ -75,9 +78,9 @@ def get_sample_logic(speaker_id):
75
  # --- Logic: Model Processing ---
76
  def process_audio_step_1(audio_path):
77
  """Runs Whisper Baseline and returns normalized text."""
78
- if not audio_path: return "No audio", ""
79
 
80
- # Pre-process audio format
81
  formatted_path = format_audio(audio_path)
82
 
83
  # Run Whisper
@@ -87,13 +90,21 @@ def process_audio_step_1(audio_path):
87
  return raw_w, norm_w
88
 
89
  def process_audio_step_2(audio_path, norm_whisper):
90
- """Sends audio + normalized whisper to the Private Model."""
91
- if not audio_path or not norm_whisper: return "Incomplete input from previous steps."
 
92
 
93
  try:
94
- formatted_path = format_audio(audio_path)
95
  client = Client(PRIVATE_BACKEND_URL, hf_token=HF_TOKEN)
96
- prediction = client.predict(formatted_path, norm_whisper, api_name="/predict_dsr")
 
 
 
 
 
 
 
97
  return prediction
98
  except Exception as e:
99
  return f"Backend Connection Required. Details: {e}"
@@ -101,7 +112,7 @@ def process_audio_step_2(audio_path, norm_whisper):
101
  # --- UI Construction ---
102
  with gr.Blocks(theme=gr.themes.Soft(), title="Torgo DSR Lab") as demo:
103
  gr.Markdown("# ⚗️ Torgo DSR Lab")
104
- gr.Markdown("Neural Reconstruction Layer for Torgo (In-domain/LOSO) and UA-Speech (Zero-Shot).")
105
 
106
  # Hidden state to store the path of the currently active audio
107
  active_audio_path = gr.State("")
@@ -132,7 +143,7 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Torgo DSR Lab") as demo:
132
  gr.Markdown("#### Step 1: ASR Baseline")
133
  whisper_btn = gr.Button("Run Whisper Tiny")
134
  w_raw = gr.Textbox(label="Whisper Raw Transcript")
135
- w_norm = gr.Textbox(label="Whisper Normalized")
136
 
137
  gr.Markdown("---")
138
 
@@ -179,9 +190,9 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Torgo DSR Lab") as demo:
179
  outputs=[active_audio_path, gt_box, meta_display]
180
  )
181
 
182
- # Personal Channel: Use Audio -> Update State -> Clear GT
183
  user_load_btn.click(
184
- lambda x: (x, "User Provided Audio", {"Dataset": "Custom", "Severity": "Unknown"}),
185
  inputs=user_audio,
186
  outputs=[active_audio_path, gt_box, meta_display]
187
  )
 
23
  }
24
  )
25
 
26
+ # Configuration from Environment Variables
27
  HF_TOKEN = os.getenv("HF_TOKEN")
28
  PRIVATE_BACKEND_URL = "st192011/Torgo-DSR-Private"
29
 
30
  def normalize_text(text):
31
  if not text: return ""
32
+ # Remove punctuation and lowercase
33
+ text = re.sub(r'[^\w\s]', '', text).lower().strip()
34
+ return " ".join(text.split())
35
 
36
  def format_audio(audio_path):
37
  """Ensures audio is 16kHz mono to match ASR training conditions."""
 
78
  # --- Logic: Model Processing ---
79
  def process_audio_step_1(audio_path):
80
  """Runs Whisper Baseline and returns normalized text."""
81
+ if not audio_path: return "No audio loaded", ""
82
 
83
+ # Pre-process audio format to 16k
84
  formatted_path = format_audio(audio_path)
85
 
86
  # Run Whisper
 
90
  return raw_w, norm_w
91
 
92
  def process_audio_step_2(audio_path, norm_whisper):
93
+ """Sends audio + normalized whisper to the Private Model API."""
94
+ if not audio_path or not norm_whisper:
95
+ return "Please load data and run Whisper (Step 1) first."
96
 
97
  try:
98
+ # Connect to the private API
99
  client = Client(PRIVATE_BACKEND_URL, hf_token=HF_TOKEN)
100
+
101
+ # Call the endpoint 'predict_dsr' defined in the Private Space
102
+ # We send the audio file and the normalized whisper transcript
103
+ prediction = client.predict(
104
+ audio_path,
105
+ norm_whisper,
106
+ api_name="/predict_dsr"
107
+ )
108
  return prediction
109
  except Exception as e:
110
  return f"Backend Connection Required. Details: {e}"
 
112
  # --- UI Construction ---
113
  with gr.Blocks(theme=gr.themes.Soft(), title="Torgo DSR Lab") as demo:
114
  gr.Markdown("# ⚗️ Torgo DSR Lab")
115
+ gr.Markdown("Neural Reconstruction Layer for Torgo and UA-Speech Zero-Shot.")
116
 
117
  # Hidden state to store the path of the currently active audio
118
  active_audio_path = gr.State("")
 
143
  gr.Markdown("#### Step 1: ASR Baseline")
144
  whisper_btn = gr.Button("Run Whisper Tiny")
145
  w_raw = gr.Textbox(label="Whisper Raw Transcript")
146
+ w_norm = gr.Textbox(label="Whisper Normalized (Input for Model)")
147
 
148
  gr.Markdown("---")
149
 
 
190
  outputs=[active_audio_path, gt_box, meta_display]
191
  )
192
 
193
+ # Personal Channel: Use Audio -> Update State -> Clear Reference
194
  user_load_btn.click(
195
+ lambda x: (x, "User Recorded (No Ground Truth)", {"Dataset": "Custom", "Severity": "N/A"}),
196
  inputs=user_audio,
197
  outputs=[active_audio_path, gt_box, meta_display]
198
  )