Spaces:

st192011
/

Torgo-DSR-Lab

Sleeping

App Files Files Community

st192011 commited on 13 days ago

Commit

6cf37ae

verified ·

1 Parent(s): d5b3a6f

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -13

app.py CHANGED Viewed

@@ -34,19 +34,25 @@ def normalize_text(text):
     # Remove special chars and lowercase
     return re.sub(r'[^\w\s]', '', text).lower().strip()
 def get_sample_logic(speaker_id):
-    """Bypasses internal decoders for stability and handles dataset differences."""
     try:
-        if "UA" in speaker_id:
-            # UA-Speech loading (Speaker F02)
             dataset = load_dataset("resproj007/uaspeech_female", split="train", streaming=True)
-            dataset = dataset.cast_column("audio", Audio(decode=False))
-            speaker_ds = dataset.filter(lambda x: x["speaker_id"] == "F02")
         else:
-            # Torgo loading (Using path-parsing for IDs)
             dataset = load_dataset("abnerh/TORGO-database", split="train", streaming=True)
             dataset = dataset.cast_column("audio", Audio(decode=False))
             def filter_spk(x):
                 sid = str(x.get('speaker_id', '')).upper()
                 if not sid or sid == "NONE":
@@ -54,18 +60,17 @@ def get_sample_logic(speaker_id):
                 return sid == speaker_id
             speaker_ds = dataset.filter(filter_spk)
-        # Get sample and decode manually
-        sample = next(iter(speaker_ds.shuffle(buffer_size=50)))
-        gt_text = sample.get('text') or sample.get('transcription') or sample.get('sentence')
-        audio_bytes = sample['audio']['bytes']
-        audio_data, sample_rate = librosa.load(io.BytesIO(audio_bytes), sr=16000)
         temp_path = "current_sample.wav"
         sf.write(temp_path, audio_data, sample_rate)
         return temp_path, gt_text.lower().strip(), SPEAKER_META[speaker_id]
     except Exception as e:
         return None, f"Dataset Error: {e}", {}

     # Remove special chars and lowercase
     return re.sub(r'[^\w\s]', '', text).lower().strip()
+# --- Data Loading Logic ---
 def get_sample_logic(speaker_id):
     try:
+        if speaker_id == "F02 (UA)":
+            # 1. UA-Speech access (Using the running code you provided)
             dataset = load_dataset("resproj007/uaspeech_female", split="train", streaming=True)
+            # Since this repo is specifically for UA female, we pull the sample directly
+            sample = next(iter(dataset.shuffle(buffer_size=50)))
+            gt_text = sample.get('text') or sample.get('transcription') or sample.get('sentence')
+            audio_data = sample['audio']['array']
+            sample_rate = sample['audio']['sampling_rate']
         else:
+            # 2. Torgo access (Using your training logic)
             dataset = load_dataset("abnerh/TORGO-database", split="train", streaming=True)
             dataset = dataset.cast_column("audio", Audio(decode=False))
+            # Use path-parsing to find specific speaker IDs in Torgo
             def filter_spk(x):
                 sid = str(x.get('speaker_id', '')).upper()
                 if not sid or sid == "NONE":
                 return sid == speaker_id
             speaker_ds = dataset.filter(filter_spk)
+            sample = next(iter(speaker_ds.shuffle(buffer_size=20)))
+            gt_text = sample.get('transcription') or sample.get('text')
+            audio_bytes = sample['audio']['bytes']
+            audio_data, sample_rate = librosa.load(io.BytesIO(audio_bytes), sr=16000)
         temp_path = "current_sample.wav"
         sf.write(temp_path, audio_data, sample_rate)
         return temp_path, gt_text.lower().strip(), SPEAKER_META[speaker_id]
     except Exception as e:
         return None, f"Dataset Error: {e}", {}