Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -34,19 +34,25 @@ def normalize_text(text):
|
|
| 34 |
# Remove special chars and lowercase
|
| 35 |
return re.sub(r'[^\w\s]', '', text).lower().strip()
|
| 36 |
|
|
|
|
| 37 |
def get_sample_logic(speaker_id):
|
| 38 |
-
"""Bypasses internal decoders for stability and handles dataset differences."""
|
| 39 |
try:
|
| 40 |
-
if "UA"
|
| 41 |
-
# UA-Speech
|
| 42 |
dataset = load_dataset("resproj007/uaspeech_female", split="train", streaming=True)
|
| 43 |
-
|
| 44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
else:
|
| 46 |
-
# Torgo
|
| 47 |
dataset = load_dataset("abnerh/TORGO-database", split="train", streaming=True)
|
| 48 |
dataset = dataset.cast_column("audio", Audio(decode=False))
|
| 49 |
|
|
|
|
| 50 |
def filter_spk(x):
|
| 51 |
sid = str(x.get('speaker_id', '')).upper()
|
| 52 |
if not sid or sid == "NONE":
|
|
@@ -54,18 +60,17 @@ def get_sample_logic(speaker_id):
|
|
| 54 |
return sid == speaker_id
|
| 55 |
|
| 56 |
speaker_ds = dataset.filter(filter_spk)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
|
| 58 |
-
# Get sample and decode manually
|
| 59 |
-
sample = next(iter(speaker_ds.shuffle(buffer_size=50)))
|
| 60 |
-
gt_text = sample.get('text') or sample.get('transcription') or sample.get('sentence')
|
| 61 |
-
|
| 62 |
-
audio_bytes = sample['audio']['bytes']
|
| 63 |
-
audio_data, sample_rate = librosa.load(io.BytesIO(audio_bytes), sr=16000)
|
| 64 |
-
|
| 65 |
temp_path = "current_sample.wav"
|
| 66 |
sf.write(temp_path, audio_data, sample_rate)
|
| 67 |
|
| 68 |
return temp_path, gt_text.lower().strip(), SPEAKER_META[speaker_id]
|
|
|
|
| 69 |
except Exception as e:
|
| 70 |
return None, f"Dataset Error: {e}", {}
|
| 71 |
|
|
|
|
| 34 |
# Remove special chars and lowercase
|
| 35 |
return re.sub(r'[^\w\s]', '', text).lower().strip()
|
| 36 |
|
| 37 |
+
# --- Data Loading Logic ---
|
| 38 |
def get_sample_logic(speaker_id):
|
|
|
|
| 39 |
try:
|
| 40 |
+
if speaker_id == "F02 (UA)":
|
| 41 |
+
# 1. UA-Speech access (Using the running code you provided)
|
| 42 |
dataset = load_dataset("resproj007/uaspeech_female", split="train", streaming=True)
|
| 43 |
+
# Since this repo is specifically for UA female, we pull the sample directly
|
| 44 |
+
sample = next(iter(dataset.shuffle(buffer_size=50)))
|
| 45 |
+
|
| 46 |
+
gt_text = sample.get('text') or sample.get('transcription') or sample.get('sentence')
|
| 47 |
+
audio_data = sample['audio']['array']
|
| 48 |
+
sample_rate = sample['audio']['sampling_rate']
|
| 49 |
+
|
| 50 |
else:
|
| 51 |
+
# 2. Torgo access (Using your training logic)
|
| 52 |
dataset = load_dataset("abnerh/TORGO-database", split="train", streaming=True)
|
| 53 |
dataset = dataset.cast_column("audio", Audio(decode=False))
|
| 54 |
|
| 55 |
+
# Use path-parsing to find specific speaker IDs in Torgo
|
| 56 |
def filter_spk(x):
|
| 57 |
sid = str(x.get('speaker_id', '')).upper()
|
| 58 |
if not sid or sid == "NONE":
|
|
|
|
| 60 |
return sid == speaker_id
|
| 61 |
|
| 62 |
speaker_ds = dataset.filter(filter_spk)
|
| 63 |
+
sample = next(iter(speaker_ds.shuffle(buffer_size=20)))
|
| 64 |
+
|
| 65 |
+
gt_text = sample.get('transcription') or sample.get('text')
|
| 66 |
+
audio_bytes = sample['audio']['bytes']
|
| 67 |
+
audio_data, sample_rate = librosa.load(io.BytesIO(audio_bytes), sr=16000)
|
| 68 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
temp_path = "current_sample.wav"
|
| 70 |
sf.write(temp_path, audio_data, sample_rate)
|
| 71 |
|
| 72 |
return temp_path, gt_text.lower().strip(), SPEAKER_META[speaker_id]
|
| 73 |
+
|
| 74 |
except Exception as e:
|
| 75 |
return None, f"Dataset Error: {e}", {}
|
| 76 |
|