st192011 commited on
Commit
6cf37ae
·
verified ·
1 Parent(s): d5b3a6f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -13
app.py CHANGED
@@ -34,19 +34,25 @@ def normalize_text(text):
34
  # Remove special chars and lowercase
35
  return re.sub(r'[^\w\s]', '', text).lower().strip()
36
 
 
37
  def get_sample_logic(speaker_id):
38
- """Bypasses internal decoders for stability and handles dataset differences."""
39
  try:
40
- if "UA" in speaker_id:
41
- # UA-Speech loading (Speaker F02)
42
  dataset = load_dataset("resproj007/uaspeech_female", split="train", streaming=True)
43
- dataset = dataset.cast_column("audio", Audio(decode=False))
44
- speaker_ds = dataset.filter(lambda x: x["speaker_id"] == "F02")
 
 
 
 
 
45
  else:
46
- # Torgo loading (Using path-parsing for IDs)
47
  dataset = load_dataset("abnerh/TORGO-database", split="train", streaming=True)
48
  dataset = dataset.cast_column("audio", Audio(decode=False))
49
 
 
50
  def filter_spk(x):
51
  sid = str(x.get('speaker_id', '')).upper()
52
  if not sid or sid == "NONE":
@@ -54,18 +60,17 @@ def get_sample_logic(speaker_id):
54
  return sid == speaker_id
55
 
56
  speaker_ds = dataset.filter(filter_spk)
 
 
 
 
 
57
 
58
- # Get sample and decode manually
59
- sample = next(iter(speaker_ds.shuffle(buffer_size=50)))
60
- gt_text = sample.get('text') or sample.get('transcription') or sample.get('sentence')
61
-
62
- audio_bytes = sample['audio']['bytes']
63
- audio_data, sample_rate = librosa.load(io.BytesIO(audio_bytes), sr=16000)
64
-
65
  temp_path = "current_sample.wav"
66
  sf.write(temp_path, audio_data, sample_rate)
67
 
68
  return temp_path, gt_text.lower().strip(), SPEAKER_META[speaker_id]
 
69
  except Exception as e:
70
  return None, f"Dataset Error: {e}", {}
71
 
 
34
  # Remove special chars and lowercase
35
  return re.sub(r'[^\w\s]', '', text).lower().strip()
36
 
37
+ # --- Data Loading Logic ---
38
  def get_sample_logic(speaker_id):
 
39
  try:
40
+ if speaker_id == "F02 (UA)":
41
+ # 1. UA-Speech access (Using the running code you provided)
42
  dataset = load_dataset("resproj007/uaspeech_female", split="train", streaming=True)
43
+ # Since this repo is specifically for UA female, we pull the sample directly
44
+ sample = next(iter(dataset.shuffle(buffer_size=50)))
45
+
46
+ gt_text = sample.get('text') or sample.get('transcription') or sample.get('sentence')
47
+ audio_data = sample['audio']['array']
48
+ sample_rate = sample['audio']['sampling_rate']
49
+
50
  else:
51
+ # 2. Torgo access (Using your training logic)
52
  dataset = load_dataset("abnerh/TORGO-database", split="train", streaming=True)
53
  dataset = dataset.cast_column("audio", Audio(decode=False))
54
 
55
+ # Use path-parsing to find specific speaker IDs in Torgo
56
  def filter_spk(x):
57
  sid = str(x.get('speaker_id', '')).upper()
58
  if not sid or sid == "NONE":
 
60
  return sid == speaker_id
61
 
62
  speaker_ds = dataset.filter(filter_spk)
63
+ sample = next(iter(speaker_ds.shuffle(buffer_size=20)))
64
+
65
+ gt_text = sample.get('transcription') or sample.get('text')
66
+ audio_bytes = sample['audio']['bytes']
67
+ audio_data, sample_rate = librosa.load(io.BytesIO(audio_bytes), sr=16000)
68
 
 
 
 
 
 
 
 
69
  temp_path = "current_sample.wav"
70
  sf.write(temp_path, audio_data, sample_rate)
71
 
72
  return temp_path, gt_text.lower().strip(), SPEAKER_META[speaker_id]
73
+
74
  except Exception as e:
75
  return None, f"Dataset Error: {e}", {}
76