HusseinBashir commited on
Commit
04a7e38
·
verified ·
1 Parent(s): 52d260b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -15
app.py CHANGED
@@ -68,15 +68,11 @@ def number_to_words(number):
68
  return str(number)
69
 
70
  def normalize_text(text):
71
- # Remove commas from numbers like 1,000,000
72
  text = re.sub(r'(\d{1,3})(,\d{3})+', lambda m: m.group(0).replace(",", ""), text)
73
- # Remove decimals (e.g., .00)
74
  text = re.sub(r'\.\d+', '', text)
75
- # Replace numbers with Somali words
76
  def replace_num(match):
77
  return number_to_words(match.group())
78
  text = re.sub(r'\d+', replace_num, text)
79
- # Replace special symbols
80
  symbol_map = {
81
  '$': 'doolar',
82
  '=': 'egwal',
@@ -85,7 +81,6 @@ def normalize_text(text):
85
  }
86
  for sym, word in symbol_map.items():
87
  text = text.replace(sym, ' ' + word + ' ')
88
- # Character normalization
89
  text = text.replace("KH", "qa").replace("Z", "S")
90
  text = text.replace("SH", "SHa'a").replace("DH", "Dha'a")
91
  text = text.replace("ZamZam", "SamSam")
@@ -94,25 +89,35 @@ def normalize_text(text):
94
  def tts(text):
95
  paragraphs = text.strip().split("\n")
96
  audio_list = []
 
 
97
 
98
  for i, para in enumerate(paragraphs):
99
- if not para.strip():
 
100
  continue
101
- norm_para = normalize_text(para)
102
- inputs = tokenizer(norm_para, return_tensors="pt").to(device)
103
- with torch.no_grad():
104
- waveform = model(**inputs).waveform.squeeze().cpu().numpy()
105
 
106
- # Add pause between paragraphs
107
- if i < len(paragraphs) - 1:
 
 
 
 
 
 
 
 
 
 
108
  pause = np.zeros(int(model.config.sampling_rate * 0.8)) # 0.8s pause
109
  audio_list.append(np.concatenate((waveform, pause)))
110
- else:
111
- audio_list.append(waveform)
112
 
113
  final_audio = np.concatenate(audio_list)
114
  filename = "output.wav"
115
  scipy.io.wavfile.write(filename, rate=model.config.sampling_rate, data=(final_audio * 32767).astype(np.int16))
 
 
 
116
  return filename
117
 
118
  # Gradio interface
@@ -121,5 +126,5 @@ gr.Interface(
121
  inputs=gr.Textbox(label="Geli qoraal Soomaali ah", lines=10, placeholder="Ku qor 1 ama in ka badan paragraph..."),
122
  outputs=gr.Audio(label="Codka TTS"),
123
  title="Somali TTS",
124
- description="Ku qor qoraal Soomaaliyeed si aad u maqasho cod dabiici ah."
125
  ).launch()
 
68
  return str(number)
69
 
70
  def normalize_text(text):
 
71
  text = re.sub(r'(\d{1,3})(,\d{3})+', lambda m: m.group(0).replace(",", ""), text)
 
72
  text = re.sub(r'\.\d+', '', text)
 
73
  def replace_num(match):
74
  return number_to_words(match.group())
75
  text = re.sub(r'\d+', replace_num, text)
 
76
  symbol_map = {
77
  '$': 'doolar',
78
  '=': 'egwal',
 
81
  }
82
  for sym, word in symbol_map.items():
83
  text = text.replace(sym, ' ' + word + ' ')
 
84
  text = text.replace("KH", "qa").replace("Z", "S")
85
  text = text.replace("SH", "SHa'a").replace("DH", "Dha'a")
86
  text = text.replace("ZamZam", "SamSam")
 
89
  def tts(text):
90
  paragraphs = text.strip().split("\n")
91
  audio_list = []
92
+ max_chars = 500 # Qiyaasta ugu badan 2 daqiiqo
93
+ warn_msg = ""
94
 
95
  for i, para in enumerate(paragraphs):
96
+ para = para.strip()
97
+ if not para:
98
  continue
 
 
 
 
99
 
100
+ if len(para) > max_chars:
101
+ warn_msg += f"❗ Qaybta {i+1} aad ayaa ka badan 2 daqiiqo. Waan kala jaray.\n"
102
+ sub_parts = [para[j:j+max_chars] for j in range(0, len(para), max_chars)]
103
+ else:
104
+ sub_parts = [para]
105
+
106
+ for part in sub_parts:
107
+ norm_para = normalize_text(part)
108
+ inputs = tokenizer(norm_para, return_tensors="pt").to(device)
109
+ with torch.no_grad():
110
+ waveform = model(**inputs).waveform.squeeze().cpu().numpy()
111
+
112
  pause = np.zeros(int(model.config.sampling_rate * 0.8)) # 0.8s pause
113
  audio_list.append(np.concatenate((waveform, pause)))
 
 
114
 
115
  final_audio = np.concatenate(audio_list)
116
  filename = "output.wav"
117
  scipy.io.wavfile.write(filename, rate=model.config.sampling_rate, data=(final_audio * 32767).astype(np.int16))
118
+
119
+ if warn_msg:
120
+ print(warn_msg)
121
  return filename
122
 
123
  # Gradio interface
 
126
  inputs=gr.Textbox(label="Geli qoraal Soomaali ah", lines=10, placeholder="Ku qor 1 ama in ka badan paragraph..."),
127
  outputs=gr.Audio(label="Codka TTS"),
128
  title="Somali TTS",
129
+ description="Ku qor qoraal Soomaaliyeed si aad u maqasho cod dabiici ah. Qoraalka ha ka badnaan 2 daqiiqo per jumlad."
130
  ).launch()