Spaces:

HusseinBashir
/

somalitts

Sleeping

App Files Files Community

HusseinBashir commited on May 28

Commit

a47d925

verified ·

1 Parent(s): 8e5335e

Update app.py

Browse files

Files changed (1) hide show

app.py +54 -31

app.py CHANGED Viewed

@@ -1,5 +1,4 @@
 import os
 os.environ["HF_HOME"] = "/tmp"
 os.environ["TRANSFORMERS_CACHE"] = "/tmp"
 os.environ["TORCH_HOME"] = "/tmp"
@@ -36,7 +35,8 @@ number_words = {
     100: "boqol", 1000: "kun"
 }
-def number_to_words(number: int) -> str:
     if number < 20:
         return number_words[number]
     elif number < 100:
@@ -71,10 +71,20 @@ def number_to_words(number: int) -> str:
     else:
         return str(number)
-def normalize_text(text: str) -> str:
-    numbers = re.findall(r'\d+', text)
-    for num in numbers:
-        text = text.replace(num, number_to_words(int(num)))
     text = text.replace("KH", "qa").replace("Z", "S")
     text = text.replace("SH", "SHa'a").replace("DH", "Dha'a")
     text = text.replace("ZamZam", "SamSam")
@@ -98,36 +108,49 @@ class TextIn(BaseModel):
 @app.post("/synthesize")
 async def synthesize_post(data: TextIn):
-    text = normalize_text(data.inputs)
-    inputs = tokenizer(text, return_tensors="pt").to(device)
-    with torch.no_grad():
-        output = model(**inputs)
-    waveform = (
-        output.waveform if hasattr(output, "waveform") else
-        output["waveform"] if isinstance(output, dict) and "waveform" in output else
-        output[0] if isinstance(output, (tuple, list)) else
-        None
-    )
-    if waveform is None:
-        return {"error": "Waveform not found in model output"}
     sample_rate = getattr(model.config, "sampling_rate", 22050)
-    wav_bytes = waveform_to_wav_bytes(waveform, sample_rate=sample_rate)
     return StreamingResponse(io.BytesIO(wav_bytes), media_type="audio/wav")
 @app.get("/synthesize")
 async def synthesize_get(text: str = Query(..., description="Text to synthesize"), test: bool = Query(False)):
-   if test:
-    paragraphs = text.count("\n") + 1  # Tirinta paragraphs-ka qoraalka
-    duration_s = paragraphs * 6        # 6 ilbiriqsi per paragraph
-    sample_rate = 22050
-    t = np.linspace(0, duration_s, int(sample_rate * duration_s), endpoint=False)
-    freq = 440
-    waveform = 0.5 * np.sin(2 * math.pi * freq * t).astype(np.float32)
-    pcm_waveform = (waveform * 32767).astype(np.int16)
-    buf = io.BytesIO()
-    scipy.io.wavfile.write(buf, rate=sample_rate, data=pcm_waveform)
-    buf.seek(0)
-    return StreamingResponse(buf, media_type="audio/wav")
     normalized = normalize_text(text)
     inputs = tokenizer(normalized, return_tensors="pt").to(device)
     with torch.no_grad():

 import os
 os.environ["HF_HOME"] = "/tmp"
 os.environ["TRANSFORMERS_CACHE"] = "/tmp"
 os.environ["TORCH_HOME"] = "/tmp"
     100: "boqol", 1000: "kun"
 }
+def number_to_words(number):
+    number = int(number)
     if number < 20:
         return number_words[number]
     elif number < 100:
     else:
         return str(number)
+def normalize_text(text):
+    text = re.sub(r'(\d{1,3})(,\d{3})+', lambda m: m.group(0).replace(",", ""), text)
+    text = re.sub(r'\.\d+', '', text)
+    def replace_num(match):
+        return number_to_words(match.group())
+    text = re.sub(r'\d+', replace_num, text)
+    symbol_map = {
+        '$': 'doolar',
+        '=': 'egwal',
+        '+': 'balaas',
+        '#': 'haash'
+    }
+    for sym, word in symbol_map.items():
+        text = text.replace(sym, ' ' + word + ' ')
     text = text.replace("KH", "qa").replace("Z", "S")
     text = text.replace("SH", "SHa'a").replace("DH", "Dha'a")
     text = text.replace("ZamZam", "SamSam")
 @app.post("/synthesize")
 async def synthesize_post(data: TextIn):
+    paragraphs = [p.strip() for p in data.inputs.split('\n') if p.strip()]
     sample_rate = getattr(model.config, "sampling_rate", 22050)
+    all_waveforms = []
+    for paragraph in paragraphs:
+        normalized = normalize_text(paragraph)
+        inputs = tokenizer(normalized, return_tensors="pt").to(device)
+        with torch.no_grad():
+            output = model(**inputs)
+        waveform = (
+            output.waveform if hasattr(output, "waveform") else
+            output["waveform"] if isinstance(output, dict) and "waveform" in output else
+            output[0] if isinstance(output, (tuple, list)) else
+            None
+        )
+        if waveform is None:
+            continue
+        all_waveforms.append(waveform)
+        silence = torch.zeros(1, sample_rate).to(waveform.device)
+        all_waveforms.append(silence)
+    if not all_waveforms:
+        return {"error": "No audio generated."}
+    final_waveform = torch.cat(all_waveforms, dim=-1)
+    wav_bytes = waveform_to_wav_bytes(final_waveform, sample_rate=sample_rate)
     return StreamingResponse(io.BytesIO(wav_bytes), media_type="audio/wav")
 @app.get("/synthesize")
 async def synthesize_get(text: str = Query(..., description="Text to synthesize"), test: bool = Query(False)):
+    if test:
+        paragraphs = text.count("\n") + 1
+        duration_s = paragraphs * 6
+        sample_rate = 22050
+        t = np.linspace(0, duration_s, int(sample_rate * duration_s), endpoint=False)
+        freq = 440
+        waveform = 0.5 * np.sin(2 * math.pi * freq * t).astype(np.float32)
+        pcm_waveform = (waveform * 32767).astype(np.int16)
+        buf = io.BytesIO()
+        scipy.io.wavfile.write(buf, rate=sample_rate, data=pcm_waveform)
+        buf.seek(0)
+        return StreamingResponse(buf, media_type="audio/wav")
     normalized = normalize_text(text)
     inputs = tokenizer(normalized, return_tensors="pt").to(device)
     with torch.no_grad():