Spaces:

Sammaali
/

Post_Process_Elevenlabs

Sleeping

App Files Files Community

Sammaali commited on Mar 11

Commit

b7809d1

verified ·

1 Parent(s): 6d44df0

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -11

app.py CHANGED Viewed

@@ -18,6 +18,10 @@ REPEAT_WORD = re.compile(r'\b(\w+)(?:\s+\1\b)+', re.IGNORECASE)
 CHAR_STRETCH = re.compile(r'(.)\1{2,}')
 REPEAT_SYLLABLE = re.compile(r'\b(\w{1,3})(?:\s+\1\b)+', re.IGNORECASE)
 def is_filler(word):
     w = word.lower()
@@ -30,12 +34,27 @@ def is_filler(word):
     return False
 def clean_transcript(text):
     text = CHAR_STRETCH.sub(r'\1', text)
     text = REPEAT_WORD.sub(r'\1', text)
     text = REPEAT_SYLLABLE.sub(r'\1', text)
     words = text.split()
     filtered = []
@@ -45,7 +64,6 @@ def clean_transcript(text):
     return " ".join(filtered)
 # =========================
 # Speech To Text
 # =========================
@@ -55,10 +73,14 @@ def transcribe_audio(audio_file):
     if audio_file is None:
         return "No audio uploaded", ""
-    headers = {"xi-api-key": ELEVENLABS_API_KEY}
     with open(audio_file, "rb") as f:
         files = {"file": f}
         data = {
             "model_id": "scribe_v2",
             "enable_logging": "false"
@@ -79,8 +101,8 @@ def transcribe_audio(audio_file):
     text = ""
     if "segments" in result:
-        for seg in result["segments"]:
-            text += seg.get("text", "") + " "
     else:
         text = result.get("text", "")
@@ -90,15 +112,21 @@ def transcribe_audio(audio_file):
 # =========================
-# Gradio UI
 # =========================
 with gr.Blocks() as demo:
-    gr.Markdown("# Speech To Text Cleaner")
-    gr.Markdown("Upload audio → convert to text → remove fillers")
-    audio_input = gr.Audio(type="filepath", label="Upload Audio")
     raw_text = gr.Textbox(
         label="Original Transcript",
@@ -118,6 +146,4 @@ with gr.Blocks() as demo:
         outputs=[raw_text, cleaned_text]
     )
-if __name__ == "__main__":
-    demo.launch()

 CHAR_STRETCH = re.compile(r'(.)\1{2,}')
 REPEAT_SYLLABLE = re.compile(r'\b(\w{1,3})(?:\s+\1\b)+', re.IGNORECASE)
+STUTTER = re.compile(r'\b(\w)[\-ـ]+(\1[\-ـ]+)+')
+REPEAT_AFTER_COMMA = re.compile(r'(\b\w+\b)[،,]\s+\1')
+COMMA_SPACES = re.compile(r'\s+([،,])')
 def is_filler(word):
     w = word.lower()
     return False
 def clean_transcript(text):
+    # collapse stretched sounds
     text = CHAR_STRETCH.sub(r'\1', text)
+    # fix stutter like ب-ب-بالشيء
+    text = STUTTER.sub(r'\1', text)
+    # repeated words
     text = REPEAT_WORD.sub(r'\1', text)
+    # repeated short syllables
     text = REPEAT_SYLLABLE.sub(r'\1', text)
+    # repeated after comma
+    text = REPEAT_AFTER_COMMA.sub(r'\1', text)
+    # fix spaces before comma
+    text = COMMA_SPACES.sub(r'\1', text)
     words = text.split()
     filtered = []
     return " ".join(filtered)
 # =========================
 # Speech To Text
 # =========================
     if audio_file is None:
         return "No audio uploaded", ""
+    headers = {
+        "xi-api-key": ELEVENLABS_API_KEY
+    }
     with open(audio_file, "rb") as f:
         files = {"file": f}
         data = {
             "model_id": "scribe_v2",
             "enable_logging": "false"
     text = ""
     if "segments" in result:
+        for segment in result["segments"]:
+            text += segment.get("text", "") + " "
     else:
         text = result.get("text", "")
 # =========================
+# Gradio Interface
 # =========================
 with gr.Blocks() as demo:
+    gr.Markdown("# Arabic Speech Cleaner")
+    gr.Markdown(
+        "Upload audio → convert to text using ElevenLabs → remove fillers and stuttering"
+    )
+    audio_input = gr.Audio(
+        type="filepath",
+        label="Upload Audio"
+    )
     raw_text = gr.Textbox(
         label="Original Transcript",
         outputs=[raw_text, cleaned_text]
     )
+demo.launch()