import os import re import requests import gradio as gr # ========================= # ElevenLabs Config # ========================= ELEVENLABS_API_KEY = "c92a87a2ebb5f51ee9fe90cc421e836e32780c188f4e0056d77ce69803008ae9" STT_URL = "https://api.elevenlabs.io/v1/speech-to-text" # ========================= # Regex Cleaning # ========================= REPEAT_WORD = re.compile(r'\b(\w+)(?:\s+\1\b)+', re.IGNORECASE) CHAR_STRETCH = re.compile(r'(.)\1{2,}') REPEAT_SYLLABLE = re.compile(r'\b(\w{1,3})(?:\s+\1\b)+', re.IGNORECASE) STUTTER = re.compile(r'\b(\w)[\-ـ]+(\1[\-ـ]+)+') REPEAT_AFTER_COMMA = re.compile(r'(\b\w+\b)[،,]\s+\1') COMMA_SPACES = re.compile(r'\s+([،,])') def is_filler(word): w = word.lower() if len(set(w)) == 1 and len(w) <= 4: return True if len(w) <= 2: return True return False def clean_transcript(text): # collapse stretched sounds text = CHAR_STRETCH.sub(r'\1', text) # fix stutter like ب-ب-بالشيء text = STUTTER.sub(r'\1', text) # repeated words text = REPEAT_WORD.sub(r'\1', text) # repeated short syllables text = REPEAT_SYLLABLE.sub(r'\1', text) # repeated after comma text = REPEAT_AFTER_COMMA.sub(r'\1', text) # fix spaces before comma text = COMMA_SPACES.sub(r'\1', text) words = text.split() filtered = [] for w in words: if not is_filler(w): filtered.append(w) return " ".join(filtered) # ========================= # Speech To Text # ========================= def transcribe_audio(audio_file): if audio_file is None: return "No audio uploaded", "" headers = { "xi-api-key": ELEVENLABS_API_KEY } with open(audio_file, "rb") as f: files = {"file": f} data = { "model_id": "scribe_v2", "enable_logging": "false" } response = requests.post( STT_URL, headers=headers, files=files, data=data ) if response.status_code != 200: return f"Error: {response.text}", "" result = response.json() text = "" if "segments" in result: for segment in result["segments"]: text += segment.get("text", "") + " " else: text = result.get("text", "") cleaned = clean_transcript(text) return text, cleaned # ========================= # Gradio Interface # ========================= with gr.Blocks() as demo: gr.Markdown("# Arabic Speech Cleaner") gr.Markdown( "Upload audio → convert to text using ElevenLabs → remove fillers and stuttering" ) audio_input = gr.Audio( type="filepath", label="Upload Audio" ) raw_text = gr.Textbox( label="Original Transcript", lines=8 ) cleaned_text = gr.Textbox( label="Cleaned Transcript", lines=8 ) btn = gr.Button("Transcribe") btn.click( fn=transcribe_audio, inputs=audio_input, outputs=[raw_text, cleaned_text] ) demo.launch()