Spaces:
Sleeping
Sleeping
| import os | |
| import re | |
| import requests | |
| import gradio as gr | |
| # ========================= | |
| # ElevenLabs Config | |
| # ========================= | |
| ELEVENLABS_API_KEY = "c92a87a2ebb5f51ee9fe90cc421e836e32780c188f4e0056d77ce69803008ae9" | |
| STT_URL = "https://api.elevenlabs.io/v1/speech-to-text" | |
| # ========================= | |
| # Regex Cleaning | |
| # ========================= | |
| REPEAT_WORD = re.compile(r'\b(\w+)(?:\s+\1\b)+', re.IGNORECASE) | |
| CHAR_STRETCH = re.compile(r'(.)\1{2,}') | |
| REPEAT_SYLLABLE = re.compile(r'\b(\w{1,3})(?:\s+\1\b)+', re.IGNORECASE) | |
| STUTTER = re.compile(r'\b(\w)[\-ـ]+(\1[\-ـ]+)+') | |
| REPEAT_AFTER_COMMA = re.compile(r'(\b\w+\b)[،,]\s+\1') | |
| COMMA_SPACES = re.compile(r'\s+([،,])') | |
| def is_filler(word): | |
| w = word.lower() | |
| if len(set(w)) == 1 and len(w) <= 4: | |
| return True | |
| if len(w) <= 2: | |
| return True | |
| return False | |
| def clean_transcript(text): | |
| # collapse stretched sounds | |
| text = CHAR_STRETCH.sub(r'\1', text) | |
| # fix stutter like ب-ب-بالشيء | |
| text = STUTTER.sub(r'\1', text) | |
| # repeated words | |
| text = REPEAT_WORD.sub(r'\1', text) | |
| # repeated short syllables | |
| text = REPEAT_SYLLABLE.sub(r'\1', text) | |
| # repeated after comma | |
| text = REPEAT_AFTER_COMMA.sub(r'\1', text) | |
| # fix spaces before comma | |
| text = COMMA_SPACES.sub(r'\1', text) | |
| words = text.split() | |
| filtered = [] | |
| for w in words: | |
| if not is_filler(w): | |
| filtered.append(w) | |
| return " ".join(filtered) | |
| # ========================= | |
| # Speech To Text | |
| # ========================= | |
| def transcribe_audio(audio_file): | |
| if audio_file is None: | |
| return "No audio uploaded", "" | |
| headers = { | |
| "xi-api-key": ELEVENLABS_API_KEY | |
| } | |
| with open(audio_file, "rb") as f: | |
| files = {"file": f} | |
| data = { | |
| "model_id": "scribe_v2", | |
| "enable_logging": "false" | |
| } | |
| response = requests.post( | |
| STT_URL, | |
| headers=headers, | |
| files=files, | |
| data=data | |
| ) | |
| if response.status_code != 200: | |
| return f"Error: {response.text}", "" | |
| result = response.json() | |
| text = "" | |
| if "segments" in result: | |
| for segment in result["segments"]: | |
| text += segment.get("text", "") + " " | |
| else: | |
| text = result.get("text", "") | |
| cleaned = clean_transcript(text) | |
| return text, cleaned | |
| # ========================= | |
| # Gradio Interface | |
| # ========================= | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# Arabic Speech Cleaner") | |
| gr.Markdown( | |
| "Upload audio → convert to text using ElevenLabs → remove fillers and stuttering" | |
| ) | |
| audio_input = gr.Audio( | |
| type="filepath", | |
| label="Upload Audio" | |
| ) | |
| raw_text = gr.Textbox( | |
| label="Original Transcript", | |
| lines=8 | |
| ) | |
| cleaned_text = gr.Textbox( | |
| label="Cleaned Transcript", | |
| lines=8 | |
| ) | |
| btn = gr.Button("Transcribe") | |
| btn.click( | |
| fn=transcribe_audio, | |
| inputs=audio_input, | |
| outputs=[raw_text, cleaned_text] | |
| ) | |
| demo.launch() |