Sammaali's picture
Update app.py
b7809d1 verified
import os
import re
import requests
import gradio as gr
# =========================
# ElevenLabs Config
# =========================
ELEVENLABS_API_KEY = "c92a87a2ebb5f51ee9fe90cc421e836e32780c188f4e0056d77ce69803008ae9"
STT_URL = "https://api.elevenlabs.io/v1/speech-to-text"
# =========================
# Regex Cleaning
# =========================
REPEAT_WORD = re.compile(r'\b(\w+)(?:\s+\1\b)+', re.IGNORECASE)
CHAR_STRETCH = re.compile(r'(.)\1{2,}')
REPEAT_SYLLABLE = re.compile(r'\b(\w{1,3})(?:\s+\1\b)+', re.IGNORECASE)
STUTTER = re.compile(r'\b(\w)[\-ـ]+(\1[\-ـ]+)+')
REPEAT_AFTER_COMMA = re.compile(r'(\b\w+\b)[،,]\s+\1')
COMMA_SPACES = re.compile(r'\s+([،,])')
def is_filler(word):
w = word.lower()
if len(set(w)) == 1 and len(w) <= 4:
return True
if len(w) <= 2:
return True
return False
def clean_transcript(text):
# collapse stretched sounds
text = CHAR_STRETCH.sub(r'\1', text)
# fix stutter like ب-ب-بالشيء
text = STUTTER.sub(r'\1', text)
# repeated words
text = REPEAT_WORD.sub(r'\1', text)
# repeated short syllables
text = REPEAT_SYLLABLE.sub(r'\1', text)
# repeated after comma
text = REPEAT_AFTER_COMMA.sub(r'\1', text)
# fix spaces before comma
text = COMMA_SPACES.sub(r'\1', text)
words = text.split()
filtered = []
for w in words:
if not is_filler(w):
filtered.append(w)
return " ".join(filtered)
# =========================
# Speech To Text
# =========================
def transcribe_audio(audio_file):
if audio_file is None:
return "No audio uploaded", ""
headers = {
"xi-api-key": ELEVENLABS_API_KEY
}
with open(audio_file, "rb") as f:
files = {"file": f}
data = {
"model_id": "scribe_v2",
"enable_logging": "false"
}
response = requests.post(
STT_URL,
headers=headers,
files=files,
data=data
)
if response.status_code != 200:
return f"Error: {response.text}", ""
result = response.json()
text = ""
if "segments" in result:
for segment in result["segments"]:
text += segment.get("text", "") + " "
else:
text = result.get("text", "")
cleaned = clean_transcript(text)
return text, cleaned
# =========================
# Gradio Interface
# =========================
with gr.Blocks() as demo:
gr.Markdown("# Arabic Speech Cleaner")
gr.Markdown(
"Upload audio → convert to text using ElevenLabs → remove fillers and stuttering"
)
audio_input = gr.Audio(
type="filepath",
label="Upload Audio"
)
raw_text = gr.Textbox(
label="Original Transcript",
lines=8
)
cleaned_text = gr.Textbox(
label="Cleaned Transcript",
lines=8
)
btn = gr.Button("Transcribe")
btn.click(
fn=transcribe_audio,
inputs=audio_input,
outputs=[raw_text, cleaned_text]
)
demo.launch()