Spaces:
Sleeping
Sleeping
File size: 3,083 Bytes
ff44794 b23bcf3 ff44794 778f5dc 3debdab ff44794 6d44df0 ff44794 778f5dc c147ba9 b7809d1 778f5dc 3debdab 778f5dc b23bcf3 778f5dc 3debdab 778f5dc 3debdab b7809d1 778f5dc b23bcf3 b7809d1 778f5dc b7809d1 778f5dc b7809d1 778f5dc b23bcf3 b7809d1 778f5dc 3debdab 778f5dc b23bcf3 778f5dc b23bcf3 ff44794 b7809d1 ff44794 b7809d1 ff44794 b7809d1 ff44794 b7809d1 ff44794 b7809d1 ff44794 b7809d1 ff44794 b7809d1 ff44794 3debdab ff44794 3debdab b7809d1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 | import os
import re
import requests
import gradio as gr
# =========================
# ElevenLabs Config
# =========================
ELEVENLABS_API_KEY = "c92a87a2ebb5f51ee9fe90cc421e836e32780c188f4e0056d77ce69803008ae9"
STT_URL = "https://api.elevenlabs.io/v1/speech-to-text"
# =========================
# Regex Cleaning
# =========================
REPEAT_WORD = re.compile(r'\b(\w+)(?:\s+\1\b)+', re.IGNORECASE)
CHAR_STRETCH = re.compile(r'(.)\1{2,}')
REPEAT_SYLLABLE = re.compile(r'\b(\w{1,3})(?:\s+\1\b)+', re.IGNORECASE)
STUTTER = re.compile(r'\b(\w)[\-ـ]+(\1[\-ـ]+)+')
REPEAT_AFTER_COMMA = re.compile(r'(\b\w+\b)[،,]\s+\1')
COMMA_SPACES = re.compile(r'\s+([،,])')
def is_filler(word):
w = word.lower()
if len(set(w)) == 1 and len(w) <= 4:
return True
if len(w) <= 2:
return True
return False
def clean_transcript(text):
# collapse stretched sounds
text = CHAR_STRETCH.sub(r'\1', text)
# fix stutter like ب-ب-بالشيء
text = STUTTER.sub(r'\1', text)
# repeated words
text = REPEAT_WORD.sub(r'\1', text)
# repeated short syllables
text = REPEAT_SYLLABLE.sub(r'\1', text)
# repeated after comma
text = REPEAT_AFTER_COMMA.sub(r'\1', text)
# fix spaces before comma
text = COMMA_SPACES.sub(r'\1', text)
words = text.split()
filtered = []
for w in words:
if not is_filler(w):
filtered.append(w)
return " ".join(filtered)
# =========================
# Speech To Text
# =========================
def transcribe_audio(audio_file):
if audio_file is None:
return "No audio uploaded", ""
headers = {
"xi-api-key": ELEVENLABS_API_KEY
}
with open(audio_file, "rb") as f:
files = {"file": f}
data = {
"model_id": "scribe_v2",
"enable_logging": "false"
}
response = requests.post(
STT_URL,
headers=headers,
files=files,
data=data
)
if response.status_code != 200:
return f"Error: {response.text}", ""
result = response.json()
text = ""
if "segments" in result:
for segment in result["segments"]:
text += segment.get("text", "") + " "
else:
text = result.get("text", "")
cleaned = clean_transcript(text)
return text, cleaned
# =========================
# Gradio Interface
# =========================
with gr.Blocks() as demo:
gr.Markdown("# Arabic Speech Cleaner")
gr.Markdown(
"Upload audio → convert to text using ElevenLabs → remove fillers and stuttering"
)
audio_input = gr.Audio(
type="filepath",
label="Upload Audio"
)
raw_text = gr.Textbox(
label="Original Transcript",
lines=8
)
cleaned_text = gr.Textbox(
label="Cleaned Transcript",
lines=8
)
btn = gr.Button("Transcribe")
btn.click(
fn=transcribe_audio,
inputs=audio_input,
outputs=[raw_text, cleaned_text]
)
demo.launch() |