Spaces:

Sammaali
/

Post_Process_Elevenlabs

Sleeping

File size: 3,083 Bytes

ff44794
b23bcf3
ff44794
778f5dc
3debdab
ff44794
 
 
 
6d44df0
ff44794
 
 
 
 
 
778f5dc
 
 
c147ba9
b7809d1
 
 
 
778f5dc
 
3debdab
778f5dc
 
b23bcf3
778f5dc
 
3debdab
778f5dc
3debdab
 
b7809d1
778f5dc
b23bcf3
b7809d1
778f5dc
b7809d1
 
 
 
 
778f5dc
b7809d1
 
778f5dc
b23bcf3
b7809d1
 
 
 
 
 
778f5dc
3debdab
778f5dc
 
 
 
b23bcf3
778f5dc
b23bcf3
ff44794
 
 
 
 
 
 
 
 
b7809d1
 
 
ff44794
 
b7809d1
ff44794
b7809d1
ff44794
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b7809d1
 
ff44794
 
 
 
 
 
 
 
 
b7809d1
ff44794
 
 
 
b7809d1
ff44794
b7809d1
 
 
 
 
 
 
 
ff44794
 
 
 
 
 
 
 
 
 
 
 
3debdab
ff44794
 
 
 
 
3debdab
b7809d1

import os
import re
import requests
import gradio as gr

# =========================
# ElevenLabs Config
# =========================

ELEVENLABS_API_KEY = "c92a87a2ebb5f51ee9fe90cc421e836e32780c188f4e0056d77ce69803008ae9"
STT_URL = "https://api.elevenlabs.io/v1/speech-to-text"

# =========================
# Regex Cleaning
# =========================

REPEAT_WORD = re.compile(r'\b(\w+)(?:\s+\1\b)+', re.IGNORECASE)
CHAR_STRETCH = re.compile(r'(.)\1{2,}')
REPEAT_SYLLABLE = re.compile(r'\b(\w{1,3})(?:\s+\1\b)+', re.IGNORECASE)

STUTTER = re.compile(r'\b(\w)[\-ـ]+(\1[\-ـ]+)+')
REPEAT_AFTER_COMMA = re.compile(r'(\b\w+\b)[،,]\s+\1')
COMMA_SPACES = re.compile(r'\s+([،,])')

def is_filler(word):
    w = word.lower()

    if len(set(w)) == 1 and len(w) <= 4:
        return True

    if len(w) <= 2:
        return True

    return False



def clean_transcript(text):

    # collapse stretched sounds
    text = CHAR_STRETCH.sub(r'\1', text)

    # fix stutter like ب-ب-بالشيء
    text = STUTTER.sub(r'\1', text)

    # repeated words
    text = REPEAT_WORD.sub(r'\1', text)

    # repeated short syllables
    text = REPEAT_SYLLABLE.sub(r'\1', text)

    # repeated after comma
    text = REPEAT_AFTER_COMMA.sub(r'\1', text)

    # fix spaces before comma
    text = COMMA_SPACES.sub(r'\1', text)

    words = text.split()

    filtered = []
    for w in words:
        if not is_filler(w):
            filtered.append(w)

    return " ".join(filtered)

# =========================
# Speech To Text
# =========================

def transcribe_audio(audio_file):

    if audio_file is None:
        return "No audio uploaded", ""

    headers = {
        "xi-api-key": ELEVENLABS_API_KEY
    }

    with open(audio_file, "rb") as f:

        files = {"file": f}

        data = {
            "model_id": "scribe_v2",
            "enable_logging": "false"
        }

        response = requests.post(
            STT_URL,
            headers=headers,
            files=files,
            data=data
        )

    if response.status_code != 200:
        return f"Error: {response.text}", ""

    result = response.json()

    text = ""

    if "segments" in result:
        for segment in result["segments"]:
            text += segment.get("text", "") + " "
    else:
        text = result.get("text", "")

    cleaned = clean_transcript(text)

    return text, cleaned


# =========================
# Gradio Interface
# =========================

with gr.Blocks() as demo:

    gr.Markdown("# Arabic Speech Cleaner")

    gr.Markdown(
        "Upload audio → convert to text using ElevenLabs → remove fillers and stuttering"
    )

    audio_input = gr.Audio(
        type="filepath",
        label="Upload Audio"
    )

    raw_text = gr.Textbox(
        label="Original Transcript",
        lines=8
    )

    cleaned_text = gr.Textbox(
        label="Cleaned Transcript",
        lines=8
    )

    btn = gr.Button("Transcribe")

    btn.click(
        fn=transcribe_audio,
        inputs=audio_input,
        outputs=[raw_text, cleaned_text]
    )

demo.launch()