import os
import re
import requests
import gradio as gr

# =========================
# ElevenLabs Config
# =========================

ELEVENLABS_API_KEY = "c92a87a2ebb5f51ee9fe90cc421e836e32780c188f4e0056d77ce69803008ae9"
STT_URL = "https://api.elevenlabs.io/v1/speech-to-text"

# =========================
# Regex Cleaning
# =========================

REPEAT_WORD = re.compile(r'\b(\w+)(?:\s+\1\b)+', re.IGNORECASE)
CHAR_STRETCH = re.compile(r'(.)\1{2,}')
REPEAT_SYLLABLE = re.compile(r'\b(\w{1,3})(?:\s+\1\b)+', re.IGNORECASE)

STUTTER = re.compile(r'\b(\w)[\-ـ]+(\1[\-ـ]+)+')
REPEAT_AFTER_COMMA = re.compile(r'(\b\w+\b)[،,]\s+\1')
COMMA_SPACES = re.compile(r'\s+([،,])')

def is_filler(word):
    w = word.lower()

    if len(set(w)) == 1 and len(w) <= 4:
        return True

    if len(w) <= 2:
        return True

    return False


def clean_transcript(text):

    # collapse stretched sounds
    text = CHAR_STRETCH.sub(r'\1', text)

    # fix stutter like ب-ب-بالشيء
    text = STUTTER.sub(r'\1', text)

    # repeated words
    text = REPEAT_WORD.sub(r'\1', text)

    # repeated short syllables
    text = REPEAT_SYLLABLE.sub(r'\1', text)

    # repeated after comma
    text = REPEAT_AFTER_COMMA.sub(r'\1', text)

    # fix spaces before comma
    text = COMMA_SPACES.sub(r'\1', text)

    words = text.split()

    filtered = []
    for w in words:
        if not is_filler(w):
            filtered.append(w)

    return " ".join(filtered)

# =========================
# Speech To Text
# =========================

def transcribe_audio(audio_file):

    if audio_file is None:
        return "No audio uploaded", ""

    headers = {
        "xi-api-key": ELEVENLABS_API_KEY
    }

    with open(audio_file, "rb") as f:

        files = {"file": f}

        data = {
            "model_id": "scribe_v2",
            "enable_logging": "false"
        }

        response = requests.post(
            STT_URL,
            headers=headers,
            files=files,
            data=data
        )

    if response.status_code != 200:
        return f"Error: {response.text}", ""

    result = response.json()

    text = ""

    if "segments" in result:
        for segment in result["segments"]:
            text += segment.get("text", "") + " "
    else:
        text = result.get("text", "")

    cleaned = clean_transcript(text)

    return text, cleaned


# =========================
# Gradio Interface
# =========================

with gr.Blocks() as demo:

    gr.Markdown("# Arabic Speech Cleaner")

    gr.Markdown(
        "Upload audio → convert to text using ElevenLabs → remove fillers and stuttering"
    )

    audio_input = gr.Audio(
        type="filepath",
        label="Upload Audio"
    )

    raw_text = gr.Textbox(
        label="Original Transcript",
        lines=8
    )

    cleaned_text = gr.Textbox(
        label="Cleaned Transcript",
        lines=8
    )

    btn = gr.Button("Transcribe")

    btn.click(
        fn=transcribe_audio,
        inputs=audio_input,
        outputs=[raw_text, cleaned_text]
    )

demo.launch()