Spaces:

Sammaali
/

Post_Process_Elevenlabs

Running

App Files Files Community

Sammaali commited on 17 days ago

Commit

3debdab

verified ·

1 Parent(s): 56cae9b

Update app.py

Browse files

Files changed (1) hide show

app.py +113 -15

app.py CHANGED Viewed

@@ -1,34 +1,132 @@
 import gradio as gr
 import re
 def clean_arabic_text(text):
     if not text:
         return ""
-    # إزالة التشكيل
     tashkeel_pattern = re.compile(r'[\u0617-\u061A\u064B-\u0652]')
     text = re.sub(tashkeel_pattern, '', text)
-    # توحيد الهمزات
     text = re.sub(r'[أإآ]', 'ا', text)
-    # تحويل التاء المربوطة إلى هاء
     text = re.sub(r'ة\b', 'ه', text)
-    # تحويل الألف المقصورة إلى ياء
     text = re.sub(r'ى\b', 'ي', text)
-    # إزالة الرموز الخاصة
     text = re.sub(r'[^\w\s]', '', text)
-    # إزالة المسافات الزائدة
     text = " ".join(text.split())
     return text
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
-    gr.Markdown("# Post Process")
-    gr.Markdown("قم بلصق النص العربي هنا ليتم تنظيفه.")
-    with gr.Row():
-        input_text = gr.Textbox(label="النص الأصلي", lines=10, placeholder="أدخل النص هنا...")
-        output_text = gr.Textbox(label="النص بعد التنظيف", lines=10)
-    btn = gr.Button("تنظيف النص")
-    btn.click(fn=clean_arabic_text, inputs=input_text, outputs=output_text)
 if __name__ == "__main__":
     demo.launch()

 import gradio as gr
 import re
+import requests
+# =========================
+# ElevenLabs Configuration
+# =========================
+ELEVENLABS_API_KEY = "YOUR_API_KEY_HERE"
+STT_URL = "https://api.elevenlabs.io/v1/speech-to-text"
+# =========================
+# Arabic Post Processing
+# =========================
 def clean_arabic_text(text):
     if not text:
         return ""
+    # Remove tashkeel
     tashkeel_pattern = re.compile(r'[\u0617-\u061A\u064B-\u0652]')
     text = re.sub(tashkeel_pattern, '', text)
+    # Normalize Hamza
     text = re.sub(r'[أإآ]', 'ا', text)
+    # ة → ه
     text = re.sub(r'ة\b', 'ه', text)
+    # ى → ي
     text = re.sub(r'ى\b', 'ي', text)
+    # Remove symbols
     text = re.sub(r'[^\w\s]', '', text)
+    # Remove extra spaces
     text = " ".join(text.split())
     return text
+# =========================
+# ElevenLabs Speech To Text
+# =========================
+def transcribe_audio(audio_file):
+    if audio_file is None:
+        return "No audio uploaded", ""
+    headers = {
+        "xi-api-key": ELEVENLABS_API_KEY
+    }
+    files = {
+        "file": open(audio_file, "rb")
+    }
+    data = {
+        "model_id": "scribe_v2",
+        "language_code": "auto",   # Arabic + English
+        "enable_logging": "false"
+    }
+    response = requests.post(
+        STT_URL,
+        headers=headers,
+        files=files,
+        data=data
+    )
+    if response.status_code != 200:
+        return "Error: " + response.text, ""
+    result = response.json()
+    # Extract speaker_0 text
+    text = ""
+    if "segments" in result:
+        for segment in result["segments"]:
+            if segment.get("speaker") == "speaker_0":
+                text += segment.get("text", "") + " "
+    else:
+        text = result.get("text", "")
+    cleaned = clean_arabic_text(text)
+    return text, cleaned
+# =========================
+# Gradio UI
+# =========================
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# ElevenLabs Speech To Text + Post Process")
+    gr.Markdown(
+        "ارفع ملف صوتي (wav) وسيتم تحويله إلى نص عربي أو إنجليزي مع تنظيف النص."
+    )
+    audio_input = gr.Audio(
+        type="filepath",
+        label="Upload audio.wav"
+    )
+    raw_text = gr.Textbox(
+        label="Original Text",
+        lines=8
+    )
+    clean_text = gr.Textbox(
+        label="Cleaned Text",
+        lines=8
+    )
+    btn = gr.Button("Transcribe")
+    btn.click(
+        fn=transcribe_audio,
+        inputs=audio_input,
+        outputs=[raw_text, clean_text]
+    )
 if __name__ == "__main__":
     demo.launch()