Spaces:

Offex
/

Transcripttiktok

Running

App Files Files Community

Offex commited on Feb 11

Commit

b292d46

verified ·

1 Parent(s): 892bbcb

Update app.py

Browse files

Files changed (1) hide show

app.py +78 -70

app.py CHANGED Viewed

@@ -4,47 +4,59 @@ import os
 import shutil
 import subprocess
 from faster_whisper import WhisperModel
 from indic_transliteration import sanscript
 from indic_transliteration.sanscript import transliterate
 # ===============================
-# Whisper Model
 # ===============================
 model = None
 def load_model():
     global model
     if model is None:
         model = WhisperModel("base", device="cpu", compute_type="int8")
     return model
 # ===============================
-# FFmpeg
 # ===============================
 def get_ffmpeg():
     return shutil.which("ffmpeg") or "/usr/bin/ffmpeg"
 # ===============================
-# Video → Audio
 # ===============================
 def extract_audio(video_path):
-    audio = "uploaded_audio.wav"
-    if os.path.exists(audio):
-        os.remove(audio)
-    subprocess.run(
-        [get_ffmpeg(), "-i", video_path, "-vn", "-ac", "1", "-ar", "16000", audio, "-y"],
-        stdout=subprocess.DEVNULL,
-        stderr=subprocess.DEVNULL
-    )
-    return audio
 # ===============================
-# URL → Audio
 # ===============================
-def download_audio(url):
     ydl_opts = {
         "format": "bestaudio/best",
-        "outtmpl": "url_audio",
         "postprocessors": [{
             "key": "FFmpegExtractAudio",
             "preferredcodec": "wav",
@@ -52,12 +64,14 @@ def download_audio(url):
         "quiet": True,
         "nocheckcertificate": True,
     }
     with yt_dlp.YoutubeDL(ydl_opts) as ydl:
         ydl.download([url])
     return "url_audio.wav"
 # ===============================
-# Script Fix
 # ===============================
 def normalize_script(text, lang):
     if lang == "hi":
@@ -68,95 +82,91 @@ def normalize_script(text, lang):
     return text
 # ===============================
-# Transcribe
 # ===============================
-def transcribe(url, file, lang_choice):
     try:
-        if file:
-            ext = os.path.splitext(file)[1].lower()
-            audio = file if ext in [".mp3", ".wav", ".m4a"] else extract_audio(file)
-        elif url:
-            audio = download_audio(url)
         else:
-            return "⚠️ Please provide a URL or upload a file."
         model = load_model()
-        language = None if lang_choice == "Auto Detect" else lang_choice
         segments, info = model.transcribe(
-            audio,
             beam_size=1,
             vad_filter=True,
             language=language
         )
-        text = " ".join(s.text for s in segments)
-        text = normalize_script(text, info.language)
-        return f"🌍 Language: {info.language}\n\n{text.strip()}"
     except Exception as e:
         return f"❌ Error: {str(e)}"
 # ===============================
-# MODERN UI
 # ===============================
 css = """
-body {
-    background: radial-gradient(circle at top, #0f2027, #203a43, #2c5364);
-}
-.glass {
-    background: rgba(255,255,255,0.08);
-    backdrop-filter: blur(18px);
-    border-radius: 18px;
-    padding: 25px;
-    box-shadow: 0 20px 40px rgba(0,0,0,0.4);
-}
 .gr-button-primary {
-    background: linear-gradient(135deg,#00c6ff,#0072ff);
     border: none;
     color: white;
-    font-weight: 600;
-}
-.gr-input, .gr-textarea {
-    background: rgba(255,255,255,0.12) !important;
-    color: white !important;
-}
-h1, h2, label, .markdown-text {
-    color: #ffffff !important;
 }
 """
-with gr.Blocks(css=css, theme=gr.themes.Base()) as demo:
-    with gr.Column(elem_classes="glass"):
         gr.Markdown("## 🚀 Universal Transcript Tool")
         gr.Markdown(
-            "Modern UI • YouTube • TikTok • Instagram (Upload) • Fast Whisper\n\n"
-            "**Note:** Instagram URLs may be blocked on Hugging Face."
         )
         with gr.Tabs():
             with gr.TabItem("🔗 Paste Link"):
-                url = gr.Textbox(
-                    label="Video URL",
-                    placeholder="YouTube / TikTok link"
-                )
                 btn_url = gr.Button("🎧 Transcribe Link", variant="primary")
             with gr.TabItem("📂 Upload File"):
-                file = gr.File(
                     label="Upload Video / Audio",
                     file_types=[".mp4", ".mkv", ".mov", ".webm", ".avi", ".mp3", ".wav"]
                 )
                 btn_file = gr.Button("📂 Transcribe File", variant="primary")
-        lang = gr.Dropdown(
-            label="🌍 Transcript Language",
             choices=[
                 "Auto Detect",
-                "hi",
-                "ur",
-                "en",
                 "ar",
                 "fr",
                 "de",
@@ -165,15 +175,13 @@ with gr.Blocks(css=css, theme=gr.themes.Base()) as demo:
                 "ja",
                 "zh"
             ],
-            value="Auto Detect"
         )
-        output = gr.Code(
-            label="Transcript Output",
-            lines=14
-        )
-    btn_url.click(transcribe, [url, gr.State(None), lang], output)
-    btn_file.click(transcribe, [gr.State(None), file, lang], output)
 demo.launch()

 import shutil
 import subprocess
 from faster_whisper import WhisperModel
+# 🔤 Hindi Script Fix
 from indic_transliteration import sanscript
 from indic_transliteration.sanscript import transliterate
 # ===============================
+# 1. Whisper Model (Lazy Load)
 # ===============================
 model = None
 def load_model():
     global model
     if model is None:
+        print("📥 Loading Whisper Model...")
         model = WhisperModel("base", device="cpu", compute_type="int8")
+        print("✅ Model Loaded")
     return model
 # ===============================
+# 2. FFmpeg Path
 # ===============================
 def get_ffmpeg():
     return shutil.which("ffmpeg") or "/usr/bin/ffmpeg"
 # ===============================
+# 3. Video → Audio
 # ===============================
 def extract_audio(video_path):
+    audio_path = "uploaded_audio.wav"
+    if os.path.exists(audio_path):
+        os.remove(audio_path)
+    cmd = [
+        get_ffmpeg(),
+        "-i", video_path,
+        "-vn",
+        "-ac", "1",
+        "-ar", "16000",
+        audio_path,
+        "-y"
+    ]
+    subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+    return audio_path
 # ===============================
+# 4. Download Audio from URL
 # ===============================
+def download_audio_from_url(url):
+    output = "url_audio"
     ydl_opts = {
         "format": "bestaudio/best",
+        "outtmpl": output,
         "postprocessors": [{
             "key": "FFmpegExtractAudio",
             "preferredcodec": "wav",
         "quiet": True,
         "nocheckcertificate": True,
     }
     with yt_dlp.YoutubeDL(ydl_opts) as ydl:
         ydl.download([url])
     return "url_audio.wav"
 # ===============================
+# 5. Hindi Script Normalizer
 # ===============================
 def normalize_script(text, lang):
     if lang == "hi":
     return text
 # ===============================
+# 6. Main Transcribe Logic
 # ===============================
+def transcribe_media(url_input, file_input, language_choice):
     try:
+        audio_path = None
+        # ---------- FILE ----------
+        if file_input:
+            ext = os.path.splitext(file_input)[1].lower()
+            if ext in [".mp3", ".wav", ".m4a"]:
+                audio_path = file_input
+            else:
+                audio_path = extract_audio(file_input)
+        # ---------- URL ----------
+        elif url_input and url_input.strip():
+            audio_path = download_audio_from_url(url_input)
         else:
+            return "⚠️ Please paste a link or upload a file."
+        if not os.path.exists(audio_path):
+            return "❌ Audio processing failed."
         model = load_model()
+        # Language handling
+        language = None if language_choice == "Auto Detect" else language_choice
         segments, info = model.transcribe(
+            audio_path,
             beam_size=1,
             vad_filter=True,
             language=language
         )
+        detected_lang = info.language
+        raw_text = " ".join(seg.text for seg in segments)
+        final_text = normalize_script(raw_text, detected_lang)
+        return f"🌍 Detected Language: {detected_lang}\n\n{final_text.strip()}"
     except Exception as e:
         return f"❌ Error: {str(e)}"
 # ===============================
+# 7. UI
 # ===============================
 css = """
+.container {max-width: 900px; margin: auto;}
 .gr-button-primary {
+    background: linear-gradient(90deg,#667eea,#764ba2);
     border: none;
     color: white;
 }
 """
+with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
+    with gr.Column(elem_classes="container"):
         gr.Markdown("## 🚀 Universal Transcript Tool")
         gr.Markdown(
+            "Supports **YouTube, TikTok, Instagram, Facebook, Twitter/X**\n\n"
+            "Hindi output is always **Devanagari** 🇮🇳"
         )
         with gr.Tabs():
             with gr.TabItem("🔗 Paste Link"):
+                url_in = gr.Textbox(label="Video URL")
                 btn_url = gr.Button("🎧 Transcribe Link", variant="primary")
             with gr.TabItem("📂 Upload File"):
+                file_in = gr.File(
                     label="Upload Video / Audio",
                     file_types=[".mp4", ".mkv", ".mov", ".webm", ".avi", ".mp3", ".wav"]
                 )
                 btn_file = gr.Button("📂 Transcribe File", variant="primary")
+        # 🌍 Language Selector
+        language_selector = gr.Dropdown(
             choices=[
                 "Auto Detect",
+                "hi",  # Hindi (Devanagari)
+                "ur",  # Urdu
+                "en",  # English
                 "ar",
                 "fr",
                 "de",
                 "ja",
                 "zh"
             ],
+            value="Auto Detect",
+            label="🌍 Select Transcript Language"
         )
+        output = gr.Code(label="Transcript Output", lines=15)
+    btn_url.click(transcribe_media, [url_in, gr.State(None), language_selector], output)
+    btn_file.click(transcribe_media, [gr.State(None), file_in, language_selector], output)
 demo.launch()