Transcript_tiktok

Runtime error

App Files Files Community

Offex commited on Apr 4

Commit

67748bb

verified ·

1 Parent(s): fbfb3b5

Update app.py

Browse files

Files changed (1) hide show

app.py +240 -123

app.py CHANGED Viewed

@@ -3,201 +3,318 @@ import yt_dlp
 import os
 import shutil
 import subprocess
 from faster_whisper import WhisperModel
 from indic_transliteration import sanscript
 from indic_transliteration.sanscript import transliterate
 # ===============================
-# Whisper Model (lazy load)
 # ===============================
-model = None
-def load_model():
-    global model
-    if model is None:
-        model = WhisperModel("base", device="cpu", compute_type="int8")
-    return model
-# ===============================
-# FFmpeg path
-# ===============================
 def get_ffmpeg():
     return shutil.which("ffmpeg") or "/usr/bin/ffmpeg"
 # ===============================
-# SAFE: Download video only (NO postprocessing)
 # ===============================
-def download_video_only(url):
-    video_path = "downloaded_video.mp4"
     if os.path.exists(video_path):
         os.remove(video_path)
     ydl_opts = {
-        "format": "best",
         "outtmpl": video_path,
         "quiet": True,
         "nocheckcertificate": True,
     }
-    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
-        ydl.download([url])
-    return video_path
 # ===============================
-# SAFE: Extract audio manually (NO ffprobe)
 # ===============================
-def extract_audio_safe(video_path):
-    audio_path = "extracted_audio.wav"
     if os.path.exists(audio_path):
         os.remove(audio_path)
-    subprocess.run(
-        [
-            get_ffmpeg(),
-            "-y",
-            "-i", video_path,
-            "-vn",
-            "-ac", "1",
-            "-ar", "16000",
-            audio_path
-        ],
-        stdout=subprocess.DEVNULL,
-        stderr=subprocess.DEVNULL
-    )
-    return audio_path
 # ===============================
-# Hindi script normalizer
 # ===============================
-def normalize_script(text, lang):
-    if lang == "hi":
-        try:
-            return transliterate(text, sanscript.ARABIC, sanscript.DEVANAGARI)
-        except:
-            return text
     return text
 # ===============================
-# Transcription logic (STABLE)
 # ===============================
-def transcribe(url, file, lang_choice):
     try:
-        # -------- FILE MODE --------
         if file:
             ext = os.path.splitext(file)[1].lower()
-            if ext in [".mp3", ".wav", ".m4a"]:
-                audio = file
             else:
-                audio = extract_audio_safe(file)
-        # -------- URL MODE --------
         elif url:
-            video = download_video_only(url)
-            audio = extract_audio_safe(video)
         else:
             return "⚠️ Please paste a URL or upload a file."
         # Safety check
-        if not os.path.exists(audio) or os.path.getsize(audio) < 10000:
-            return "❌ Audio extraction failed. Please try again."
-        model = load_model()
-        language = None if lang_choice == "Auto Detect" else lang_choice
         segments, info = model.transcribe(
-            audio,
-            beam_size=1,
             vad_filter=True,
-            language=language
         )
-        raw_text = " ".join(s.text for s in segments)
-        final_text = normalize_script(raw_text, info.language)
-        return f"🌍 Detected Language: {info.language}\n\n{final_text.strip()}"
     except Exception as e:
-        if "instagram" in str(e).lower():
-            return "❌ Instagram URL is blocked on Hugging Face. Please upload the video file instead."
-        return f"❌ Error: {str(e)}"
 # ===============================
-# MODERN UI
 # ===============================
-css = """
 body {
-    background: radial-gradient(circle at top, #0f2027, #203a43, #2c5364);
 }
-.glass {
-    background: rgba(255,255,255,0.08);
-    backdrop-filter: blur(18px);
-    border-radius: 18px;
-    padding: 25px;
-    box-shadow: 0 20px 40px rgba(0,0,0,0.4);
 }
 .gr-button-primary {
-    background: linear-gradient(135deg,#00c6ff,#0072ff);
     border: none;
     color: white;
     font-weight: 600;
 }
-.gr-input, .gr-textarea {
-    background: rgba(255,255,255,0.12) !important;
-    color: white !important;
 }
-h1, h2, label, .markdown-text {
-    color: #ffffff !important;
 }
-footer {display:none;}
-"""
-with gr.Blocks(css=css, theme=gr.themes.Base()) as demo:
-    with gr.Column(elem_classes="glass"):
-        gr.Markdown("## 🚀 Universal Transcript Tool (STABLE)")
         gr.Markdown(
-            "✔ YouTube ✔ TikTok ✔ Facebook ✔ Twitter/X\n\n"
-            "⚠️ Instagram URL blocked on Hugging Face → **Upload video instead**\n\n"
-            "**No random ffprobe errors. Ever.**"
         )
         with gr.Tabs():
-            with gr.TabItem("🔗 Paste Link"):
-                url = gr.Textbox(label="Video URL")
-                btn_url = gr.Button("🎧 Transcribe Link", variant="primary")
-            with gr.TabItem("📂 Upload File"):
-                file = gr.File(
-                    label="Upload Video / Audio",
-                    file_types=[".mp4", ".mkv", ".mov", ".webm", ".avi", ".mp3", ".wav"]
                 )
-                btn_file = gr.Button("📂 Transcribe File", variant="primary")
-        lang = gr.Dropdown(
-            label="🌍 Transcript Language",
-            choices=[
-                "Auto Detect",
-                "hi",
-                "ur",
-                "en",
-                "ar",
-                "fr",
-                "de",
-                "es",
-                "ru",
-                "ja",
-                "zh"
-            ],
-            value="Auto Detect"
         )
-        output = gr.Code(label="Transcript Output", lines=14)
-    btn_url.click(transcribe, [url, gr.State(None), lang], output)
-    btn_file.click(transcribe, [gr.State(None), file, lang], output)
-demo.launch()

 import os
 import shutil
 import subprocess
+import tempfile
 from faster_whisper import WhisperModel
 from indic_transliteration import sanscript
 from indic_transliteration.sanscript import transliterate
+import torch
 # ===============================
+# 🔒 GLOBALS & CONFIG
 # ===============================
+MODEL_CACHE_DIR = "/tmp/qwen_whisper_cache"
+os.makedirs(MODEL_CACHE_DIR, exist_ok=True)
+# Lazy-loaded model (shared across calls)
+_model = None
+def load_whisper_model():
+    global _model
+    if _model is None:
+        print("📥 Loading Whisper 'base' model (CPU/int8)...")
+        _model = WhisperModel(
+            "base",
+            device="cpu",
+            compute_type="int8",
+            download_root=MODEL_CACHE_DIR
+        )
+        print("✅ Model loaded.")
+    return _model
 def get_ffmpeg():
     return shutil.which("ffmpeg") or "/usr/bin/ffmpeg"
 # ===============================
+# 📥 SAFE DOWNLOAD (YouTube, TikTok, etc.)
 # ===============================
+def download_video(url):
+    video_path = os.path.join(tempfile.gettempdir(), "downloaded_video.mp4")
     if os.path.exists(video_path):
         os.remove(video_path)
     ydl_opts = {
+        "format": "bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best",
         "outtmpl": video_path,
         "quiet": True,
         "nocheckcertificate": True,
+        "noplaylist": True,        "extract_audio": False,
+        "retries": 10,
+        "fragment_retries": 10,
     }
+    try:
+        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+            info = ydl.extract_info(url, download=True)
+            # Ensure file exists
+            if not os.path.exists(video_path):
+                raise FileNotFoundError("Download failed: no file created")
+        return video_path, info.get("title", "Untitled")
+    except Exception as e:
+        raise RuntimeError(f"Download failed: {str(e)}")
 # ===============================
+# 🎧 EXTRACT AUDIO (robust)
 # ===============================
+def extract_audio(video_path):
+    audio_path = os.path.join(tempfile.gettempdir(), "extracted_audio.wav")
     if os.path.exists(audio_path):
         os.remove(audio_path)
+    cmd = [
+        get_ffmpeg(),
+        "-y",
+        "-i", video_path,
+        "-vn",
+        "-ac", "1",
+        "-ar", "16000",
+        "-c:a", "pcm_s16le",
+        audio_path
+    ]
+    try:
+        result = subprocess.run(cmd, capture_output=True, text=True, timeout=60)
+        if result.returncode != 0:
+            raise RuntimeError(f"FFmpeg failed: {result.stderr}")
+        if not os.path.exists(audio_path) or os.path.getsize(audio_path) < 5000:
+            raise RuntimeError("Audio extraction produced empty/invalid file")
+        return audio_path
+    except subprocess.TimeoutExpired:
+        raise RuntimeError("Audio extraction timed out (>60s)")
 # ===============================
+# 🌐 LANGUAGE-AWARE TRANSLITERATION & NORMALIZATION
 # ===============================
+def normalize_to_hindi(text):
+    """Convert any script to Devanagari + clean up"""
+    if not text.strip():        return ""
+    # Step 1: Transliterate non-Devanagari scripts to Devanagari
+    try:
+        # Try Arabic → Devanagari (for Urdu)
+        text = transliterate(text, sanscript.ARABIC, sanscript.DEVANAGARI)
+        # Try Roman → Devanagari (for Hindi/English mixed)
+        text = transliterate(text, sanscript.ITRANS, sanscript.DEVANAGARI)
+    except Exception:
+        pass  # fallback to raw text
+    # Step 2: Clean punctuation & spacing
+    import re
+    text = re.sub(r'[^\u0900-\u097F\u0020\u002E\u002C\u003F\u0021\u003B\u003A\u002D\u0028\u0029]', '', text)
+    text = re.sub(r'\s+', ' ', text).strip()
+    text = re.sub(r'\.\s*\.', '.', text)  # fix .. → .
+    text = re.sub(r'\?\s*\?', '?', text)
+    text = re.sub(r'!\s*!', '!', text)
+    # Step 3: Add proper full stops at end if missing
+    if text and text[-1] not in "।.!?":
+        text += "।"
     return text
 # ===============================
+# 🎯 CORE TRANSCRIBE FUNCTION (ALWAYS OUTPUT HINDI)
 # ===============================
+def transcribe_to_hindi(url=None, file=None, lang_choice="Auto Detect"):
     try:
+        # ======== INPUT HANDLING ========
         if file:
             ext = os.path.splitext(file)[1].lower()
+            if ext in [".mp3", ".wav", ".m4a", ".ogg"]:
+                audio_path = file
+                title = os.path.basename(file)
             else:
+                video_path = file
+                audio_path = extract_audio(video_path)
+                title = os.path.basename(video_path)
         elif url:
+            video_path, title = download_video(url)
+            audio_path = extract_audio(video_path)
         else:
             return "⚠️ Please paste a URL or upload a file."
         # Safety check
+        if not os.path.exists(audio_path) or os.path.getsize(audio_path) < 5000:
+            return "❌ Audio file too small or missing. Try again."
+        # ======== TRANSCRIPTION ========
+        model = load_whisper_model()
         segments, info = model.transcribe(
+            audio_path,
+            beam_size=5,
+            best_of=3,
+            patience=1.0,
+            temperature=(0.0, 0.2, 0.4, 0.6, 0.8, 1.0),
             vad_filter=True,
+            word_timestamps=False,
+            language=None  # Auto-detect
         )
+        raw_text = " ".join([seg.text for seg in segments]).strip()
+        # ======== FORCE HINDI OUTPUT ========
+        # Even if detected language is en/ur/tam, convert to Hindi script
+        final_text = normalize_to_hindi(raw_text)
+        # Optional: Add title & metadata
+        header = f"🎬 {title[:50]}{'...' if len(title) > 50 else ''}\n"
+        header += f"🌍 Detected: {info.language or 'Unknown'} → 🇮🇳 Output: Hindi (Devanagari)\n\n"
+        return header + final_text
     except Exception as e:
+        err_msg = str(e).lower()
+        if "instagram" in err_msg:
+            return (
+                "❌ Instagram URLs are blocked on Hugging Face.\n\n"
+                "✅ Solution: Download the video manually (e.g., via online downloader), then upload it here."
+            )
+        elif "timeout" in err_msg or "network" in err_msg:
+            return "⚠️ Network timeout. Try again or upload file directly."
+        else:
+            return f"❌ Error: {str(e)[:200]}..."
 # ===============================
+# 🎨 MODERN UI (HUGGING FACE OPTIMIZED)
 # ===============================
+CSS = """
+/* Glassmorphism + Dark Gradient */
 body {
+    background: radial-gradient(circle at top, #0c1445, #1a2a6c, #2c3e50);
+    font-family: 'Inter', system-ui, sans-serif;
 }
+.glass-card {
+    background: rgba(255, 255, 255, 0.07);
+    backdrop-filter:);
+    border-radius: 20px;    padding: 28px;
+    box-shadow: 0 12px 32px rgba(0, 0, 0, 0.4);
+    border: 1px solid rgba(255, 255, 255, 0.1);
 }
 .gr-button-primary {
+    background: linear-gradient(135deg, #6a11cb 0%, #2575fc 100%);
     border: none;
     color: white;
     font-weight: 600;
+    padding: 12px 24px;
+    border-radius: 12px;
+    transition: all 0.3s ease;
 }
+.gr-button-primary:hover {
+    transform: translateY(-2px);
+    box-shadow: 0 6px 15px rgba(37, 117252, 0.4);
 }
+.gr-input, .gr-textarea, .gr-dropdown {
+    background: rgba(255, 255, 255, 0.08) !important;
+    color: #e0e0ff !important;
+    border: 1px solid rgba(255, 255, 255, 0.15) !important;
+    border-radius: 10px;
 }
+.gr-markdown p, .gr-markdown h2 {
+    color: #f0f4ff !important;
+}
+footer { display: none !important; }
+.title {
+    font-size: 2.2rem;
+    font-weight: 800;
+    background: linear-gradient(90deg, #ffd700, #ff8c00);
+    -webkit-background-clip: text;
+    background-clip: text;
+    color: transparent;
+    margin-bottom: 12px;
+}
+.subtitle {
+    color: #a0d2eb;
+    font-size: 1.1rem;
+    margin-bottom: 24px;
+}
+.feature-badge {
+    display: inline-block;
+    background: rgba(106, 17, 203, 0.3);
+    color: #ffd700;
+    padding: 3px 10;
+    border-radius: 20px;
+    font-size: 0.85rem;
+    margin: 0 4px;
+}"""
+with gr.Blocks(
+    css=CSS,
+    theme=gr.themes.Default(
+        primary_hue=gr.themes.Color(c100="#6a11cb", c200="#2575fc", c300="#1a5fb4"),
+        secondary_hue=gr.themes.Color(c100="#ff9e00", c200="#ff7b00"),
+        neutral_hue=gr.themes.Color(c100="#1e293b", c200="#0f172a"),
+    ),
+    title="🗣️ AI Hindi Transcript Studio",
+) as demo:
+    with gr.Column(elem_classes=["glass-card"]):
+        gr.HTML("<div class='title'>AI Hindi Transcript Studio</div>")
+        gr.HTML("<div class='subtitle'>Upload or paste any video → Get clean Devanagari Hindi transcript instantly</div>")
         gr.Markdown(
+            "✨ Supports: YouTube, TikTok, Facebook, Twitter/X, Instagram (via upload), local files<br>"
+            "⚡ Zero ffprobe errors • Auto-script conversion • Real-time cleanup"
         )
         with gr.Tabs():
+            with gr.TabItem("🔗 URL"):
+                url_input = gr.Textbox(
+                    label="🎥 Video URL",
+                    placeholder="https://youtu.be/...",
+                    info="Instagram? Upload file instead (HF restriction)"
                 )
+                btn_url = gr.Button("🔊 Transcribe to Hindi", variant="primary", size="lg")
+            with gr.TabItem("📂 File"):
+                file_input = gr.File(
+                    label="📁 Upload Video/Audio",
+                    file_types=["video", "audio"],
+                    info="MP4, MOV, MP3, WAV, M4A, etc."
+                )
+                btn_file = gr.Button("📖 Convert to Hindi", variant="primary", size="lg")
+        lang_dummy = gr.Dropdown(
+            choices=["Auto (→ Hindi)"],
+            value="Auto (→ Hindi)",
+            interactive=False,
+            visible=False
+        )  # Hidden — we force Hindi output
+        output_box = gr.Textbox(
+            label="📝 Hindi Transcript (Devanagari)",
+            lines=16,
+            max_lines=25,
+            show_copy_button=True,
+            interactive=False,            elem_classes=["gr-textarea"]
+        )
+        gr.Markdown(
+            "<div style='text-align:center; margin-top:20px; color:#a0d2eb; font-size:0.9rem;'>"
+            "🚀 Powered by Faster-Whisper + Indic Transliteration | Deployed on Hugging Face Spaces"
+            "</div>"
         )
+    # Event bindings
+    btn_url.click(
+        fn=transcribe_to_hindi,
+        inputs=[url_input, gr.State(None), lang_dummy],
+        outputs=output_box
+    )
+    btn_file.click(
+        fn=transcribe_to_hindi,
+        inputs=[gr.State(None), file_input, lang_dummy],
+        outputs=output_box
+    )
+# Optional: Enable queue for HF Spaces
+demo.queue(concurrency_count=2, max_size=10)
+if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860)