Spaces:

Offex
/

Transcripttiktok

Running

App Files Files Community

Offex commited on Feb 12

Commit

fbfb3b5

verified ·

1 Parent(s): eeb950d

Update app.py

Browse files

Files changed (1) hide show

app.py +96 -80

app.py CHANGED Viewed

@@ -4,63 +4,38 @@ import os
 import shutil
 import subprocess
 from faster_whisper import WhisperModel
-# 🔤 Hindi Script Fix
 from indic_transliteration import sanscript
 from indic_transliteration.sanscript import transliterate
 # ===============================
-# 1. Whisper Model (Lazy Load)
 # ===============================
 model = None
 def load_model():
     global model
     if model is None:
-        print("📥 Loading Whisper Model...")
         model = WhisperModel("base", device="cpu", compute_type="int8")
-        print("✅ Model Loaded")
     return model
 # ===============================
-# 2. FFmpeg Path
 # ===============================
 def get_ffmpeg():
     return shutil.which("ffmpeg") or "/usr/bin/ffmpeg"
 # ===============================
-# 3. Video → Audio
 # ===============================
-def extract_audio(video_path):
-    audio_path = "uploaded_audio.wav"
-    if os.path.exists(audio_path):
-        os.remove(audio_path)
-    cmd = [
-        get_ffmpeg(),
-        "-i", video_path,
-        "-vn",
-        "-ac", "1",
-        "-ar", "16000",
-        audio_path,
-        "-y"
-    ]
-    subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
-    return audio_path
-# ===============================
-# 4. Download Audio from URL
-# ===============================
-def download_audio_from_url(url):
-    output = "url_audio"
     ydl_opts = {
-        "format": "bestaudio/best",
-        "outtmpl": output,
-        "postprocessors": [{
-            "key": "FFmpegExtractAudio",
-            "preferredcodec": "wav",
-        }],
         "quiet": True,
         "nocheckcertificate": True,
     }
@@ -68,10 +43,35 @@ def download_audio_from_url(url):
     with yt_dlp.YoutubeDL(ydl_opts) as ydl:
         ydl.download([url])
-    return "url_audio.wav"
 # ===============================
-# 5. Hindi Script Normalizer
 # ===============================
 def normalize_script(text, lang):
     if lang == "hi":
@@ -82,91 +82,108 @@ def normalize_script(text, lang):
     return text
 # ===============================
-# 6. Main Transcribe Logic
 # ===============================
-def transcribe_media(url_input, file_input, language_choice):
     try:
-        audio_path = None
-        # ---------- FILE ----------
-        if file_input:
-            ext = os.path.splitext(file_input)[1].lower()
             if ext in [".mp3", ".wav", ".m4a"]:
-                audio_path = file_input
             else:
-                audio_path = extract_audio(file_input)
-        # ---------- URL ----------
-        elif url_input and url_input.strip():
-            audio_path = download_audio_from_url(url_input)
         else:
-            return "⚠️ Please paste a link or upload a file."
-        if not os.path.exists(audio_path):
-            return "❌ Audio processing failed."
         model = load_model()
-        # Language handling
-        language = None if language_choice == "Auto Detect" else language_choice
         segments, info = model.transcribe(
-            audio_path,
             beam_size=1,
             vad_filter=True,
             language=language
         )
-        detected_lang = info.language
-        raw_text = " ".join(seg.text for seg in segments)
-        final_text = normalize_script(raw_text, detected_lang)
-        return f"🌍 Detected Language: {detected_lang}\n\n{final_text.strip()}"
     except Exception as e:
         return f"❌ Error: {str(e)}"
 # ===============================
-# 7. UI
 # ===============================
 css = """
-.container {max-width: 900px; margin: auto;}
 .gr-button-primary {
-    background: linear-gradient(90deg,#667eea,#764ba2);
     border: none;
     color: white;
 }
 """
-with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
-    with gr.Column(elem_classes="container"):
-        gr.Markdown("## 🚀 Universal Transcript Tool")
         gr.Markdown(
-            "Supports **YouTube, TikTok, Instagram, Facebook, Twitter/X**\n\n"
-            "Hindi output is always **Devanagari** 🇮🇳"
         )
         with gr.Tabs():
             with gr.TabItem("🔗 Paste Link"):
-                url_in = gr.Textbox(label="Video URL")
                 btn_url = gr.Button("🎧 Transcribe Link", variant="primary")
             with gr.TabItem("📂 Upload File"):
-                file_in = gr.File(
                     label="Upload Video / Audio",
                     file_types=[".mp4", ".mkv", ".mov", ".webm", ".avi", ".mp3", ".wav"]
                 )
                 btn_file = gr.Button("📂 Transcribe File", variant="primary")
-        # 🌍 Language Selector
-        language_selector = gr.Dropdown(
             choices=[
                 "Auto Detect",
-                "hi",  # Hindi (Devanagari)
-                "ur",  # Urdu
-                "en",  # English
                 "ar",
                 "fr",
                 "de",
@@ -175,13 +192,12 @@ with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
                 "ja",
                 "zh"
             ],
-            value="Auto Detect",
-            label="🌍 Select Transcript Language"
         )
-        output = gr.Code(label="Transcript Output", lines=15)
-    btn_url.click(transcribe_media, [url_in, gr.State(None), language_selector], output)
-    btn_file.click(transcribe_media, [gr.State(None), file_in, language_selector], output)
 demo.launch()

 import shutil
 import subprocess
 from faster_whisper import WhisperModel
 from indic_transliteration import sanscript
 from indic_transliteration.sanscript import transliterate
 # ===============================
+# Whisper Model (lazy load)
 # ===============================
 model = None
 def load_model():
     global model
     if model is None:
         model = WhisperModel("base", device="cpu", compute_type="int8")
     return model
 # ===============================
+# FFmpeg path
 # ===============================
 def get_ffmpeg():
     return shutil.which("ffmpeg") or "/usr/bin/ffmpeg"
 # ===============================
+# SAFE: Download video only (NO postprocessing)
 # ===============================
+def download_video_only(url):
+    video_path = "downloaded_video.mp4"
+    if os.path.exists(video_path):
+        os.remove(video_path)
     ydl_opts = {
+        "format": "best",
+        "outtmpl": video_path,
         "quiet": True,
         "nocheckcertificate": True,
     }
     with yt_dlp.YoutubeDL(ydl_opts) as ydl:
         ydl.download([url])
+    return video_path
 # ===============================
+# SAFE: Extract audio manually (NO ffprobe)
+# ===============================
+def extract_audio_safe(video_path):
+    audio_path = "extracted_audio.wav"
+    if os.path.exists(audio_path):
+        os.remove(audio_path)
+    subprocess.run(
+        [
+            get_ffmpeg(),
+            "-y",
+            "-i", video_path,
+            "-vn",
+            "-ac", "1",
+            "-ar", "16000",
+            audio_path
+        ],
+        stdout=subprocess.DEVNULL,
+        stderr=subprocess.DEVNULL
+    )
+    return audio_path
+# ===============================
+# Hindi script normalizer
 # ===============================
 def normalize_script(text, lang):
     if lang == "hi":
     return text
 # ===============================
+# Transcription logic (STABLE)
 # ===============================
+def transcribe(url, file, lang_choice):
     try:
+        # -------- FILE MODE --------
+        if file:
+            ext = os.path.splitext(file)[1].lower()
             if ext in [".mp3", ".wav", ".m4a"]:
+                audio = file
             else:
+                audio = extract_audio_safe(file)
+        # -------- URL MODE --------
+        elif url:
+            video = download_video_only(url)
+            audio = extract_audio_safe(video)
         else:
+            return "⚠️ Please paste a URL or upload a file."
+        # Safety check
+        if not os.path.exists(audio) or os.path.getsize(audio) < 10000:
+            return "❌ Audio extraction failed. Please try again."
         model = load_model()
+        language = None if lang_choice == "Auto Detect" else lang_choice
         segments, info = model.transcribe(
+            audio,
             beam_size=1,
             vad_filter=True,
             language=language
         )
+        raw_text = " ".join(s.text for s in segments)
+        final_text = normalize_script(raw_text, info.language)
+        return f"🌍 Detected Language: {info.language}\n\n{final_text.strip()}"
     except Exception as e:
+        if "instagram" in str(e).lower():
+            return "❌ Instagram URL is blocked on Hugging Face. Please upload the video file instead."
         return f"❌ Error: {str(e)}"
 # ===============================
+# MODERN UI
 # ===============================
 css = """
+body {
+    background: radial-gradient(circle at top, #0f2027, #203a43, #2c5364);
+}
+.glass {
+    background: rgba(255,255,255,0.08);
+    backdrop-filter: blur(18px);
+    border-radius: 18px;
+    padding: 25px;
+    box-shadow: 0 20px 40px rgba(0,0,0,0.4);
+}
 .gr-button-primary {
+    background: linear-gradient(135deg,#00c6ff,#0072ff);
     border: none;
     color: white;
+    font-weight: 600;
+}
+.gr-input, .gr-textarea {
+    background: rgba(255,255,255,0.12) !important;
+    color: white !important;
+}
+h1, h2, label, .markdown-text {
+    color: #ffffff !important;
 }
+footer {display:none;}
 """
+with gr.Blocks(css=css, theme=gr.themes.Base()) as demo:
+    with gr.Column(elem_classes="glass"):
+        gr.Markdown("## 🚀 Universal Transcript Tool (STABLE)")
         gr.Markdown(
+            "✔ YouTube ✔ TikTok ✔ Facebook ✔ Twitter/X\n\n"
+            "⚠️ Instagram URL blocked on Hugging Face → **Upload video instead**\n\n"
+            "**No random ffprobe errors. Ever.**"
         )
         with gr.Tabs():
             with gr.TabItem("🔗 Paste Link"):
+                url = gr.Textbox(label="Video URL")
                 btn_url = gr.Button("🎧 Transcribe Link", variant="primary")
             with gr.TabItem("📂 Upload File"):
+                file = gr.File(
                     label="Upload Video / Audio",
                     file_types=[".mp4", ".mkv", ".mov", ".webm", ".avi", ".mp3", ".wav"]
                 )
                 btn_file = gr.Button("📂 Transcribe File", variant="primary")
+        lang = gr.Dropdown(
+            label="🌍 Transcript Language",
             choices=[
                 "Auto Detect",
+                "hi",
+                "ur",
+                "en",
                 "ar",
                 "fr",
                 "de",
                 "ja",
                 "zh"
             ],
+            value="Auto Detect"
         )
+        output = gr.Code(label="Transcript Output", lines=14)
+    btn_url.click(transcribe, [url, gr.State(None), lang], output)
+    btn_file.click(transcribe, [gr.State(None), file, lang], output)
 demo.launch()