Spaces:

Offex
/

Transcripttiktok

Running

App Files Files Community

Offex commited on Feb 9

Commit

ab99879

verified ·

1 Parent(s): 1247156

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -60

app.py CHANGED Viewed

@@ -1,51 +1,38 @@
 import gradio as gr
 import yt_dlp
 import os
-import shutil
-import requests
 from faster_whisper import WhisperModel
-# --- 1. Model Setup (Lazy Loading) ---
 model = None
 def load_model():
     global model
     if model is None:
         print("📥 Loading Whisper Model...")
-        # 'base' model balance hai speed aur accuracy ka
         model = WhisperModel("base", device="cpu", compute_type="int8")
         print("✅ Model Loaded!")
     return model
-# --- 2. URL Resolver (Short Link Fix) ---
-def get_actual_url(short_url):
-    try:
-        headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
-        response = requests.head(short_url, allow_redirects=True, headers=headers)
-        return response.url
-    except:
-        return short_url
-# --- 3. Audio Download & Transcribe ---
 def process_audio(url):
     if not url:
         return "⚠️ कृपया URL डालें।"
     print(f"Processing: {url}")
-    actual_url = get_actual_url(url)
-    # Filename
-    output_audio = "tiktok_audio.mp3"
-    if os.path.exists(output_audio): os.remove(output_audio)
-    # FFmpeg check
-    ffmpeg_path = shutil.which("ffmpeg") or "/usr/bin/ffmpeg"
-    # Audio Download Settings (No Video)
     ydl_opts = {
-        'format': 'bestaudio/best',  # Sirf Audio download karega (Bahut Fast)
-        'outtmpl': 'tiktok_audio',
-        'ffmpeg_location': ffmpeg_path,
         'postprocessors': [{
             'key': 'FFmpegExtractAudio',
             'preferredcodec': 'mp3',
@@ -53,62 +40,43 @@ def process_audio(url):
         }],
         'quiet': True,
         'no_warnings': True,
         'http_headers': {
-            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
             'Referer': 'https://www.tiktok.com/'
         }
     }
-    # 1. Download Audio
     try:
         with yt_dlp.YoutubeDL(ydl_opts) as ydl:
-            ydl.download([actual_url])
     except Exception as e:
-        return f"❌ Download Error: {str(e)}"
     # 2. Transcribe
     try:
         current_model = load_model()
         segments, _ = current_model.transcribe(f"{output_audio}.mp3", beam_size=5)
         text = " ".join([s.text for s in segments])
         return text.strip()
     except Exception as e:
         return f"Transcription Error: {str(e)}"
-# --- 4. Professional UI ---
-# Custom CSS for clean look
-css = """
-.container {max-width: 800px; margin: auto;}
-.gr-button-primary {background: linear-gradient(90deg, #4b6cb7 0%, #182848 100%); border: none;}
-"""
-with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
-    with gr.Column(elem_classes="container"):
-        gr.Markdown(
-            """
-            # 📝 Professional TikTok Transcriber
-            Paste the link below to extract text from the video.
-            """
-        )
-        with gr.Row():
-            link_input = gr.Textbox(
-                label="TikTok URL",
-                placeholder="Paste https://vt.tiktok.com/... link here",
-                scale=4
-            )
-            btn = gr.Button("Transcribe Text", variant="primary", scale=1)
-        # 'gr.Code' box automatic copy button ke sath aata hai header me
-        transcript_out = gr.Code(
-            label="Transcript Result (Click Copy Icon 📋)",
-            language="markdown",
-            interactive=False,
-            lines=15
-        )
-    # Button Action
     btn.click(fn=process_audio, inputs=link_input, outputs=transcript_out)
 demo.launch()

 import gradio as gr
 import yt_dlp
 import os
 from faster_whisper import WhisperModel
+# --- 1. Model Setup ---
 model = None
 def load_model():
     global model
     if model is None:
         print("📥 Loading Whisper Model...")
         model = WhisperModel("base", device="cpu", compute_type="int8")
         print("✅ Model Loaded!")
     return model
+# --- 2. Process Audio ---
 def process_audio(url):
     if not url:
         return "⚠️ कृपया URL डालें।"
     print(f"Processing: {url}")
+    # Filename Setup
+    output_audio = "tiktok_audio"
+    if os.path.exists(f"{output_audio}.mp3"):
+        os.remove(f"{output_audio}.mp3")
+    # FIX: FFmpeg ka location folder (jahan ffmpeg aur ffprobe dono hote hain)
+    ffmpeg_dir = "/usr/bin"
     ydl_opts = {
+        'format': 'bestaudio/best',
+        'outtmpl': output_audio,
+        'ffmpeg_location': ffmpeg_dir,  # <--- DIRECT PATH FIX
         'postprocessors': [{
             'key': 'FFmpegExtractAudio',
             'preferredcodec': 'mp3',
         }],
         'quiet': True,
         'no_warnings': True,
+        'nocheckcertificate': True,
         'http_headers': {
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
             'Referer': 'https://www.tiktok.com/'
         }
     }
+    # 1. Download
     try:
         with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+            ydl.download([url])
     except Exception as e:
+        return f"❌ Download Error: {str(e)}\n(Ensure packages.txt has 'ffmpeg')"
     # 2. Transcribe
+    if not os.path.exists(f"{output_audio}.mp3"):
+        return "❌ Error: Audio file download nahi ho payi."
     try:
         current_model = load_model()
+        # Transcribe directly from mp3
         segments, _ = current_model.transcribe(f"{output_audio}.mp3", beam_size=5)
         text = " ".join([s.text for s in segments])
         return text.strip()
     except Exception as e:
         return f"Transcription Error: {str(e)}"
+# --- 3. UI ---
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 📝 Professional TikTok Transcriber")
+    with gr.Row():
+        link_input = gr.Textbox(label="TikTok URL", placeholder="Paste Link Here...")
+        btn = gr.Button("Transcribe", variant="primary")
+    transcript_out = gr.Code(label="Transcript", language="markdown", interactive=False, lines=15)
     btn.click(fn=process_audio, inputs=link_input, outputs=transcript_out)
 demo.launch()