Spaces:

Offex
/

Transcripttiktok

Running

App Files Files Community

Offex commited on Feb 9

Commit

1247156

verified ·

1 Parent(s): 5deb27a

Update app.py

Browse files

Files changed (1) hide show

app.py +61 -31

app.py CHANGED Viewed

@@ -5,18 +5,19 @@ import shutil
 import requests
 from faster_whisper import WhisperModel
-# --- 1. Model Setup ---
 model = None
 def load_model():
     global model
     if model is None:
         print("📥 Loading Whisper Model...")
         model = WhisperModel("base", device="cpu", compute_type="int8")
         print("✅ Model Loaded!")
     return model
-# --- 2. URL Resolver ---
 def get_actual_url(short_url):
     try:
         headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
@@ -25,60 +26,89 @@ def get_actual_url(short_url):
     except:
         return short_url
-# --- 3. Process Video & Transcript ---
-def process_video(url):
     if not url:
-        return None, "⚠️ URL missing!"
     actual_url = get_actual_url(url)
-    output_video = "final_video.mp4"
-    if os.path.exists(output_video): os.remove(output_video)
     ffmpeg_path = shutil.which("ffmpeg") or "/usr/bin/ffmpeg"
     ydl_opts = {
-        'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best',
-        'outtmpl': 'downloaded_video.%(ext)s',
-        'merge_output_format': 'mp4', # ब्राउज़र संगतता के लिए पक्का MP4
         'ffmpeg_location': ffmpeg_path,
         'http_headers': {
-            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
             'Referer': 'https://www.tiktok.com/'
         }
     }
     try:
         with yt_dlp.YoutubeDL(ydl_opts) as ydl:
             ydl.download([actual_url])
-        # डाउनलोड की गई फ़ाइल का नाम बदलें
-        downloaded_file = next(f for f in os.listdir('.') if f.startswith('downloaded_video'))
-        shutil.move(downloaded_file, output_video)
     except Exception as e:
-        return None, f"❌ Error: {str(e)}"
-    # Transcribe
     try:
         current_model = load_model()
-        segments, _ = current_model.transcribe(output_video, beam_size=1)
         text = " ".join([s.text for s in segments])
-        return output_video, text
     except Exception as e:
-        return output_video, f"Transcription Error: {str(e)}"
-# --- 4. UI ---
-with gr.Blocks(theme="soft") as demo:
-    gr.Markdown("# 🎵 TikTok Downloader & Transcriber")
-    with gr.Row():
-        link_input = gr.Textbox(label="TikTok URL", placeholder="लिंक यहाँ पेस्ट करें...")
-        btn = gr.Button("🚀 Start", variant="primary")
-    with gr.Row():
-        video_out = gr.Video(label="Video")
-        # 'gr.Code' में कॉपी बटन पहले से मौजूद होता है
-        transcript_out = gr.Code(label="Transcript (Copy करने के लिए ऊपर दाईं ओर क्लिक करें)", language="markdown")
-    btn.click(fn=process_video, inputs=link_input, outputs=[video_out, transcript_out])
 demo.launch()

 import requests
 from faster_whisper import WhisperModel
+# --- 1. Model Setup (Lazy Loading) ---
 model = None
 def load_model():
     global model
     if model is None:
         print("📥 Loading Whisper Model...")
+        # 'base' model balance hai speed aur accuracy ka
         model = WhisperModel("base", device="cpu", compute_type="int8")
         print("✅ Model Loaded!")
     return model
+# --- 2. URL Resolver (Short Link Fix) ---
 def get_actual_url(short_url):
     try:
         headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
     except:
         return short_url
+# --- 3. Audio Download & Transcribe ---
+def process_audio(url):
     if not url:
+        return "⚠️ कृपया URL डालें।"
+    print(f"Processing: {url}")
     actual_url = get_actual_url(url)
+    # Filename
+    output_audio = "tiktok_audio.mp3"
+    if os.path.exists(output_audio): os.remove(output_audio)
+    # FFmpeg check
     ffmpeg_path = shutil.which("ffmpeg") or "/usr/bin/ffmpeg"
+    # Audio Download Settings (No Video)
     ydl_opts = {
+        'format': 'bestaudio/best',  # Sirf Audio download karega (Bahut Fast)
+        'outtmpl': 'tiktok_audio',
         'ffmpeg_location': ffmpeg_path,
+        'postprocessors': [{
+            'key': 'FFmpegExtractAudio',
+            'preferredcodec': 'mp3',
+            'preferredquality': '192',
+        }],
+        'quiet': True,
+        'no_warnings': True,
         'http_headers': {
+            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
             'Referer': 'https://www.tiktok.com/'
         }
     }
+    # 1. Download Audio
     try:
         with yt_dlp.YoutubeDL(ydl_opts) as ydl:
             ydl.download([actual_url])
     except Exception as e:
+        return f"❌ Download Error: {str(e)}"
+    # 2. Transcribe
     try:
         current_model = load_model()
+        segments, _ = current_model.transcribe(f"{output_audio}.mp3", beam_size=5)
         text = " ".join([s.text for s in segments])
+        return text.strip()
     except Exception as e:
+        return f"Transcription Error: {str(e)}"
+# --- 4. Professional UI ---
+# Custom CSS for clean look
+css = """
+.container {max-width: 800px; margin: auto;}
+.gr-button-primary {background: linear-gradient(90deg, #4b6cb7 0%, #182848 100%); border: none;}
+"""
+with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
+    with gr.Column(elem_classes="container"):
+        gr.Markdown(
+            """
+            # 📝 Professional TikTok Transcriber
+            Paste the link below to extract text from the video.
+            """
+        )
+        with gr.Row():
+            link_input = gr.Textbox(
+                label="TikTok URL",
+                placeholder="Paste https://vt.tiktok.com/... link here",
+                scale=4
+            )
+            btn = gr.Button("Transcribe Text", variant="primary", scale=1)
+        # 'gr.Code' box automatic copy button ke sath aata hai header me
+        transcript_out = gr.Code(
+            label="Transcript Result (Click Copy Icon 📋)",
+            language="markdown",
+            interactive=False,
+            lines=15
+        )
+    # Button Action
+    btn.click(fn=process_audio, inputs=link_input, outputs=transcript_out)
 demo.launch()