Spaces:

ayloll
/

VideoToTexttik

Sleeping

App Files Files Community

ayloll commited on Jun 20, 2025

Commit

525e22b

verified ·

1 Parent(s): efd114f

Update app.py

Browse files

Files changed (1) hide show

app.py +85 -105

app.py CHANGED Viewed

@@ -1,114 +1,111 @@
-# First install required packages (this should be at the very top)
-#!pip install --upgrade gradio yt-dlp openai-whisper transformers ffmpeg-python pydub
 import gradio as gr
 from transformers import pipeline
 import yt_dlp
 import whisper
 import os
-import requests
 import uuid
 import re
-# Delete old files
-def clean_old_files():
-    files = ["video.mp4", "audio.mp3", "transcription.txt"]
-    for file in files:
         if os.path.exists(file):
             os.remove(file)
 # Download TikTok video
 def download_video(video_url):
-    unique_name = f"video_{uuid.uuid4().hex[:8]}.mp4"
-    ydl_opts = {
-        'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]',
-        'outtmpl': unique_name,
-        'quiet': True,
-        'no_warnings': True,
-        'extractor_args': {
-            'tiktok': {
-                'skip_watermark': True
-            }
-        }
-    }
     try:
         with yt_dlp.YoutubeDL(ydl_opts) as ydl:
             ydl.download([video_url])
-        return unique_name
     except Exception as e:
-        print(f"Error downloading video: {e}")
         return None
-# Extract audio from video
 def extract_audio(video_path):
-    audio_path = "audio.mp3"
-    os.system(f"ffmpeg -i \"{video_path}\" -vn -acodec libmp3lame -q:a 3 \"{audio_path}\" -y")
-    if not os.path.exists(audio_path):
-        raise RuntimeError("Error: Failed to extract audio.")
-    return audio_path
-# Convert audio to text
 def transcribe_audio(audio_path):
     try:
         model = whisper.load_model("base")
         result = model.transcribe(audio_path)
         return result['text']
     except Exception as e:
-        return f"Transcription error: {str(e)}"
 # Classify content
 def classify_content(text):
     try:
         if not text or len(text.strip()) == 0:
-            return "No text to classify"
         classifier = pipeline("zero-shot-classification",
                            model="facebook/bart-large-mnli")
-        labels = ["Challenge", "Comedy", "Dance", "Educational", "TikTok Trend",
-                "Music", "Lifestyle", "Beauty", "Cooking", "Fashion"]
-        clean_text = ' '.join(text.split()[:500])
-        result = classifier(clean_text,
                           candidate_labels=labels,
-                          hypothesis_template="This content is about {}.")
-        return f"{result['labels'][0]} (Confidence: {result['scores'][0]:.2f})"
     except Exception as e:
-        return f"Classification error: {str(e)}"
-# Main video processing function
 def process_video(video_url):
-    try:
-        clean_old_files()
-        if not video_url or len(video_url.strip()) == 0:
-            return ["Please enter a valid video URL", "", None, None]
-        if "tiktok.com" not in video_url and "vm.tiktok.com" not in video_url:
-            return ["This app is for TikTok links only", "", None, None]
-        print(f"Downloading video: {video_url}")
-        video_path = download_video(video_url)
-        if not video_path:
-            return ["Failed to download video. Please check the URL.", "", None, None]
-        print("Extracting audio...")
-        audio_path = extract_audio(video_path)
-        print("Transcribing audio...")
-        transcription = transcribe_audio(audio_path)
-        print("Classifying content...")
-        category = classify_content(transcription)
-        return [transcription, category, video_path, audio_path]
-    except Exception as e:
-        return [f"Processing error: {str(e)}", "", None, None]
 # Gradio interface
 with gr.Blocks(title="TikTok Content Analyzer") as demo:
@@ -120,56 +117,39 @@ with gr.Blocks(title="TikTok Content Analyzer") as demo:
     with gr.Row():
         url_input = gr.Textbox(
             label="TikTok URL",
-            placeholder="Enter TikTok video URL here...",
-            scale=4
         )
-        submit_btn = gr.Button("Analyze Video", variant="primary", scale=1)
     with gr.Row():
-        with gr.Column():
-            transcription_output = gr.Textbox(
-                label="Extracted Text",
-                interactive=True,
-                lines=10,
-                max_lines=20
-            )
-            category_output = gr.Textbox(
-                label="Content Category",
-                interactive=False
-            )
-        with gr.Column():
-            video_preview = gr.Video(
-                label="Downloaded Video",
-                interactive=False
-            )
-            audio_preview = gr.Audio(
-                label="Extracted Audio",
-                interactive=False
-            )
-    # TikTok URL examples
     gr.Examples(
         examples=[
             ["https://www.tiktok.com/@example/video/123456789"],
-            ["https://www.tiktok.com/@user2/video/987654321"],
             ["https://vm.tiktok.com/ZMexample/"]
         ],
-        inputs=url_input,
-        label="Try these examples"
     )
-    # Button click event
     submit_btn.click(
         fn=process_video,
         inputs=url_input,
-        outputs=[transcription_output, category_output, video_preview, audio_preview]
     )
-# Launch the app
 if __name__ == "__main__":
-    demo.launch(
-        server_name="0.0.0.0",
-        server_port=7860,
-        share=False
-    )

 import gradio as gr
 from transformers import pipeline
 import yt_dlp
 import whisper
 import os
 import uuid
 import re
+# Delete temporary files
+def clean_temp_files():
+    temp_files = ["temp_video.mp4", "temp_audio.mp3"]
+    for file in temp_files:
         if os.path.exists(file):
             os.remove(file)
 # Download TikTok video
 def download_video(video_url):
     try:
+        ydl_opts = {
+            'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]',
+            'outtmpl': 'temp_video.mp4',
+            'quiet': True,
+            'no_warnings': True,
+            'extractor_args': {'tiktok': {'skip_watermark': True}}
+        }
         with yt_dlp.YoutubeDL(ydl_opts) as ydl:
             ydl.download([video_url])
+        return "temp_video.mp4"
     except Exception as e:
+        print(f"Download error: {e}")
         return None
+# Extract audio (temporary)
 def extract_audio(video_path):
+    os.system(f"ffmpeg -i \"{video_path}\" -vn -acodec libmp3lame -q:a 3 \"temp_audio.mp3\" -y")
+    return "temp_audio.mp3" if os.path.exists("temp_audio.mp3") else None
+# Transcribe audio
 def transcribe_audio(audio_path):
     try:
         model = whisper.load_model("base")
         result = model.transcribe(audio_path)
         return result['text']
     except Exception as e:
+        print(f"Transcription error: {e}")
+        return None
 # Classify content
 def classify_content(text):
     try:
         if not text or len(text.strip()) == 0:
+            return None, None
         classifier = pipeline("zero-shot-classification",
                            model="facebook/bart-large-mnli")
+        labels = ["educational", "entertainment", "news", "political",
+                "religious", "technical", "advertisement", "social"]
+        result = classifier(text,
                           candidate_labels=labels,
+                          hypothesis_template="This text is about {}.")
+        return result['labels'][0], result['scores'][0]
     except Exception as e:
+        print(f"Classification error: {e}")
+        return None, None
+# Main processing function
 def process_video(video_url):
+    clean_temp_files()
+    if not video_url or len(video_url.strip()) == 0:
+        return "Please enter a valid TikTok URL", ""
+    if "tiktok.com" not in video_url and "vm.tiktok.com" not in video_url:
+        return "This app is for TikTok links only", ""
+    # Download video
+    video_path = download_video(video_url)
+    if not video_path:
+        return "Failed to download video", ""
+    # Extract audio
+    audio_path = extract_audio(video_path)
+    if not audio_path:
+        clean_temp_files()
+        return "Failed to extract audio", ""
+    # Transcribe
+    transcription = transcribe_audio(audio_path)
+    if not transcription:
+        clean_temp_files()
+        return "Failed to transcribe audio", ""
+    # Classify
+    category, confidence = classify_content(transcription)
+    if not category:
+        clean_temp_files()
+        return transcription, "Failed to classify content"
+    # Clean up
+    clean_temp_files()
+    # Format classification result
+    classification_result = f"{category} (confidence: {confidence:.2f})"
+    return transcription, classification_result
 # Gradio interface
 with gr.Blocks(title="TikTok Content Analyzer") as demo:
     with gr.Row():
         url_input = gr.Textbox(
             label="TikTok URL",
+            placeholder="Enter TikTok video URL here..."
         )
     with gr.Row():
+        transcription_output = gr.Textbox(
+            label="Transcription",
+            interactive=True,
+            lines=10,
+            max_lines=20
+        )
+    with gr.Row():
+        category_output = gr.Textbox(
+            label="Content Category",
+            interactive=False
+        )
+    submit_btn = gr.Button("Analyze Video", variant="primary")
+    # Examples
     gr.Examples(
         examples=[
             ["https://www.tiktok.com/@example/video/123456789"],
             ["https://vm.tiktok.com/ZMexample/"]
         ],
+        inputs=url_input
     )
     submit_btn.click(
         fn=process_video,
         inputs=url_input,
+        outputs=[transcription_output, category_output]
     )
 if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860)