Spaces:

ayloll
/

VideoToTexttik

Sleeping

App Files Files Community

ayloll commited on Jun 20, 2025

Commit

e48ee91

verified ·

1 Parent(s): 383c593

Update app.py

Browse files

Files changed (1) hide show

app.py +65 -132

app.py CHANGED Viewed

@@ -1,7 +1,4 @@
-!apt update && apt install ffmpeg
-!pip install yt-dlp openai-whisper pydub ffmpeg
-!pip install -U openai-whisper
 from transformers import pipeline
 import yt_dlp
 import whisper
@@ -10,140 +7,76 @@ import requests
 import uuid
 import re
-# Delete old files before starting a new process
-def clean_old_files():
-    if os.path.exists("video.mp4"): os.remove("video.mp4")
-    if os.path.exists("audio.mp3"): os.remove("audio.mp3")
-    if os.path.exists("transcription.txt"): os.remove("transcription.txt")
-# Download video from YouTube or TikTok
-def download_video(video_url):
-    unique_name = f"video_{uuid.uuid4().hex[:8]}.mp4"
-    # Check if it's a TikTok URL
-    if "tiktok.com" in video_url:
-        ydl_opts = {
-            'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]',
-            'outtmpl': unique_name,
-            'quiet': True,
-            'no_warnings': True,
-        }
-    else:  # YouTube or other platforms
-        ydl_opts = {
-            'format': 'mp4',
-            'outtmpl': unique_name,
-        }
     try:
-        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
-            ydl.download([video_url])
-        return unique_name
-    except Exception as e:
-        print(f"Error downloading video: {e}")
-        return None
-# Download video from a direct link
-def download_direct_video(video_url):
-    unique_name = f"video_{uuid.uuid4().hex[:8]}.mp4"
-    headers = {
-        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
-    }
-    try:
-        response = requests.get(video_url, headers=headers, stream=True)
-        response.raise_for_status()
-        with open(unique_name, "wb") as f:
-            for chunk in response.iter_content(chunk_size=1024):
-                if chunk:
-                    f.write(chunk)
-        return unique_name
     except Exception as e:
-        print(f"Error downloading direct video: {e}")
-        return None
-# Extract audio from the video using ffmpeg
-def extract_audio(video_path):
-    audio_path = "audio.mp3"
-    os.system(f"ffmpeg -i \"{video_path}\" -vn -acodec libmp3lame -q:a 3 \"{audio_path}\" -y")
-    if not os.path.exists(audio_path):
-        raise RuntimeError("Error: Failed to extract audio.")
-    return audio_path
-# Convert audio to text using Whisper model
-def transcribe_audio(audio_path):
-    model = whisper.load_model("large")
-    result = model.transcribe(audio_path)
-    return result['text']
-# Classify text content using Zero-shot Classification
-def classify_content(text):
-    classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
-    labels = ["educational", "entertainment", "news", "political", "religious",
-              "technical", "advertisement", "social", "music", "comedy", "dance",
-              "challenge", "tutorial", "vlog", "prank", "beauty", "fashion"]
-    # Clean text to improve classification
-    clean_text = ' '.join(text.split()[:500])  # Use first 500 words to avoid token limits
-    result = classifier(clean_text, candidate_labels=labels, hypothesis_template="This text is about {}.")
-    top_label = result['labels'][0]
-    confidence = result['scores'][0]
-    print("\nVideo Content Classification:")
-    print(f"Predicted Category: {top_label} with confidence: {confidence:.2f}")
-    return top_label
-# Check if URL is TikTok
-def is_tiktok_url(url):
-    tiktok_pattern = r'(https?://)?(www\.)?tiktok\.com/.+'
-    return re.match(tiktok_pattern, url) is not None
-# Main script
 if __name__ == "__main__":
-    video_url = input("Enter the video URL or path to .mp4 file: ").strip()
-    clean_old_files()
-    if video_url.endswith(".mp4"):
-        video_path = video_url
-    elif is_tiktok_url(video_url):
-        print("Downloading video from TikTok...")
-        video_path = download_video(video_url)
-    elif "youtube.com" in video_url or "youtu.be" in video_url:
-        print("Downloading video from YouTube...")
-        video_path = download_video(video_url)
-    else:
-        print("Downloading video from direct link...")
-        video_path = download_direct_video(video_url)
-    if not video_path or not os.path.exists(video_path):
-        print("Error: Failed to download video.")
-        exit(1)
-    print("Extracting audio...\n")
-    try:
-        audio_path = extract_audio(video_path)
-    except Exception as e:
-        print(f"Error extracting audio: {e}")
-        exit(1)
-    print("Transcribing...\n")
-    try:
-        transcription = transcribe_audio(audio_path)
-    except Exception as e:
-        print(f"Error transcribing audio: {e}")
-        exit(1)
-    print("\nTranscription Result:\n")
-    print(transcription)
-    with open("transcription.txt", "w", encoding="utf-8") as f:
-        f.write(transcription)
-    # Classify the content based on transcribed text
-    print("\nClassifying content...")
-    try:
-        classify_content(transcription)
-    except Exception as e:
-        print(f"Error classifying content: {e}")

+import gradio as gr
 from transformers import pipeline
 import yt_dlp
 import whisper
 import uuid
 import re
+# [Keep all your existing functions here: clean_old_files, download_video,
+# download_direct_video, extract_audio, transcribe_audio, classify_content, is_tiktok_url]
+def process_video(video_url):
     try:
+        clean_old_files()
+        if video_url.endswith(".mp4"):
+            video_path = video_url
+        elif is_tiktok_url(video_url):
+            print("Downloading video from TikTok...")
+            video_path = download_video(video_url)
+        elif "youtube.com" in video_url or "youtu.be" in video_url:
+            print("Downloading video from YouTube...")
+            video_path = download_video(video_url)
+        else:
+            print("Downloading video from direct link...")
+            video_path = download_direct_video(video_url)
+        if not video_path or not os.path.exists(video_path):
+            return "Error: Failed to download video."
+        print("Extracting audio...")
+        audio_path = extract_audio(video_path)
+        print("Transcribing...")
+        transcription = transcribe_audio(audio_path)
+        print("Classifying content...")
+        category = classify_content(transcription)
+        return {
+            "transcription": transcription,
+            "category": category,
+            "video_path": video_path,
+            "audio_path": audio_path
+        }
     except Exception as e:
+        return f"Error processing video: {str(e)}"
+# Gradio Interface
+with gr.Blocks(title="Video Content Analyzer") as demo:
+    gr.Markdown("""
+    # 🎥 Video Content Analyzer
+    Analyze videos from TikTok, YouTube, or direct links. Get transcription and content classification.
+    """)
+    with gr.Row():
+        url_input = gr.Textbox(label="Video URL", placeholder="Enter TikTok, YouTube or direct video URL...")
+        submit_btn = gr.Button("Analyze", variant="primary")
+    with gr.Row():
+        with gr.Column():
+            transcription_output = gr.Textbox(label="Transcription", interactive=False)
+            category_output = gr.Textbox(label="Content Category", interactive=False)
+        with gr.Column():
+            video_preview = gr.Video(label="Downloaded Video")
+            audio_preview = gr.Audio(label="Extracted Audio")
+    submit_btn.click(
+        fn=process_video,
+        inputs=url_input,
+        outputs={
+            "transcription": transcription_output,
+            "category": category_output,
+            "video_path": video_preview,
+            "audio_path": audio_preview
+        }
+    )
 if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860)