Spaces:

ayloll
/

VideoToTexttik

Sleeping

App Files Files Community

ayloll commited on Jun 20, 2025

Commit

383c593

verified ·

1 Parent(s): 82b980c

Upload 3 files

Browse files

Files changed (3) hide show

app.py +149 -0
dockerfile.dockerfile +26 -0
requirements.txt.txt +8 -0

app.py ADDED Viewed

	@@ -0,0 +1,149 @@

+!apt update && apt install ffmpeg
+!pip install yt-dlp openai-whisper pydub ffmpeg
+!pip install -U openai-whisper
+from transformers import pipeline
+import yt_dlp
+import whisper
+import os
+import requests
+import uuid
+import re
+# Delete old files before starting a new process
+def clean_old_files():
+    if os.path.exists("video.mp4"): os.remove("video.mp4")
+    if os.path.exists("audio.mp3"): os.remove("audio.mp3")
+    if os.path.exists("transcription.txt"): os.remove("transcription.txt")
+# Download video from YouTube or TikTok
+def download_video(video_url):
+    unique_name = f"video_{uuid.uuid4().hex[:8]}.mp4"
+    # Check if it's a TikTok URL
+    if "tiktok.com" in video_url:
+        ydl_opts = {
+            'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]',
+            'outtmpl': unique_name,
+            'quiet': True,
+            'no_warnings': True,
+        }
+    else:  # YouTube or other platforms
+        ydl_opts = {
+            'format': 'mp4',
+            'outtmpl': unique_name,
+        }
+    try:
+        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+            ydl.download([video_url])
+        return unique_name
+    except Exception as e:
+        print(f"Error downloading video: {e}")
+        return None
+# Download video from a direct link
+def download_direct_video(video_url):
+    unique_name = f"video_{uuid.uuid4().hex[:8]}.mp4"
+    headers = {
+        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
+    }
+    try:
+        response = requests.get(video_url, headers=headers, stream=True)
+        response.raise_for_status()
+        with open(unique_name, "wb") as f:
+            for chunk in response.iter_content(chunk_size=1024):
+                if chunk:
+                    f.write(chunk)
+        return unique_name
+    except Exception as e:
+        print(f"Error downloading direct video: {e}")
+        return None
+# Extract audio from the video using ffmpeg
+def extract_audio(video_path):
+    audio_path = "audio.mp3"
+    os.system(f"ffmpeg -i \"{video_path}\" -vn -acodec libmp3lame -q:a 3 \"{audio_path}\" -y")
+    if not os.path.exists(audio_path):
+        raise RuntimeError("Error: Failed to extract audio.")
+    return audio_path
+# Convert audio to text using Whisper model
+def transcribe_audio(audio_path):
+    model = whisper.load_model("large")
+    result = model.transcribe(audio_path)
+    return result['text']
+# Classify text content using Zero-shot Classification
+def classify_content(text):
+    classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
+    labels = ["educational", "entertainment", "news", "political", "religious",
+              "technical", "advertisement", "social", "music", "comedy", "dance",
+              "challenge", "tutorial", "vlog", "prank", "beauty", "fashion"]
+    # Clean text to improve classification
+    clean_text = ' '.join(text.split()[:500])  # Use first 500 words to avoid token limits
+    result = classifier(clean_text, candidate_labels=labels, hypothesis_template="This text is about {}.")
+    top_label = result['labels'][0]
+    confidence = result['scores'][0]
+    print("\nVideo Content Classification:")
+    print(f"Predicted Category: {top_label} with confidence: {confidence:.2f}")
+    return top_label
+# Check if URL is TikTok
+def is_tiktok_url(url):
+    tiktok_pattern = r'(https?://)?(www\.)?tiktok\.com/.+'
+    return re.match(tiktok_pattern, url) is not None
+# Main script
+if __name__ == "__main__":
+    video_url = input("Enter the video URL or path to .mp4 file: ").strip()
+    clean_old_files()
+    if video_url.endswith(".mp4"):
+        video_path = video_url
+    elif is_tiktok_url(video_url):
+        print("Downloading video from TikTok...")
+        video_path = download_video(video_url)
+    elif "youtube.com" in video_url or "youtu.be" in video_url:
+        print("Downloading video from YouTube...")
+        video_path = download_video(video_url)
+    else:
+        print("Downloading video from direct link...")
+        video_path = download_direct_video(video_url)
+    if not video_path or not os.path.exists(video_path):
+        print("Error: Failed to download video.")
+        exit(1)
+    print("Extracting audio...\n")
+    try:
+        audio_path = extract_audio(video_path)
+    except Exception as e:
+        print(f"Error extracting audio: {e}")
+        exit(1)
+    print("Transcribing...\n")
+    try:
+        transcription = transcribe_audio(audio_path)
+    except Exception as e:
+        print(f"Error transcribing audio: {e}")
+        exit(1)
+    print("\nTranscription Result:\n")
+    print(transcription)
+    with open("transcription.txt", "w", encoding="utf-8") as f:
+        f.write(transcription)
+    # Classify the content based on transcribed text
+    print("\nClassifying content...")
+    try:
+        classify_content(transcription)
+    except Exception as e:
+        print(f"Error classifying content: {e}")

dockerfile.dockerfile ADDED Viewed

	@@ -0,0 +1,26 @@

+# Use official Python image
+FROM python:3.9-slim
+# Set working directory
+WORKDIR /app
+# Install system dependencies
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends ffmpeg && \
+    rm -rf /var/lib/apt/lists/*
+# Copy requirements first to leverage Docker cache
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy the rest of the application
+COPY . .
+# Download Whisper model during build (optional)
+# RUN python -c "import whisper; whisper.load_model('base')"
+# Expose port
+EXPOSE 7860
+# Run the application
+CMD ["python", "app.py"]

requirements.txt.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+gradio>=3.0
+yt-dlp>=2023.7.6
+openai-whisper>=2023.6.14
+pydub>=0.25.1
+ffmpeg-python>=0.2.0
+transformers>=4.30.0
+requests>=2.28.0
+python-dotenv>=0.21.0