video-analyzer / app.py
ayloll's picture
Update app.py
e35d070 verified
import gradio as gr
import whisper
import yt_dlp
from transformers import pipeline
import tempfile
import os
import json
# Cache models globally
MODEL = None
CLASSIFIER = None
def load_models():
global MODEL, CLASSIFIER
if MODEL is None:
print("Loading models...")
MODEL = whisper.load_model("base")
CLASSIFIER = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
return MODEL, CLASSIFIER
def convert_cookies_to_single_line():
"""Utility function to convert cookies.txt to single-line format"""
try:
with open("cookies.txt") as f:
single_line = f.read().replace("\n", "\\n")
print("Copy this to Hugging Face Secrets (YOUTUBE_COOKIES_TXT):")
print(single_line)
return single_line
except FileNotFoundError:
print("Error: cookies.txt file not found")
return None
def setup_cookies():
"""Handle cookies from environment variable"""
cookies_txt = os.getenv('YOUTUBE_COOKIES_TXT')
if not cookies_txt:
return False
with open('cookies.txt', 'w') as f:
f.write(cookies_txt.replace("\\n", "\n"))
return True
def normalize_youtube_url(url):
"""Convert various YouTube URL formats to standard watch URL"""
url = url.strip()
# Handle youtu.be short links
if 'youtu.be' in url.lower():
video_id = url.split('/')[-1].split('?')[0]
return f'https://www.youtube.com/watch?v={video_id}'
# Ensure URL is in standard format
if 'youtube.com/watch' not in url.lower():
return None
return url.split('&')[0] # Remove any extra parameters
def analyze_video(yt_url):
try:
# Normalize and validate URL
normalized_url = normalize_youtube_url(yt_url)
if not normalized_url:
return "Error: Invalid YouTube URL. Must be from youtube.com or youtu.be", "", 0
model, classifier = load_models()
has_cookies = setup_cookies()
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as tmp:
tmp_path = tmp.name
try:
ydl_opts = {
'format': 'bestaudio/best',
'outtmpl': tmp_path,
'quiet': True,
'extract_audio': True,
'postprocessors': [{
'key': 'FFmpegExtractAudio',
'preferredcodec': 'mp3',
'preferredquality': '192',
}],
'http_headers': {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36',
'Accept-Language': 'en-US,en;q=0.9',
'Referer': 'https://www.youtube.com/'
},
'socket_timeout': 30,
'noplaylist': True,
'verbose': False
}
if has_cookies:
ydl_opts.update({
'cookiefile': 'cookies.txt',
'extract_flat': 'in_playlist',
})
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
try:
info = ydl.extract_info(normalized_url, download=False)
if not info.get('url') and not info.get('requested_downloads'):
return "Error: Failed to extract video info. Cookies may be invalid.", "", 0
ydl.download([normalized_url])
except yt_dlp.utils.DownloadError as e:
if "Sign in to confirm you're not a bot" in str(e):
return "Error: YouTube requires authentication. Please ensure cookies are fresh and valid.", "", 0
raise e
result = model.transcribe(tmp_path)
transcription = result["text"]
labels = ["educational", "entertainment", "news", "political", "religious", "technical"]
classification = classifier(
transcription,
candidate_labels=labels,
hypothesis_template="This content is about {}."
)
return transcription, classification["labels"][0], round(classification["scores"][0], 3)
finally:
for f in [tmp_path, 'cookies.txt']:
if os.path.exists(f):
os.remove(f)
except Exception as e:
return f"Error: {str(e)}", "", 0
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# 🎬 YouTube Content Analyzer")
with gr.Row():
url = gr.Textbox(label="YouTube URL",
placeholder="https://www.youtube.com/watch?v=... or https://youtu.be/...")
btn = gr.Button("Analyze", variant="primary")
with gr.Row():
transcription = gr.Textbox(label="Transcription", interactive=False, lines=5)
with gr.Column():
label = gr.Label(label="Category")
confidence = gr.Number(label="Confidence Score", precision=2)
btn.click(analyze_video, inputs=url, outputs=[transcription, label, confidence])
if __name__ == "__main__":
if os.path.exists("cookies.txt"):
convert_cookies_to_single_line()
demo.launch()