Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import whisper | |
| import yt_dlp | |
| from transformers import pipeline | |
| import tempfile | |
| import os | |
| import json | |
| # Cache models globally | |
| MODEL = None | |
| CLASSIFIER = None | |
| def load_models(): | |
| global MODEL, CLASSIFIER | |
| if MODEL is None: | |
| print("Loading models...") | |
| MODEL = whisper.load_model("base") | |
| CLASSIFIER = pipeline("zero-shot-classification", model="facebook/bart-large-mnli") | |
| return MODEL, CLASSIFIER | |
| def convert_cookies_to_single_line(): | |
| """Utility function to convert cookies.txt to single-line format""" | |
| try: | |
| with open("cookies.txt") as f: | |
| single_line = f.read().replace("\n", "\\n") | |
| print("Copy this to Hugging Face Secrets (YOUTUBE_COOKIES_TXT):") | |
| print(single_line) | |
| return single_line | |
| except FileNotFoundError: | |
| print("Error: cookies.txt file not found") | |
| return None | |
| def setup_cookies(): | |
| """Handle cookies from environment variable""" | |
| cookies_txt = os.getenv('YOUTUBE_COOKIES_TXT') | |
| if not cookies_txt: | |
| return False | |
| with open('cookies.txt', 'w') as f: | |
| f.write(cookies_txt.replace("\\n", "\n")) | |
| return True | |
| def normalize_youtube_url(url): | |
| """Convert various YouTube URL formats to standard watch URL""" | |
| url = url.strip() | |
| # Handle youtu.be short links | |
| if 'youtu.be' in url.lower(): | |
| video_id = url.split('/')[-1].split('?')[0] | |
| return f'https://www.youtube.com/watch?v={video_id}' | |
| # Ensure URL is in standard format | |
| if 'youtube.com/watch' not in url.lower(): | |
| return None | |
| return url.split('&')[0] # Remove any extra parameters | |
| def analyze_video(yt_url): | |
| try: | |
| # Normalize and validate URL | |
| normalized_url = normalize_youtube_url(yt_url) | |
| if not normalized_url: | |
| return "Error: Invalid YouTube URL. Must be from youtube.com or youtu.be", "", 0 | |
| model, classifier = load_models() | |
| has_cookies = setup_cookies() | |
| with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as tmp: | |
| tmp_path = tmp.name | |
| try: | |
| ydl_opts = { | |
| 'format': 'bestaudio/best', | |
| 'outtmpl': tmp_path, | |
| 'quiet': True, | |
| 'extract_audio': True, | |
| 'postprocessors': [{ | |
| 'key': 'FFmpegExtractAudio', | |
| 'preferredcodec': 'mp3', | |
| 'preferredquality': '192', | |
| }], | |
| 'http_headers': { | |
| 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36', | |
| 'Accept-Language': 'en-US,en;q=0.9', | |
| 'Referer': 'https://www.youtube.com/' | |
| }, | |
| 'socket_timeout': 30, | |
| 'noplaylist': True, | |
| 'verbose': False | |
| } | |
| if has_cookies: | |
| ydl_opts.update({ | |
| 'cookiefile': 'cookies.txt', | |
| 'extract_flat': 'in_playlist', | |
| }) | |
| with yt_dlp.YoutubeDL(ydl_opts) as ydl: | |
| try: | |
| info = ydl.extract_info(normalized_url, download=False) | |
| if not info.get('url') and not info.get('requested_downloads'): | |
| return "Error: Failed to extract video info. Cookies may be invalid.", "", 0 | |
| ydl.download([normalized_url]) | |
| except yt_dlp.utils.DownloadError as e: | |
| if "Sign in to confirm you're not a bot" in str(e): | |
| return "Error: YouTube requires authentication. Please ensure cookies are fresh and valid.", "", 0 | |
| raise e | |
| result = model.transcribe(tmp_path) | |
| transcription = result["text"] | |
| labels = ["educational", "entertainment", "news", "political", "religious", "technical"] | |
| classification = classifier( | |
| transcription, | |
| candidate_labels=labels, | |
| hypothesis_template="This content is about {}." | |
| ) | |
| return transcription, classification["labels"][0], round(classification["scores"][0], 3) | |
| finally: | |
| for f in [tmp_path, 'cookies.txt']: | |
| if os.path.exists(f): | |
| os.remove(f) | |
| except Exception as e: | |
| return f"Error: {str(e)}", "", 0 | |
| with gr.Blocks(theme=gr.themes.Soft()) as demo: | |
| gr.Markdown("# 🎬 YouTube Content Analyzer") | |
| with gr.Row(): | |
| url = gr.Textbox(label="YouTube URL", | |
| placeholder="https://www.youtube.com/watch?v=... or https://youtu.be/...") | |
| btn = gr.Button("Analyze", variant="primary") | |
| with gr.Row(): | |
| transcription = gr.Textbox(label="Transcription", interactive=False, lines=5) | |
| with gr.Column(): | |
| label = gr.Label(label="Category") | |
| confidence = gr.Number(label="Confidence Score", precision=2) | |
| btn.click(analyze_video, inputs=url, outputs=[transcription, label, confidence]) | |
| if __name__ == "__main__": | |
| if os.path.exists("cookies.txt"): | |
| convert_cookies_to_single_line() | |
| demo.launch() |