""" YouTube Clip Analyzer - Identifies viral/interesting timestamps in videos using Hugging Face models for AI processing. """ import gradio as gr from pytube import YouTube from moviepy.editor import VideoFileClip import os import logging import time import requests import json import torch import whisper from transformers import pipeline # Set up logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) # Initialize models try: logger.info("Initializing models") whisper_model = whisper.load_model("tiny") sentiment_analyzer = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english") summarizer = pipeline("summarization", model="facebook/bart-large-cnn") except Exception as e: logger.error(f"Failed to initialize models: {str(e)}") whisper_model = None sentiment_analyzer = None summarizer = None def download_youtube_audio(youtube_url): """Download audio from YouTube video""" try: yt = YouTube(youtube_url) audio_stream = yt.streams.filter(only_audio=True).first() audio_path = "temp_audio.mp4" audio_stream.download(filename=audio_path) # Convert to WAV for better compatibility with speech recognition video = VideoFileClip(audio_path) wav_path = "temp_audio.wav" video.audio.write_audiofile(wav_path, ffmpeg_params=["-ac", "1", "-ar", "16000"]) video.close() os.remove(audio_path) return wav_path, yt.title except Exception as e: logger.error(f"Error downloading YouTube audio: {str(e)}") raise def analyze_youtube(youtube_url, progress=gr.Progress()): """Main function to analyze YouTube video""" try: progress(0.1, desc="Downloading YouTube audio...") # Download audio wav_path, video_title = download_youtube_audio(youtube_url) progress(0.3, desc="Transcribing audio...") # Transcribe audio result = whisper_model.transcribe(wav_path, fp16=False) segments = result["segments"] progress(0.5, desc="Processing transcript...") # Find clips clips = [] for i in range(len(segments)): start_time = segments[i]["start"] for j in range(i, min(i + 10, len(segments))): end_time = segments[j]["end"] duration = end_time - start_time if 30 <= duration <= 60: text = " ".join([seg["text"] for seg in segments[i:j+1]]) if text.strip(): # Analyze sentiment sentiment_result = sentiment_analyzer(text)[0] score = sentiment_result["score"] # Generate summary if text is long enough summary = text if len(text) > 100: try: summary_result = summarizer(text, max_length=100, min_length=30, do_sample=False) summary = summary_result[0]["summary_text"] except Exception as e: logger.error(f"Summarization error: {str(e)}") clips.append({ "start": start_time, "end": end_time, "score": score, "text": text, "summary": summary }) progress(0.9, desc="Finalizing results...") # Clean up if os.path.exists(wav_path): os.remove(wav_path) # Sort and format results clips.sort(key=lambda x: x["score"], reverse=True) top_clips = clips[:3] output = f"## Analysis Results for: {video_title}\n\n" for i, clip in enumerate(top_clips, 1): start_time_fmt = f"{int(clip['start']//60):02d}:{int(clip['start']%60):02d}" end_time_fmt = f"{int(clip['end']//60):02d}:{int(clip['end']%60):02d}" output += f"### Clip {i}\n" output += f"⏱️ Time: {start_time_fmt} - {end_time_fmt}\n" output += f"📊 Interest Score: {clip['score']:.2f}\n" output += f"💬 Summary: {clip['summary']}\n\n" # Add direct link to timestamp video_id = youtube_url.split("v=")[1].split("&")[0] if "v=" in youtube_url else "" if video_id: timestamp_seconds = int(clip["start"]) output += f"🔗 [Watch this segment](https://youtu.be/{video_id}?t={timestamp_seconds})\n\n" progress(1.0, desc="Done!") return youtube_url, output except Exception as e: logger.error(f"Error: {str(e)}") return None, f"Error processing video: {str(e)}" # Create Gradio interface demo = gr.Interface( fn=analyze_youtube, inputs=gr.Textbox( label="YouTube URL", placeholder="Enter YouTube URL (e.g., https://www.youtube.com/watch?v=dQw4w9WgXcQ)" ), outputs=[ gr.Video(label="Video"), gr.Markdown(label="Analysis Results") ], title="YouTube Viral Clip Analyzer", description="Identify the most interesting timestamps in YouTube videos using AI analysis.", examples=[ ["https://www.youtube.com/watch?v=Yf_1w00qIKc"], ["https://www.youtube.com/watch?v=dQw4w9WgXcQ"] ] ) # Launch the app if __name__ == "__main__": try: demo.launch(server_port=7861) except Exception as e: logger.error(f"Failed to launch on port 7861: {str(e)}") # Try with different settings demo.launch(share=True)