Spaces:
Sleeping
Sleeping
| """ | |
| YouTube Clip Analyzer - Identifies viral/interesting timestamps in videos | |
| using Hugging Face models for AI processing. | |
| """ | |
| import gradio as gr | |
| from pytube import YouTube | |
| from moviepy.editor import VideoFileClip | |
| import os | |
| import logging | |
| import time | |
| import requests | |
| import json | |
| import torch | |
| import whisper | |
| from transformers import pipeline | |
| # Set up logging | |
| logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') | |
| logger = logging.getLogger(__name__) | |
| # Initialize models | |
| try: | |
| logger.info("Initializing models") | |
| whisper_model = whisper.load_model("tiny") | |
| sentiment_analyzer = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english") | |
| summarizer = pipeline("summarization", model="facebook/bart-large-cnn") | |
| except Exception as e: | |
| logger.error(f"Failed to initialize models: {str(e)}") | |
| whisper_model = None | |
| sentiment_analyzer = None | |
| summarizer = None | |
| def download_youtube_audio(youtube_url): | |
| """Download audio from YouTube video""" | |
| try: | |
| yt = YouTube(youtube_url) | |
| audio_stream = yt.streams.filter(only_audio=True).first() | |
| audio_path = "temp_audio.mp4" | |
| audio_stream.download(filename=audio_path) | |
| # Convert to WAV for better compatibility with speech recognition | |
| video = VideoFileClip(audio_path) | |
| wav_path = "temp_audio.wav" | |
| video.audio.write_audiofile(wav_path, ffmpeg_params=["-ac", "1", "-ar", "16000"]) | |
| video.close() | |
| os.remove(audio_path) | |
| return wav_path, yt.title | |
| except Exception as e: | |
| logger.error(f"Error downloading YouTube audio: {str(e)}") | |
| raise | |
| def analyze_youtube(youtube_url, progress=gr.Progress()): | |
| """Main function to analyze YouTube video""" | |
| try: | |
| progress(0.1, desc="Downloading YouTube audio...") | |
| # Download audio | |
| wav_path, video_title = download_youtube_audio(youtube_url) | |
| progress(0.3, desc="Transcribing audio...") | |
| # Transcribe audio | |
| result = whisper_model.transcribe(wav_path, fp16=False) | |
| segments = result["segments"] | |
| progress(0.5, desc="Processing transcript...") | |
| # Find clips | |
| clips = [] | |
| for i in range(len(segments)): | |
| start_time = segments[i]["start"] | |
| for j in range(i, min(i + 10, len(segments))): | |
| end_time = segments[j]["end"] | |
| duration = end_time - start_time | |
| if 30 <= duration <= 60: | |
| text = " ".join([seg["text"] for seg in segments[i:j+1]]) | |
| if text.strip(): | |
| # Analyze sentiment | |
| sentiment_result = sentiment_analyzer(text)[0] | |
| score = sentiment_result["score"] | |
| # Generate summary if text is long enough | |
| summary = text | |
| if len(text) > 100: | |
| try: | |
| summary_result = summarizer(text, max_length=100, min_length=30, do_sample=False) | |
| summary = summary_result[0]["summary_text"] | |
| except Exception as e: | |
| logger.error(f"Summarization error: {str(e)}") | |
| clips.append({ | |
| "start": start_time, | |
| "end": end_time, | |
| "score": score, | |
| "text": text, | |
| "summary": summary | |
| }) | |
| progress(0.9, desc="Finalizing results...") | |
| # Clean up | |
| if os.path.exists(wav_path): | |
| os.remove(wav_path) | |
| # Sort and format results | |
| clips.sort(key=lambda x: x["score"], reverse=True) | |
| top_clips = clips[:3] | |
| output = f"## Analysis Results for: {video_title}\n\n" | |
| for i, clip in enumerate(top_clips, 1): | |
| start_time_fmt = f"{int(clip['start']//60):02d}:{int(clip['start']%60):02d}" | |
| end_time_fmt = f"{int(clip['end']//60):02d}:{int(clip['end']%60):02d}" | |
| output += f"### Clip {i}\n" | |
| output += f"⏱️ Time: {start_time_fmt} - {end_time_fmt}\n" | |
| output += f"📊 Interest Score: {clip['score']:.2f}\n" | |
| output += f"💬 Summary: {clip['summary']}\n\n" | |
| # Add direct link to timestamp | |
| video_id = youtube_url.split("v=")[1].split("&")[0] if "v=" in youtube_url else "" | |
| if video_id: | |
| timestamp_seconds = int(clip["start"]) | |
| output += f"🔗 [Watch this segment](https://youtu.be/{video_id}?t={timestamp_seconds})\n\n" | |
| progress(1.0, desc="Done!") | |
| return youtube_url, output | |
| except Exception as e: | |
| logger.error(f"Error: {str(e)}") | |
| return None, f"Error processing video: {str(e)}" | |
| # Create Gradio interface | |
| demo = gr.Interface( | |
| fn=analyze_youtube, | |
| inputs=gr.Textbox( | |
| label="YouTube URL", | |
| placeholder="Enter YouTube URL (e.g., https://www.youtube.com/watch?v=dQw4w9WgXcQ)" | |
| ), | |
| outputs=[ | |
| gr.Video(label="Video"), | |
| gr.Markdown(label="Analysis Results") | |
| ], | |
| title="YouTube Viral Clip Analyzer", | |
| description="Identify the most interesting timestamps in YouTube videos using AI analysis.", | |
| examples=[ | |
| ["https://www.youtube.com/watch?v=Yf_1w00qIKc"], | |
| ["https://www.youtube.com/watch?v=dQw4w9WgXcQ"] | |
| ] | |
| ) | |
| # Launch the app | |
| if __name__ == "__main__": | |
| try: | |
| demo.launch(server_port=7861) | |
| except Exception as e: | |
| logger.error(f"Failed to launch on port 7861: {str(e)}") | |
| # Try with different settings | |
| demo.launch(share=True) |