YT-agent-analyzer1

Sleeping

App Files Files Community

YT-agent-analyzer1 / app.py

Badro

Upload 2 files

5c97b56 verified 12 months ago

raw

history blame contribute delete

5.9 kB

	"""
	YouTube Clip Analyzer - Identifies viral/interesting timestamps in videos
	using Hugging Face models for AI processing.
	"""
	import gradio as gr
	from pytube import YouTube
	from moviepy.editor import VideoFileClip
	import os
	import logging
	import time
	import requests
	import json
	import torch
	import whisper
	from transformers import pipeline

	# Set up logging
	logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
	logger = logging.getLogger(__name__)

	# Initialize models
	try:
	logger.info("Initializing models")
	whisper_model = whisper.load_model("tiny")
	sentiment_analyzer = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
	summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
	except Exception as e:
	logger.error(f"Failed to initialize models: {str(e)}")
	whisper_model = None
	sentiment_analyzer = None
	summarizer = None

	def download_youtube_audio(youtube_url):
	"""Download audio from YouTube video"""
	try:
	yt = YouTube(youtube_url)
	audio_stream = yt.streams.filter(only_audio=True).first()
	audio_path = "temp_audio.mp4"
	audio_stream.download(filename=audio_path)

	# Convert to WAV for better compatibility with speech recognition
	video = VideoFileClip(audio_path)
	wav_path = "temp_audio.wav"
	video.audio.write_audiofile(wav_path, ffmpeg_params=["-ac", "1", "-ar", "16000"])
	video.close()
	os.remove(audio_path)

	return wav_path, yt.title
	except Exception as e:
	logger.error(f"Error downloading YouTube audio: {str(e)}")
	raise

	def analyze_youtube(youtube_url, progress=gr.Progress()):
	"""Main function to analyze YouTube video"""
	try:
	progress(0.1, desc="Downloading YouTube audio...")

	# Download audio
	wav_path, video_title = download_youtube_audio(youtube_url)

	progress(0.3, desc="Transcribing audio...")
	# Transcribe audio
	result = whisper_model.transcribe(wav_path, fp16=False)
	segments = result["segments"]

	progress(0.5, desc="Processing transcript...")
	# Find clips
	clips = []
	for i in range(len(segments)):
	start_time = segments[i]["start"]
	for j in range(i, min(i + 10, len(segments))):
	end_time = segments[j]["end"]
	duration = end_time - start_time
	if 30 <= duration <= 60:
	text = " ".join([seg["text"] for seg in segments[i:j+1]])
	if text.strip():
	# Analyze sentiment
	sentiment_result = sentiment_analyzer(text)[0]
	score = sentiment_result["score"]

	# Generate summary if text is long enough
	summary = text
	if len(text) > 100:
	try:
	summary_result = summarizer(text, max_length=100, min_length=30, do_sample=False)
	summary = summary_result[0]["summary_text"]
	except Exception as e:
	logger.error(f"Summarization error: {str(e)}")

	clips.append({
	"start": start_time,
	"end": end_time,
	"score": score,
	"text": text,
	"summary": summary
	})

	progress(0.9, desc="Finalizing results...")
	# Clean up
	if os.path.exists(wav_path):
	os.remove(wav_path)

	# Sort and format results
	clips.sort(key=lambda x: x["score"], reverse=True)
	top_clips = clips[:3]

	output = f"## Analysis Results for: {video_title}\n\n"
	for i, clip in enumerate(top_clips, 1):
	start_time_fmt = f"{int(clip['start']//60):02d}:{int(clip['start']%60):02d}"
	end_time_fmt = f"{int(clip['end']//60):02d}:{int(clip['end']%60):02d}"

	output += f"### Clip {i}\n"
	output += f"⏱️ Time: {start_time_fmt} - {end_time_fmt}\n"
	output += f"📊 Interest Score: {clip['score']:.2f}\n"
	output += f"💬 Summary: {clip['summary']}\n\n"

	# Add direct link to timestamp
	video_id = youtube_url.split("v=")[1].split("&")[0] if "v=" in youtube_url else ""
	if video_id:
	timestamp_seconds = int(clip["start"])
	output += f"🔗 [Watch this segment](https://youtu.be/{video_id}?t={timestamp_seconds})\n\n"

	progress(1.0, desc="Done!")
	return youtube_url, output
	except Exception as e:
	logger.error(f"Error: {str(e)}")
	return None, f"Error processing video: {str(e)}"

	# Create Gradio interface
	demo = gr.Interface(
	fn=analyze_youtube,
	inputs=gr.Textbox(
	label="YouTube URL",
	placeholder="Enter YouTube URL (e.g., https://www.youtube.com/watch?v=dQw4w9WgXcQ)"
	),
	outputs=[
	gr.Video(label="Video"),
	gr.Markdown(label="Analysis Results")
	],
	title="YouTube Viral Clip Analyzer",
	description="Identify the most interesting timestamps in YouTube videos using AI analysis.",
	examples=[
	["https://www.youtube.com/watch?v=Yf_1w00qIKc"],
	["https://www.youtube.com/watch?v=dQw4w9WgXcQ"]
	]
	)

	# Launch the app
	if __name__ == "__main__":
	try:
	demo.launch(server_port=7861)
	except Exception as e:
	logger.error(f"Failed to launch on port 7861: {str(e)}")
	# Try with different settings
	demo.launch(share=True)