Badro commited on
Commit
efe0e7b
·
verified ·
1 Parent(s): 021e3c4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +62 -139
app.py CHANGED
@@ -1,155 +1,78 @@
1
- """
2
- YouTube Clip Analyzer - Identifies viral/interesting timestamps in videos
3
- using Hugging Face models for AI processing.
4
- """
5
  import gradio as gr
6
- from pytube import YouTube
7
- from moviepy.editor import VideoFileClip
8
- import os
9
- import logging
10
- import time
11
- import requests
12
- import json
13
- import torch
14
- import whisper
15
  from transformers import pipeline
 
16
 
17
- # Set up logging
18
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
19
- logger = logging.getLogger(__name__)
20
 
21
- # Initialize models
22
- try:
23
- logger.info("Initializing models")
24
- whisper_model = whisper.load_model("tiny")
25
- sentiment_analyzer = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
26
- summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
27
- except Exception as e:
28
- logger.error(f"Failed to initialize models: {str(e)}")
29
- whisper_model = None
30
- sentiment_analyzer = None
31
- summarizer = None
32
-
33
- def download_youtube_audio(youtube_url):
34
- """Download audio from YouTube video"""
35
- try:
36
- yt = YouTube(youtube_url)
37
- audio_stream = yt.streams.filter(only_audio=True).first()
38
- audio_path = "temp_audio.mp4"
39
- audio_stream.download(filename=audio_path)
40
-
41
- # Convert to WAV for better compatibility with speech recognition
42
- video = VideoFileClip(audio_path)
43
- wav_path = "temp_audio.wav"
44
- video.audio.write_audiofile(wav_path, ffmpeg_params=["-ac", "1", "-ar", "16000"])
45
- video.close()
46
- os.remove(audio_path)
47
-
48
- return wav_path, yt.title
49
- except Exception as e:
50
- logger.error(f"Error downloading YouTube audio: {str(e)}")
51
- raise
52
-
53
- def analyze_youtube(youtube_url, progress=gr.Progress()):
54
- """Main function to analyze YouTube video"""
55
- try:
56
- progress(0.1, desc="Downloading YouTube audio...")
57
-
58
- # Download audio
59
- wav_path, video_title = download_youtube_audio(youtube_url)
60
-
61
- progress(0.3, desc="Transcribing audio...")
62
- # Transcribe audio
63
- result = whisper_model.transcribe(wav_path, fp16=False)
64
- segments = result["segments"]
65
 
66
- progress(0.5, desc="Processing transcript...")
67
- # Find clips
68
- clips = []
69
- for i in range(len(segments)):
70
- start_time = segments[i]["start"]
71
- for j in range(i, min(i + 10, len(segments))):
72
- end_time = segments[j]["end"]
73
- duration = end_time - start_time
74
- if 30 <= duration <= 60:
75
- text = " ".join([seg["text"] for seg in segments[i:j+1]])
76
- if text.strip():
77
- # Analyze sentiment
78
- sentiment_result = sentiment_analyzer(text)[0]
79
- score = sentiment_result["score"]
80
-
81
- # Generate summary if text is long enough
82
- summary = text
83
- if len(text) > 100:
84
- try:
85
- summary_result = summarizer(text, max_length=100, min_length=30, do_sample=False)
86
- summary = summary_result[0]["summary_text"]
87
- except Exception as e:
88
- logger.error(f"Summarization error: {str(e)}")
89
-
90
- clips.append({
91
- "start": start_time,
92
- "end": end_time,
93
- "score": score,
94
- "text": text,
95
- "summary": summary
96
- })
97
 
98
- progress(0.9, desc="Finalizing results...")
99
- # Clean up
100
- if os.path.exists(wav_path):
101
- os.remove(wav_path)
 
 
 
 
 
 
102
 
103
- # Sort and format results
104
- clips.sort(key=lambda x: x["score"], reverse=True)
105
- top_clips = clips[:3]
106
 
107
- output = f"## Analysis Results for: {video_title}\n\n"
108
- for i, clip in enumerate(top_clips, 1):
109
- start_time_fmt = f"{int(clip['start']//60):02d}:{int(clip['start']%60):02d}"
110
- end_time_fmt = f"{int(clip['end']//60):02d}:{int(clip['end']%60):02d}"
111
-
112
- output += f"### Clip {i}\n"
113
- output += f"⏱️ Time: {start_time_fmt} - {end_time_fmt}\n"
114
- output += f"📊 Interest Score: {clip['score']:.2f}\n"
115
- output += f"💬 Summary: {clip['summary']}\n\n"
116
-
117
- # Add direct link to timestamp
118
- video_id = youtube_url.split("v=")[1].split("&")[0] if "v=" in youtube_url else ""
119
- if video_id:
120
- timestamp_seconds = int(clip["start"])
121
- output += f"🔗 [Watch this segment](https://youtu.be/{video_id}?t={timestamp_seconds})\n\n"
122
 
123
- progress(1.0, desc="Done!")
124
- return youtube_url, output
125
- except Exception as e:
126
- logger.error(f"Error: {str(e)}")
127
- return None, f"Error processing video: {str(e)}"
128
 
129
  # Create Gradio interface
130
  demo = gr.Interface(
131
- fn=analyze_youtube,
132
- inputs=gr.Textbox(
133
- label="YouTube URL",
134
- placeholder="Enter YouTube URL (e.g., https://www.youtube.com/watch?v=dQw4w9WgXcQ)"
135
- ),
136
- outputs=[
137
- gr.Video(label="Video"),
138
- gr.Markdown(label="Analysis Results")
139
  ],
140
- title="YouTube Viral Clip Analyzer",
141
- description="Identify the most interesting timestamps in YouTube videos using AI analysis.",
142
- examples=[
143
- ["https://www.youtube.com/watch?v=Yf_1w00qIKc"],
144
- ["https://www.youtube.com/watch?v=dQw4w9WgXcQ"]
145
- ]
146
  )
147
 
148
- # Launch the app
149
  if __name__ == "__main__":
150
- try:
151
- demo.launch(server_port=7861)
152
- except Exception as e:
153
- logger.error(f"Failed to launch on port 7861: {str(e)}")
154
- # Try with different settings
155
- demo.launch(share=True)
 
 
 
 
 
1
  import gradio as gr
2
+ import pytube
 
 
 
 
 
 
 
 
3
  from transformers import pipeline
4
+ from textblob import TextBlob
5
 
6
+ # Initialize sentiment analysis pipeline
7
+ sentiment_analyzer = pipeline("sentiment-analysis")
 
8
 
9
+ def analyze_youtube_content(youtube_url, transcript_text=""):
10
+ """Main function to analyze YouTube content"""
11
+ results = {}
12
+
13
+ # If URL is provided, get video info
14
+ if youtube_url:
15
+ try:
16
+ # Create a YouTube object
17
+ yt = pytube.YouTube(youtube_url)
18
+ results["video_info"] = {
19
+ "title": yt.title,
20
+ "status": "success"
21
+ }
22
+ except Exception as e:
23
+ results["video_info"] = {
24
+ "status": "error",
25
+ "message": str(e)
26
+ }
27
+
28
+ # If transcript is provided, analyze it
29
+ if transcript_text:
30
+ # Analyze sentiment with TextBlob
31
+ blob = TextBlob(transcript_text)
32
+ textblob_sentiment = blob.sentiment
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
+ # Analyze sentiment with Hugging Face
35
+ hf_result = sentiment_analyzer(transcript_text[:512])[0]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
+ results["sentiment"] = {
38
+ "textblob": {
39
+ "polarity": round(textblob_sentiment.polarity, 2),
40
+ "assessment": "positive" if textblob_sentiment.polarity > 0 else "negative" if textblob_sentiment.polarity < 0 else "neutral"
41
+ },
42
+ "huggingface": {
43
+ "label": hf_result["label"],
44
+ "score": round(hf_result["score"], 4)
45
+ }
46
+ }
47
 
48
+ # Identify key moments based on sentiment
49
+ sentences = [str(sentence) for sentence in blob.sentences]
50
+ key_moments = []
51
 
52
+ for i, sentence in enumerate(sentences):
53
+ sentiment = TextBlob(sentence).sentiment.polarity
54
+ if abs(sentiment) > 0.5:
55
+ key_moments.append({
56
+ "text": sentence,
57
+ "sentiment": sentiment
58
+ })
 
 
 
 
 
 
 
 
59
 
60
+ results["key_moments"] = key_moments[:5] # Top 5 moments
61
+
62
+ return results
 
 
63
 
64
  # Create Gradio interface
65
  demo = gr.Interface(
66
+ fn=analyze_youtube_content,
67
+ inputs=[
68
+ gr.Textbox(label="YouTube URL"),
69
+ gr.Textbox(label="Transcript Text", lines=10)
 
 
 
 
70
  ],
71
+ outputs=gr.JSON(label="Analysis Results"),
72
+ title="YouTube Viral Moment Analyzer",
73
+ description="Analyze viral moments from YouTube videos using ML models"
 
 
 
74
  )
75
 
76
+ # Launch the app with MCP server enabled
77
  if __name__ == "__main__":
78
+ demo.launch(server_name="0.0.0.0", share=True, mcp_server=True)