Badro commited on
Commit
5c97b56
·
verified ·
1 Parent(s): d9554cb

Upload 2 files

Browse files

app and requirement

Files changed (2) hide show
  1. app.py +155 -0
  2. requirements.txt +7 -0
app.py ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ YouTube Clip Analyzer - Identifies viral/interesting timestamps in videos
3
+ using Hugging Face models for AI processing.
4
+ """
5
+ import gradio as gr
6
+ from pytube import YouTube
7
+ from moviepy.editor import VideoFileClip
8
+ import os
9
+ import logging
10
+ import time
11
+ import requests
12
+ import json
13
+ import torch
14
+ import whisper
15
+ from transformers import pipeline
16
+
17
+ # Set up logging
18
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
19
+ logger = logging.getLogger(__name__)
20
+
21
+ # Initialize models
22
+ try:
23
+ logger.info("Initializing models")
24
+ whisper_model = whisper.load_model("tiny")
25
+ sentiment_analyzer = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
26
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
27
+ except Exception as e:
28
+ logger.error(f"Failed to initialize models: {str(e)}")
29
+ whisper_model = None
30
+ sentiment_analyzer = None
31
+ summarizer = None
32
+
33
+ def download_youtube_audio(youtube_url):
34
+ """Download audio from YouTube video"""
35
+ try:
36
+ yt = YouTube(youtube_url)
37
+ audio_stream = yt.streams.filter(only_audio=True).first()
38
+ audio_path = "temp_audio.mp4"
39
+ audio_stream.download(filename=audio_path)
40
+
41
+ # Convert to WAV for better compatibility with speech recognition
42
+ video = VideoFileClip(audio_path)
43
+ wav_path = "temp_audio.wav"
44
+ video.audio.write_audiofile(wav_path, ffmpeg_params=["-ac", "1", "-ar", "16000"])
45
+ video.close()
46
+ os.remove(audio_path)
47
+
48
+ return wav_path, yt.title
49
+ except Exception as e:
50
+ logger.error(f"Error downloading YouTube audio: {str(e)}")
51
+ raise
52
+
53
+ def analyze_youtube(youtube_url, progress=gr.Progress()):
54
+ """Main function to analyze YouTube video"""
55
+ try:
56
+ progress(0.1, desc="Downloading YouTube audio...")
57
+
58
+ # Download audio
59
+ wav_path, video_title = download_youtube_audio(youtube_url)
60
+
61
+ progress(0.3, desc="Transcribing audio...")
62
+ # Transcribe audio
63
+ result = whisper_model.transcribe(wav_path, fp16=False)
64
+ segments = result["segments"]
65
+
66
+ progress(0.5, desc="Processing transcript...")
67
+ # Find clips
68
+ clips = []
69
+ for i in range(len(segments)):
70
+ start_time = segments[i]["start"]
71
+ for j in range(i, min(i + 10, len(segments))):
72
+ end_time = segments[j]["end"]
73
+ duration = end_time - start_time
74
+ if 30 <= duration <= 60:
75
+ text = " ".join([seg["text"] for seg in segments[i:j+1]])
76
+ if text.strip():
77
+ # Analyze sentiment
78
+ sentiment_result = sentiment_analyzer(text)[0]
79
+ score = sentiment_result["score"]
80
+
81
+ # Generate summary if text is long enough
82
+ summary = text
83
+ if len(text) > 100:
84
+ try:
85
+ summary_result = summarizer(text, max_length=100, min_length=30, do_sample=False)
86
+ summary = summary_result[0]["summary_text"]
87
+ except Exception as e:
88
+ logger.error(f"Summarization error: {str(e)}")
89
+
90
+ clips.append({
91
+ "start": start_time,
92
+ "end": end_time,
93
+ "score": score,
94
+ "text": text,
95
+ "summary": summary
96
+ })
97
+
98
+ progress(0.9, desc="Finalizing results...")
99
+ # Clean up
100
+ if os.path.exists(wav_path):
101
+ os.remove(wav_path)
102
+
103
+ # Sort and format results
104
+ clips.sort(key=lambda x: x["score"], reverse=True)
105
+ top_clips = clips[:3]
106
+
107
+ output = f"## Analysis Results for: {video_title}\n\n"
108
+ for i, clip in enumerate(top_clips, 1):
109
+ start_time_fmt = f"{int(clip['start']//60):02d}:{int(clip['start']%60):02d}"
110
+ end_time_fmt = f"{int(clip['end']//60):02d}:{int(clip['end']%60):02d}"
111
+
112
+ output += f"### Clip {i}\n"
113
+ output += f"⏱️ Time: {start_time_fmt} - {end_time_fmt}\n"
114
+ output += f"📊 Interest Score: {clip['score']:.2f}\n"
115
+ output += f"💬 Summary: {clip['summary']}\n\n"
116
+
117
+ # Add direct link to timestamp
118
+ video_id = youtube_url.split("v=")[1].split("&")[0] if "v=" in youtube_url else ""
119
+ if video_id:
120
+ timestamp_seconds = int(clip["start"])
121
+ output += f"🔗 [Watch this segment](https://youtu.be/{video_id}?t={timestamp_seconds})\n\n"
122
+
123
+ progress(1.0, desc="Done!")
124
+ return youtube_url, output
125
+ except Exception as e:
126
+ logger.error(f"Error: {str(e)}")
127
+ return None, f"Error processing video: {str(e)}"
128
+
129
+ # Create Gradio interface
130
+ demo = gr.Interface(
131
+ fn=analyze_youtube,
132
+ inputs=gr.Textbox(
133
+ label="YouTube URL",
134
+ placeholder="Enter YouTube URL (e.g., https://www.youtube.com/watch?v=dQw4w9WgXcQ)"
135
+ ),
136
+ outputs=[
137
+ gr.Video(label="Video"),
138
+ gr.Markdown(label="Analysis Results")
139
+ ],
140
+ title="YouTube Viral Clip Analyzer",
141
+ description="Identify the most interesting timestamps in YouTube videos using AI analysis.",
142
+ examples=[
143
+ ["https://www.youtube.com/watch?v=Yf_1w00qIKc"],
144
+ ["https://www.youtube.com/watch?v=dQw4w9WgXcQ"]
145
+ ]
146
+ )
147
+
148
+ # Launch the app
149
+ if __name__ == "__main__":
150
+ try:
151
+ demo.launch(server_port=7861)
152
+ except Exception as e:
153
+ logger.error(f"Failed to launch on port 7861: {str(e)}")
154
+ # Try with different settings
155
+ demo.launch(share=True)
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ gradio==3.35.2
2
+ pytube==15.0.0
3
+ moviepy==1.0.3
4
+ openai-whisper==20231117
5
+ transformers==4.35.0
6
+ torch==2.0.1
7
+ requests>=2.28.0