ayloll commited on
Commit
d185723
·
verified ·
1 Parent(s): 9246621

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -147
app.py CHANGED
@@ -3,7 +3,6 @@ from transformers import pipeline
3
  import yt_dlp
4
  import whisper
5
  import os
6
- import re
7
  from urllib.parse import urlparse
8
 
9
  # Delete temporary files
@@ -13,7 +12,7 @@ def clean_temp_files():
13
  if os.path.exists(file):
14
  os.remove(file)
15
 
16
- # Download YouTube video with improved options
17
  def download_video(video_url):
18
  try:
19
  ydl_opts = {
@@ -25,78 +24,36 @@ def download_video(video_url):
25
  'retries': 3,
26
  'socket_timeout': 30,
27
  'extract_flat': False,
 
 
28
  }
29
 
30
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
31
- info = ydl.extract_info(video_url, download=True)
32
- filename = ydl.prepare_filename(info)
33
- return filename if os.path.exists(filename) else None
 
 
 
 
 
 
 
34
 
 
 
 
 
 
 
 
 
35
  except Exception as e:
36
- print(f"Download error: {str(e)}")
37
- return None
38
 
39
- # Extract audio with better error handling
40
- def extract_audio(video_path):
41
- try:
42
- if not os.path.exists(video_path):
43
- return None
44
-
45
- audio_path = "temp_audio.mp3"
46
- os.system(f"ffmpeg -i \"{video_path}\" -vn -acodec libmp3lame -q:a 2 \"{audio_path}\" -y -loglevel error")
47
- return audio_path if os.path.exists(audio_path) else None
48
- except Exception as e:
49
- print(f"Audio extraction error: {str(e)}")
50
- return None
51
-
52
- # Transcribe audio with model caching
53
- def transcribe_audio(audio_path):
54
- try:
55
- if not os.path.exists(audio_path):
56
- return None
57
-
58
- model = whisper.load_model("base")
59
- result = model.transcribe(audio_path, fp16=False) # fp16=False for better compatibility
60
- return result['text']
61
- except Exception as e:
62
- print(f"Transcription error: {str(e)}")
63
- return None
64
-
65
- # Classify content with fallback
66
- def classify_content(text):
67
- try:
68
- if not text or len(text.strip()) == 0:
69
- return None, None
70
-
71
- classifier = pipeline("zero-shot-classification",
72
- model="facebook/bart-large-mnli")
73
-
74
- labels = ["educational", "entertainment", "news", "political",
75
- "religious", "technical", "advertisement", "social"]
76
-
77
- result = classifier(text,
78
- candidate_labels=labels,
79
- hypothesis_template="This text is about {}.")
80
-
81
- return result['labels'][0], result['scores'][0]
82
- except Exception as e:
83
- print(f"Classification error: {str(e)}")
84
- return None, None
85
-
86
- # Validate YouTube URL
87
- def is_valid_youtube_url(url):
88
- youtube_domains = ['youtube.com', 'www.youtube.com', 'youtu.be', 'www.youtu.be']
89
- try:
90
- parsed = urlparse(url)
91
- if not parsed.scheme in ('http', 'https'):
92
- return False
93
- if not any(domain in parsed.netloc for domain in youtube_domains):
94
- return False
95
- return True
96
- except:
97
- return False
98
 
99
- # Main processing function with better error handling
100
  def process_video(video_url):
101
  clean_temp_files()
102
 
@@ -104,87 +61,15 @@ def process_video(video_url):
104
  return "Please enter a valid YouTube URL", ""
105
 
106
  if not is_valid_youtube_url(video_url):
107
- return "Please enter a valid YouTube URL (should start with https://youtube.com or https://youtu.be)", ""
108
 
109
- try:
110
- # Download video
111
- video_path = download_video(video_url)
112
- if not video_path:
113
- return "Failed to download video (may be private, age-restricted, or unavailable)", ""
114
-
115
- # Extract audio
116
- audio_path = extract_audio(video_path)
117
- if not audio_path:
118
- clean_temp_files()
119
- return "Failed to extract audio from video", ""
120
-
121
- # Transcribe
122
- transcription = transcribe_audio(audio_path)
123
- if not transcription:
124
- clean_temp_files()
125
- return "Failed to transcribe audio (may be no speech detected)", ""
126
-
127
- # Classify
128
- category, confidence = classify_content(transcription)
129
- if not category:
130
- clean_temp_files()
131
- return transcription, "Failed to classify content"
132
-
133
- # Clean up
134
- clean_temp_files()
135
-
136
- # Format classification result
137
- classification_result = f"{category} (confidence: {confidence:.2f})"
138
- return transcription, classification_result
139
-
140
- except Exception as e:
141
  clean_temp_files()
142
- return f"An error occurred: {str(e)}", ""
143
-
144
- # Gradio interface
145
- with gr.Blocks(title="YouTube Content Analyzer") as demo:
146
- gr.Markdown("""
147
- # ▶️ YouTube Content Analyzer
148
- Enter a YouTube video URL to get transcription and content classification
149
- """)
150
-
151
- with gr.Row():
152
- url_input = gr.Textbox(
153
- label="YouTube URL",
154
- placeholder="Enter YouTube video URL here...",
155
- max_lines=1
156
- )
157
-
158
- with gr.Row():
159
- transcription_output = gr.Textbox(
160
- label="Transcription",
161
- interactive=True,
162
- lines=10,
163
- max_lines=20
164
- )
165
-
166
- with gr.Row():
167
- category_output = gr.Textbox(
168
- label="Content Category",
169
- interactive=False
170
- )
171
-
172
- submit_btn = gr.Button("Analyze Video", variant="primary")
173
-
174
- # Examples
175
- gr.Examples(
176
- examples=[
177
- ["https://www.youtube.com/watch?v=dQw4w9WgXcQ"], # Rick Astley - Never Gonna Give You Up
178
- ["https://youtu.be/J---aiyznGQ"] # Keyboard Cat
179
- ],
180
- inputs=url_input
181
- )
182
 
183
- submit_btn.click(
184
- fn=process_video,
185
- inputs=url_input,
186
- outputs=[transcription_output, category_output]
187
- )
188
 
189
- if __name__ == "__main__":
190
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
3
  import yt_dlp
4
  import whisper
5
  import os
 
6
  from urllib.parse import urlparse
7
 
8
  # Delete temporary files
 
12
  if os.path.exists(file):
13
  os.remove(file)
14
 
15
+ # Enhanced YouTube downloader with error handling
16
  def download_video(video_url):
17
  try:
18
  ydl_opts = {
 
24
  'retries': 3,
25
  'socket_timeout': 30,
26
  'extract_flat': False,
27
+ 'ignoreerrors': True,
28
+ 'cookiefile': 'cookies.txt' if os.path.exists('cookies.txt') else None,
29
  }
30
 
31
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
32
+ # First check if video is available
33
+ try:
34
+ info = ydl.extract_info(video_url, download=False)
35
+ if info.get('availability') == 'unavailable':
36
+ return None, "Video is unavailable (private, deleted, or region-locked)"
37
+
38
+ if info.get('age_limit', 0) > 0:
39
+ return None, "Age-restricted content (try with cookies)"
40
+ except:
41
+ pass
42
 
43
+ # Try to download
44
+ try:
45
+ ydl.download([video_url])
46
+ filename = 'temp_video.mp4' if os.path.exists('temp_video.mp4') else None
47
+ return filename, None
48
+ except yt_dlp.utils.DownloadError as e:
49
+ return None, f"Download failed: {str(e)}"
50
+
51
  except Exception as e:
52
+ return None, f"Error: {str(e)}"
 
53
 
54
+ # [Rest of your functions (extract_audio, transcribe_audio, classify_content) remain the same...]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
+ # Main processing function with enhanced error handling
57
  def process_video(video_url):
58
  clean_temp_files()
59
 
 
61
  return "Please enter a valid YouTube URL", ""
62
 
63
  if not is_valid_youtube_url(video_url):
64
+ return "Please enter a valid YouTube URL", ""
65
 
66
+ # Download video
67
+ video_path, download_error = download_video(video_url)
68
+ if not video_path:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  clean_temp_files()
70
+ error_msg = download_error or "Failed to download video"
71
+ return error_msg, ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
+ # [Rest of your processing logic remains the same...]
 
 
 
 
74
 
75
+ # [Rest of your Gradio interface code remains the same...]