Spaces:

EdgarDataScientist
/

Accent_Classification

Runtime error

App Files Files Community

EdgarDataScientist commited on May 29, 2025

Commit

2685e79

verified ·

1 Parent(s): f6f6edc

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -13

app.py CHANGED Viewed

@@ -2,37 +2,36 @@ import gradio as gr
 from moviepy.editor import VideoFileClip
 from speechbrain.pretrained import EncoderClassifier
 import torchaudio
-from pytube import YouTube
 import os
 CLASSIFIER = "Jzuluaga/accent-id-commonaccent_xlsr-en-english"
 def download_video(url):
-    """Handles YouTube and direct video links with error handling"""
     try:
         if "youtube.com" in url or "youtu.be" in url:
-            yt = YouTube(url)
             stream = yt.streams.filter(progressive=True, file_extension='mp4').first()
             if not stream:
                 raise ValueError("No suitable video stream found.")
-            video_path = stream.download()  # Store the download path
             return video_path
         else:
             # For direct MP4 links, download file
-            import requests
             local_filename = "temp_video.mp4"
             with requests.get(url, stream=True) as r:
                 r.raise_for_status()
                 with open(local_filename, 'wb') as f:
                     for chunk in r.iter_content(chunk_size=8192):
                         f.write(chunk)
-            video_path = local_filename # Store the download path
-            return video_path
     except Exception as e:
         raise RuntimeError(f"Failed to download video: {e}")
 def extract_audio(video_path):
     clip = VideoFileClip(video_path)
     audio_path = "temp_audio.wav"
@@ -44,11 +43,10 @@ def classify_accent(audio_path):
     classifier = EncoderClassifier.from_hparams(
         source=CLASSIFIER,
         savedir="pretrained_models/accent_classifier",
-        run_opts={"device":"cpu"}  # or "cuda" if GPU available
     )
     waveform, sample_rate = torchaudio.load(audio_path)
     prediction = classifier.classify_batch(waveform)
-    # prediction format: (scores, probabilities, embeddings, predicted_labels)
     predicted_accent = prediction[3][0]
     confidence = prediction[1].exp().max().item() * 100
     return predicted_accent, f"{confidence:.2f}%"
@@ -68,8 +66,6 @@ def process_video(url):
             if f and os.path.exists(f):
                 os.remove(f)
-# Gradio interface
 iface = gr.Interface(
     fn=process_video,
     inputs=gr.Textbox(label="Enter Public Video URL (YouTube, Loom, direct MP4)"),
@@ -83,3 +79,4 @@ iface = gr.Interface(
 if __name__ == "__main__":
     iface.launch()

 from moviepy.editor import VideoFileClip
 from speechbrain.pretrained import EncoderClassifier
 import torchaudio
+from pytubefix import YouTube
+from pytubefix.cli import on_progress
+import requests
 import os
 CLASSIFIER = "Jzuluaga/accent-id-commonaccent_xlsr-en-english"
 def download_video(url):
+    """Handles YouTube and direct video links with pytubefix and error handling"""
     try:
         if "youtube.com" in url or "youtu.be" in url:
+            yt = YouTube(url, on_progress_callback=on_progress)
+            # Get progressive mp4 streams (video + audio combined)
             stream = yt.streams.filter(progressive=True, file_extension='mp4').first()
             if not stream:
                 raise ValueError("No suitable video stream found.")
+            video_path = stream.download()
             return video_path
         else:
             # For direct MP4 links, download file
             local_filename = "temp_video.mp4"
             with requests.get(url, stream=True) as r:
                 r.raise_for_status()
                 with open(local_filename, 'wb') as f:
                     for chunk in r.iter_content(chunk_size=8192):
                         f.write(chunk)
+            return local_filename
     except Exception as e:
         raise RuntimeError(f"Failed to download video: {e}")
 def extract_audio(video_path):
     clip = VideoFileClip(video_path)
     audio_path = "temp_audio.wav"
     classifier = EncoderClassifier.from_hparams(
         source=CLASSIFIER,
         savedir="pretrained_models/accent_classifier",
+        run_opts={"device":"cpu"}  # Change to "cuda" if GPU available
     )
     waveform, sample_rate = torchaudio.load(audio_path)
     prediction = classifier.classify_batch(waveform)
     predicted_accent = prediction[3][0]
     confidence = prediction[1].exp().max().item() * 100
     return predicted_accent, f"{confidence:.2f}%"
             if f and os.path.exists(f):
                 os.remove(f)
 iface = gr.Interface(
     fn=process_video,
     inputs=gr.Textbox(label="Enter Public Video URL (YouTube, Loom, direct MP4)"),
 if __name__ == "__main__":
     iface.launch()