Spaces:

EdgarDataScientist
/

Accent_Classification

Runtime error

App Files Files Community

EdgarDataScientist commited on May 29, 2025

Commit

5ca9307

verified ·

1 Parent(s): d23d38e

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -6

app.py CHANGED Viewed

@@ -6,22 +6,26 @@ from pytubefix import YouTube
 from pytubefix.cli import on_progress
 import requests
 import os
 CLASSIFIER = "Jzuluaga/accent-id-commonaccent_xlsr-en-english"
 def download_video(url):
-    """Handles YouTube and direct video links with pytubefix and error handling"""
     try:
         if "youtube.com" in url or "youtu.be" in url:
             yt = YouTube(url, on_progress_callback=on_progress)
-            # Get progressive mp4 streams (video + audio combined)
             stream = yt.streams.filter(progressive=True, file_extension='mp4').first()
             if not stream:
                 raise ValueError("No suitable video stream found.")
             video_path = stream.download()
             return video_path
         else:
-            # For direct MP4 links, download file
             local_filename = "temp_video.mp4"
             with requests.get(url, stream=True) as r:
                 r.raise_for_status()
@@ -33,6 +37,7 @@ def download_video(url):
         raise RuntimeError(f"Failed to download video: {e}")
 def extract_audio(video_path):
     clip = VideoFileClip(video_path)
     audio_path = "temp_audio.wav"
     clip.audio.write_audiofile(audio_path, logger=None)
@@ -40,18 +45,21 @@ def extract_audio(video_path):
     return audio_path
 def classify_accent(audio_path):
     classifier = EncoderClassifier.from_hparams(
         source=CLASSIFIER,
         savedir="pretrained_models/accent_classifier",
-        run_opts={"device":"cpu"}  # Change to "cuda" if GPU available
     )
     waveform, sample_rate = torchaudio.load(audio_path)
-    prediction = classifier.classify_batch(waveform)
     predicted_accent = prediction[3][0]
     confidence = prediction[1].exp().max().item() * 100
     return predicted_accent, f"{confidence:.2f}%"
 def process_video(url):
     video_path = None
     audio_path = None
     try:
@@ -62,6 +70,7 @@ def process_video(url):
     except Exception as e:
         return f"Error: {e}", ""
     finally:
         for f in [video_path, audio_path]:
             if f and os.path.exists(f):
                 os.remove(f)
@@ -79,4 +88,3 @@ iface = gr.Interface(
 if __name__ == "__main__":
     iface.launch()

 from pytubefix.cli import on_progress
 import requests
 import os
+import torch
 CLASSIFIER = "Jzuluaga/accent-id-commonaccent_xlsr-en-english"
+def get_default_device():
+    """Return the default device (cuda if available, else cpu)."""
+    return torch.device("cuda" if torch.cuda.is_available() else "cpu")
 def download_video(url):
+    """Download video from YouTube or direct MP4 URL using pytubefix."""
     try:
         if "youtube.com" in url or "youtu.be" in url:
             yt = YouTube(url, on_progress_callback=on_progress)
             stream = yt.streams.filter(progressive=True, file_extension='mp4').first()
             if not stream:
                 raise ValueError("No suitable video stream found.")
             video_path = stream.download()
             return video_path
         else:
+            # Direct MP4 file download
             local_filename = "temp_video.mp4"
             with requests.get(url, stream=True) as r:
                 r.raise_for_status()
         raise RuntimeError(f"Failed to download video: {e}")
 def extract_audio(video_path):
+    """Extract audio from video and save as WAV file."""
     clip = VideoFileClip(video_path)
     audio_path = "temp_audio.wav"
     clip.audio.write_audiofile(audio_path, logger=None)
     return audio_path
 def classify_accent(audio_path):
+    """Classify English accent from audio file using SpeechBrain model."""
+    device = get_default_device()
     classifier = EncoderClassifier.from_hparams(
         source=CLASSIFIER,
         savedir="pretrained_models/accent_classifier",
+        run_opts={"device": str(device)}
     )
     waveform, sample_rate = torchaudio.load(audio_path)
+    prediction = classifier.classify_batch(waveform.to(device))
     predicted_accent = prediction[3][0]
     confidence = prediction[1].exp().max().item() * 100
     return predicted_accent, f"{confidence:.2f}%"
 def process_video(url):
+    """Main processing pipeline: download video, extract audio, classify accent."""
     video_path = None
     audio_path = None
     try:
     except Exception as e:
         return f"Error: {e}", ""
     finally:
+        # Clean up temporary files
         for f in [video_path, audio_path]:
             if f and os.path.exists(f):
                 os.remove(f)
 if __name__ == "__main__":
     iface.launch()