Spaces:
Sleeping
Sleeping
| import os | |
| import subprocess | |
| import sys | |
| # Ensure yt_dlp is available | |
| try: | |
| import yt_dlp as youtube_dl | |
| except ImportError: | |
| subprocess.check_call([sys.executable, "-m", "pip", "install", "yt-dlp"]) | |
| import yt_dlp as youtube_dl | |
| import gradio as gr | |
| from transformers import pipeline | |
| def download_video(video_url, filename="downloaded_video.mp4"): | |
| ydl_opts = { | |
| 'format': 'bestaudio/best', | |
| 'outtmpl': filename, | |
| 'noplaylist': True, | |
| 'quiet': True, | |
| 'user_agent': ( | |
| 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) ' | |
| 'AppleWebKit/537.36 (KHTML, like Gecko) ' | |
| 'Chrome/115.0.0.0 Safari/537.36' | |
| ) | |
| } | |
| with youtube_dl.YoutubeDL(ydl_opts) as ydl: | |
| ydl.download([video_url]) | |
| return filename | |
| def extract_audio(video_filename, audio_filename="extracted_audio.wav"): | |
| command = [ | |
| "ffmpeg", | |
| "-y", | |
| "-i", video_filename, | |
| "-vn", | |
| "-acodec", "pcm_s16le", | |
| "-ar", "16000", | |
| "-ac", "1", | |
| audio_filename | |
| ] | |
| subprocess.run(command, check=True) | |
| return audio_filename | |
| def classify_accent(audio_file, model_name="superb/wav2vec2-base-superb-sid"): | |
| classifier = pipeline("audio-classification", model=model_name) | |
| results = classifier(audio_file) | |
| if results: | |
| top = results[0] | |
| return f"Speaker ID (as accent proxy): {top['label']}\nConfidence: {top['score'] * 100:.2f}%" | |
| return "No result." | |
| def accent_classifier(video_url): | |
| try: | |
| video_file = download_video(video_url) | |
| audio_file = extract_audio(video_file) | |
| result = classify_accent(audio_file) | |
| except Exception as e: | |
| result = f"Error occurred: {e}" | |
| finally: | |
| for f in ["downloaded_video.mp4", "extracted_audio.wav"]: | |
| if os.path.exists(f): | |
| os.remove(f) | |
| return result | |
| iface = gr.Interface( | |
| fn=accent_classifier, | |
| inputs=gr.Textbox(label="Video URL", placeholder="Paste a public YouTube or Vimeo video link here"), | |
| outputs="text", | |
| title="Accent Classifier", | |
| description="Download a video, extract the audio, and classify the speaker (as an accent proxy) using a Hugging Face model." | |
| ) | |
| if __name__ == "__main__": | |
| iface.launch() | |