Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import torch | |
| import torchaudio | |
| from transformers import AutoFeatureExtractor, Wav2Vec2ForSequenceClassification | |
| import yt_dlp | |
| import tempfile | |
| import os | |
| from moviepy.editor import VideoFileClip | |
| from pydub import AudioSegment | |
| import uuid | |
| import shutil | |
| MODEL_ID = "dima806/english_accents_classification" | |
| feature_extractor = AutoFeatureExtractor.from_pretrained(MODEL_ID) | |
| model = Wav2Vec2ForSequenceClassification.from_pretrained(MODEL_ID) | |
| LABELS = ["us", "england", "indian", "australia", "canada"] | |
| PRETTY = { | |
| "us": "American", | |
| "england": "British", | |
| "indian": "Indian", | |
| "australia": "Australian", | |
| "canada": "Canadian" | |
| } | |
| def download_video(url): | |
| temp_dir = tempfile.mkdtemp() | |
| video_path = os.path.join(temp_dir, f"{uuid.uuid4()}.mp4") | |
| ydl_opts = { | |
| 'format': 'best', | |
| 'outtmpl': video_path, | |
| 'quiet': True, | |
| 'no_warnings': True, | |
| 'http_headers': { | |
| 'User-Agent': 'Mozilla/5.0', | |
| } | |
| } | |
| with yt_dlp.YoutubeDL(ydl_opts) as ydl: | |
| ydl.download([url]) | |
| if not os.path.exists(video_path): | |
| shutil.rmtree(temp_dir) | |
| raise Exception("Failed to download video") | |
| return video_path, temp_dir | |
| def extract_audio(video_file): | |
| audio_path = video_file.replace(".mp4", ".wav") | |
| clip = VideoFileClip(video_file) | |
| clip.audio.write_audiofile(audio_path, verbose=False, logger=None) | |
| clip.close() | |
| audio = AudioSegment.from_wav(audio_path) | |
| audio = audio.set_frame_rate(16000).set_channels(1) | |
| audio.export(audio_path, format="wav") | |
| return audio_path | |
| def classify_accent(audio_path): | |
| waveform, sr = torchaudio.load(audio_path) | |
| if sr != 16000: | |
| waveform = torchaudio.transforms.Resample(orig_freq=sr, new_freq=16000)(waveform) | |
| inputs = feature_extractor(waveform.squeeze(), sampling_rate=16000, return_tensors="pt", padding=True) | |
| with torch.no_grad(): | |
| logits = model(**inputs).logits | |
| predicted = torch.argmax(logits, dim=-1).item() | |
| confidence = torch.softmax(logits, dim=-1)[0][predicted].item() | |
| label = LABELS[predicted] | |
| return PRETTY.get(label, label), round(confidence * 100, 2) | |
| def analyze_accent(url, file): | |
| temp_dir = None | |
| try: | |
| if file is not None: | |
| file_path = file.name | |
| audio_path = file_path if file_path.lower().endswith(".wav") else extract_audio(file_path) | |
| elif url and url.strip(): | |
| video_path, temp_dir = download_video(url) | |
| audio_path = extract_audio(video_path) | |
| os.remove(video_path) | |
| else: | |
| return "Error: Please provide a URL or upload a file.", 0.0 | |
| accent, confidence = classify_accent(audio_path) | |
| os.remove(audio_path) | |
| if temp_dir: | |
| shutil.rmtree(temp_dir) | |
| return accent, confidence | |
| except Exception as e: | |
| if temp_dir: | |
| shutil.rmtree(temp_dir) | |
| return f"Error: {str(e)}", 0.0 | |
| iface = gr.Interface( | |
| fn=analyze_accent, | |
| inputs=[ | |
| gr.Textbox(label="YouTube URL (optional)"), | |
| gr.File(label="Upload MP4 or WAV (optional)", file_types=[".mp4", ".wav"]) | |
| ], | |
| outputs=[ | |
| gr.Textbox(label="Predicted Accent"), | |
| gr.Number(label="Confidence (%)") | |
| ], | |
| title="English Accent Analyzer", | |
| description=( | |
| "Enter a YouTube URL or upload a video/audio file to analyze the English accent.\n\n" | |
| "💡 If your video URL doesn't work, try uploading a video file instead." | |
| ) | |
| ) | |
| iface.launch() | |