accent-analyzer / app.py
masrialx's picture
update handler
4ed0cc5
import gradio as gr
import torch
import torchaudio
from transformers import AutoFeatureExtractor, Wav2Vec2ForSequenceClassification
import yt_dlp
import tempfile
import os
from moviepy.editor import VideoFileClip
from pydub import AudioSegment
import uuid
import shutil
MODEL_ID = "dima806/english_accents_classification"
feature_extractor = AutoFeatureExtractor.from_pretrained(MODEL_ID)
model = Wav2Vec2ForSequenceClassification.from_pretrained(MODEL_ID)
LABELS = ["us", "england", "indian", "australia", "canada"]
PRETTY = {
"us": "American",
"england": "British",
"indian": "Indian",
"australia": "Australian",
"canada": "Canadian"
}
def download_video(url):
temp_dir = tempfile.mkdtemp()
video_path = os.path.join(temp_dir, f"{uuid.uuid4()}.mp4")
ydl_opts = {
'format': 'best',
'outtmpl': video_path,
'quiet': True,
'no_warnings': True,
'http_headers': {
'User-Agent': 'Mozilla/5.0',
}
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
ydl.download([url])
if not os.path.exists(video_path):
shutil.rmtree(temp_dir)
raise Exception("Failed to download video")
return video_path, temp_dir
def extract_audio(video_file):
audio_path = video_file.replace(".mp4", ".wav")
clip = VideoFileClip(video_file)
clip.audio.write_audiofile(audio_path, verbose=False, logger=None)
clip.close()
audio = AudioSegment.from_wav(audio_path)
audio = audio.set_frame_rate(16000).set_channels(1)
audio.export(audio_path, format="wav")
return audio_path
def classify_accent(audio_path):
waveform, sr = torchaudio.load(audio_path)
if sr != 16000:
waveform = torchaudio.transforms.Resample(orig_freq=sr, new_freq=16000)(waveform)
inputs = feature_extractor(waveform.squeeze(), sampling_rate=16000, return_tensors="pt", padding=True)
with torch.no_grad():
logits = model(**inputs).logits
predicted = torch.argmax(logits, dim=-1).item()
confidence = torch.softmax(logits, dim=-1)[0][predicted].item()
label = LABELS[predicted]
return PRETTY.get(label, label), round(confidence * 100, 2)
def analyze_accent(url, file):
temp_dir = None
try:
if file is not None:
file_path = file.name
audio_path = file_path if file_path.lower().endswith(".wav") else extract_audio(file_path)
elif url and url.strip():
video_path, temp_dir = download_video(url)
audio_path = extract_audio(video_path)
os.remove(video_path)
else:
return "Error: Please provide a URL or upload a file.", 0.0
accent, confidence = classify_accent(audio_path)
os.remove(audio_path)
if temp_dir:
shutil.rmtree(temp_dir)
return accent, confidence
except Exception as e:
if temp_dir:
shutil.rmtree(temp_dir)
return f"Error: {str(e)}", 0.0
iface = gr.Interface(
fn=analyze_accent,
inputs=[
gr.Textbox(label="YouTube URL (optional)"),
gr.File(label="Upload MP4 or WAV (optional)", file_types=[".mp4", ".wav"])
],
outputs=[
gr.Textbox(label="Predicted Accent"),
gr.Number(label="Confidence (%)")
],
title="English Accent Analyzer",
description=(
"Enter a YouTube URL or upload a video/audio file to analyze the English accent.\n\n"
"💡 If your video URL doesn't work, try uploading a video file instead."
)
)
iface.launch()