Spaces:

masrialx404
/

accent-analyzer

Sleeping

App Files Files Community

accent-analyzer / app.py

masrialx

update handler

4ed0cc5 10 months ago

raw

history blame contribute delete

3.53 kB

	import gradio as gr
	import torch
	import torchaudio
	from transformers import AutoFeatureExtractor, Wav2Vec2ForSequenceClassification
	import yt_dlp
	import tempfile
	import os
	from moviepy.editor import VideoFileClip
	from pydub import AudioSegment
	import uuid
	import shutil

	MODEL_ID = "dima806/english_accents_classification"
	feature_extractor = AutoFeatureExtractor.from_pretrained(MODEL_ID)
	model = Wav2Vec2ForSequenceClassification.from_pretrained(MODEL_ID)

	LABELS = ["us", "england", "indian", "australia", "canada"]
	PRETTY = {
	"us": "American",
	"england": "British",
	"indian": "Indian",
	"australia": "Australian",
	"canada": "Canadian"
	}

	def download_video(url):
	temp_dir = tempfile.mkdtemp()
	video_path = os.path.join(temp_dir, f"{uuid.uuid4()}.mp4")

	ydl_opts = {
	'format': 'best',
	'outtmpl': video_path,
	'quiet': True,
	'no_warnings': True,
	'http_headers': {
	'User-Agent': 'Mozilla/5.0',
	}
	}
	with yt_dlp.YoutubeDL(ydl_opts) as ydl:
	ydl.download([url])

	if not os.path.exists(video_path):
	shutil.rmtree(temp_dir)
	raise Exception("Failed to download video")

	return video_path, temp_dir

	def extract_audio(video_file):
	audio_path = video_file.replace(".mp4", ".wav")
	clip = VideoFileClip(video_file)
	clip.audio.write_audiofile(audio_path, verbose=False, logger=None)
	clip.close()

	audio = AudioSegment.from_wav(audio_path)
	audio = audio.set_frame_rate(16000).set_channels(1)
	audio.export(audio_path, format="wav")
	return audio_path

	def classify_accent(audio_path):
	waveform, sr = torchaudio.load(audio_path)
	if sr != 16000:
	waveform = torchaudio.transforms.Resample(orig_freq=sr, new_freq=16000)(waveform)
	inputs = feature_extractor(waveform.squeeze(), sampling_rate=16000, return_tensors="pt", padding=True)
	with torch.no_grad():
	logits = model(**inputs).logits
	predicted = torch.argmax(logits, dim=-1).item()
	confidence = torch.softmax(logits, dim=-1)[0][predicted].item()
	label = LABELS[predicted]
	return PRETTY.get(label, label), round(confidence * 100, 2)

	def analyze_accent(url, file):
	temp_dir = None
	try:
	if file is not None:
	file_path = file.name
	audio_path = file_path if file_path.lower().endswith(".wav") else extract_audio(file_path)
	elif url and url.strip():
	video_path, temp_dir = download_video(url)
	audio_path = extract_audio(video_path)
	os.remove(video_path)
	else:
	return "Error: Please provide a URL or upload a file.", 0.0

	accent, confidence = classify_accent(audio_path)
	os.remove(audio_path)
	if temp_dir:
	shutil.rmtree(temp_dir)
	return accent, confidence
	except Exception as e:
	if temp_dir:
	shutil.rmtree(temp_dir)
	return f"Error: {str(e)}", 0.0

	iface = gr.Interface(
	fn=analyze_accent,
	inputs=[
	gr.Textbox(label="YouTube URL (optional)"),
	gr.File(label="Upload MP4 or WAV (optional)", file_types=[".mp4", ".wav"])
	],
	outputs=[
	gr.Textbox(label="Predicted Accent"),
	gr.Number(label="Confidence (%)")
	],
	title="English Accent Analyzer",
	description=(
	"Enter a YouTube URL or upload a video/audio file to analyze the English accent.\n\n"
	"💡 If your video URL doesn't work, try uploading a video file instead."
	)
	)

	iface.launch()