Spaces:

leeksang
/

Accent_Classifier

Sleeping

App Files Files Community

Accent_Classifier / app.py

leeksang

Upload app.py

9c06d0f verified 11 months ago

raw

history blame

3.61 kB

	import os
	import subprocess
	import sys
	import requests
	import gradio as gr
	from urllib.parse import urlparse
	from moviepy.editor import VideoFileClip
	from transformers import pipeline
	import yt_dlp as youtube_dl

	# Ensure required packages are installed
	def install_package(package):
	try:
	__import__(package)
	except ImportError:
	print(f"Installing {package}...")
	subprocess.check_call([sys.executable, "-m", "pip", "install", package])
	__import__(package)

	install_package("torch")
	install_package("tensorflow")
	install_package("transformers")
	install_package("gradio")
	install_package("yt-dlp")
	install_package("moviepy")

	# Check if URL is a direct video file
	def is_direct_video_link(url: str) -> bool:
	parsed = urlparse(url)
	path = parsed.path.lower()
	return any(path.endswith(ext) for ext in (".mp4", ".mov", ".avi", ".mkv", ".wmv", ".flv"))

	# Download functions
	def download_video_direct(video_url: str, filename: str = "video"):
	ext = os.path.splitext(urlparse(video_url).path)[1]
	if ext == "":
	raise ValueError("Cannot determine file extension.")
	filename_with_ext = filename + ext
	print(f"Downloading: {video_url}")
	resp = requests.get(video_url, stream=True)
	resp.raise_for_status()
	with open(filename_with_ext, "wb") as f:
	for chunk in resp.iter_content(chunk_size=8192):
	f.write(chunk)
	return filename_with_ext

	def download_video_via_yt_dlp(video_url: str, filename: str = "video.mp4"):
	ydl_opts = {
	"format": "best[ext=mp4]/best",
	"outtmpl": filename,
	"noplaylist": True,
	"quiet": True
	}
	print(f"Downloading (yt-dlp): {video_url}")
	with youtube_dl.YoutubeDL(ydl_opts) as ydl:
	ydl.download([video_url])
	return filename

	# Extract audio via MoviePy (Supports ALL video formats)
	def extract_audio(video_filename: str, audio_filename: str = "audio.wav") -> str:
	try:
	video = VideoFileClip(video_filename)
	audio = video.audio
	audio.write_audiofile(audio_filename)
	return audio_filename
	except Exception as e:
	raise Exception(f"Audio extraction failed: {e}")

	# Hugging Face accent classifier
	def classify_accent(audio_file: str, model_name: str = "superb/wav2vec2-base-superb-sid") -> str:
	classifier = pipeline("audio-classification", model=model_name)
	results = classifier(audio_file)
	if results:
	top = results[0]
	return f"Accent: {top['label']} (Confidence: {top['score'] * 100:.2f}%)"
	return "No classification result."

	# Full pipeline for Gradio
	def accent_classifier(video_url: str) -> str:
	tmp_video, tmp_audio = None, None
	try:
	tmp_video = download_video_direct(video_url) if is_direct_video_link(video_url) else download_video_via_yt_dlp(video_url)
	tmp_audio = extract_audio(tmp_video)
	result = classify_accent(tmp_audio)
	except Exception as e:
	result = f"Error: {e}"
	finally:
	if tmp_video and os.path.exists(tmp_video):
	os.remove(tmp_video)
	if tmp_audio and os.path.exists(tmp_audio):
	os.remove(tmp_audio)
	return result

	# Gradio UI setup
	iface = gr.Interface(
	fn=accent_classifier,
	inputs=gr.Textbox(label="Video URL", placeholder="Enter a direct video or streaming link"),
	outputs="text",
	title="🎤 Accent Classifier",
	description="Paste any video URL (MP4/MOV/AVI/MKV/WMV/FLV, YouTube, Vimeo, etc.). This will download, extract audio, and classify the speaker's accent."
	)

	if __name__ == "__main__":
	iface.launch()