Spaces:

leeksang
/

Accent_classifier_project

Sleeping

App Files Files Community

Accent_classifier_project / app.py

leeksang

Upload 6 files

5ca170c verified 7 months ago

raw

history blame contribute delete

2.27 kB

	import os
	import subprocess
	import sys

	# Ensure yt_dlp is available
	try:
	import yt_dlp as youtube_dl
	except ImportError:
	subprocess.check_call([sys.executable, "-m", "pip", "install", "yt-dlp"])
	import yt_dlp as youtube_dl

	import gradio as gr
	from transformers import pipeline

	def download_video(video_url, filename="downloaded_video.mp4"):
	ydl_opts = {
	'format': 'bestaudio/best',
	'outtmpl': filename,
	'noplaylist': True,
	'quiet': True,
	'user_agent': (
	'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
	'AppleWebKit/537.36 (KHTML, like Gecko) '
	'Chrome/115.0.0.0 Safari/537.36'
	)
	}
	with youtube_dl.YoutubeDL(ydl_opts) as ydl:
	ydl.download([video_url])
	return filename

	def extract_audio(video_filename, audio_filename="extracted_audio.wav"):
	command = [
	"ffmpeg",
	"-y",
	"-i", video_filename,
	"-vn",
	"-acodec", "pcm_s16le",
	"-ar", "16000",
	"-ac", "1",
	audio_filename
	]
	subprocess.run(command, check=True)
	return audio_filename

	def classify_accent(audio_file, model_name="superb/wav2vec2-base-superb-sid"):
	classifier = pipeline("audio-classification", model=model_name)
	results = classifier(audio_file)
	if results:
	top = results[0]
	return f"Speaker ID (as accent proxy): {top['label']}\nConfidence: {top['score'] * 100:.2f}%"
	return "No result."

	def accent_classifier(video_url):
	try:
	video_file = download_video(video_url)
	audio_file = extract_audio(video_file)
	result = classify_accent(audio_file)
	except Exception as e:
	result = f"Error occurred: {e}"
	finally:
	for f in ["downloaded_video.mp4", "extracted_audio.wav"]:
	if os.path.exists(f):
	os.remove(f)
	return result

	iface = gr.Interface(
	fn=accent_classifier,
	inputs=gr.Textbox(label="Video URL", placeholder="Paste a public YouTube or Vimeo video link here"),
	outputs="text",
	title="Accent Classifier",
	description="Download a video, extract the audio, and classify the speaker (as an accent proxy) using a Hugging Face model."
	)

	if __name__ == "__main__":
	iface.launch()