Spaces:

Sajidahamed
/

Classifciation

Sleeping

App Files Files Community

Classifciation / app.py

Sajidahamed

Update app.py

34200f5 verified 9 months ago

raw

history blame contribute delete

3.96 kB

	import gradio as gr
	import os
	import subprocess
	import torch
	import torchaudio
	from speechbrain.pretrained import EncoderClassifier
	import yt_dlp
	import tempfile

	def download_video(url, out_path):
	"""Download a video from YouTube or direct MP4 link."""
	try:
	if "youtube.com" in url or "youtu.be" in url:
	ydl_opts = {'outtmpl': out_path}
	with yt_dlp.YoutubeDL(ydl_opts) as ydl:
	ydl.download([url])
	else:
	os.system(f"wget -O {out_path} {url}")
	return out_path
	except Exception as e:
	return f"ERROR: Video download failed: {str(e)}"

	def extract_audio(video_path, audio_path):
	"""Extract audio from video file using ffmpeg."""
	try:
	cmd = [
	"ffmpeg", "-y",
	"-i", video_path,
	"-vn",
	"-acodec", "pcm_s16le",
	"-ar", "16000",
	"-ac", "1",
	audio_path
	]
	subprocess.run(cmd, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
	return audio_path
	except Exception as e:
	return f"ERROR: Audio extraction failed: {str(e)}"

	def analyze_accent(audio_path):
	"""Analyze accent using SpeechBrain pre-trained model."""
	try:
	classifier = EncoderClassifier.from_hparams(
	source="speechbrain/lang-id-voxlingua107-ecapa",
	savedir="pretrained_models/lang-id-voxlingua107-ecapa"
	)
	signal, fs = torchaudio.load(audio_path)
	prediction = classifier.classify_batch(signal)
	predicted_lang = prediction[3][0]
	confidence = float(torch.max(prediction[1]).item())
	return predicted_lang, confidence
	except Exception as e:
	return f"ERROR: Accent analysis failed: {str(e)}", None

	def process_input(video_link, uploaded_video):
	temp_dir = tempfile.mkdtemp()
	# Prioritize file upload if both provided
	if uploaded_video is not None:
	video_path = os.path.join(temp_dir, uploaded_video.name)
	with open(video_path, "wb") as f:
	f.write(uploaded_video.read())
	elif video_link:
	video_path = os.path.join(temp_dir, "input_video.mp4")
	result = download_video(video_link, video_path)
	if isinstance(result, str) and result.startswith("ERROR"):
	return result, None, None, None
	else:
	return "Please provide a YouTube/MP4 link or upload a video file.", None, None, None

	# Extract audio
	audio_path = os.path.join(temp_dir, "audio.wav")
	result = extract_audio(video_path, audio_path)
	if isinstance(result, str) and result.startswith("ERROR"):
	return result, None, None, None

	# Analyze accent
	accent, confidence = analyze_accent(audio_path)
	if isinstance(accent, str) and accent.startswith("ERROR"):
	return accent, None, None, None

	# For playback in Gradio
	return (
	f"Detected Language/Accent: {accent}\n\nConfidence: {confidence*100:.2f}%",
	video_path,
	audio_path,
	accent
	)

	with gr.Blocks() as demo:
	gr.Markdown("# 🎙️ Accent/Language Detection from Video")
	gr.Markdown(
	"Upload a video or provide a YouTube/direct MP4 link. This app will extract the audio, "
	"detect the spoken language/accent, and estimate confidence using a SpeechBrain pre-trained model."
	)

	with gr.Row():
	video_link = gr.Textbox(label="YouTube or MP4 Link (optional)")
	uploaded_video = gr.File(label="Upload Video File (optional)", file_types=[".mp4", ".mov", ".avi", ".mkv"])

	btn = gr.Button("Analyze")

	output_text = gr.Markdown()
	video_output = gr.Video(label="Video Preview")
	audio_output = gr.Audio(label="Extracted Audio", type="filepath")

	btn.click(
	fn=process_input,
	inputs=[video_link, uploaded_video],
	outputs=[output_text, video_output, audio_output, gr.Textbox(visible=False)]
	)

	demo.launch()