Classifciation / app.py
Sajidahamed's picture
Update app.py
34200f5 verified
import gradio as gr
import os
import subprocess
import torch
import torchaudio
from speechbrain.pretrained import EncoderClassifier
import yt_dlp
import tempfile
def download_video(url, out_path):
"""Download a video from YouTube or direct MP4 link."""
try:
if "youtube.com" in url or "youtu.be" in url:
ydl_opts = {'outtmpl': out_path}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
ydl.download([url])
else:
os.system(f"wget -O {out_path} {url}")
return out_path
except Exception as e:
return f"ERROR: Video download failed: {str(e)}"
def extract_audio(video_path, audio_path):
"""Extract audio from video file using ffmpeg."""
try:
cmd = [
"ffmpeg", "-y",
"-i", video_path,
"-vn",
"-acodec", "pcm_s16le",
"-ar", "16000",
"-ac", "1",
audio_path
]
subprocess.run(cmd, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
return audio_path
except Exception as e:
return f"ERROR: Audio extraction failed: {str(e)}"
def analyze_accent(audio_path):
"""Analyze accent using SpeechBrain pre-trained model."""
try:
classifier = EncoderClassifier.from_hparams(
source="speechbrain/lang-id-voxlingua107-ecapa",
savedir="pretrained_models/lang-id-voxlingua107-ecapa"
)
signal, fs = torchaudio.load(audio_path)
prediction = classifier.classify_batch(signal)
predicted_lang = prediction[3][0]
confidence = float(torch.max(prediction[1]).item())
return predicted_lang, confidence
except Exception as e:
return f"ERROR: Accent analysis failed: {str(e)}", None
def process_input(video_link, uploaded_video):
temp_dir = tempfile.mkdtemp()
# Prioritize file upload if both provided
if uploaded_video is not None:
video_path = os.path.join(temp_dir, uploaded_video.name)
with open(video_path, "wb") as f:
f.write(uploaded_video.read())
elif video_link:
video_path = os.path.join(temp_dir, "input_video.mp4")
result = download_video(video_link, video_path)
if isinstance(result, str) and result.startswith("ERROR"):
return result, None, None, None
else:
return "Please provide a YouTube/MP4 link or upload a video file.", None, None, None
# Extract audio
audio_path = os.path.join(temp_dir, "audio.wav")
result = extract_audio(video_path, audio_path)
if isinstance(result, str) and result.startswith("ERROR"):
return result, None, None, None
# Analyze accent
accent, confidence = analyze_accent(audio_path)
if isinstance(accent, str) and accent.startswith("ERROR"):
return accent, None, None, None
# For playback in Gradio
return (
f"**Detected Language/Accent:** {accent}\n\n**Confidence:** {confidence*100:.2f}%",
video_path,
audio_path,
accent
)
with gr.Blocks() as demo:
gr.Markdown("# 🎙️ Accent/Language Detection from Video")
gr.Markdown(
"Upload a video or provide a YouTube/direct MP4 link. This app will extract the audio, "
"detect the spoken language/accent, and estimate confidence using a SpeechBrain pre-trained model."
)
with gr.Row():
video_link = gr.Textbox(label="YouTube or MP4 Link (optional)")
uploaded_video = gr.File(label="Upload Video File (optional)", file_types=[".mp4", ".mov", ".avi", ".mkv"])
btn = gr.Button("Analyze")
output_text = gr.Markdown()
video_output = gr.Video(label="Video Preview")
audio_output = gr.Audio(label="Extracted Audio", type="filepath")
btn.click(
fn=process_input,
inputs=[video_link, uploaded_video],
outputs=[output_text, video_output, audio_output, gr.Textbox(visible=False)]
)
demo.launch()