File size: 2,482 Bytes
c9ddb18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import gradio as gr
import os
import subprocess
import librosa
from transformers import pipeline
import requests
import yt_dlp

MODEL_ID = "HamzaSidhu786/speech-accent-detection"
TEMP_VIDEO = "temp_video.mp4"
TEMP_AUDIO = "temp_audio.wav"
FFMPEG_PATH = "ffmpeg"  

clf = pipeline("audio-classification", model=MODEL_ID)

def download_from_url(url):
    if url.startswith("http"):
        if "youtube.com" in url or "youtu.be" in url:
            ydl_opts = {"format": "mp4", "outtmpl": TEMP_VIDEO, "quiet": True}
            with yt_dlp.YoutubeDL(ydl_opts) as ydl:
                ydl.download([url])
        else:
            r = requests.get(url, stream=True)
            with open(TEMP_VIDEO, 'wb') as f:
                for chunk in r.iter_content(chunk_size=8192):
                    f.write(chunk)
    elif os.path.exists(url):
        return url
    else:
        raise Exception("Invalid path or URL.")
    return TEMP_VIDEO

def extract_audio(video_path):
    cmd = [FFMPEG_PATH, "-i", video_path, "-t", "40", "-ac", "1", "-ar", "16000", "-y", TEMP_AUDIO]
    subprocess.run(cmd, check=True)
    return TEMP_AUDIO

def predict_accent(audio_path):
    audio, sr = librosa.load(audio_path, sr=16000)
    clip = audio[:sr * 10]
    results = clf(clip)
    top = max(results, key=lambda x: x["score"])
    return f"Accent: {top['label']} | Confidence: {top['score']*100:.2f}%"

def accent_from_upload(file):
    try:
        extract_audio(file)
        return predict_accent(TEMP_AUDIO)
    finally:
        cleanup()

def accent_from_url(url):
    try:
        video_path = download_from_url(url)
        extract_audio(video_path)
        return predict_accent(TEMP_AUDIO)
    finally:
        cleanup()

def cleanup():
    for f in (TEMP_VIDEO, TEMP_AUDIO):
        if os.path.exists(f):
            os.remove(f)

with gr.Blocks() as demo:
    gr.Markdown("## English Accent Detection (Upload, Path or URL)")

    with gr.Tab("Upload File"):
        file_input = gr.File(type="filepath")
        output1 = gr.Textbox(label="Detected Accent")
        btn1 = gr.Button("Detect")
        btn1.click(fn=accent_from_upload, inputs=file_input, outputs=output1)

    with gr.Tab("From URL or Path"):
        url_input = gr.Textbox(label="Enter a video URL or local path")
        output2 = gr.Textbox(label="Detected Accent")
        btn2 = gr.Button("Detect")
        btn2.click(fn=accent_from_url, inputs=url_input, outputs=output2)

if __name__ == "__main__":
    demo.launch()