Upload app_gradio.py
Browse files- app_gradio.py +81 -0
app_gradio.py
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import os
|
| 3 |
+
import subprocess
|
| 4 |
+
import librosa
|
| 5 |
+
from transformers import pipeline
|
| 6 |
+
import requests
|
| 7 |
+
import yt_dlp
|
| 8 |
+
|
| 9 |
+
MODEL_ID = "HamzaSidhu786/speech-accent-detection"
|
| 10 |
+
TEMP_VIDEO = "temp_video.mp4"
|
| 11 |
+
TEMP_AUDIO = "temp_audio.wav"
|
| 12 |
+
FFMPEG_PATH = "ffmpeg" # Uses system path in Hugging Face Spaces
|
| 13 |
+
|
| 14 |
+
clf = pipeline("audio-classification", model=MODEL_ID)
|
| 15 |
+
|
| 16 |
+
def download_from_url(url):
|
| 17 |
+
if url.startswith("http"):
|
| 18 |
+
if "youtube.com" in url or "youtu.be" in url:
|
| 19 |
+
ydl_opts = {"format": "mp4", "outtmpl": TEMP_VIDEO, "quiet": True}
|
| 20 |
+
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
| 21 |
+
ydl.download([url])
|
| 22 |
+
else:
|
| 23 |
+
r = requests.get(url, stream=True)
|
| 24 |
+
with open(TEMP_VIDEO, 'wb') as f:
|
| 25 |
+
for chunk in r.iter_content(chunk_size=8192):
|
| 26 |
+
f.write(chunk)
|
| 27 |
+
elif os.path.exists(url):
|
| 28 |
+
return url
|
| 29 |
+
else:
|
| 30 |
+
raise Exception("Invalid path or URL.")
|
| 31 |
+
return TEMP_VIDEO
|
| 32 |
+
|
| 33 |
+
def extract_audio(video_path):
|
| 34 |
+
cmd = [FFMPEG_PATH, "-i", video_path, "-t", "40", "-ac", "1", "-ar", "16000", "-y", TEMP_AUDIO]
|
| 35 |
+
subprocess.run(cmd, check=True)
|
| 36 |
+
return TEMP_AUDIO
|
| 37 |
+
|
| 38 |
+
def predict_accent(audio_path):
|
| 39 |
+
audio, sr = librosa.load(audio_path, sr=16000)
|
| 40 |
+
clip = audio[:sr * 10]
|
| 41 |
+
results = clf(clip)
|
| 42 |
+
top = max(results, key=lambda x: x["score"])
|
| 43 |
+
return f"Accent: {top['label']} | Confidence: {top['score']*100:.2f}%"
|
| 44 |
+
|
| 45 |
+
def accent_from_upload(file):
|
| 46 |
+
try:
|
| 47 |
+
extract_audio(file)
|
| 48 |
+
return predict_accent(TEMP_AUDIO)
|
| 49 |
+
finally:
|
| 50 |
+
cleanup()
|
| 51 |
+
|
| 52 |
+
def accent_from_url(url):
|
| 53 |
+
try:
|
| 54 |
+
video_path = download_from_url(url)
|
| 55 |
+
extract_audio(video_path)
|
| 56 |
+
return predict_accent(TEMP_AUDIO)
|
| 57 |
+
finally:
|
| 58 |
+
cleanup()
|
| 59 |
+
|
| 60 |
+
def cleanup():
|
| 61 |
+
for f in (TEMP_VIDEO, TEMP_AUDIO):
|
| 62 |
+
if os.path.exists(f):
|
| 63 |
+
os.remove(f)
|
| 64 |
+
|
| 65 |
+
with gr.Blocks() as demo:
|
| 66 |
+
gr.Markdown("## English Accent Detection (Upload, Path or URL)")
|
| 67 |
+
|
| 68 |
+
with gr.Tab("Upload File"):
|
| 69 |
+
file_input = gr.File(type="filepath")
|
| 70 |
+
output1 = gr.Textbox(label="Detected Accent")
|
| 71 |
+
btn1 = gr.Button("Detect")
|
| 72 |
+
btn1.click(fn=accent_from_upload, inputs=file_input, outputs=output1)
|
| 73 |
+
|
| 74 |
+
with gr.Tab("From URL or Path"):
|
| 75 |
+
url_input = gr.Textbox(label="Enter a video URL or local path")
|
| 76 |
+
output2 = gr.Textbox(label="Detected Accent")
|
| 77 |
+
btn2 = gr.Button("Detect")
|
| 78 |
+
btn2.click(fn=accent_from_url, inputs=url_input, outputs=output2)
|
| 79 |
+
|
| 80 |
+
if __name__ == "__main__":
|
| 81 |
+
demo.launch()
|