Update app.py
Browse files
app.py
CHANGED
|
@@ -5,12 +5,9 @@ import torch
|
|
| 5 |
import librosa
|
| 6 |
import time
|
| 7 |
import os
|
| 8 |
-
import uuid
|
| 9 |
import tempfile
|
| 10 |
from pydub import AudioSegment
|
| 11 |
from transformers import pipeline, AutoFeatureExtractor, AutoModelForAudioClassification
|
| 12 |
-
from scipy.io.wavfile import write
|
| 13 |
-
import sounddevice as sd
|
| 14 |
|
| 15 |
# إعداد نموذج Whisper للتفريغ
|
| 16 |
whisper_pipeline = pipeline("automatic-speech-recognition", model="openai/whisper-base")
|
|
@@ -25,14 +22,6 @@ label_map = {
|
|
| 25 |
14: "US"
|
| 26 |
}
|
| 27 |
|
| 28 |
-
# تسجيل الصوت من المايكروفون
|
| 29 |
-
def record_audio(duration=5, fs=16000):
|
| 30 |
-
recording = sd.rec(int(duration * fs), samplerate=fs, channels=1, dtype='int16')
|
| 31 |
-
sd.wait()
|
| 32 |
-
temp_path = tempfile.NamedTemporaryFile(delete=False, suffix=".wav").name
|
| 33 |
-
write(temp_path, fs, recording)
|
| 34 |
-
return temp_path
|
| 35 |
-
|
| 36 |
# تحويل الصوت إلى النص باستخدام Whisper
|
| 37 |
def transcribe_audio(audio_path):
|
| 38 |
result = whisper_pipeline(audio_path)
|
|
@@ -47,10 +36,9 @@ def predict_accent(audio_path):
|
|
| 47 |
predicted_id = torch.argmax(logits, dim=-1).item()
|
| 48 |
return label_map.get(predicted_id, f"Unknown (ID: {predicted_id})")
|
| 49 |
|
| 50 |
-
# معالجة كاملة
|
| 51 |
-
def agent_run(
|
| 52 |
start_time = time.time()
|
| 53 |
-
audio_path = record_audio(duration)
|
| 54 |
accent = predict_accent(audio_path)
|
| 55 |
transcription = transcribe_audio(audio_path)
|
| 56 |
elapsed = round(time.time() - start_time, 2)
|
|
@@ -59,19 +47,19 @@ def agent_run(duration):
|
|
| 59 |
# واجهة Gradio
|
| 60 |
with gr.Blocks() as demo:
|
| 61 |
gr.Markdown("## 🧠 Accent Detection Agent")
|
| 62 |
-
gr.Markdown("Record
|
| 63 |
|
| 64 |
-
|
| 65 |
-
run_button = gr.Button("
|
| 66 |
|
| 67 |
-
audio_output = gr.Audio(label="
|
| 68 |
accent_output = gr.Textbox(label="Detected Accent")
|
| 69 |
transcription_output = gr.Textbox(label="Transcription")
|
| 70 |
time_output = gr.Textbox(label="Processing Time")
|
| 71 |
|
| 72 |
run_button.click(
|
| 73 |
fn=agent_run,
|
| 74 |
-
inputs=[
|
| 75 |
outputs=[audio_output, accent_output, transcription_output, time_output]
|
| 76 |
)
|
| 77 |
|
|
|
|
| 5 |
import librosa
|
| 6 |
import time
|
| 7 |
import os
|
|
|
|
| 8 |
import tempfile
|
| 9 |
from pydub import AudioSegment
|
| 10 |
from transformers import pipeline, AutoFeatureExtractor, AutoModelForAudioClassification
|
|
|
|
|
|
|
| 11 |
|
| 12 |
# إعداد نموذج Whisper للتفريغ
|
| 13 |
whisper_pipeline = pipeline("automatic-speech-recognition", model="openai/whisper-base")
|
|
|
|
| 22 |
14: "US"
|
| 23 |
}
|
| 24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
# تحويل الصوت إلى النص باستخدام Whisper
|
| 26 |
def transcribe_audio(audio_path):
|
| 27 |
result = whisper_pipeline(audio_path)
|
|
|
|
| 36 |
predicted_id = torch.argmax(logits, dim=-1).item()
|
| 37 |
return label_map.get(predicted_id, f"Unknown (ID: {predicted_id})")
|
| 38 |
|
| 39 |
+
# المعالجة الكاملة عند إدخال ملف صوتي
|
| 40 |
+
def agent_run(audio_path):
|
| 41 |
start_time = time.time()
|
|
|
|
| 42 |
accent = predict_accent(audio_path)
|
| 43 |
transcription = transcribe_audio(audio_path)
|
| 44 |
elapsed = round(time.time() - start_time, 2)
|
|
|
|
| 47 |
# واجهة Gradio
|
| 48 |
with gr.Blocks() as demo:
|
| 49 |
gr.Markdown("## 🧠 Accent Detection Agent")
|
| 50 |
+
gr.Markdown("Record or upload audio and let the AI detect the accent and transcribe your speech.")
|
| 51 |
|
| 52 |
+
audio_input = gr.Audio(type="filepath", label="🎙️ Upload or Record Audio", source="microphone")
|
| 53 |
+
run_button = gr.Button("Analyze")
|
| 54 |
|
| 55 |
+
audio_output = gr.Audio(label="Audio")
|
| 56 |
accent_output = gr.Textbox(label="Detected Accent")
|
| 57 |
transcription_output = gr.Textbox(label="Transcription")
|
| 58 |
time_output = gr.Textbox(label="Processing Time")
|
| 59 |
|
| 60 |
run_button.click(
|
| 61 |
fn=agent_run,
|
| 62 |
+
inputs=[audio_input],
|
| 63 |
outputs=[audio_output, accent_output, transcription_output, time_output]
|
| 64 |
)
|
| 65 |
|