wesam0099 commited on
Commit
333b80c
·
verified ·
1 Parent(s): 54a1938

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -19
app.py CHANGED
@@ -5,12 +5,9 @@ import torch
5
  import librosa
6
  import time
7
  import os
8
- import uuid
9
  import tempfile
10
  from pydub import AudioSegment
11
  from transformers import pipeline, AutoFeatureExtractor, AutoModelForAudioClassification
12
- from scipy.io.wavfile import write
13
- import sounddevice as sd
14
 
15
  # إعداد نموذج Whisper للتفريغ
16
  whisper_pipeline = pipeline("automatic-speech-recognition", model="openai/whisper-base")
@@ -25,14 +22,6 @@ label_map = {
25
  14: "US"
26
  }
27
 
28
- # تسجيل الصوت من المايكروفون
29
- def record_audio(duration=5, fs=16000):
30
- recording = sd.rec(int(duration * fs), samplerate=fs, channels=1, dtype='int16')
31
- sd.wait()
32
- temp_path = tempfile.NamedTemporaryFile(delete=False, suffix=".wav").name
33
- write(temp_path, fs, recording)
34
- return temp_path
35
-
36
  # تحويل الصوت إلى النص باستخدام Whisper
37
  def transcribe_audio(audio_path):
38
  result = whisper_pipeline(audio_path)
@@ -47,10 +36,9 @@ def predict_accent(audio_path):
47
  predicted_id = torch.argmax(logits, dim=-1).item()
48
  return label_map.get(predicted_id, f"Unknown (ID: {predicted_id})")
49
 
50
- # معالجة كاملة: تسجيل ← توقع اللهجة ← التفريغ
51
- def agent_run(duration):
52
  start_time = time.time()
53
- audio_path = record_audio(duration)
54
  accent = predict_accent(audio_path)
55
  transcription = transcribe_audio(audio_path)
56
  elapsed = round(time.time() - start_time, 2)
@@ -59,19 +47,19 @@ def agent_run(duration):
59
  # واجهة Gradio
60
  with gr.Blocks() as demo:
61
  gr.Markdown("## 🧠 Accent Detection Agent")
62
- gr.Markdown("Record your voice and let the AI detect the accent and transcribe the speech.")
63
 
64
- duration = gr.Slider(3, 10, value=5, label="Recording Duration (seconds)")
65
- run_button = gr.Button("🎙️ Start Agent")
66
 
67
- audio_output = gr.Audio(label="Recorded Audio")
68
  accent_output = gr.Textbox(label="Detected Accent")
69
  transcription_output = gr.Textbox(label="Transcription")
70
  time_output = gr.Textbox(label="Processing Time")
71
 
72
  run_button.click(
73
  fn=agent_run,
74
- inputs=[duration],
75
  outputs=[audio_output, accent_output, transcription_output, time_output]
76
  )
77
 
 
5
  import librosa
6
  import time
7
  import os
 
8
  import tempfile
9
  from pydub import AudioSegment
10
  from transformers import pipeline, AutoFeatureExtractor, AutoModelForAudioClassification
 
 
11
 
12
  # إعداد نموذج Whisper للتفريغ
13
  whisper_pipeline = pipeline("automatic-speech-recognition", model="openai/whisper-base")
 
22
  14: "US"
23
  }
24
 
 
 
 
 
 
 
 
 
25
  # تحويل الصوت إلى النص باستخدام Whisper
26
  def transcribe_audio(audio_path):
27
  result = whisper_pipeline(audio_path)
 
36
  predicted_id = torch.argmax(logits, dim=-1).item()
37
  return label_map.get(predicted_id, f"Unknown (ID: {predicted_id})")
38
 
39
+ # المعالجة الكاملة عند إدخال ملف صوتي
40
+ def agent_run(audio_path):
41
  start_time = time.time()
 
42
  accent = predict_accent(audio_path)
43
  transcription = transcribe_audio(audio_path)
44
  elapsed = round(time.time() - start_time, 2)
 
47
  # واجهة Gradio
48
  with gr.Blocks() as demo:
49
  gr.Markdown("## 🧠 Accent Detection Agent")
50
+ gr.Markdown("Record or upload audio and let the AI detect the accent and transcribe your speech.")
51
 
52
+ audio_input = gr.Audio(type="filepath", label="🎙️ Upload or Record Audio", source="microphone")
53
+ run_button = gr.Button("Analyze")
54
 
55
+ audio_output = gr.Audio(label="Audio")
56
  accent_output = gr.Textbox(label="Detected Accent")
57
  transcription_output = gr.Textbox(label="Transcription")
58
  time_output = gr.Textbox(label="Processing Time")
59
 
60
  run_button.click(
61
  fn=agent_run,
62
+ inputs=[audio_input],
63
  outputs=[audio_output, accent_output, transcription_output, time_output]
64
  )
65