Mtkhang90 commited on
Commit
d08ab17
·
verified ·
1 Parent(s): 986ad2f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -15
app.py CHANGED
@@ -1,26 +1,36 @@
1
  import gradio as gr
2
- import whisper
3
  import requests
 
 
4
  import numpy as np
5
  import soundfile as sf
6
- from TTS.api import TTS
7
 
8
- # Load models
9
- whisper_model = whisper.load_model("base")
10
  tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False, gpu=False)
11
 
12
- # Groq API key from secrets
13
- import os
14
  GROQ_API_KEY = os.getenv("GROQ_API_KEY")
15
 
16
  def voice_chat(audio):
17
- # Step 1: Speech to text
18
- audio_array, sr = audio
19
- sf.write("temp.wav", audio_array, sr)
20
- result = whisper_model.transcribe("temp.wav")
21
- text = result["text"]
22
-
23
- # Step 2: Groq LLM call
 
 
 
 
 
 
 
 
 
 
 
24
  response = requests.post(
25
  "https://api.groq.com/openai/v1/chat/completions",
26
  headers={
@@ -34,7 +44,7 @@ def voice_chat(audio):
34
  )
35
  llm_text = response.json()["choices"][0]["message"]["content"]
36
 
37
- # Step 3: Text to speech
38
  tts.tts_to_file(text=llm_text, file_path="response.wav")
39
 
40
  return llm_text, "response.wav"
@@ -43,7 +53,7 @@ demo = gr.Interface(
43
  fn=voice_chat,
44
  inputs=gr.Audio(sources=["microphone", "upload"], type="numpy", label="🎤 Speak or upload"),
45
  outputs=[gr.Textbox(label="Groq Response"), gr.Audio(label="AI Voice")],
46
- title="📚 Speech Text Speech AI"
47
  )
48
 
49
  if __name__ == "__main__":
 
1
  import gradio as gr
2
+ import speech_recognition as sr
3
  import requests
4
+ from TTS.api import TTS
5
+ import os
6
  import numpy as np
7
  import soundfile as sf
 
8
 
9
+ # Initialize TTS model
 
10
  tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False, gpu=False)
11
 
12
+ # Groq API key
 
13
  GROQ_API_KEY = os.getenv("GROQ_API_KEY")
14
 
15
  def voice_chat(audio):
16
+ if audio is None:
17
+ return "No audio input detected.", None
18
+
19
+ audio_array, sr_rate = audio
20
+ sf.write("temp.wav", audio_array, sr_rate)
21
+
22
+ # SpeechRecognition setup
23
+ recognizer = sr.Recognizer()
24
+ with sr.AudioFile("temp.wav") as source:
25
+ audio_data = recognizer.record(source)
26
+ try:
27
+ text = recognizer.recognize_google(audio_data)
28
+ except sr.UnknownValueError:
29
+ return "Could not understand audio.", None
30
+ except sr.RequestError as e:
31
+ return f"Speech Recognition error: {e}", None
32
+
33
+ # Call Groq LLM
34
  response = requests.post(
35
  "https://api.groq.com/openai/v1/chat/completions",
36
  headers={
 
44
  )
45
  llm_text = response.json()["choices"][0]["message"]["content"]
46
 
47
+ # Generate TTS audio file
48
  tts.tts_to_file(text=llm_text, file_path="response.wav")
49
 
50
  return llm_text, "response.wav"
 
53
  fn=voice_chat,
54
  inputs=gr.Audio(sources=["microphone", "upload"], type="numpy", label="🎤 Speak or upload"),
55
  outputs=[gr.Textbox(label="Groq Response"), gr.Audio(label="AI Voice")],
56
+ title="📚 Speech-to-Text-to-Speech with Groq LLM and TTS"
57
  )
58
 
59
  if __name__ == "__main__":