sunnynazir commited on
Commit
89f29a0
·
verified ·
1 Parent(s): a82f58f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -5
app.py CHANGED
@@ -4,13 +4,12 @@ from gtts import gTTS
4
  import whisper
5
  from groq import Groq
6
  import tempfile
7
- import threading
8
 
9
  # Initialize Groq client
10
  GROQ_API_KEY = "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"
11
  client = Groq(api_key=GROQ_API_KEY)
12
 
13
- # Load Whisper model
14
  model = whisper.load_model("base")
15
 
16
  # Function for generating audio response
@@ -20,8 +19,9 @@ def generate_audio_response(response_text):
20
  tts.save(output_audio.name)
21
  return output_audio.name
22
 
 
23
  def process_audio_stream(audio_data):
24
- # Step 1: Transcribe audio in real-time (simplified transcription)
25
  transcription = model.transcribe(audio_data)["text"]
26
 
27
  # Step 2: Process transcription with Groq's LLM for real-time response
@@ -42,7 +42,7 @@ def process_audio_stream(audio_data):
42
  # Gradio interface for real-time voice interaction
43
  interface = gr.Interface(
44
  fn=process_audio_stream,
45
- inputs=gr.Audio(source="microphone", type="filepath", streaming=True), # Stream from microphone
46
  outputs=[
47
  "text", # Display transcription
48
  "text", # Display LLM response
@@ -53,4 +53,4 @@ interface = gr.Interface(
53
  )
54
 
55
  # Launch interface
56
- interface.launch()
 
4
  import whisper
5
  from groq import Groq
6
  import tempfile
 
7
 
8
  # Initialize Groq client
9
  GROQ_API_KEY = "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"
10
  client = Groq(api_key=GROQ_API_KEY)
11
 
12
+ # Load Whisper model (using base model for faster processing)
13
  model = whisper.load_model("base")
14
 
15
  # Function for generating audio response
 
19
  tts.save(output_audio.name)
20
  return output_audio.name
21
 
22
+ # Real-time processing function
23
  def process_audio_stream(audio_data):
24
+ # Step 1: Transcribe audio in real-time
25
  transcription = model.transcribe(audio_data)["text"]
26
 
27
  # Step 2: Process transcription with Groq's LLM for real-time response
 
42
  # Gradio interface for real-time voice interaction
43
  interface = gr.Interface(
44
  fn=process_audio_stream,
45
+ inputs=gr.Audio(type="filepath", recording=True), # Corrected usage for microphone input
46
  outputs=[
47
  "text", # Display transcription
48
  "text", # Display LLM response
 
53
  )
54
 
55
  # Launch interface
56
+ interface.launch(share=True) # Use share=True to generate a public link