Udyan commited on
Commit
f2fca2e
·
verified ·
1 Parent(s): 7e4b494

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -10
app.py CHANGED
@@ -1,21 +1,19 @@
1
  import gradio as gr
2
  from transformers import pipeline
3
- import soundfile as sf
4
- import numpy as np
5
 
6
- # Speech-to-text model
7
  stt = pipeline(
8
  "automatic-speech-recognition",
9
  model="openai/whisper-base"
10
  )
11
 
12
- # Language model
13
  llm = pipeline(
14
- "text2text-generation",
15
- model="google/flan-t5-base"
16
  )
17
 
18
- # Text-to-speech model
19
  tts = pipeline(
20
  "text-to-speech",
21
  model="facebook/fastspeech2-en-ljspeech"
@@ -24,16 +22,16 @@ tts = pipeline(
24
 
25
  def voice_assistant(audio):
26
 
27
- # Convert speech text
28
  speech_text = stt(audio)["text"]
29
 
30
- # Generate AI response
31
  response = llm(
32
  speech_text,
33
  max_new_tokens=80
34
  )[0]["generated_text"]
35
 
36
- # Convert text speech
37
  speech_output = tts(response)
38
 
39
  audio_array = speech_output["audio"]
 
1
  import gradio as gr
2
  from transformers import pipeline
 
 
3
 
4
+ # Speech → Text
5
  stt = pipeline(
6
  "automatic-speech-recognition",
7
  model="openai/whisper-base"
8
  )
9
 
10
+ # LLM
11
  llm = pipeline(
12
+ "text-generation",
13
+ model="distilgpt2"
14
  )
15
 
16
+ # Text → Speech
17
  tts = pipeline(
18
  "text-to-speech",
19
  model="facebook/fastspeech2-en-ljspeech"
 
22
 
23
  def voice_assistant(audio):
24
 
25
+ # speech to text
26
  speech_text = stt(audio)["text"]
27
 
28
+ # AI response
29
  response = llm(
30
  speech_text,
31
  max_new_tokens=80
32
  )[0]["generated_text"]
33
 
34
+ # text to speech
35
  speech_output = tts(response)
36
 
37
  audio_array = speech_output["audio"]