Dua Rajper commited on
Commit
6d1f45a
·
verified ·
1 Parent(s): f3e3e8f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -24
app.py CHANGED
@@ -1,42 +1,40 @@
1
  import os
2
  import streamlit as st
3
- from transformers import pipeline, AutoProcessor, AutoModelForSpeechSeq2Seq, AutoTokenizer, AutoModelForCausalLM
 
4
  from espnet2.bin.tts_inference import Text2Speech
5
  import soundfile as sf
6
  from pydub import AudioSegment
7
  import io
8
 
9
- # Load Hugging Face token from environment variable
10
- HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
11
- if not HUGGINGFACE_TOKEN:
12
- st.error("Hugging Face token not found. Please set the HUGGINGFACE_TOKEN environment variable.")
13
  st.stop()
14
 
 
 
 
15
  # Load models
16
  @st.cache(allow_output_mutation=True)
17
  def load_models():
18
  # Speech-to-Text
19
- processor = AutoProcessor.from_pretrained("openai/whisper-small", use_auth_token=HUGGINGFACE_TOKEN)
20
- stt_model = AutoModelForSpeechSeq2Seq.from_pretrained("openai/whisper-small", use_auth_token=HUGGINGFACE_TOKEN)
21
  stt_pipe = pipeline(
22
  "automatic-speech-recognition",
23
  model=stt_model,
24
  tokenizer=processor.tokenizer,
25
- feature_extractor=processor.feature_extractor,
26
- use_auth_token=HUGGINGFACE_TOKEN
27
  )
28
 
29
- # Text Generation
30
- tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1", use_auth_token=HUGGINGFACE_TOKEN)
31
- text_model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1", use_auth_token=HUGGINGFACE_TOKEN)
32
- text_pipe = pipeline("text-generation", model=text_model, tokenizer=tokenizer, use_auth_token=HUGGINGFACE_TOKEN)
33
-
34
  # Text-to-Speech
35
  tts_model = Text2Speech.from_pretrained("espnet/espnet_tts_vctk_espnet_spk_voxceleb12_rawnet")
36
 
37
- return stt_pipe, text_pipe, tts_model
38
 
39
- stt_pipe, text_pipe, tts_model = load_models()
40
 
41
  # Streamlit app
42
  st.title("Voice-Enabled Chatbot")
@@ -51,12 +49,20 @@ if audio_file is not None:
51
  text = stt_pipe(speech)['text']
52
  st.write("Transcribed Text:", text)
53
 
54
- # Generate response
55
- messages = [{"role": "user", "content": text}]
56
- response = text_pipe(messages)[0]['generated_text']
57
- st.write("Generated Response:", response)
 
 
 
 
 
 
58
 
59
- # Convert response to speech
60
- speech, *_ = tts_model(response)
61
- sf.write("response.wav", speech, 22050)
62
- st.audio("response.wav")
 
 
 
1
  import os
2
  import streamlit as st
3
+ from groq import Groq
4
+ from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq
5
  from espnet2.bin.tts_inference import Text2Speech
6
  import soundfile as sf
7
  from pydub import AudioSegment
8
  import io
9
 
10
+ # Load Groq API key from environment variable
11
+ GROQ_API_KEY = os.getenv("GROQ_API_KEY")
12
+ if not GROQ_API_KEY:
13
+ st.error("Groq API key not found. Please set the GROQ_API_KEY environment variable.")
14
  st.stop()
15
 
16
+ # Initialize Groq client
17
+ groq_client = Groq(api_key=GROQ_API_KEY)
18
+
19
  # Load models
20
  @st.cache(allow_output_mutation=True)
21
  def load_models():
22
  # Speech-to-Text
23
+ processor = AutoProcessor.from_pretrained("openai/whisper-small")
24
+ stt_model = AutoModelForSpeechSeq2Seq.from_pretrained("openai/whisper-small")
25
  stt_pipe = pipeline(
26
  "automatic-speech-recognition",
27
  model=stt_model,
28
  tokenizer=processor.tokenizer,
29
+ feature_extractor=processor.feature_extractor
 
30
  )
31
 
 
 
 
 
 
32
  # Text-to-Speech
33
  tts_model = Text2Speech.from_pretrained("espnet/espnet_tts_vctk_espnet_spk_voxceleb12_rawnet")
34
 
35
+ return stt_pipe, tts_model
36
 
37
+ stt_pipe, tts_model = load_models()
38
 
39
  # Streamlit app
40
  st.title("Voice-Enabled Chatbot")
 
49
  text = stt_pipe(speech)['text']
50
  st.write("Transcribed Text:", text)
51
 
52
+ # Generate response using Groq API
53
+ try:
54
+ chat_completion = groq_client.chat.completions.create(
55
+ messages=[{"role": "user", "content": text}],
56
+ model="mixtral-8x7b-32768",
57
+ temperature=0.5,
58
+ max_tokens=1024
59
+ )
60
+ response = chat_completion.choices[0].message.content
61
+ st.write("Generated Response:", response)
62
 
63
+ # Convert response to speech
64
+ speech, *_ = tts_model(response)
65
+ sf.write("response.wav", speech, 22050)
66
+ st.audio("response.wav")
67
+ except Exception as e:
68
+ st.error(f"Error generating response: {e}")