varun324242 commited on
Commit
2ba89e1
·
verified ·
1 Parent(s): 9785795
Files changed (1) hide show
  1. s +0 -218
s DELETED
@@ -1,218 +0,0 @@
1
- import asyncio
2
- import shutil
3
- import subprocess
4
- import requests
5
- import time
6
- import os
7
- import logging
8
- import gradio as gr # Import Gradio
9
-
10
- from langchain_core.prompts import ChatPromptTemplate
11
- from langchain_groq import ChatGroq
12
- from langchain.memory import ConversationBufferMemory
13
- from langchain.prompts import (
14
- MessagesPlaceholder,
15
- SystemMessagePromptTemplate,
16
- HumanMessagePromptTemplate,
17
- )
18
- from langchain.chains import LLMChain
19
-
20
- from deepgram import (
21
- DeepgramClient,
22
- DeepgramClientOptions,
23
- LiveTranscriptionEvents,
24
- LiveOptions,
25
- Microphone,
26
- )
27
-
28
- # Configure logging
29
- logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
30
-
31
- class LanguageModelProcessor:
32
- def __init__(self):
33
- self.llm = ChatGroq(temperature=0, model_name="mixtral-8x7b-32768", groq_api_key="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")
34
- self.memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
35
-
36
- system_prompt = "You are a conversational assistant named Eliza. Use short, conversational responses as if you're having a live conversation. Your response should be under 20 words. Do not respond with any code, only conversation string"
37
-
38
- self.prompt = ChatPromptTemplate.from_messages([
39
- SystemMessagePromptTemplate.from_template(system_prompt),
40
- MessagesPlaceholder(variable_name="chat_history"),
41
- HumanMessagePromptTemplate.from_template("{text}")
42
- ])
43
-
44
- self.conversation = LLMChain(
45
- llm=self.llm,
46
- prompt=self.prompt,
47
- memory=self.memory
48
- )
49
-
50
- def process(self, text):
51
- self.memory.chat_memory.add_user_message(text) # Add user message to memory
52
-
53
- start_time = time.time()
54
-
55
- # Go get the response from the LLM
56
- response = self.conversation.invoke({"text": text})
57
- end_time = time.time()
58
-
59
- self.memory.chat_memory.add_ai_message(response['text']) # Add AI response to memory
60
-
61
- elapsed_time = int((end_time - start_time) * 1000)
62
- logging.debug(f"LLM ({elapsed_time}ms): {response['text']}")
63
- return response['text']
64
-
65
- class TextToSpeech:
66
- DG_API_KEY = "101eb2de35d298cdf2a15c76b1726d8ce82394d6"
67
- MODEL_NAME = "aura-helios-en" # Example model name, change as needed
68
-
69
- @staticmethod
70
- def is_installed(lib_name: str) -> bool:
71
- lib = shutil.which(lib_name)
72
- return lib is not None
73
-
74
- def speak(self, text):
75
- if not self.is_installed("ffplay"):
76
- raise ValueError("ffplay not found. Please install FFmpeg to stream audio.")
77
-
78
- DEEPGRAM_URL = f"https://api.deepgram.com/v1/speak?model={self.MODEL_NAME}&encoding=linear16&sample_rate=24000"
79
- headers = {
80
- "Authorization": f"Token {self.DG_API_KEY}",
81
- "Content-Type": "application/json"
82
- }
83
- payload = {
84
- "text": text
85
- }
86
-
87
- logging.debug(f"Sending request to Deepgram API with text: {text}")
88
-
89
- player_command = ["ffplay", "-autoexit", "-", "-nodisp"]
90
- player_process = subprocess.Popen(
91
- player_command,
92
- stdin=subprocess.PIPE,
93
- stdout=subprocess.DEVNULL,
94
- stderr=subprocess.PIPE, # Capture stderr to check for errors
95
- )
96
-
97
- start_time = time.time() # Record the time before sending the request
98
- first_byte_time = None # Initialize a variable to store the time when the first byte is received
99
-
100
- with requests.post(DEEPGRAM_URL, stream=True, headers=headers, json=payload) as r:
101
- logging.debug(f"Response Status Code: {r.status_code}") # Log the status code
102
- if r.status_code != 200:
103
- logging.error(f"Error: {r.text}") # Log the error message if the response is not OK
104
- return # Exit if there's an error
105
-
106
- for chunk in r.iter_content(chunk_size=1024):
107
- if chunk:
108
- if first_byte_time is None: # Check if this is the first chunk received
109
- first_byte_time = time.time() # Record the time when the first byte is received
110
- ttfb = int((first_byte_time - start_time)*1000) # Calculate the time to first byte
111
- logging.debug(f"TTS Time to First Byte (TTFB): {ttfb}ms")
112
- player_process.stdin.write(chunk)
113
- player_process.stdin.flush()
114
-
115
- if player_process.stdin:
116
- player_process.stdin.close()
117
-
118
- # Check for any errors in stderr
119
- stderr_output = player_process.stderr.read()
120
- if stderr_output:
121
- logging.error(f"Error during audio playback: {stderr_output.decode()}")
122
-
123
- player_process.wait()
124
-
125
- async def get_transcript(callback):
126
- transcription_complete = asyncio.Event() # Event to signal transcription completion
127
-
128
- try:
129
- DEEPGRAM_API_KEY = "101eb2de35d298cdf2a15c76b1726d8ce82394d6" # Replace with your actual API key
130
- config = DeepgramClientOptions(options={"keepalive": "true"})
131
- deepgram: DeepgramClient = DeepgramClient(DEEPGRAM_API_KEY, config) # Pass the API key here
132
-
133
- dg_connection = deepgram.listen.asynclive.v("1")
134
- logging.info("Listening...")
135
-
136
- async def on_message(self, result, **kwargs):
137
- sentence = result.channel.alternatives[0].transcript
138
-
139
- if not result.speech_final:
140
- transcript_collector.add_part(sentence)
141
- else:
142
- transcript_collector.add_part(sentence)
143
- full_sentence = transcript_collector.get_full_transcript()
144
- if len(full_sentence.strip()) > 0:
145
- full_sentence = full_sentence.strip()
146
- logging.info(f"Human: {full_sentence}")
147
- callback(full_sentence) # Call the callback with the full_sentence
148
- transcript_collector.reset()
149
- transcription_complete.set() # Signal to stop transcription and exit
150
-
151
- dg_connection.on(LiveTranscriptionEvents.Transcript, on_message)
152
-
153
- options = LiveOptions(
154
- model="nova-2",
155
- punctuate=True,
156
- language="en-US",
157
- encoding="linear16",
158
- channels=1,
159
- sample_rate=16000,
160
- endpointing=300,
161
- smart_format=True,
162
- )
163
-
164
- await dg_connection.start(options)
165
-
166
- microphone = Microphone(dg_connection.send)
167
- microphone.start()
168
-
169
- await transcription_complete.wait() # Wait for the transcription to complete instead of looping indefinitely
170
-
171
- microphone.finish()
172
- await dg_connection.finish()
173
-
174
- except Exception as e:
175
- logging.error(f"Could not open socket: {e}")
176
- return
177
-
178
- class ConversationManager:
179
- def __init__(self):
180
- self.transcription_response = ""
181
- self.llm = LanguageModelProcessor()
182
-
183
- async def main(self):
184
- def handle_full_sentence(full_sentence):
185
- self.transcription_response = full_sentence
186
-
187
- while True:
188
- await get_transcript(handle_full_sentence)
189
-
190
- if "goodbye" in self.transcription_response.lower():
191
- break
192
-
193
- llm_response = self.llm.process(self.transcription_response)
194
-
195
- tts = TextToSpeech()
196
- tts.speak(llm_response)
197
-
198
- self.transcription_response = ""
199
-
200
- # Gradio Interface
201
- def gradio_interface(user_input):
202
- manager = ConversationManager()
203
- llm_processor = LanguageModelProcessor()
204
- response = llm_processor.process(user_input) # Remove asyncio.run since process is not async
205
- tts = TextToSpeech()
206
- tts.speak(response)
207
- return response
208
-
209
- if __name__ == "__main__":
210
- # Create and launch Gradio interface
211
- demo = gr.Interface(
212
- fn=gradio_interface,
213
- inputs=gr.Textbox(label="Enter your message"),
214
- outputs=gr.Textbox(label="Response"),
215
- title="Voice Assistant",
216
- description="Interact with the voice assistant. Type your message and listen to the response."
217
- ).queue() # Add queue() for handling multiple requests
218
- demo.launch(share=True)