Spaces:
Sleeping
Sleeping
| import asyncio | |
| import base64 | |
| import os | |
| import tempfile | |
| import numpy as np | |
| from project.config import settings | |
| import pandas as pd | |
| class ChatBot: | |
| chat_history = [] | |
| def __init__(self, memory=None): | |
| self.chat_history.append({ | |
| "role": 'assistant', | |
| 'content': "Hi! What would you like to order from the food?" | |
| }) | |
| def _transform_bytes_to_file(data_bytes) -> str: | |
| audio_bytes = base64.b64decode(data_bytes) | |
| temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') | |
| try: | |
| temp_file.write(audio_bytes) | |
| filepath = temp_file.name | |
| finally: | |
| temp_file.close() | |
| return filepath | |
| async def _transcript_audio(temp_filepath: str) -> str: | |
| with open(temp_filepath, 'rb') as file: | |
| transcript = await settings.OPENAI_CLIENT.audio.transcriptions.create( | |
| model='whisper-1', | |
| file=file, | |
| language="nl" | |
| ) | |
| text = transcript.text | |
| return text | |
| async def _convert_to_embeddings(query: str): | |
| response = await settings.OPENAI_CLIENT.embeddings.create( | |
| input=query, | |
| model='text-embedding-3-large' | |
| ) | |
| embeddings = response.data[0].embedding | |
| return embeddings | |
| async def _convert_response_to_voice(ai_response: str) -> str: | |
| audio = await settings.OPENAI_CLIENT.audio.speech.create( | |
| model="tts-1", | |
| voice="alloy", | |
| input=ai_response | |
| ) | |
| encoded_audio = base64.b64encode(audio.content).decode('utf-8') | |
| return encoded_audio | |
| async def _get_context_data(query: list[float]) -> str: | |
| query = np.array([query]).astype('float32') | |
| _, distances, indices = settings.FAISS_INDEX.range_search(query.astype('float32'), settings.SEARCH_RADIUS) | |
| indices_distances_df = pd.DataFrame({'index': indices, 'distance': distances}) | |
| filtered_data_df = settings.products_dataset.iloc[indices] | |
| filtered_data_df['distance'] = indices_distances_df['distance'].values | |
| sorted_data_df: pd.DataFrame = filtered_data_df.sort_values(by='distance').reset_index(drop=True) | |
| sorted_data_df = sorted_data_df.drop('distance', axis=1) | |
| data = sorted_data_df.head(1).to_dict(orient='records') | |
| context_str = '' | |
| for row in data: | |
| context_str += f'{row["Search"]}\n\n' | |
| return context_str | |
| async def _rag(self, query: str, query_type: str, context: str = None): | |
| if context: | |
| self.chat_history.append({'role': 'assistant', 'content': context}) | |
| prompt = settings.PRODUCT_PROMPT | |
| else: | |
| if 'search' in query_type.lower(): | |
| prompt = settings.EMPTY_PRODUCT_PROMPT | |
| elif 'purchase' in query_type.lower(): | |
| prompt = settings.ADD_TO_CART_PROMPT | |
| elif 'product_list' in query_type.lower(): | |
| prompt = settings.PRODUCT_LIST_PROMPT | |
| else: | |
| prompt = settings.EMPTY_PRODUCT_PROMPT | |
| self.chat_history.append({ | |
| 'role': 'user', | |
| 'content': query | |
| }) | |
| messages = [ | |
| { | |
| 'role': 'system', | |
| 'content': f"{prompt}" | |
| }, | |
| ] | |
| messages += self.chat_history | |
| completion = await settings.OPENAI_CLIENT.chat.completions.create( | |
| messages=messages, | |
| temperature=0, | |
| n=1, | |
| model="gpt-3.5-turbo", | |
| ) | |
| response = completion.choices[0].message.content | |
| self.chat_history.append({'role': 'assistant', 'content': response}) | |
| return response | |
| async def _get_query_type(self, query: str) -> str: | |
| assistant_message = self.chat_history[-1]['content'] | |
| messages = [ | |
| { | |
| "role": 'system', | |
| 'content': settings.ANALYZER_PROMPT | |
| }, | |
| { | |
| "role": 'user', | |
| "content": f"Assistant message: {assistant_message}\n" | |
| f"User response: {query}" | |
| } | |
| ] | |
| completion = await settings.OPENAI_CLIENT.chat.completions.create( | |
| messages=messages, | |
| temperature=0, | |
| n=1, | |
| model="gpt-3.5-turbo", | |
| ) | |
| response = completion.choices[0].message.content | |
| return response | |
| async def ask(self, data: dict): | |
| audio = data['audio'] | |
| temp_filepath = self._transform_bytes_to_file(audio) | |
| transcript = await self._transcript_audio(temp_filepath) | |
| query_type = await self._get_query_type(transcript) | |
| context = None | |
| if query_type == 'search': | |
| transformed_query = await self._convert_to_embeddings(transcript) | |
| context = await self._get_context_data(transformed_query) | |
| ai_response = await self._rag(transcript, query_type, context) | |
| voice_ai_response = await self._convert_response_to_voice(ai_response) | |
| data = { | |
| 'user_query': transcript, | |
| 'ai_response': ai_response, | |
| 'voice_response': voice_ai_response | |
| } | |
| try: | |
| os.remove(temp_filepath) | |
| except FileNotFoundError: | |
| pass | |
| return data | |