Spaces:
Runtime error
Runtime error
| import re | |
| import tomllib | |
| import chainlit as cl | |
| from docarray.index.abstract import BaseDocIndex | |
| from azure_openai import AzureOpenaiSettings, AzureOpenaiEmbeddings, patch_chainlit | |
| from chain import Chain | |
| from data import embed, restaurant_index, RestaurantDescription | |
| embedding_settings = AzureOpenaiEmbeddings.load_from_env().to_settings_dict() | |
| patch_chainlit() | |
| def search_embeddings(query: str, doc_index: BaseDocIndex): | |
| vec = embed(query, **embedding_settings) | |
| docs, scores = doc_index.find(vec, 'embedding', 5) | |
| return docs | |
| async def start_chat(): | |
| cl.user_session.set("history", []) | |
| async def on_message(message: str, message_id: str): | |
| history = cl.user_session.get("history") | |
| # history = [] | |
| # update history | |
| history.append({"role": "user", "content": message}) | |
| # build AI response | |
| chain = Chain(message_id, llm_settings=AzureOpenaiSettings.load_from_env()) | |
| query_msg = await chain.llm( | |
| """ | |
| You are a conversation summarizer that condenses a conversation between | |
| a human and a brilllian AI into a search query that can be used to find relevant | |
| restaurants. If a conversation is "normal" the AI answers it. If the question is | |
| "nonsense" the AI says "Please rephrase your question". | |
| Conversation history | |
| ============ | |
| {history} | |
| ============ | |
| From this conversation, create a search query that would fit the human's needs. | |
| Do not say anything else; just the query. If a conversation is "normal" the AI answers it. If the question is | |
| "nonsense" the AI says "Please rephrase your question". | |
| """, | |
| history=format_history(history), | |
| ) | |
| # If the question is gibberish, stop the querying and make the user rephrase question | |
| if query_msg.content == "Please rephrase your question.": | |
| response_text = await chain.text("Please rephrase your query.", final=True) | |
| await response_text.update() | |
| else: | |
| results = search_embeddings(query_msg.content, restaurant_index) | |
| # results = search_embeddings(query_msg.content, restaurant_index) | |
| await chain.text(str(list(results))) # TODO maybe json format would be better? | |
| restaurants = "\n".join(f"- ID: {r.id} | {r.text}" for r in results) | |
| final_choices_msg = await chain.llm( | |
| """ | |
| You are a search engine for restaurants. | |
| Output the restaurant IDs for the best matches to the following query: | |
| ---- | |
| {query} | |
| ============ | |
| List of restaurants | |
| ---- | |
| {restaurants} | |
| ============ | |
| Output your final answer as a TOML blob. | |
| Each restaurant should have a key for its ID, with a | |
| boolean value, where true means the restaurant is a good fit | |
| for all parts of the query. | |
| For example: | |
| --- | |
| [answer] | |
| 101 = false | |
| 1350 = true | |
| 02458 = false | |
| 9315 = true | |
| 128974 = true | |
| ============ | |
| Make include IDs of ALL restaurants, but only mark true for ones that fit the query. | |
| """, | |
| query=query_msg.content, | |
| restaurants=restaurants | |
| ) | |
| # match = re.match(r'```\s*toml\s*(.*)\s*```', final_choices_msg.content, re.DOTALL) | |
| # toml_string = match.group(1) | |
| toml_string = final_choices_msg.content | |
| # don't output just the good values, since GPT doesn't think about each option | |
| # final_ids = [x.strip() for x in final_choices_msg.content.split(',')] | |
| # don't use json because curly braces brakes the template code... | |
| # final_ids = json.loads(json_string) | |
| # TOML is easy to write and parse for both machines and humans :) | |
| obj = tomllib.loads(toml_string) | |
| final_ids = [id for id, val in obj['answer'].items() if val] | |
| if len(final_ids) == 0: | |
| await chain.text("Sorry, no restaurants found. Please try another query.", final=True) | |
| for i, id in enumerate(final_ids[:3]): | |
| id = str(id) | |
| restaurant: RestaurantDescription = restaurant_index[id] # why no automatic typing? | |
| # Getting dishes and categories from a list form to string | |
| dishes_as_string = ', '.join(restaurant.dishes) | |
| categories_as_string = ', '.join(restaurant.categories) | |
| msg = await chain.text(f"Option {i}", final=True) | |
| msg.elements = [ | |
| # note: image always displays above text | |
| cl.Image(name=restaurant.name, url=restaurant.image_url, display='inline', size='small'), | |
| cl.Text(name=restaurant.name, content=restaurant.intro, display='inline'), | |
| cl.Text(name="Example Dishes:", content=dishes_as_string, display='inline'), | |
| cl.Text(name="Category:", content=categories_as_string, display='inline'), | |
| cl.Text(name="Estimated Average Price (HKD):", content=restaurant.price, display="inline"), | |
| # TODO text could also include categories/dishes/rating/price/location | |
| ] | |
| msg.actions = [ | |
| cl.Action(name='book', value=id, label='Book', description='Click to book this restaurant'), | |
| ] | |
| await msg.update() | |
| # TODO what should the history include? ids only? or also descriptions? | |
| # history.append({"role": "assistant", "content": response.content}) | |
| # await cl.Text(name="rephrase", content=response_text, displlay="inline").send() | |
| NAMES = { | |
| # 'system': '', | |
| 'user': 'Human', | |
| 'assistant': 'AI', | |
| } | |
| def format_history(history: list[dict]) -> str: | |
| """Formats list of messages into a single string.""" | |
| strings = [f'{NAMES[m["role"]]}: {m["content"]}' for m in history] | |
| return "\n".join(strings) | |