Spaces:
Paused
Paused
| from dotenv import load_dotenv | |
| import os | |
| from timeit import default_timer as timer | |
| import time | |
| import requests | |
| import streamlit as st | |
| import tiktoken | |
| load_dotenv("environments/.env") | |
| LLM_IDK_ANSWER = "CANT_PROVIDE_NBQS" | |
| ENGINE_GPT_3_5 = "gpt3_5_test" | |
| ENGINE_GPT_4 = "gpt-4-test" | |
| DEBUG = True | |
| HUNDRED_CENTS = 100 | |
| FAKE_OPENAI_RESPONSE = False | |
| def get_openai_response_msg(response): | |
| if response is None: | |
| raise Exception("Unexpected error querying OpenAI: response is None") | |
| if "choices" not in response: | |
| st.error("Missing choices from response:") | |
| st.error(response) | |
| return None | |
| choices = list(response["choices"]) | |
| choice = choices[0] | |
| return choice["message"] | |
| def build_query_msg_content(selected_guidelines, chat_array): | |
| dr_patient_conv = "Give 1 new question for which we don't know the answer" | |
| if len(chat_array) > 0: | |
| transcript = '"' | |
| for i in chat_array: | |
| if i["role"] == "Doctor": | |
| transcript += "Doctor: " + str(i["content"].strip()) + "\n" | |
| else: | |
| transcript += "Patient: " + str(i["content"].strip()) + "\n" | |
| transcript += '"\n' | |
| dr_patient_conv += ( | |
| "The patient already answered the following questions: \n" + transcript | |
| ) | |
| guidelines_txt = "" | |
| if len(selected_guidelines) > 0: | |
| guidelines_txt = ". Only ask questions strictly based on the following without hallucinating:\n" | |
| for g in selected_guidelines: | |
| guidelines_txt += st.session_state["guidelines_dict"][g.lower()] | |
| return dr_patient_conv + guidelines_txt | |
| def build_general_chat_system_prompt(system_prompt, pre_chat_summary): | |
| patient_input_str = 'Patient input: ' + pre_chat_summary | |
| task_str = '''Task: Based on the patient input, | |
| propose the most suited question. Don't use the same question twice.''' | |
| updated_prompt = system_prompt + "\n" + patient_input_str + "\n" + task_str | |
| openai_system_message = {"role": "system", "content": updated_prompt} | |
| return openai_system_message | |
| def get_general_chat_user_msg(): | |
| guidelines_msg = { | |
| "role": "user", | |
| "content": build_query_msg_content( | |
| st.session_state["selected_guidelines"], | |
| st.session_state["chat_history_array"] | |
| ), | |
| } | |
| return guidelines_msg | |
| def get_chat_history_string(chat_history): | |
| res = "" | |
| for i in chat_history: | |
| if i["role"] == "Doctor": | |
| res += "**Doctor**: " + str(i["content"].strip()) + " \n " | |
| else: | |
| res += "**Patient**: " + str(i["content"].strip()) + " \n\n " | |
| return res | |
| def get_doctor_question( | |
| engine, | |
| temperature, | |
| top_p, | |
| system_prompt, | |
| pre_chat_summary, | |
| patient_reply | |
| ): | |
| print("Requesting Doctor question...") | |
| if len(st.session_state["past_messages"]) == 0: | |
| print("Initializing system prompt...") | |
| general_chat_system_message = build_general_chat_system_prompt(system_prompt, pre_chat_summary) | |
| st.session_state["past_messages"].append(general_chat_system_message) | |
| user_msg = get_general_chat_user_msg() | |
| st.session_state["last_request"] = user_msg | |
| openai_messages = st.session_state["past_messages"] + [user_msg] | |
| response = send_openai_request( | |
| engine, None, temperature, top_p, openai_messages, "get_doctor_question" | |
| ) | |
| openai_proposal = get_openai_response_msg(response) | |
| st.session_state["last_proposal"] = openai_proposal | |
| return openai_proposal | |
| def summarize_conversation(prompt_msg, content, engine, temperature, top_p): | |
| print("Summarizing conversation...") | |
| prompt_obj = { | |
| "role": "system", | |
| "content": prompt_msg | |
| } | |
| new_msg = {"role": "user", "content": content} | |
| messages = [prompt_obj, new_msg] | |
| st.session_state["last_request"] = messages | |
| response = send_openai_request( | |
| engine, None, temperature, top_p, messages, "summarize_session" | |
| ) | |
| openai_proposal = get_openai_response_msg(response) | |
| st.session_state["last_proposal"] = openai_proposal | |
| return openai_proposal | |
| def get_triage_recommendation(prompt_msg, content, engine, temperature, top_p): | |
| print("Requesting triage recommendation...") | |
| system_prompt = { | |
| "role": "system", | |
| "content": prompt_msg | |
| } | |
| msg = content | |
| new_msg = {"role": "user", "content": msg} | |
| messages = [system_prompt, new_msg] | |
| response = send_openai_request( | |
| engine, None, temperature, top_p, messages, "get_llm_triage_reco" | |
| ) | |
| openai_proposal = get_openai_response_msg(response) | |
| return openai_proposal | |
| def summarize_feed_info( | |
| engine, temperature, top_p, age, gender, patient_medical_info, contact_reason, health_situation | |
| ): | |
| print("Summarizing feed info...") | |
| msg = "Please summarize the following:" | |
| msg += "Patient is " + gender + " " + str(age) + " old. " | |
| if patient_medical_info: | |
| msg += patient_medical_info + ". " | |
| if contact_reason: | |
| msg += "Contact reason: " + contact_reason + ". " | |
| if health_situation: | |
| msg += "Health situation: " + health_situation + ". " | |
| system_message = {"role": "system", "content": "You summarize patient information"} | |
| new_msg = {"role": "user", "content": msg} | |
| messages = [system_message] + [new_msg] | |
| response = send_openai_request( | |
| engine, None, temperature, top_p, messages, "summarize_params_and_concern" | |
| ) | |
| openai_proposal = get_openai_response_msg(response) | |
| return openai_proposal["content"] | |
| def get_available_engines(): | |
| return [ENGINE_GPT_3_5, ENGINE_GPT_4] | |
| # See API ref & Swagger: https://learn.microsoft.com/en-us/azure/ai-services/openai/reference | |
| # See https://learn.microsoft.com/en-us/azure/ai-services/openai/use-your-data-quickstart?source=recommendations&tabs=bash&pivots=rest-api#retrieve-required-variables | |
| # for instructions on where to find the different parameters in Azure portal | |
| def send_openai_request_old( | |
| engine, search_query_type, temperature, top_p, messages, event_name | |
| ): | |
| print('send_openai_request: ' + str(event_name) + '\n\n') | |
| if FAKE_OPENAI_RESPONSE: | |
| print("Faking OpenAI response...") | |
| session_event = { | |
| "event_name": event_name, | |
| "prompt_tokens": 10, | |
| "prompt_cost_chf": 0.1, | |
| "completion_tokens": 11, | |
| "completion_cost_chf": 0.11, | |
| "total_cost_chf": 0, | |
| "response_time": 0, | |
| } | |
| st.session_state["session_events"] += [session_event] | |
| return {'id': 'chatcmpl-86wTdbCLS1wxeEOKNCtWPu7vMgyoq', 'object': 'chat.completion', 'created': 1696665445, | |
| 'model': 'gpt-4', 'prompt_filter_results': [{'prompt_index': 0, 'content_filter_results': { | |
| 'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, | |
| 'sexual': {'filtered': False, 'severity': 'safe'}, | |
| 'violence': {'filtered': False, 'severity': 'safe'}}}], | |
| 'choices': [{'index': 0, 'finish_reason': 'stop', 'message': {'role': 'assistant', | |
| 'content': 'How long have you been experiencing these headaches and how have they developed over time?'}, | |
| 'content_filter_results': {'hate': {'filtered': False, 'severity': 'safe'}, | |
| 'self_harm': {'filtered': False, 'severity': 'safe'}, | |
| 'sexual': {'filtered': False, 'severity': 'safe'}, | |
| 'violence': {'filtered': False, 'severity': 'safe'}}}], | |
| 'usage': {'completion_tokens': 16, 'prompt_tokens': 518, 'total_tokens': 534}} | |
| request_start = timer() | |
| print("Sending messages: ") | |
| print(messages) | |
| llm_deployment_name = "" | |
| embedding_deployment_name = "" | |
| search_index_name = "" | |
| api_version = "2023-08-01-preview" | |
| if engine == ENGINE_GPT_3_5: | |
| api_base = "https://cog-gpt-35-sandbox.openai.azure.com/" | |
| llm_deployment_name = "gpt3_5_test" | |
| api_key = os.getenv("AZURE_OPENAI_GPT3_5_KEY") | |
| embedding_deployment_name = "embedding-gpt3_5" | |
| elif engine == ENGINE_GPT_4: | |
| api_base = "https://cog-gpt-4-sandbox-uks.openai.azure.com/" | |
| llm_deployment_name = "gpt-4-test" | |
| api_key = os.getenv("AZURE_OPENAI_GPT4_KEY") | |
| embedding_deployment_name = "embedding-gpt4" | |
| else: | |
| raise Exception("Engine not yet supported: " + engine) | |
| url = ( | |
| api_base | |
| + "openai/deployments/" | |
| + llm_deployment_name | |
| + "/chat/completions?api-version=" | |
| + api_version | |
| ) | |
| headers = {"Content-Type": "application/json", "api-key": api_key} | |
| payload = {"temperature": temperature, "top_p": top_p, "messages": messages} | |
| if search_query_type is not None: | |
| search_endpoint = "https://cog-robin-test-euw.search.windows.net" | |
| embedding_endpoint = ( | |
| api_base | |
| + "openai/deployments/" | |
| + embedding_deployment_name | |
| + "/embeddings?api-version=2023-05-15" | |
| ) | |
| data_source = { | |
| "type": "AzureCognitiveSearch", | |
| "parameters": { | |
| "endpoint": search_endpoint, | |
| "key": os.getenv("AZURE_COG_SEARCH_KEY"), | |
| "inScope": True, # Limit responses to grounded data | |
| "queryType": search_query_type, | |
| }, | |
| } | |
| if search_query_type == "simple" or search_query_type == "keyword": | |
| if engine == ENGINE_GPT_4: | |
| data_source["parameters"]["indexName"] = "guidelines-simple-gpt4-230907" | |
| elif engine == ENGINE_GPT_3_5: | |
| data_source["parameters"][ | |
| "indexName" | |
| ] = "guidelines-simple-gpt35-230907" | |
| if search_query_type == "semantic": | |
| data_source["parameters"]["semanticConfiguration"] = "default" | |
| if engine == ENGINE_GPT_4: | |
| data_source["parameters"]["indexName"] = "guidelines-gpt4-230907" | |
| elif engine == ENGINE_GPT_3_5: | |
| data_source["parameters"]["indexName"] = "guidelines-gpt35-230907" | |
| if ( | |
| search_query_type == "vector" | |
| or search_query_type == "vectorSimpleHybrid" | |
| or search_query_type == "vectorSemanticHybrid" | |
| ): | |
| data_source["parameters"]["embeddingEndpoint"] = embedding_endpoint | |
| data_source["parameters"]["embeddingKey"] = api_key | |
| if search_query_type == "vector": | |
| if engine == ENGINE_GPT_4: | |
| data_source["parameters"]["indexName"] = "guidelines-vector-gpt4-230907" | |
| elif engine == ENGINE_GPT_3_5: | |
| data_source["parameters"][ | |
| "indexName" | |
| ] = "guidelines-vector-gpt35-230907" | |
| if search_query_type == "vectorSimpleHybrid": | |
| if engine == ENGINE_GPT_4: | |
| data_source["parameters"][ | |
| "indexName" | |
| ] = "guidelines-vector-hybrid-gpt4-230907" | |
| elif engine == ENGINE_GPT_3_5: | |
| data_source["parameters"][ | |
| "indexName" | |
| ] = "guidelines-vector-hybrid-gpt35-230907" | |
| if search_query_type == "vectorSemanticHybrid": | |
| data_source["parameters"]["semanticConfiguration"] = "default" | |
| if engine == ENGINE_GPT_4: | |
| data_source["parameters"][ | |
| "indexName" | |
| ] = "guidelines-vector-hybrid-sem-gpt4-230907" | |
| elif engine == ENGINE_GPT_3_5: | |
| data_source["parameters"][ | |
| "indexName" | |
| ] = "guidelines-vector-hybrid-sem-gpt35-230907" | |
| print("Data source:") | |
| print(data_source) | |
| # Here 'extensions' is needed if dataSource arg is provided in the payload | |
| # See file upload limitations in https://learn.microsoft.com/en-us/azure/ai-services/openai/quotas-limits | |
| url = ( | |
| api_base | |
| + "openai/deployments/" | |
| + llm_deployment_name | |
| + "/extensions/chat/completions?api-version=" | |
| + api_version | |
| ) | |
| payload["dataSources"] = [data_source] | |
| print("Querying " + url + " ...") | |
| response = requests.post(url, headers=headers, json=payload) | |
| response_json = response.json() | |
| print("\n\n\nResponse:") | |
| print(str(response_json)) | |
| print("\n\n") | |
| request_end = timer() | |
| try: | |
| prompt_tokens = response_json["usage"]["prompt_tokens"] | |
| prompt_cost = get_token_costs(prompt_tokens, engine, "prompt") | |
| completion_tokens = response_json["usage"]["completion_tokens"] | |
| completion_cost = get_token_costs(completion_tokens, engine, "completion") | |
| session_event = { | |
| "event_name": event_name, | |
| "prompt_tokens": prompt_tokens, | |
| "prompt_cost_chf": prompt_cost, | |
| "completion_tokens": completion_tokens, | |
| "completion_cost_chf": completion_cost, | |
| "total_cost_chf": prompt_cost + completion_cost, | |
| "response_time": request_end - request_start, | |
| } | |
| st.session_state["session_events"] += [session_event] | |
| except: | |
| print("Unable to update prompt and response tokens") | |
| return response_json | |
| # See API ref & Swagger: https://learn.microsoft.com/en-us/azure/ai-services/openai/reference | |
| # See https://learn.microsoft.com/en-us/azure/ai-services/openai/use-your-data-quickstart?source=recommendations&tabs=bash&pivots=rest-api#retrieve-required-variables | |
| # for instructions on where to find the different parameters in Azure portal | |
| def send_openai_request( | |
| engine, search_query_type, temperature, top_p, messages, event_name | |
| ): | |
| request_start = timer() | |
| if DEBUG: | |
| print("Sending messages: ") | |
| print(messages) | |
| if FAKE_OPENAI_RESPONSE: | |
| print("Faking OpenAI response...") | |
| session_event = { | |
| "event_name": "mocked_" + event_name, | |
| "prompt_tokens": 0, | |
| "prompt_cost_chf": 0, | |
| "completion_tokens": 0, | |
| "completion_cost_chf": 0, | |
| "total_cost_chf": 0, | |
| "response_time": 0, | |
| } | |
| st.session_state["session_events"] += [session_event] | |
| return {'id': 'chatcmpl-86wTdbCLS1wxeEOKNCtWPu7vMgyoq', 'object': 'chat.completion', 'created': 1696665445, | |
| 'model': 'gpt-4', 'prompt_filter_results': [{'prompt_index': 0, 'content_filter_results': { | |
| 'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, | |
| 'sexual': {'filtered': False, 'severity': 'safe'}, | |
| 'violence': {'filtered': False, 'severity': 'safe'}}}], | |
| 'choices': [{'index': 0, 'finish_reason': 'stop', 'message': {'role': 'assistant', | |
| 'content': 'MOCKED LLM RESPONSE: GP: Patient cannot be treated remotely'}, | |
| 'content_filter_results': {'hate': {'filtered': False, 'severity': 'safe'}, | |
| 'self_harm': {'filtered': False, 'severity': 'safe'}, | |
| 'sexual': {'filtered': False, 'severity': 'safe'}, | |
| 'violence': {'filtered': False, 'severity': 'safe'}}}], | |
| 'usage': {'completion_tokens': 16, 'prompt_tokens': 518, 'total_tokens': 534}} | |
| llm_deployment_name = "" | |
| embedding_deployment_name = "" | |
| search_index_name = "" | |
| url = "" | |
| api_version = "2023-08-01-preview" | |
| if engine == ENGINE_GPT_3_5: | |
| url = str(os.getenv("AZURE_OPENAI_GPT3_5_ENDPOINT")) | |
| api_key = os.getenv("AZURE_OPENAI_GPT3_5_KEY") | |
| embedding_deployment_name = "embedding-gpt3_5" | |
| elif engine == ENGINE_GPT_4: | |
| url = str(os.getenv("AZURE_OPENAI_GPT4_ENDPOINT")) | |
| api_key = os.getenv("AZURE_OPENAI_GPT4_KEY") | |
| embedding_deployment_name = "embedding-gpt4" | |
| else: | |
| raise Exception("Engine not yet supported: " + engine) | |
| headers = {"Content-Type": "application/json", "api-key": api_key} | |
| payload = {"temperature": temperature, "top_p": top_p, "messages": messages} | |
| if DEBUG: | |
| print("Querying " + url + " ...") | |
| st.session_state["llm_messages"] += messages | |
| response = requests.post(url, headers=headers, json=payload) | |
| response_json = response.json() | |
| print("Response:") | |
| print(response_json) | |
| while "error" in response_json: | |
| if int(response_json["error"]["code"]) != 429: | |
| raise Exception("OpenAI error: " + str(response_json)) | |
| print('OpenAI rate limit reached, waiting 2s before retrying...') | |
| time.sleep(2) | |
| response = requests.post(url, headers=headers, json=payload) | |
| response_json = response.json() | |
| print(response_json) | |
| request_end = timer() | |
| try: | |
| prompt_tokens = response_json["usage"]["prompt_tokens"] | |
| prompt_cost = get_token_costs(prompt_tokens, engine, "prompt") | |
| completion_tokens = response_json["usage"]["completion_tokens"] | |
| completion_cost = get_token_costs(completion_tokens, engine, "completion") | |
| session_event = { | |
| "event_name": event_name, | |
| "prompt_tokens": prompt_tokens, | |
| "prompt_cost_chf": prompt_cost, | |
| "completion_tokens": completion_tokens, | |
| "completion_cost_chf": completion_cost, | |
| "total_cost_chf": prompt_cost + completion_cost, | |
| "response_time": request_end - request_start, | |
| } | |
| st.session_state["session_events"] += [session_event] | |
| if DEBUG: | |
| print(session_event) | |
| except: | |
| print("Unable to update prompt and response tokens") | |
| return response_json | |
| def send_patient_reply( | |
| engine, search_query_type, temperature, selected_guidelines, top_p, chat_array | |
| ): | |
| print("Submitting patient reply...") | |
| msg_content = build_query_msg_content(selected_guidelines, chat_array) | |
| new_message = {"role": "user", "content": msg_content} | |
| st.session_state["last_request"] = new_message | |
| messages = st.session_state["past_messages"] + [new_message] | |
| response = send_openai_request( | |
| engine, search_query_type, temperature, top_p, messages, "send_dr_patient_msg" | |
| ) | |
| received_message = get_openai_response_msg(response) | |
| st.session_state["last_proposal"] = received_message | |
| return received_message | |
| def get_num_tokens(text, engine): | |
| model = "gpt-3.5-turbo" | |
| if engine == ENGINE_GPT_3_5: | |
| pass | |
| elif engine == ENGINE_GPT_4: | |
| model = "gpt-4" | |
| else: | |
| raise Exception("Unknown model: " + engine) | |
| encoding = tiktoken.encoding_for_model(model) | |
| num_tokens = len(encoding.encode(text)) | |
| return num_tokens | |
| # Source: https://azure.microsoft.com/en-us/pricing/details/cognitive-services/openai-service/ | |
| def get_token_costs(num_tokens, engine, query_type): | |
| chf_by_1k_token = 0 | |
| if engine == ENGINE_GPT_3_5: | |
| if query_type == "prompt": | |
| # usd_by_1k_token = 0.003 | |
| chf_by_1k_token = 0.0028 | |
| elif query_type == "completion": | |
| # usd_by_1k_token = 0.004 | |
| chf_by_1k_token = 0.0037 | |
| else: | |
| raise Exception("Unknown type: " + query_type) | |
| elif engine == ENGINE_GPT_4: | |
| if query_type == "prompt": | |
| # usd_by_1k_token = 0.03 | |
| chf_by_1k_token = 0.0028 | |
| elif query_type == "completion": | |
| # usd_by_1k_token = 0.06 | |
| chf_by_1k_token = 0.055 | |
| else: | |
| raise Exception("Unknown type: " + query_type) | |
| elif engine == "embedding": | |
| chf_by_1k_token = 0.0001 | |
| else: | |
| raise Exception("Unknown model: " + engine) | |
| return chf_by_1k_token * num_tokens / 1000 | |
| # No API ref; allowed values obtained from OpenAI error messages | |
| def get_search_query_type_options(): | |
| return [ | |
| None, | |
| "simple", | |
| "semantic", | |
| "vector", | |
| "vectorSimpleHybrid", | |
| "vectorSemanticHybrid", | |
| ] | |
| DATASET_AIDA_JIRA_TICKETS = "aida reviewed jira tickets (N=1'407)" | |
| DATASET_GT_CASES = "gt-cases (N=2'434)" | |
| DATASET_APP_CHATS = "app chats (N=300)" | |
| def get_dataset_names(): | |
| return [DATASET_APP_CHATS, DATASET_GT_CASES, DATASET_AIDA_JIRA_TICKETS] | |