| import yaml |
| import re |
| import os |
|
|
| import gradio as gr |
| import random |
| from fastapi import FastAPI |
| from sentence_transformers import SentenceTransformer |
| from starlette.responses import JSONResponse |
| import openai |
| |
|
|
|
|
| from helpers import * |
|
|
| with open("config.yaml", 'r') as stream: |
| config = yaml.load(stream, Loader=yaml.FullLoader) |
|
|
| app = FastAPI() |
| model = SentenceTransformer(config["MODEL"]) |
|
|
| load_data(config, model) |
|
|
| @app.get("/") |
| def home(): |
| return {"health_check": "OK", "model": config["model"]} |
|
|
|
|
| @app.get("/search") |
| def search( |
| question: str, |
| history: list, |
| ) -> JSONResponse: |
| """ |
| Finds the appropriate response for the user question from the lora king interview transcript |
| - **question**: user question |
| :return: response and distances |
| """ |
|
|
| off_topic = pd.read_csv(config["OT_CSV_FILENAME"], index_col=None) |
| off_topic = off_topic[off_topic.Questions == "Off topic"] |
| off_topic["Answers"] = off_topic.Answers.str.split(" ~ ") |
| df = pd.read_csv(config["CSV_FILENAME"], index_col=None) |
| df_dedupe = df.drop_duplicates("Answers").reset_index(drop=True) |
|
|
| user_question_embedding = model.encode(question) |
|
|
| question_embeddings = check_embeddings(config["QUESTIONS_FILENAME"], model, df["Variations_Q"]) |
| neighbors_q, distances_q = find_neighbors(user_question_embedding, question_embeddings, k=config["K"]) |
| responses_q = df.loc[neighbors_q].VideoID.values |
|
|
| answer_embeddings = check_embeddings(config["ANSWERS_FILENAME"], model, df_dedupe["Answers"].str[6:]) |
| neighbors_a, distances_a = find_neighbors(user_question_embedding, answer_embeddings, k=config["K"]) |
| responses_a = df_dedupe.loc[neighbors_a].VideoID.values |
|
|
| |
| if distances_q[0] < config["OFF_TOPIC_THRESHOLD"]: |
| text = off_topic.Answers.values[0] |
| result = random.choice(text) |
| distances = distances_q |
| return str({"response": result, "distances": round(float(distances[0]), 4)}) |
| elif distances_q[0] > config["QUESTION_THRESHOLD"]: |
| text = df.loc[neighbors_q].Answers.values[0] |
| result = text |
| distances = distances_q |
| return str({"response": result, "distances": round(float(distances[0]), 4)}) |
| elif list(responses_q).count(responses_q[0]) > config["QUESTION_COUNTS"] and distances_q[0] > config["QUESTION_COUNTS_THRESHOLD"]: |
| text = df.loc[neighbors_q].Answers.values[0] |
| result = text |
| distances = distances_q |
| return str({"response": result, "distances": round(float(distances[0]), 4)}) |
|
|
|
|
| |
| dad = int(bool(re.search("father|dad|rodney", question.lower()))) |
| mom = int(bool(re.search("mother|mom|danetta", question.lower()))) |
| responses = np.concatenate((responses_q,responses_a)) |
|
|
| if dad + mom == 1: |
| id_ans = dict(zip(df_dedupe["VideoID"], df_dedupe["Questions"] + df_dedupe["Answers"].str[6:])) |
| if dad: |
| responses = [vid for vid in responses if any(x in id_ans.get(vid, "") for x in ["dad", "father", "Rodney"])] |
| if mom: |
| responses = [vid for vid in responses if any(x in id_ans.get(vid, "") for x in ["mom", "mother", "Danetta"])] |
|
|
| id_ans = dict(zip(df_dedupe["VideoID"], df_dedupe["Answers"])) |
| gpt_options = [id_ans.get(vid, "") for vid in responses] |
|
|
| |
| |
| api_key = os.getenv("OPENAI_API_KEY") |
| client = openai.OpenAI(api_key=api_key) |
|
|
| response = client.chat.completions.create( |
| model="gpt-4o", |
| messages=[ |
| {"role": "system", |
| "content": """The user will provide a question and potential answers. Identify which of the ~ separated answers (prepended by an ID) is an appropriate response to the user question. \n For some context, these responses are taken from an interview with Lora King, Rodney King's daughter. When she refers to her father, she is referring to Rodney King. If she refers to her mother, she is referring to Danetta King. \n If there is no best answer to the question or the question does not make sense, say there is "NO ANSWER" otherwise return only the best answer without any further explanation or formatting"""}, |
| {"role": "user", "content": f"Question:{question}, Potential Answers:{gpt_options}"}, |
| ] |
| ) |
|
|
| result = response.choices[0].message.content |
| if result == "NO ANSWER": |
| result = off_topic.Answers.values[0] |
| result = random.choice(result) |
| return str({"response": result}) |
|
|
|
|
| demo = gr.ChatInterface(search) |
|
|
| demo.launch() |