loraking / app.py
Neha Rao
refine off topic response
58b4df5
import yaml
import re
import os
import gradio as gr
import random
from fastapi import FastAPI
from sentence_transformers import SentenceTransformer
from starlette.responses import JSONResponse
import openai
# from dotenv import load_dotenv
from helpers import *
with open("config.yaml", 'r') as stream:
config = yaml.load(stream, Loader=yaml.FullLoader)
app = FastAPI()
model = SentenceTransformer(config["MODEL"])
load_data(config, model)
@app.get("/")
def home():
return {"health_check": "OK", "model": config["model"]}
@app.get("/search")
def search(
question: str,
history: list,
) -> JSONResponse:
"""
Finds the appropriate response for the user question from the lora king interview transcript
- **question**: user question
:return: response and distances
"""
off_topic = pd.read_csv(config["OT_CSV_FILENAME"], index_col=None)
off_topic = off_topic[off_topic.Questions == "Off topic"]
off_topic["Answers"] = off_topic.Answers.str.split(" ~ ")
df = pd.read_csv(config["CSV_FILENAME"], index_col=None)
df_dedupe = df.drop_duplicates("Answers").reset_index(drop=True)
user_question_embedding = model.encode(question)
question_embeddings = check_embeddings(config["QUESTIONS_FILENAME"], model, df["Variations_Q"])
neighbors_q, distances_q = find_neighbors(user_question_embedding, question_embeddings, k=config["K"])
responses_q = df.loc[neighbors_q].VideoID.values
answer_embeddings = check_embeddings(config["ANSWERS_FILENAME"], model, df_dedupe["Answers"].str[6:])
neighbors_a, distances_a = find_neighbors(user_question_embedding, answer_embeddings, k=config["K"])
responses_a = df_dedupe.loc[neighbors_a].VideoID.values
# algorithm to pick question match or answer match
if distances_q[0] < config["OFF_TOPIC_THRESHOLD"]:
text = off_topic.Answers.values[0]
result = random.choice(text)
distances = distances_q
return str({"response": result, "distances": round(float(distances[0]), 4)})
elif distances_q[0] > config["QUESTION_THRESHOLD"]:
text = df.loc[neighbors_q].Answers.values[0]
result = text
distances = distances_q
return str({"response": result, "distances": round(float(distances[0]), 4)})
elif list(responses_q).count(responses_q[0]) > config["QUESTION_COUNTS"] and distances_q[0] > config["QUESTION_COUNTS_THRESHOLD"]:
text = df.loc[neighbors_q].Answers.values[0]
result = text
distances = distances_q
return str({"response": result, "distances": round(float(distances[0]), 4)})
#entity filtering
dad = int(bool(re.search("father|dad|rodney", question.lower())))
mom = int(bool(re.search("mother|mom|danetta", question.lower())))
responses = np.concatenate((responses_q,responses_a))
if dad + mom == 1:
id_ans = dict(zip(df_dedupe["VideoID"], df_dedupe["Questions"] + df_dedupe["Answers"].str[6:]))
if dad:
responses = [vid for vid in responses if any(x in id_ans.get(vid, "") for x in ["dad", "father", "Rodney"])]
if mom:
responses = [vid for vid in responses if any(x in id_ans.get(vid, "") for x in ["mom", "mother", "Danetta"])]
id_ans = dict(zip(df_dedupe["VideoID"], df_dedupe["Answers"]))
gpt_options = [id_ans.get(vid, "") for vid in responses]
#gpt check
# load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")
client = openai.OpenAI(api_key=api_key)
response = client.chat.completions.create(
model="gpt-4o",
messages=[
{"role": "system",
"content": """The user will provide a question and potential answers. Identify which of the ~ separated answers (prepended by an ID) is an appropriate response to the user question. \n For some context, these responses are taken from an interview with Lora King, Rodney King's daughter. When she refers to her father, she is referring to Rodney King. If she refers to her mother, she is referring to Danetta King. \n If there is no best answer to the question or the question does not make sense, say there is "NO ANSWER" otherwise return only the best answer without any further explanation or formatting"""},
{"role": "user", "content": f"Question:{question}, Potential Answers:{gpt_options}"},
]
)
result = response.choices[0].message.content
if result == "NO ANSWER":
result = off_topic.Answers.values[0]
result = random.choice(result)
return str({"response": result})
demo = gr.ChatInterface(search)
demo.launch()