Spaces:

neharao
/

loraking

Runtime error

Neha Rao

refine off topic response

58b4df5 almost 2 years ago

4.6 kB

	import yaml
	import re
	import os

	import gradio as gr
	import random
	from fastapi import FastAPI
	from sentence_transformers import SentenceTransformer
	from starlette.responses import JSONResponse
	import openai
	# from dotenv import load_dotenv


	from helpers import *

	with open("config.yaml", 'r') as stream:
	config = yaml.load(stream, Loader=yaml.FullLoader)

	app = FastAPI()
	model = SentenceTransformer(config["MODEL"])

	load_data(config, model)

	@app.get("/")
	def home():
	return {"health_check": "OK", "model": config["model"]}


	@app.get("/search")
	def search(
	question: str,
	history: list,
	) -> JSONResponse:
	"""
	Finds the appropriate response for the user question from the lora king interview transcript
	- question: user question
	:return: response and distances
	"""

	off_topic = pd.read_csv(config["OT_CSV_FILENAME"], index_col=None)
	off_topic = off_topic[off_topic.Questions == "Off topic"]
	off_topic["Answers"] = off_topic.Answers.str.split(" ~ ")
	df = pd.read_csv(config["CSV_FILENAME"], index_col=None)
	df_dedupe = df.drop_duplicates("Answers").reset_index(drop=True)

	user_question_embedding = model.encode(question)

	question_embeddings = check_embeddings(config["QUESTIONS_FILENAME"], model, df["Variations_Q"])
	neighbors_q, distances_q = find_neighbors(user_question_embedding, question_embeddings, k=config["K"])
	responses_q = df.loc[neighbors_q].VideoID.values

	answer_embeddings = check_embeddings(config["ANSWERS_FILENAME"], model, df_dedupe["Answers"].str[6:])
	neighbors_a, distances_a = find_neighbors(user_question_embedding, answer_embeddings, k=config["K"])
	responses_a = df_dedupe.loc[neighbors_a].VideoID.values

	# algorithm to pick question match or answer match
	if distances_q[0] < config["OFF_TOPIC_THRESHOLD"]:
	text = off_topic.Answers.values[0]
	result = random.choice(text)
	distances = distances_q
	return str({"response": result, "distances": round(float(distances[0]), 4)})
	elif distances_q[0] > config["QUESTION_THRESHOLD"]:
	text = df.loc[neighbors_q].Answers.values[0]
	result = text
	distances = distances_q
	return str({"response": result, "distances": round(float(distances[0]), 4)})
	elif list(responses_q).count(responses_q[0]) > config["QUESTION_COUNTS"] and distances_q[0] > config["QUESTION_COUNTS_THRESHOLD"]:
	text = df.loc[neighbors_q].Answers.values[0]
	result = text
	distances = distances_q
	return str({"response": result, "distances": round(float(distances[0]), 4)})


	#entity filtering
	dad = int(bool(re.search("father\|dad\|rodney", question.lower())))
	mom = int(bool(re.search("mother\|mom\|danetta", question.lower())))
	responses = np.concatenate((responses_q,responses_a))

	if dad + mom == 1:
	id_ans = dict(zip(df_dedupe["VideoID"], df_dedupe["Questions"] + df_dedupe["Answers"].str[6:]))
	if dad:
	responses = [vid for vid in responses if any(x in id_ans.get(vid, "") for x in ["dad", "father", "Rodney"])]
	if mom:
	responses = [vid for vid in responses if any(x in id_ans.get(vid, "") for x in ["mom", "mother", "Danetta"])]

	id_ans = dict(zip(df_dedupe["VideoID"], df_dedupe["Answers"]))
	gpt_options = [id_ans.get(vid, "") for vid in responses]

	#gpt check
	# load_dotenv()
	api_key = os.getenv("OPENAI_API_KEY")
	client = openai.OpenAI(api_key=api_key)

	response = client.chat.completions.create(
	model="gpt-4o",
	messages=[
	{"role": "system",
	"content": """The user will provide a question and potential answers. Identify which of the ~ separated answers (prepended by an ID) is an appropriate response to the user question. \n For some context, these responses are taken from an interview with Lora King, Rodney King's daughter. When she refers to her father, she is referring to Rodney King. If she refers to her mother, she is referring to Danetta King. \n If there is no best answer to the question or the question does not make sense, say there is "NO ANSWER" otherwise return only the best answer without any further explanation or formatting"""},
	{"role": "user", "content": f"Question:{question}, Potential Answers:{gpt_options}"},
	]
	)

	result = response.choices[0].message.content
	if result == "NO ANSWER":
	result = off_topic.Answers.values[0]
	result = random.choice(result)
	return str({"response": result})


	demo = gr.ChatInterface(search)

	demo.launch()