Spaces:

avysotsky
/

asklethain

Configuration error

App Files Files Community

asklethain / lib /utils.py

avysotsky

remove api keys

4270957 about 3 years ago

raw

history blame contribute delete

2.32 kB

	import os
	from pathlib import Path

	import openai
	import tiktoken
	import pandas as pd
	from openai.embeddings_utils import get_embedding, cosine_similarity

	encoding_name = "p50k_base"

	encoding = tiktoken.get_encoding(encoding_name)

	embedding_model = "text-embedding-ada-002"
	openai.api_key = os.environ.get("OPENAI_API_KEY", None)

	# read from current directory
	df = pd.read_pickle(Path(__file__).resolve().parent.__str__() + "/../data/lethain.pkl")


	def search_reviews(df, query):
	query_embedding = get_embedding(
	query,
	engine="text-embedding-ada-002"
	)
	df["similarity"] = df.embeddings.apply(lambda x: cosine_similarity(x, query_embedding))

	results = (
	df.sort_values("similarity", ascending=False)

	)
	return results


	def construct_prompt(question: str, df: pd.DataFrame) -> str:
	MAX_SECTION_LEN = 500
	SEPARATOR = "\n* "

	separator_len = len(encoding.encode(SEPARATOR))

	f"Context separator contains {separator_len} tokens"

	"""
	Fetch relevant
	"""
	result = search_reviews(df, question)

	chosen_sections = []
	chosen_sections_len = 0
	chosen_sections_indexes = []

	for section_index, row in result.iterrows():
	# Add contexts until we run out of space.

	tokens_num = len(encoding.encode(row.content))
	chosen_sections_len += tokens_num
	if chosen_sections_len > MAX_SECTION_LEN:
	break

	chosen_sections.append(SEPARATOR + row.content.replace("\n", " "))
	chosen_sections_indexes.append(str(section_index))

	# Useful diagnostic information
	print(f"Selected {len(chosen_sections)} document sections:")
	print("\n".join(chosen_sections_indexes))

	header = """You name is Will Larson, you are CTO at Calm and a blogger about engineering leadership. Answer the question as truthfully as possible using the provided context, and if the answer is not contained within the text below, say "I don't know."\n\nContext:\n"""

	return header + "".join(chosen_sections) + "\n\n Q: " + question + "\n A:"


	def ask(question):
	prompt = construct_prompt(question, df)
	result = openai.Completion.create(
	prompt=prompt,
	temperature=0,
	max_tokens=300,
	model="text-davinci-003"
	)

	return result['choices'][0]['text']