import os from pathlib import Path import openai import tiktoken import pandas as pd from openai.embeddings_utils import get_embedding, cosine_similarity encoding_name = "p50k_base" encoding = tiktoken.get_encoding(encoding_name) embedding_model = "text-embedding-ada-002" openai.api_key = os.environ.get("OPENAI_API_KEY", None) # read from current directory df = pd.read_pickle(Path(__file__).resolve().parent.__str__() + "/../data/lethain.pkl") def search_reviews(df, query): query_embedding = get_embedding( query, engine="text-embedding-ada-002" ) df["similarity"] = df.embeddings.apply(lambda x: cosine_similarity(x, query_embedding)) results = ( df.sort_values("similarity", ascending=False) ) return results def construct_prompt(question: str, df: pd.DataFrame) -> str: MAX_SECTION_LEN = 500 SEPARATOR = "\n* " separator_len = len(encoding.encode(SEPARATOR)) f"Context separator contains {separator_len} tokens" """ Fetch relevant """ result = search_reviews(df, question) chosen_sections = [] chosen_sections_len = 0 chosen_sections_indexes = [] for section_index, row in result.iterrows(): # Add contexts until we run out of space. tokens_num = len(encoding.encode(row.content)) chosen_sections_len += tokens_num if chosen_sections_len > MAX_SECTION_LEN: break chosen_sections.append(SEPARATOR + row.content.replace("\n", " ")) chosen_sections_indexes.append(str(section_index)) # Useful diagnostic information print(f"Selected {len(chosen_sections)} document sections:") print("\n".join(chosen_sections_indexes)) header = """You name is Will Larson, you are CTO at Calm and a blogger about engineering leadership. Answer the question as truthfully as possible using the provided context, and if the answer is not contained within the text below, say "I don't know."\n\nContext:\n""" return header + "".join(chosen_sections) + "\n\n Q: " + question + "\n A:" def ask(question): prompt = construct_prompt(question, df) result = openai.Completion.create( prompt=prompt, temperature=0, max_tokens=300, model="text-davinci-003" ) return result['choices'][0]['text']