Spaces:
Configuration error
Configuration error
| import os | |
| from pathlib import Path | |
| import openai | |
| import tiktoken | |
| import pandas as pd | |
| from openai.embeddings_utils import get_embedding, cosine_similarity | |
| encoding_name = "p50k_base" | |
| encoding = tiktoken.get_encoding(encoding_name) | |
| embedding_model = "text-embedding-ada-002" | |
| openai.api_key = os.environ.get("OPENAI_API_KEY", None) | |
| # read from current directory | |
| df = pd.read_pickle(Path(__file__).resolve().parent.__str__() + "/../data/lethain.pkl") | |
| def search_reviews(df, query): | |
| query_embedding = get_embedding( | |
| query, | |
| engine="text-embedding-ada-002" | |
| ) | |
| df["similarity"] = df.embeddings.apply(lambda x: cosine_similarity(x, query_embedding)) | |
| results = ( | |
| df.sort_values("similarity", ascending=False) | |
| ) | |
| return results | |
| def construct_prompt(question: str, df: pd.DataFrame) -> str: | |
| MAX_SECTION_LEN = 500 | |
| SEPARATOR = "\n* " | |
| separator_len = len(encoding.encode(SEPARATOR)) | |
| f"Context separator contains {separator_len} tokens" | |
| """ | |
| Fetch relevant | |
| """ | |
| result = search_reviews(df, question) | |
| chosen_sections = [] | |
| chosen_sections_len = 0 | |
| chosen_sections_indexes = [] | |
| for section_index, row in result.iterrows(): | |
| # Add contexts until we run out of space. | |
| tokens_num = len(encoding.encode(row.content)) | |
| chosen_sections_len += tokens_num | |
| if chosen_sections_len > MAX_SECTION_LEN: | |
| break | |
| chosen_sections.append(SEPARATOR + row.content.replace("\n", " ")) | |
| chosen_sections_indexes.append(str(section_index)) | |
| # Useful diagnostic information | |
| print(f"Selected {len(chosen_sections)} document sections:") | |
| print("\n".join(chosen_sections_indexes)) | |
| header = """You name is Will Larson, you are CTO at Calm and a blogger about engineering leadership. Answer the question as truthfully as possible using the provided context, and if the answer is not contained within the text below, say "I don't know."\n\nContext:\n""" | |
| return header + "".join(chosen_sections) + "\n\n Q: " + question + "\n A:" | |
| def ask(question): | |
| prompt = construct_prompt(question, df) | |
| result = openai.Completion.create( | |
| prompt=prompt, | |
| temperature=0, | |
| max_tokens=300, | |
| model="text-davinci-003" | |
| ) | |
| return result['choices'][0]['text'] | |