AashitaK commited on
Commit
f4f6f11
·
verified ·
1 Parent(s): 7c6fbd3

Create functions.py

Browse files
Files changed (1) hide show
  1. functions.py +103 -0
functions.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import openai
2
+ import numpy as np
3
+ import pandas as pd
4
+ import os
5
+
6
+ # Set your OpenAI API key
7
+ openai.api_key = os.getenv("OPENAI_API_KEY")
8
+
9
+ COMPLETIONS_MODEL = "gpt-3.5-turbo-instruct" # "text-davinci-003" used earlier is deprecated.
10
+ # EMBEDDING_MODEL = "text-embedding-3-small" #"text-embedding-ada-002"
11
+ EMBEDDING_MODEL = "text-embedding-3-large"
12
+
13
+ COMPLETIONS_API_PARAMS = {
14
+ # We use temperature of 0.0 because it gives the most predictable, factual answer.
15
+ "temperature": 0.0,
16
+ "max_tokens": 300,
17
+ "model": COMPLETIONS_MODEL,
18
+ }
19
+
20
+ df = pd.read_csv('services-links.csv')
21
+ df = df.set_index("service")
22
+
23
+ def get_embedding(text: str, model: str=EMBEDDING_MODEL) -> list[float]:
24
+ result = openai.Embedding.create(
25
+ model=model,
26
+ input=text
27
+ )
28
+ return result["data"][0]["embedding"]
29
+
30
+ def vector_similarity(x: list[float], y: list[float]) -> float:
31
+ """
32
+ Returns the similarity between two vectors.
33
+
34
+ Because OpenAI Embeddings are normalized to length 1, the cosine similarity is the same as the dot product.
35
+ """
36
+ return np.dot(np.array(x), np.array(y))
37
+
38
+ def select_document_section_by_query_similarity(query: str, contexts: dict[(str, str), np.array]) -> list[(float, (str, str))]:
39
+ """
40
+ Find the query embedding for the supplied query, and compare it against all of the pre-calculated document embeddings
41
+ to find the most relevant sections.
42
+
43
+ Return the list of document sections, sorted by relevance in descending order.
44
+ """
45
+ query_embedding = get_embedding(query)
46
+
47
+ document_similarities = sorted([
48
+ (vector_similarity(query_embedding, doc_embedding), doc_index) for doc_index, doc_embedding in contexts.items()
49
+ ], reverse=True)
50
+
51
+ return document_similarities[0]
52
+
53
+ def compute_doc_embeddings(df: pd.DataFrame) -> dict[tuple[str, str], list[float]]:
54
+ """
55
+ Create an embedding for each row in the dataframe using the OpenAI Embeddings API.
56
+
57
+ Return a dictionary that maps between each embedding vector and the index of the row that it corresponds to.
58
+ """
59
+ return {
60
+ idx: get_embedding(r.description) for idx, r in df.iterrows()
61
+ }
62
+
63
+ document_embeddings = compute_doc_embeddings(df)
64
+
65
+ def construct_prompt(question: str, context_embeddings: dict, df: pd.DataFrame) -> str:
66
+ """
67
+ Fetch relevant
68
+ """
69
+ _ , chosen_service = select_document_section_by_query_similarity(question, context_embeddings)
70
+
71
+ service_description = df.loc[chosen_service].description.replace("\n", " ")
72
+ header = "Answer the question as truthfully as possible using the provided context, and if the answer is not contained within the text below, say "
73
+ message = "I could not find an answer to your question, please reach out to Helpdesk."
74
+ link = df.loc[chosen_service].link
75
+ return header + message + "\n* " + "\n\nContext:\n" + service_description + "\n\n Q: " + question + "\n A:", link
76
+
77
+ def answer_query_with_context(
78
+ query: str,
79
+ df: pd.DataFrame,
80
+ document_embeddings: dict[(str, str), np.array],
81
+ show_prompt: bool = False
82
+ ) -> str:
83
+ prompt, link = construct_prompt(
84
+ query,
85
+ document_embeddings,
86
+ df
87
+ )
88
+
89
+ if show_prompt:
90
+ print(prompt)
91
+
92
+ response = openai.Completion.create(
93
+ prompt=prompt,
94
+ **COMPLETIONS_API_PARAMS
95
+ )
96
+
97
+ end_message = "\n\nPlease check out the relevant HMC service catalogue for more details: "+ link
98
+ end_message += """\n\nIf not satisfied with the answer, please email helpdesk@hmc.edu, call 909.607.7777 or visit the Helpdesk located on the Sprague first floor. """
99
+ end_message += """Helpdesk representatives are also available for a remote chat session during normal hours on Monday - Friday, 8:00 AM - 5:00 PM PST via https://helpdesk.hmc.edu"""
100
+
101
+ reply = response["choices"][0]["text"] + end_message
102
+
103
+ return reply