AashitaK commited on
Commit
b2d9845
·
verified ·
1 Parent(s): b7b690e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +117 -12
app.py CHANGED
@@ -1,27 +1,132 @@
1
  import gradio as gr
2
  import openai
 
3
  import os
4
 
5
  # Set your OpenAI API key
6
- openai.api_key = os.getenv("OPENAI_API_KEY")
7
 
8
- def chatbot(prompt):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  try:
10
- response = openai.ChatCompletion.create(
11
- model="gpt-3.5-turbo",
12
- messages=[{"role": "system", "content": "You are a helpful assistant."},
13
- {"role": "user", "content": prompt}]
14
- )
15
- return response["choices"][0]["message"]["content"]
16
  except Exception as e:
17
  return str(e)
18
 
19
  # Create a Gradio interface
20
- iface = gr.Interface(fn=chatbot,
21
- inputs="text",
22
- outputs="text",
 
 
 
23
  title="AI Chatbot",
24
- description="Ask me anything!")
 
25
 
26
  if __name__ == "__main__":
27
  iface.launch()
 
1
  import gradio as gr
2
  import openai
3
+ import numpy as np
4
  import os
5
 
6
  # Set your OpenAI API key
7
+ openai.api_key = os.getenv("OPENAI_API_KEY")
8
 
9
+ COMPLETIONS_MODEL = "gpt-3.5-turbo-instruct" # "text-davinci-003" used earlier is deprecated.
10
+ # EMBEDDING_MODEL = "text-embedding-3-small" #"text-embedding-ada-002"
11
+ EMBEDDING_MODEL = "text-embedding-3-large"
12
+
13
+ COMPLETIONS_API_PARAMS = {
14
+ # We use temperature of 0.0 because it gives the most predictable, factual answer.
15
+ "temperature": 0.0,
16
+ "max_tokens": 300,
17
+ "model": COMPLETIONS_MODEL,
18
+ }
19
+
20
+ df = pd.read_csv('services-links.csv')
21
+ df = df.set_index("service")
22
+
23
+ def get_embedding(text: str, model: str=EMBEDDING_MODEL) -> list[float]:
24
+ result = openai.Embedding.create(
25
+ model=model,
26
+ input=text
27
+ )
28
+ return result["data"][0]["embedding"]
29
+
30
+ def vector_similarity(x: list[float], y: list[float]) -> float:
31
+ """
32
+ Returns the similarity between two vectors.
33
+
34
+ Because OpenAI Embeddings are normalized to length 1, the cosine similarity is the same as the dot product.
35
+ """
36
+ return np.dot(np.array(x), np.array(y))
37
+
38
+ def select_document_section_by_query_similarity(query: str, contexts: dict[(str, str), np.array]) -> list[(float, (str, str))]:
39
+ """
40
+ Find the query embedding for the supplied query, and compare it against all of the pre-calculated document embeddings
41
+ to find the most relevant sections.
42
+
43
+ Return the list of document sections, sorted by relevance in descending order.
44
+ """
45
+ query_embedding = get_embedding(query)
46
+
47
+ document_similarities = sorted([
48
+ (vector_similarity(query_embedding, doc_embedding), doc_index) for doc_index, doc_embedding in contexts.items()
49
+ ], reverse=True)
50
+
51
+ return document_similarities[0]
52
+
53
+ def compute_doc_embeddings(df: pd.DataFrame) -> dict[tuple[str, str], list[float]]:
54
+ """
55
+ Create an embedding for each row in the dataframe using the OpenAI Embeddings API.
56
+
57
+ Return a dictionary that maps between each embedding vector and the index of the row that it corresponds to.
58
+ """
59
+ return {
60
+ idx: get_embedding(r.description) for idx, r in df.iterrows()
61
+ }
62
+
63
+ def construct_prompt(question: str, context_embeddings: dict, df: pd.DataFrame) -> str:
64
+ """
65
+ Fetch relevant
66
+ """
67
+ _ , chosen_service = select_document_section_by_query_similarity(question, context_embeddings)
68
+
69
+ service_description = df.loc[chosen_service].description.replace("\n", " ")
70
+ header = "Answer the question as truthfully as possible using the provided context, and if the answer is not contained within the text below, say "
71
+ message = "I could not find an answer to your question, please reach out to Helpdesk."
72
+ link = df.loc[chosen_service].link
73
+ return header + message + "\n* " + "\n\nContext:\n" + service_description + "\n\n Q: " + question + "\n A:", link
74
+
75
+ prompt = construct_prompt(
76
+ "How many iClickers are there?",
77
+ document_embeddings,
78
+ df
79
+ )
80
+ print(prompt)
81
+
82
+ def answer_query_with_context(
83
+ query: str,
84
+ df: pd.DataFrame,
85
+ document_embeddings: dict[(str, str), np.array],
86
+ show_prompt: bool = False
87
+ ) -> str:
88
+ prompt, link = construct_prompt(
89
+ query,
90
+ document_embeddings,
91
+ df
92
+ )
93
+
94
+ if show_prompt:
95
+ print(prompt)
96
+
97
+ response = openai.Completion.create(
98
+ prompt=prompt,
99
+ **COMPLETIONS_API_PARAMS
100
+ )
101
+
102
+ end_message = "\n\nPlease check out the relevant HMC service catalogue for more details: "+ link
103
+ end_message += """\n\nIf not satisfied with the answer, please email helpdesk@hmc.edu, call 909.607.7777 or visit the Helpdesk located on the Sprague first floor. """
104
+ end_message += """Helpdesk representatives are also available for a remote chat session during normal hours on Monday - Friday, 8:00 AM - 5:00 PM PST via https://helpdesk.hmc.edu"""
105
+
106
+ reply = response["choices"][0]["text"] + end_message
107
+
108
+ return reply
109
+
110
+ answer_query_with_context("How to install Matlab?", df, document_embeddings)
111
+
112
+ def chatbot(input):
113
  try:
114
+ if input:
115
+ reply = answer_query_with_context(input, df, document_embeddings)
116
+ return reply
 
 
 
117
  except Exception as e:
118
  return str(e)
119
 
120
  # Create a Gradio interface
121
+ inputs = gr.inputs.Textbox(lines=7, label="Chat with AI")
122
+ outputs = gr.outputs.Textbox(label="Reply")
123
+ header_message = "Ask anything about the following services: "+", ".join(df.index)
124
+ iface = gr.Interface(fn=chatbot,
125
+ inputs=inputs,
126
+ outputs=outputs,
127
  title="AI Chatbot",
128
+ description=header_message,
129
+ theme="compact")
130
 
131
  if __name__ == "__main__":
132
  iface.launch()