AashitaK commited on
Commit
7c6fbd3
·
verified ·
1 Parent(s): 55c74f4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -103
app.py CHANGED
@@ -1,107 +1,4 @@
1
  import gradio as gr
2
- import openai
3
- import numpy as np
4
- import pandas as pd
5
- import os
6
-
7
- # Set your OpenAI API key
8
- openai.api_key = os.getenv("OPENAI_API_KEY")
9
-
10
- COMPLETIONS_MODEL = "gpt-3.5-turbo-instruct" # "text-davinci-003" used earlier is deprecated.
11
- # EMBEDDING_MODEL = "text-embedding-3-small" #"text-embedding-ada-002"
12
- EMBEDDING_MODEL = "text-embedding-3-large"
13
-
14
- COMPLETIONS_API_PARAMS = {
15
- # We use temperature of 0.0 because it gives the most predictable, factual answer.
16
- "temperature": 0.0,
17
- "max_tokens": 300,
18
- "model": COMPLETIONS_MODEL,
19
- }
20
-
21
- df = pd.read_csv('services-links.csv')
22
- df = df.set_index("service")
23
-
24
- def get_embedding(text: str, model: str=EMBEDDING_MODEL) -> list[float]:
25
- result = openai.Embedding.create(
26
- model=model,
27
- input=text
28
- )
29
- return result["data"][0]["embedding"]
30
-
31
- def vector_similarity(x: list[float], y: list[float]) -> float:
32
- """
33
- Returns the similarity between two vectors.
34
-
35
- Because OpenAI Embeddings are normalized to length 1, the cosine similarity is the same as the dot product.
36
- """
37
- return np.dot(np.array(x), np.array(y))
38
-
39
- def select_document_section_by_query_similarity(query: str, contexts: dict[(str, str), np.array]) -> list[(float, (str, str))]:
40
- """
41
- Find the query embedding for the supplied query, and compare it against all of the pre-calculated document embeddings
42
- to find the most relevant sections.
43
-
44
- Return the list of document sections, sorted by relevance in descending order.
45
- """
46
- query_embedding = get_embedding(query)
47
-
48
- document_similarities = sorted([
49
- (vector_similarity(query_embedding, doc_embedding), doc_index) for doc_index, doc_embedding in contexts.items()
50
- ], reverse=True)
51
-
52
- return document_similarities[0]
53
-
54
- def compute_doc_embeddings(df: pd.DataFrame) -> dict[tuple[str, str], list[float]]:
55
- """
56
- Create an embedding for each row in the dataframe using the OpenAI Embeddings API.
57
-
58
- Return a dictionary that maps between each embedding vector and the index of the row that it corresponds to.
59
- """
60
- return {
61
- idx: get_embedding(r.description) for idx, r in df.iterrows()
62
- }
63
-
64
- document_embeddings = compute_doc_embeddings(df)
65
-
66
- def construct_prompt(question: str, context_embeddings: dict, df: pd.DataFrame) -> str:
67
- """
68
- Fetch relevant
69
- """
70
- _ , chosen_service = select_document_section_by_query_similarity(question, context_embeddings)
71
-
72
- service_description = df.loc[chosen_service].description.replace("\n", " ")
73
- header = "Answer the question as truthfully as possible using the provided context, and if the answer is not contained within the text below, say "
74
- message = "I could not find an answer to your question, please reach out to Helpdesk."
75
- link = df.loc[chosen_service].link
76
- return header + message + "\n* " + "\n\nContext:\n" + service_description + "\n\n Q: " + question + "\n A:", link
77
-
78
- def answer_query_with_context(
79
- query: str,
80
- df: pd.DataFrame,
81
- document_embeddings: dict[(str, str), np.array],
82
- show_prompt: bool = False
83
- ) -> str:
84
- prompt, link = construct_prompt(
85
- query,
86
- document_embeddings,
87
- df
88
- )
89
-
90
- if show_prompt:
91
- print(prompt)
92
-
93
- response = openai.Completion.create(
94
- prompt=prompt,
95
- **COMPLETIONS_API_PARAMS
96
- )
97
-
98
- end_message = "\n\nPlease check out the relevant HMC service catalogue for more details: "+ link
99
- end_message += """\n\nIf not satisfied with the answer, please email helpdesk@hmc.edu, call 909.607.7777 or visit the Helpdesk located on the Sprague first floor. """
100
- end_message += """Helpdesk representatives are also available for a remote chat session during normal hours on Monday - Friday, 8:00 AM - 5:00 PM PST via https://helpdesk.hmc.edu"""
101
-
102
- reply = response["choices"][0]["text"] + end_message
103
-
104
- return reply
105
 
106
  def chatbot(input):
107
  try:
 
1
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
  def chatbot(input):
4
  try: