Spaces:

HMC-CIS
/

Trial-OpenAI

Build error

App Files Files Community

AashitaK commited on Feb 6, 2025

Commit

b2d9845

verified ·

1 Parent(s): b7b690e

Update app.py

Browse files

Files changed (1) hide show

app.py +117 -12

app.py CHANGED Viewed

@@ -1,27 +1,132 @@
 import gradio as gr
 import openai
 import os
 # Set your OpenAI API key
-openai.api_key = os.getenv("OPENAI_API_KEY")
-def chatbot(prompt):
     try:
-        response = openai.ChatCompletion.create(
-            model="gpt-3.5-turbo",
-            messages=[{"role": "system", "content": "You are a helpful assistant."},
-                      {"role": "user", "content": prompt}]
-        )
-        return response["choices"][0]["message"]["content"]
     except Exception as e:
         return str(e)
 # Create a Gradio interface
-iface = gr.Interface(fn=chatbot,
-                     inputs="text",
-                     outputs="text",
                      title="AI Chatbot",
-                     description="Ask me anything!")
 if __name__ == "__main__":
     iface.launch()

 import gradio as gr
 import openai
+import numpy as np
 import os
 # Set your OpenAI API key
+openai.api_key = os.getenv("OPENAI_API_KEY")
+COMPLETIONS_MODEL = "gpt-3.5-turbo-instruct" # "text-davinci-003" used earlier is deprecated.
+# EMBEDDING_MODEL = "text-embedding-3-small" #"text-embedding-ada-002"
+EMBEDDING_MODEL = "text-embedding-3-large"
+COMPLETIONS_API_PARAMS = {
+    # We use temperature of 0.0 because it gives the most predictable, factual answer.
+    "temperature": 0.0,
+    "max_tokens": 300,
+    "model": COMPLETIONS_MODEL,
+}
+df = pd.read_csv('services-links.csv')
+df = df.set_index("service")
+def get_embedding(text: str, model: str=EMBEDDING_MODEL) -> list[float]:
+    result = openai.Embedding.create(
+      model=model,
+      input=text
+    )
+    return result["data"][0]["embedding"]
+def vector_similarity(x: list[float], y: list[float]) -> float:
+    """
+    Returns the similarity between two vectors.
+    Because OpenAI Embeddings are normalized to length 1, the cosine similarity is the same as the dot product.
+    """
+    return np.dot(np.array(x), np.array(y))
+def select_document_section_by_query_similarity(query: str, contexts: dict[(str, str), np.array]) -> list[(float, (str, str))]:
+    """
+    Find the query embedding for the supplied query, and compare it against all of the pre-calculated document embeddings
+    to find the most relevant sections.
+    Return the list of document sections, sorted by relevance in descending order.
+    """
+    query_embedding = get_embedding(query)
+    document_similarities = sorted([
+        (vector_similarity(query_embedding, doc_embedding), doc_index) for doc_index, doc_embedding in contexts.items()
+    ], reverse=True)
+    return document_similarities[0]
+def compute_doc_embeddings(df: pd.DataFrame) -> dict[tuple[str, str], list[float]]:
+    """
+    Create an embedding for each row in the dataframe using the OpenAI Embeddings API.
+    Return a dictionary that maps between each embedding vector and the index of the row that it corresponds to.
+    """
+    return {
+        idx: get_embedding(r.description) for idx, r in df.iterrows()
+    }
+def construct_prompt(question: str, context_embeddings: dict, df: pd.DataFrame) -> str:
+    """
+    Fetch relevant
+    """
+    _ , chosen_service = select_document_section_by_query_similarity(question, context_embeddings)
+    service_description = df.loc[chosen_service].description.replace("\n", " ")
+    header = "Answer the question as truthfully as possible using the provided context, and if the answer is not contained within the text below, say "
+    message = "I could not find an answer to your question, please reach out to Helpdesk."
+    link = df.loc[chosen_service].link
+    return header + message + "\n* " + "\n\nContext:\n" + service_description + "\n\n Q: " + question + "\n A:", link
+prompt = construct_prompt(
+    "How many iClickers are there?",
+    document_embeddings,
+    df
+)
+print(prompt)
+def answer_query_with_context(
+    query: str,
+    df: pd.DataFrame,
+    document_embeddings: dict[(str, str), np.array],
+    show_prompt: bool = False
+) -> str:
+    prompt, link = construct_prompt(
+        query,
+        document_embeddings,
+        df
+    )
+    if show_prompt:
+        print(prompt)
+    response = openai.Completion.create(
+                prompt=prompt,
+                **COMPLETIONS_API_PARAMS
+            )
+    end_message = "\n\nPlease check out the relevant HMC service catalogue for more details: "+ link
+    end_message += """\n\nIf not satisfied with the answer, please email helpdesk@hmc.edu, call 909.607.7777 or visit the Helpdesk located on the Sprague first floor. """
+    end_message += """Helpdesk representatives are also available for a remote chat session during normal hours on Monday - Friday, 8:00 AM - 5:00 PM PST via https://helpdesk.hmc.edu"""
+    reply = response["choices"][0]["text"] + end_message
+    return reply
+answer_query_with_context("How to install Matlab?", df, document_embeddings)
+def chatbot(input):
     try:
+        if input:
+            reply = answer_query_with_context(input, df, document_embeddings)
+            return reply
     except Exception as e:
         return str(e)
 # Create a Gradio interface
+inputs = gr.inputs.Textbox(lines=7, label="Chat with AI")
+outputs = gr.outputs.Textbox(label="Reply")
+header_message = "Ask anything about the following services: "+", ".join(df.index)
+iface = gr.Interface(fn=chatbot,
+                     inputs=inputs,
+                     outputs=outputs,
                      title="AI Chatbot",
+                     description=header_message,
+                     theme="compact")
 if __name__ == "__main__":
     iface.launch()