Spaces:

HMC-CIS
/

Trial-OpenAI

Build error

App Files Files Community

AashitaK commited on Feb 6, 2025

Commit

f4f6f11

verified ·

1 Parent(s): 7c6fbd3

Create functions.py

Browse files

Files changed (1) hide show

functions.py +103 -0

functions.py ADDED Viewed

	@@ -0,0 +1,103 @@

+import openai
+import numpy as np
+import pandas as pd
+import os
+# Set your OpenAI API key
+openai.api_key = os.getenv("OPENAI_API_KEY")
+COMPLETIONS_MODEL = "gpt-3.5-turbo-instruct" # "text-davinci-003" used earlier is deprecated.
+# EMBEDDING_MODEL = "text-embedding-3-small" #"text-embedding-ada-002"
+EMBEDDING_MODEL = "text-embedding-3-large"
+COMPLETIONS_API_PARAMS = {
+    # We use temperature of 0.0 because it gives the most predictable, factual answer.
+    "temperature": 0.0,
+    "max_tokens": 300,
+    "model": COMPLETIONS_MODEL,
+}
+df = pd.read_csv('services-links.csv')
+df = df.set_index("service")
+def get_embedding(text: str, model: str=EMBEDDING_MODEL) -> list[float]:
+    result = openai.Embedding.create(
+      model=model,
+      input=text
+    )
+    return result["data"][0]["embedding"]
+def vector_similarity(x: list[float], y: list[float]) -> float:
+    """
+    Returns the similarity between two vectors.
+    Because OpenAI Embeddings are normalized to length 1, the cosine similarity is the same as the dot product.
+    """
+    return np.dot(np.array(x), np.array(y))
+def select_document_section_by_query_similarity(query: str, contexts: dict[(str, str), np.array]) -> list[(float, (str, str))]:
+    """
+    Find the query embedding for the supplied query, and compare it against all of the pre-calculated document embeddings
+    to find the most relevant sections.
+    Return the list of document sections, sorted by relevance in descending order.
+    """
+    query_embedding = get_embedding(query)
+    document_similarities = sorted([
+        (vector_similarity(query_embedding, doc_embedding), doc_index) for doc_index, doc_embedding in contexts.items()
+    ], reverse=True)
+    return document_similarities[0]
+def compute_doc_embeddings(df: pd.DataFrame) -> dict[tuple[str, str], list[float]]:
+    """
+    Create an embedding for each row in the dataframe using the OpenAI Embeddings API.
+    Return a dictionary that maps between each embedding vector and the index of the row that it corresponds to.
+    """
+    return {
+        idx: get_embedding(r.description) for idx, r in df.iterrows()
+    }
+document_embeddings = compute_doc_embeddings(df)
+def construct_prompt(question: str, context_embeddings: dict, df: pd.DataFrame) -> str:
+    """
+    Fetch relevant
+    """
+    _ , chosen_service = select_document_section_by_query_similarity(question, context_embeddings)
+    service_description = df.loc[chosen_service].description.replace("\n", " ")
+    header = "Answer the question as truthfully as possible using the provided context, and if the answer is not contained within the text below, say "
+    message = "I could not find an answer to your question, please reach out to Helpdesk."
+    link = df.loc[chosen_service].link
+    return header + message + "\n* " + "\n\nContext:\n" + service_description + "\n\n Q: " + question + "\n A:", link
+def answer_query_with_context(
+    query: str,
+    df: pd.DataFrame,
+    document_embeddings: dict[(str, str), np.array],
+    show_prompt: bool = False
+) -> str:
+    prompt, link = construct_prompt(
+        query,
+        document_embeddings,
+        df
+    )
+    if show_prompt:
+        print(prompt)
+    response = openai.Completion.create(
+                prompt=prompt,
+                **COMPLETIONS_API_PARAMS
+            )
+    end_message = "\n\nPlease check out the relevant HMC service catalogue for more details: "+ link
+    end_message += """\n\nIf not satisfied with the answer, please email helpdesk@hmc.edu, call 909.607.7777 or visit the Helpdesk located on the Sprague first floor. """
+    end_message += """Helpdesk representatives are also available for a remote chat session during normal hours on Monday - Friday, 8:00 AM - 5:00 PM PST via https://helpdesk.hmc.edu"""
+    reply = response["choices"][0]["text"] + end_message
+    return reply