Spaces:

HMC-CIS
/

Trial-OpenAI

Build error

AashitaK commited on Feb 28, 2025

Commit

1a6ff22

verified ·

1 Parent(s): 228f462

Create document_retrieval.py

Files changed (1) hide show

document_retrieval.py ADDED Viewed

+import numpy as np
+from openai_api import get_embedding
+def vector_similarity(x: list[float], y: list[float]) -> float:
+    """
+    Returns the similarity between two vectors.
+    Because OpenAI Embeddings are normalized to length 1, the cosine similarity is the same as the dot product.
+    """
+    return np.dot(np.array(x), np.array(y))
+def select_document_section_by_query_similarity(query: str, contexts: dict[(str, str), np.array]) -> list[(float, (str, str))]:
+    """
+    Find the query embedding for the supplied query, and compare it against all of the pre-calculated document embeddings
+    to find the most relevant sections.
+    Return the list of document sections, sorted by relevance in descending order.
+    """
+    query_embedding = get_embedding(query)
+    document_similarities = sorted([
+        (vector_similarity(query_embedding, doc_embedding), doc_index) for doc_index, doc_embedding in contexts.items()
+    ], reverse=True)
+    return document_similarities[0]