|
|
import chromadb |
|
|
from chromadb.utils import embedding_functions |
|
|
import csv |
|
|
default_ef = embedding_functions.DefaultEmbeddingFunction() |
|
|
|
|
|
def createEmbeddings(): |
|
|
try: |
|
|
client = chromadb.PersistentClient("./chromaDB") |
|
|
collection = client.get_or_create_collection("ICC_Rules") |
|
|
|
|
|
|
|
|
with open("notes.txt", "r") as f: |
|
|
data = [line.strip() for line in f.readlines()] |
|
|
|
|
|
with open("metaData.csv", "r") as f: |
|
|
metaData = list(csv.reader(f)) |
|
|
|
|
|
|
|
|
if len(data) != len(metaData): |
|
|
print(f"Warning: Mismatch in data lengths. notes.txt has {len(data)} lines but metaData.csv has {len(metaData)} rows.") |
|
|
|
|
|
|
|
|
for i in range(len(data)): |
|
|
if i < len(metaData): |
|
|
collection.add( |
|
|
ids=[str(i)], |
|
|
embeddings=default_ef([data[i]]), |
|
|
metadatas=[{"ruleDescription": metaData[i][2]}], |
|
|
documents=[data[i]] |
|
|
) |
|
|
else: |
|
|
|
|
|
collection.add( |
|
|
ids=[str(i)], |
|
|
embeddings=default_ef([data[i]]), |
|
|
metadatas=[{"ruleDescription": "No metadata available"}], |
|
|
documents=[data[i]] |
|
|
) |
|
|
|
|
|
print(f"Successfully added {len(data)} documents to the collection.") |
|
|
except Exception as e: |
|
|
print(f"Error creating embeddings: {e}") |
|
|
|
|
|
|
|
|
def retrieveInfo(query, n_results=3): |
|
|
try: |
|
|
client = chromadb.PersistentClient("./chromaDB") |
|
|
collection = client.get_or_create_collection("ICC_Rules") |
|
|
results = collection.query( |
|
|
query_embeddings=default_ef([query]), |
|
|
n_results=n_results, |
|
|
) |
|
|
|
|
|
if not results or not results['documents'] or len(results['documents'][0]) == 0: |
|
|
return "No relevant information found." |
|
|
|
|
|
|
|
|
formatted_results = [] |
|
|
for i in range(len(results['documents'][0])): |
|
|
formatted_results.append({ |
|
|
"document": results['documents'][0][i], |
|
|
"metadata": results['metadatas'][0][i] if 'metadatas' in results else {}, |
|
|
"distance": results['distances'][0][i] if 'distances' in results else None |
|
|
}) |
|
|
|
|
|
print("Retrieved information successfully.") |
|
|
return formatted_results |
|
|
except Exception as e: |
|
|
print(f"Error retrieving information: {e}") |
|
|
return f"An error occurred: {e}" |
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
|
|
print(retrieveInfo("What happens when there are less than 11 players fit?")) |