File size: 1,323 Bytes
2115505 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 |
import chromadb
from chromadb.utils import embedding_functions
import csv
# --- Setup ChromaDB (in-memory for Hugging Face Spaces free tier) ---
chroma_client = chromadb.Client()
# SentenceTransformer embedding function
sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(
model_name="all-mpnet-base-v2"
)
# Create collection
collection = chroma_client.create_collection(
name="my_collection",
embedding_function=sentence_transformer_ef
)
# --- Load CSV data ---
with open("menu_items.csv") as file:
lines = csv.reader(file)
documents = []
metadatas = []
ids = []
for i, line in enumerate(lines):
if i == 0:
continue # skip header
documents.append(line[1])
metadatas.append({"item_id": line[0]})
ids.append(str(i))
# Add to ChromaDB
collection.add(
documents=documents,
metadatas=metadatas,
ids=ids
)
def search_dishes(query: str) -> str:
"""Search for top 5 similar dishes."""
results = collection.query(
query_texts=[query],
n_results=5,
include=["documents", "metadatas"]
)
hits = results["documents"][0]
ids_meta = results["metadatas"][0]
output = [f"{ids_meta[i]['item_id']}: {hits[i]}" for i in range(len(hits))]
return "\n".join(output)
|