| import chromadb | |
| from chromadb.utils import embedding_functions | |
| import csv | |
| # --- Setup ChromaDB (in-memory for Hugging Face Spaces free tier) --- | |
| chroma_client = chromadb.Client() | |
| # SentenceTransformer embedding function | |
| sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction( | |
| model_name="all-mpnet-base-v2" | |
| ) | |
| # Create collection | |
| collection = chroma_client.create_collection( | |
| name="my_collection", | |
| embedding_function=sentence_transformer_ef | |
| ) | |
| # --- Load CSV data --- | |
| with open("menu_items.csv") as file: | |
| lines = csv.reader(file) | |
| documents = [] | |
| metadatas = [] | |
| ids = [] | |
| for i, line in enumerate(lines): | |
| if i == 0: | |
| continue # skip header | |
| documents.append(line[1]) | |
| metadatas.append({"item_id": line[0]}) | |
| ids.append(str(i)) | |
| # Add to ChromaDB | |
| collection.add( | |
| documents=documents, | |
| metadatas=metadatas, | |
| ids=ids | |
| ) | |
| def search_dishes(query: str) -> str: | |
| """Search for top 5 similar dishes.""" | |
| results = collection.query( | |
| query_texts=[query], | |
| n_results=5, | |
| include=["documents", "metadatas"] | |
| ) | |
| hits = results["documents"][0] | |
| ids_meta = results["metadatas"][0] | |
| output = [f"{ids_meta[i]['item_id']}: {hits[i]}" for i in range(len(hits))] | |
| return "\n".join(output) | |