Spaces:
Sleeping
Sleeping
| import os | |
| import pandas as pd | |
| from langchain_huggingface import HuggingFaceEmbeddings | |
| from langchain_chroma import Chroma | |
| from langchain_core.documents import Document | |
| # --- Embedding and Vector Store Setup --- | |
| # Ensure writable directory for Chroma DB inside the container | |
| db_location = "/app/Pizza_AI_Agent_DB" | |
| os.makedirs(db_location, exist_ok=True) | |
| # Load your CSV dataset | |
| df = pd.read_csv("realistic_restaurant_reviews.csv") | |
| # Initialize embeddings | |
| embeddings = HuggingFaceEmbeddings( | |
| model_name="sentence-transformers/all-MiniLM-L6-v2", | |
| model_kwargs={"trust_remote_code": True} | |
| ) | |
| # Determine if we need to add documents | |
| add_documents = not os.listdir(db_location) # empty directory = add documents | |
| # Prepare documents | |
| if add_documents: | |
| documents = [] | |
| ids = [] | |
| for i, row in df.iterrows(): | |
| title = str(row.get("Title", "")) | |
| review = str(row.get("Review", "")) | |
| page_content = (title + ". " + review).strip() | |
| metadata = {} | |
| if "Rating" in row: | |
| metadata["rating"] = row["Rating"] | |
| if "Date" in row: | |
| metadata["date"] = row["Date"] | |
| document = Document( | |
| page_content=page_content, | |
| metadata=metadata, | |
| id=str(i) | |
| ) | |
| ids.append(str(i)) | |
| documents.append(document) | |
| # Initialize Chroma vector store | |
| vector_store = Chroma( | |
| persist_directory=db_location, | |
| collection_name="restaurant_reviews", | |
| embedding_function=embeddings | |
| ) | |
| # Add documents if directory was empty | |
| if add_documents: | |
| vector_store.add_documents(documents=documents, ids=ids) | |
| # Create retriever | |
| retriever = vector_store.as_retriever( | |
| search_kwargs={"k": 5} | |
| ) | |