Pizza_AI / vector.py
alaselababatunde's picture
Updated
f7a940a
import os
import pandas as pd
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma
from langchain_core.documents import Document
# --- Embedding and Vector Store Setup ---
# Ensure writable directory for Chroma DB inside the container
db_location = "/app/Pizza_AI_Agent_DB"
os.makedirs(db_location, exist_ok=True)
# Load your CSV dataset
df = pd.read_csv("realistic_restaurant_reviews.csv")
# Initialize embeddings
embeddings = HuggingFaceEmbeddings(
model_name="sentence-transformers/all-MiniLM-L6-v2",
model_kwargs={"trust_remote_code": True}
)
# Determine if we need to add documents
add_documents = not os.listdir(db_location) # empty directory = add documents
# Prepare documents
if add_documents:
documents = []
ids = []
for i, row in df.iterrows():
title = str(row.get("Title", ""))
review = str(row.get("Review", ""))
page_content = (title + ". " + review).strip()
metadata = {}
if "Rating" in row:
metadata["rating"] = row["Rating"]
if "Date" in row:
metadata["date"] = row["Date"]
document = Document(
page_content=page_content,
metadata=metadata,
id=str(i)
)
ids.append(str(i))
documents.append(document)
# Initialize Chroma vector store
vector_store = Chroma(
persist_directory=db_location,
collection_name="restaurant_reviews",
embedding_function=embeddings
)
# Add documents if directory was empty
if add_documents:
vector_store.add_documents(documents=documents, ids=ids)
# Create retriever
retriever = vector_store.as_retriever(
search_kwargs={"k": 5}
)