| import os |
| import pandas as pd |
| from langchain_huggingface import HuggingFaceEmbeddings |
| from langchain_chroma import Chroma |
| from langchain_core.documents import Document |
|
|
| |
|
|
| |
| db_location = "/app/Pizza_AI_Agent_DB" |
| os.makedirs(db_location, exist_ok=True) |
|
|
| |
| df = pd.read_csv("realistic_restaurant_reviews.csv") |
|
|
| |
| embeddings = HuggingFaceEmbeddings( |
| model_name="sentence-transformers/all-MiniLM-L6-v2", |
| model_kwargs={"trust_remote_code": True} |
| ) |
|
|
| |
| add_documents = not os.listdir(db_location) |
|
|
| |
| if add_documents: |
| documents = [] |
| ids = [] |
| for i, row in df.iterrows(): |
| title = str(row.get("Title", "")) |
| review = str(row.get("Review", "")) |
| page_content = (title + ". " + review).strip() |
| metadata = {} |
| if "Rating" in row: |
| metadata["rating"] = row["Rating"] |
| if "Date" in row: |
| metadata["date"] = row["Date"] |
| document = Document( |
| page_content=page_content, |
| metadata=metadata, |
| id=str(i) |
| ) |
| ids.append(str(i)) |
| documents.append(document) |
|
|
| |
| vector_store = Chroma( |
| persist_directory=db_location, |
| collection_name="restaurant_reviews", |
| embedding_function=embeddings |
| ) |
|
|
| |
| if add_documents: |
| vector_store.add_documents(documents=documents, ids=ids) |
|
|
| |
| retriever = vector_store.as_retriever( |
| search_kwargs={"k": 5} |
| ) |
|
|