|
|
|
|
|
|
|
|
|
|
|
import os |
|
|
import pandas as pd |
|
|
import kagglehub |
|
|
import gradio as gr |
|
|
from langchain.embeddings import HuggingFaceEmbeddings |
|
|
from langchain.vectorstores import FAISS |
|
|
from transformers import pipeline |
|
|
|
|
|
|
|
|
print("π¦ Starting dataset download...") |
|
|
data_path = kagglehub.dataset_download("yasserh/instacart-online-grocery-basket-analysis-dataset") |
|
|
print(f"β
Dataset downloaded at: {data_path}") |
|
|
print("π Files:", os.listdir(data_path)) |
|
|
|
|
|
|
|
|
products_df = pd.read_csv(os.path.join(data_path, "products.csv")) |
|
|
aisles_df = pd.read_csv(os.path.join(data_path, "aisles.csv")) |
|
|
departments_df = pd.read_csv(os.path.join(data_path, "departments.csv")) |
|
|
|
|
|
print("π§Ύ Sample products data:") |
|
|
print(products_df.head()) |
|
|
|
|
|
|
|
|
products_full = products_df.merge(aisles_df, how="left", on="aisle_id") |
|
|
products_full = products_full.merge(departments_df, how="left", on="department_id") |
|
|
|
|
|
print("π Merged data sample:") |
|
|
print(products_full.head()) |
|
|
|
|
|
|
|
|
def make_product_text(row): |
|
|
return f"Product: {row['product_name']}. Aisle: {row['aisle']}. Department: {row['department']}." |
|
|
|
|
|
products_full["combined_text"] = products_full.apply(make_product_text, axis=1) |
|
|
print("π Sample combined description:") |
|
|
print(products_full["combined_text"].iloc[0]) |
|
|
|
|
|
|
|
|
print("π Loading embedding model...") |
|
|
local_embedder = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") |
|
|
|
|
|
print("π Creating FAISS vector store...") |
|
|
vector_store = FAISS.from_texts(products_full["combined_text"].tolist(), local_embedder) |
|
|
|
|
|
print("β
Vector store ready!") |
|
|
|
|
|
|
|
|
print("π§ Loading local language model...") |
|
|
qa_pipeline = pipeline("text2text-generation", model="google/flan-t5-base", max_length=256) |
|
|
print("β
Model loaded!") |
|
|
|
|
|
|
|
|
def local_answer_question(query): |
|
|
docs = vector_store.similarity_search(query, k=4) |
|
|
context = "\n".join([doc.page_content for doc in docs]) |
|
|
prompt = f"Answer the question using the context below:\n\nContext:\n{context}\n\nQuestion: {query}" |
|
|
result = qa_pipeline(prompt)[0]["generated_text"] |
|
|
return result |
|
|
|
|
|
|
|
|
chat_interface = gr.Interface( |
|
|
fn=local_answer_question, |
|
|
inputs="text", |
|
|
outputs="text", |
|
|
title="π Grocery AI Assistant (Fully Local)", |
|
|
description="Ask about grocery products β answers generated using FAISS + flan-t5-base!", |
|
|
) |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
chat_interface.launch(debug=True) |
|
|
|