# app.py # โœ… Core Imports import os import pandas as pd import kagglehub import gradio as gr from langchain.embeddings import HuggingFaceEmbeddings from langchain.vectorstores import FAISS from transformers import pipeline # โœ… Step 1: Download and Verify Dataset print("๐Ÿ“ฆ Starting dataset download...") data_path = kagglehub.dataset_download("yasserh/instacart-online-grocery-basket-analysis-dataset") print(f"โœ… Dataset downloaded at: {data_path}") print("๐Ÿ“ Files:", os.listdir(data_path)) # โœ… Step 2: Load Required CSVs products_df = pd.read_csv(os.path.join(data_path, "products.csv")) aisles_df = pd.read_csv(os.path.join(data_path, "aisles.csv")) departments_df = pd.read_csv(os.path.join(data_path, "departments.csv")) print("๐Ÿงพ Sample products data:") print(products_df.head()) # โœ… Step 3: Merge Aisles & Departments products_full = products_df.merge(aisles_df, how="left", on="aisle_id") products_full = products_full.merge(departments_df, how="left", on="department_id") print("๐Ÿ”— Merged data sample:") print(products_full.head()) # โœ… Step 4: Create Combined Text Column def make_product_text(row): return f"Product: {row['product_name']}. Aisle: {row['aisle']}. Department: {row['department']}." products_full["combined_text"] = products_full.apply(make_product_text, axis=1) print("๐Ÿ“ Sample combined description:") print(products_full["combined_text"].iloc[0]) # โœ… Step 5: Generate Embeddings + FAISS Index print("๐Ÿ” Loading embedding model...") local_embedder = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") print("๐Ÿ“š Creating FAISS vector store...") vector_store = FAISS.from_texts(products_full["combined_text"].tolist(), local_embedder) print("โœ… Vector store ready!") # โœ… Step 6: Load Local LLM print("๐Ÿง  Loading local language model...") qa_pipeline = pipeline("text2text-generation", model="google/flan-t5-base", max_length=256) print("โœ… Model loaded!") # โœ… Step 7: Define Q&A Function def local_answer_question(query): docs = vector_store.similarity_search(query, k=4) context = "\n".join([doc.page_content for doc in docs]) prompt = f"Answer the question using the context below:\n\nContext:\n{context}\n\nQuestion: {query}" result = qa_pipeline(prompt)[0]["generated_text"] return result # โœ… Step 8: Gradio Interface chat_interface = gr.Interface( fn=local_answer_question, inputs="text", outputs="text", title="๐Ÿ›’ Grocery AI Assistant (Fully Local)", description="Ask about grocery products โ€” answers generated using FAISS + flan-t5-base!", ) # โœ… Step 9: Launch the App if __name__ == "__main__": chat_interface.launch(debug=True)