kmsmohamedansar's picture
Update app.py
016d3b5 verified
# app.py
# βœ… Core Imports
import os
import pandas as pd
import kagglehub
import gradio as gr
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from transformers import pipeline
# βœ… Step 1: Download and Verify Dataset
print("πŸ“¦ Starting dataset download...")
data_path = kagglehub.dataset_download("yasserh/instacart-online-grocery-basket-analysis-dataset")
print(f"βœ… Dataset downloaded at: {data_path}")
print("πŸ“ Files:", os.listdir(data_path))
# βœ… Step 2: Load Required CSVs
products_df = pd.read_csv(os.path.join(data_path, "products.csv"))
aisles_df = pd.read_csv(os.path.join(data_path, "aisles.csv"))
departments_df = pd.read_csv(os.path.join(data_path, "departments.csv"))
print("🧾 Sample products data:")
print(products_df.head())
# βœ… Step 3: Merge Aisles & Departments
products_full = products_df.merge(aisles_df, how="left", on="aisle_id")
products_full = products_full.merge(departments_df, how="left", on="department_id")
print("πŸ”— Merged data sample:")
print(products_full.head())
# βœ… Step 4: Create Combined Text Column
def make_product_text(row):
return f"Product: {row['product_name']}. Aisle: {row['aisle']}. Department: {row['department']}."
products_full["combined_text"] = products_full.apply(make_product_text, axis=1)
print("πŸ“ Sample combined description:")
print(products_full["combined_text"].iloc[0])
# βœ… Step 5: Generate Embeddings + FAISS Index
print("πŸ” Loading embedding model...")
local_embedder = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
print("πŸ“š Creating FAISS vector store...")
vector_store = FAISS.from_texts(products_full["combined_text"].tolist(), local_embedder)
print("βœ… Vector store ready!")
# βœ… Step 6: Load Local LLM
print("🧠 Loading local language model...")
qa_pipeline = pipeline("text2text-generation", model="google/flan-t5-base", max_length=256)
print("βœ… Model loaded!")
# βœ… Step 7: Define Q&A Function
def local_answer_question(query):
docs = vector_store.similarity_search(query, k=4)
context = "\n".join([doc.page_content for doc in docs])
prompt = f"Answer the question using the context below:\n\nContext:\n{context}\n\nQuestion: {query}"
result = qa_pipeline(prompt)[0]["generated_text"]
return result
# βœ… Step 8: Gradio Interface
chat_interface = gr.Interface(
fn=local_answer_question,
inputs="text",
outputs="text",
title="πŸ›’ Grocery AI Assistant (Fully Local)",
description="Ask about grocery products β€” answers generated using FAISS + flan-t5-base!",
)
# βœ… Step 9: Launch the App
if __name__ == "__main__":
chat_interface.launch(debug=True)