File size: 2,692 Bytes
016d3b5
a1d64df
016d3b5
a1d64df
 
 
016d3b5
 
 
 
a1d64df
016d3b5
 
a1d64df
016d3b5
 
a1d64df
016d3b5
a1d64df
 
 
 
016d3b5
a1d64df
 
016d3b5
a1d64df
 
 
016d3b5
a1d64df
 
016d3b5
a1d64df
 
 
 
016d3b5
a1d64df
 
016d3b5
 
a1d64df
 
016d3b5
a1d64df
 
016d3b5
a1d64df
016d3b5
 
a1d64df
016d3b5
a1d64df
016d3b5
a1d64df
 
 
 
 
 
 
016d3b5
a1d64df
016d3b5
 
 
a1d64df
016d3b5
a1d64df
 
016d3b5
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# app.py

# βœ… Core Imports
import os
import pandas as pd
import kagglehub
import gradio as gr
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from transformers import pipeline

# βœ… Step 1: Download and Verify Dataset
print("πŸ“¦ Starting dataset download...")
data_path = kagglehub.dataset_download("yasserh/instacart-online-grocery-basket-analysis-dataset")
print(f"βœ… Dataset downloaded at: {data_path}")
print("πŸ“ Files:", os.listdir(data_path))

# βœ… Step 2: Load Required CSVs
products_df = pd.read_csv(os.path.join(data_path, "products.csv"))
aisles_df = pd.read_csv(os.path.join(data_path, "aisles.csv"))
departments_df = pd.read_csv(os.path.join(data_path, "departments.csv"))

print("🧾 Sample products data:")
print(products_df.head())

# βœ… Step 3: Merge Aisles & Departments
products_full = products_df.merge(aisles_df, how="left", on="aisle_id")
products_full = products_full.merge(departments_df, how="left", on="department_id")

print("πŸ”— Merged data sample:")
print(products_full.head())

# βœ… Step 4: Create Combined Text Column
def make_product_text(row):
    return f"Product: {row['product_name']}. Aisle: {row['aisle']}. Department: {row['department']}."

products_full["combined_text"] = products_full.apply(make_product_text, axis=1)
print("πŸ“ Sample combined description:")
print(products_full["combined_text"].iloc[0])

# βœ… Step 5: Generate Embeddings + FAISS Index
print("πŸ” Loading embedding model...")
local_embedder = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

print("πŸ“š Creating FAISS vector store...")
vector_store = FAISS.from_texts(products_full["combined_text"].tolist(), local_embedder)

print("βœ… Vector store ready!")

# βœ… Step 6: Load Local LLM
print("🧠 Loading local language model...")
qa_pipeline = pipeline("text2text-generation", model="google/flan-t5-base", max_length=256)
print("βœ… Model loaded!")

# βœ… Step 7: Define Q&A Function
def local_answer_question(query):
    docs = vector_store.similarity_search(query, k=4)
    context = "\n".join([doc.page_content for doc in docs])
    prompt = f"Answer the question using the context below:\n\nContext:\n{context}\n\nQuestion: {query}"
    result = qa_pipeline(prompt)[0]["generated_text"]
    return result

# βœ… Step 8: Gradio Interface
chat_interface = gr.Interface(
    fn=local_answer_question,
    inputs="text",
    outputs="text",
    title="πŸ›’ Grocery AI Assistant (Fully Local)",
    description="Ask about grocery products β€” answers generated using FAISS + flan-t5-base!",
)

# βœ… Step 9: Launch the App
if __name__ == "__main__":
    chat_interface.launch(debug=True)