ak2704 commited on
Commit
ea5488c
·
verified ·
1 Parent(s): 8bfeac6

Initial commit: E-commerce RAG Docker demo

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ ecom_chroma_db/chroma.sqlite3 filter=lfs diff=lfs merge=lfs -text
Dockerfile ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+ WORKDIR /app
3
+ RUN apt-get update && apt-get install -y git && rm -rf /var/lib/apt/lists/*
4
+ COPY requirements.txt /app/requirements.txt
5
+ RUN pip install --no-cache-dir -r /app/requirements.txt
6
+ COPY . /app
7
+ EXPOSE 7860
8
+ CMD ["streamlit", "run", "app.py", "--server.port=7860", "--server.address=0.0.0.0"]
README.md CHANGED
@@ -1,10 +1,19 @@
1
  ---
2
- title: Ecom Rag
3
- emoji: 🐠
4
  colorFrom: pink
5
- colorTo: green
6
  sdk: docker
 
7
  pinned: false
8
  ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: E-commerce RAG Demo
3
+ emoji: 🛍️
4
  colorFrom: pink
5
+ colorTo: purple
6
  sdk: docker
7
+ app_file: app.py
8
  pinned: false
9
  ---
10
 
11
+ # E-commerce RAG Demo (Streamlit inside Docker)
12
+
13
+ This Space was auto-created from Colab. It ships with:
14
+ - Tiny multi-source dataset (descriptions/specs/reviews)
15
+ - Embeddings via `all-MiniLM-L6-v2` into **ChromaDB** (bundled in repo)
16
+ - Sentiment analysis model (bundled in repo)
17
+ - Streamlit app for **recommendations** and **comparisons**
18
+
19
+ ## Run locally
app.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import chromadb
4
+ from sentence_transformers import SentenceTransformer
5
+ import streamlit as st
6
+ from typing import List
7
+ import google.generativeai as genai
8
+ from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
9
+
10
+ st.set_page_config(page_title='E-commerce RAG Demo', layout='wide')
11
+ st.title('🛍️ E-commerce RAG Demo (Recommendations & Comparisons)')
12
+
13
+ # Configure Gemini (optional)
14
+ GEMINI_API_KEY = os.environ.get('GEMINI_API_KEY', '')
15
+ if GEMINI_API_KEY:
16
+ genai.configure(api_key=GEMINI_API_KEY)
17
+
18
+ @st.cache_resource(show_spinner=False)
19
+ def get_clients():
20
+ client = chromadb.PersistentClient(path='ecom_chroma_db')
21
+ collection = client.get_or_create_collection('products', metadata={"hnsw:space": "cosine"})
22
+ embedder = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
23
+ return client, collection, embedder
24
+
25
+ @st.cache_resource(show_spinner=False)
26
+ def get_sentiment_pipeline():
27
+ model_dir = 'sentiment_model' # Load from saved directory
28
+ tokenizer = AutoTokenizer.from_pretrained(model_dir)
29
+ model = AutoModelForSequenceClassification.from_pretrained(model_dir)
30
+ return pipeline('sentiment-analysis', model=model, tokenizer=tokenizer)
31
+
32
+ _, collection, embedder = get_clients()
33
+ sa_pipeline = get_sentiment_pipeline()
34
+
35
+
36
+ def retrieve(query: str, k: int = 5):
37
+ qemb = embedder.encode([query]).tolist()
38
+ out = collection.query(query_embeddings=qemb, n_results=k, include=['documents', 'metadatas', 'distances'])
39
+ items = []
40
+ for doc, meta, dist in zip(out['documents'][0], out['metadatas'][0], out['distances'][0]):
41
+ items.append({'doc': doc, 'meta': meta, 'score': 1 - dist})
42
+ return items
43
+
44
+ def llm_generate(prompt: str) -> str:
45
+ if GEMINI_API_KEY:
46
+ model = genai.GenerativeModel('gemini-1.5-flash')
47
+ resp = model.generate_content(prompt)
48
+ return resp.text
49
+ # Fallback if no key: return prompt tail as simple echo
50
+ return 'LLM disabled. Showing retrieved context only.\n\n' + prompt[-1500:]
51
+
52
+ st.sidebar.header('Preferences')
53
+ prefs_cat = st.sidebar.multiselect('Preferred categories', ['Audio', 'Wearables', 'Computers'])
54
+ price_min, price_max = st.sidebar.slider('Price range', 0, 50000, (0, 50000), step=500)
55
+
56
+ mode = st.radio('Mode', ['Recommend Products', 'Compare Products'])
57
+ query = st.text_input('Describe what you need (e.g., "lightweight earbuds for calls and gym")')
58
+ topk = st.slider('Top K', 1, 10, 5)
59
+
60
+ if st.button('Run'):
61
+ if not query:
62
+ st.warning('Enter a query first.')
63
+ else:
64
+ results = retrieve(query, k=topk)
65
+ # Simple personalization: filter by category and price range
66
+ filtered = []
67
+ for r in results:
68
+ cat_ok = (not prefs_cat) or (r['meta']['category'] in prefs_cat)
69
+ price_ok = (price_min <= r['meta']['price'] <= price_max)
70
+ if cat_ok and price_ok:
71
+ filtered.append(r)
72
+ if not filtered:
73
+ filtered = results
74
+
75
+ if mode == 'Recommend Products':
76
+ ctx = '\n\n'.join([f"[Score={round(x['score'],3)}] {x['doc']}" for x in filtered])
77
+ prompt = f"""
78
+ You are an assistant that recommends e-commerce products. Based on the retrieved context below, recommend 3 products and explain why each fits the user's query. Summarize pros/cons succinctly. If information is missing, say so.
79
+ USER QUERY: {query}
80
+ CONTEXT:\n{ctx}
81
+ """
82
+ answer = llm_generate(prompt)
83
+ st.markdown(answer)
84
+ st.subheader('Retrieved Items')
85
+ for r in filtered:
86
+ st.write(r['meta'])
87
+ with st.expander('Context'):
88
+ st.write(r['doc'])
89
+ else:
90
+ # Compare top 2-4
91
+ comps = filtered[:4]
92
+ if not comps:
93
+ st.info('No items to compare.')
94
+ else:
95
+ cols = st.columns(len(comps))
96
+ for c, r in zip(cols, comps):
97
+ with c:
98
+ st.metric(r['meta']['title'], f"₹{int(r['meta']['price'])}")
99
+ st.caption(f"Category: {r['meta']['category']} | Score: {r['score']:.3f}")
100
+ with st.expander('Details'):
101
+ st.write(r['doc'])
102
+ ctx = '\n\n'.join([r['doc'] for r in comps])
103
+ prompt = f"""
104
+ Create a concise comparison table (Markdown) for the products in the context. Rows: Price, Category, Best for, Not ideal for, Key specs. Then a 3-bullet summary of trade-offs.
105
+ USER QUERY: {query}
106
+ CONTEXT:\n{ctx}
107
+ """
108
+ st.markdown(llm_generate(prompt))
ecom_chroma_db/0adce968-463d-42bc-be27-acfba3a21a21/data_level0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8146ecc3e4c3a36ea9b3edc3778630c452f483990ec942d38e8006f4661e430
3
+ size 16760000
ecom_chroma_db/0adce968-463d-42bc-be27-acfba3a21a21/header.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18f1e924efbb5e1af5201e3fbab86a97f5c195c311abe651eeec525884e5e449
3
+ size 100
ecom_chroma_db/0adce968-463d-42bc-be27-acfba3a21a21/length.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7e2dcff542de95352682dc186432e98f0188084896773f1973276b0577d5305
3
+ size 40000
ecom_chroma_db/0adce968-463d-42bc-be27-acfba3a21a21/link_lists.bin ADDED
File without changes
ecom_chroma_db/chroma.sqlite3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8da19cc93a21395e2af3990299ac6bf726485aabe0c44cc0fb22b1e2362e5220
3
+ size 245760
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+
2
+ sentence-transformers
3
+ chromadb==0.5.3
4
+ transformers
5
+ streamlit
6
+ google-generativeai
7
+ tiktoken
8
+ rapidfuzz