rairo commited on
Commit
e1d5ce2
·
verified ·
1 Parent(s): 5bcea8b

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +339 -0
app.py ADDED
@@ -0,0 +1,339 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ from langchain.memory import ConversationBufferMemory
4
+ from langchain_google_genai import ChatGoogleGenerativeAI
5
+ import google.generativeai as genai
6
+ from PIL import Image
7
+ import PyPDF2
8
+ import os
9
+ from langchain.agents import initialize_agent, Tool
10
+ from langchain.agents.agent_types import AgentType
11
+ from difflib import get_close_matches
12
+ from dotenv import load_dotenv
13
+
14
+ # Load environment variables
15
+ load_dotenv()
16
+
17
+ # Configure Google API
18
+ genai.configure(api_key=os.getenv('GOOGLE_API_KEY'))
19
+
20
+ def configure_gemini(api_key):
21
+ genai.configure(api_key=api_key)
22
+ return genai.GenerativeModel('gemini-2.0-flash-thinking-exp')
23
+
24
+ model = configure_gemini(os.environ['GOOGLE_API_KEY'])
25
+
26
+ # Initialize Gemini models
27
+ llm_flash_exp = ChatGoogleGenerativeAI(
28
+ model="gemini-2.0-flash-exp",
29
+ max_retries=2
30
+ )
31
+
32
+ class SmartShoppingAssistant:
33
+ def __init__(self, products_df):
34
+ self.df = products_df
35
+ # Preprocess product names for faster matching
36
+ self.df['CleanName'] = self.df['ProductName'].str.upper().str.strip().str.replace(r'\s+', ' ', regex=True)
37
+ self.product_names = self.df['CleanName'].tolist()
38
+ self.memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
39
+ self.setup_agent()
40
+
41
+ def find_closest_product(self, product_name, threshold=0.7):
42
+ product_name = product_name.upper().strip()
43
+ matches = get_close_matches(
44
+ product_name,
45
+ self.product_names,
46
+ n=3,
47
+ cutoff=threshold
48
+ )
49
+ return matches if matches else []
50
+
51
+ def match_products_with_catalogue(self, extracted_items):
52
+ """Match extracted items with catalogue products using Gemini"""
53
+ product_list = self.df['ProductName'].tolist()
54
+ product_string = ", ".join(product_list)
55
+
56
+ prompt = f"""
57
+ Given these extracted items and quantities:
58
+ {extracted_items}
59
+
60
+ And this product catalogue:
61
+ {product_string}
62
+
63
+ Match each item with the most appropriate product from the catalogue.
64
+ For each item, provide:
65
+ 1. The exact product name from the catalogue
66
+ 2. The quantity (if specified, otherwise assume 1)
67
+ 3. Any specific requirements (brand, size, etc.)
68
+
69
+ Format the response as one entry per line:
70
+ ProductName == "MATCHED_PRODUCT" quantity: NUMBER
71
+ """
72
+
73
+ try:
74
+ matches = llm_flash_exp.predict(prompt)
75
+ return matches.strip()
76
+ except Exception as e:
77
+ return f"Error matching products: {str(e)}"
78
+
79
+ def search_products_fuzzy(self, product_names_with_quantities):
80
+ """Perform fuzzy search and return a DataFrame with product details"""
81
+ results = pd.DataFrame()
82
+ matched_products = set()
83
+
84
+ for item in product_names_with_quantities:
85
+ # Expect a line like: ProductName == "Some Name" quantity: 3
86
+ parts = item.split('quantity:')
87
+ if len(parts) < 2:
88
+ continue # skip badly formatted lines
89
+ clean_name = parts[0].strip().upper().replace('PRODUCTNAME ==', '').strip(' "\'')
90
+ try:
91
+ quantity = int(parts[1].strip())
92
+ except Exception:
93
+ quantity = 1
94
+
95
+ # Avoid duplicates by checking the cleaned product name
96
+ if clean_name in matched_products:
97
+ continue
98
+
99
+ closest_matches = self.find_closest_product(clean_name)
100
+ for match in closest_matches:
101
+ matched = self.df[self.df['CleanName'] == match]
102
+ if not matched.empty:
103
+ matched = matched.copy()
104
+ matched['Quantity'] = quantity
105
+ results = pd.concat([results, matched], ignore_index=True)
106
+ matched_products.add(clean_name)
107
+ break # Use the first good match
108
+
109
+ return results.drop_duplicates(subset=['CleanName'])
110
+
111
+ def setup_agent(self):
112
+ """Set up the LangChain agent with necessary tools (if needed)"""
113
+ # In this revised version we will directly call our fuzzy search function,
114
+ # so the tool is not used to convert to a string.
115
+ def search_products(query):
116
+ try:
117
+ # Split into individual product entries
118
+ product_entries = [entry.strip() for entry in query.split('or')]
119
+ results = self.search_products_fuzzy(product_entries)
120
+ if not results.empty:
121
+ # Create a formatted string with each product’s quantity and price
122
+ formatted_results = results.apply(
123
+ lambda x: f"{x['ProductName']} (Quantity: {x['Quantity']}) - Price: ${x['RetailPrice']:.2f}",
124
+ axis=1
125
+ )
126
+ return "\n".join(formatted_results)
127
+ return "No products found matching your criteria."
128
+ except Exception as e:
129
+ return f"Error executing query: {str(e)}"
130
+
131
+ tools = [
132
+ Tool(
133
+ name="Product Search",
134
+ func=search_products,
135
+ description="Search for products in the supermarket database using fuzzy matching"
136
+ )
137
+ ]
138
+
139
+ self.agent = initialize_agent(
140
+ tools=tools,
141
+ memory=self.memory,
142
+ llm=llm_flash_exp,
143
+ agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
144
+ verbose=True,
145
+ max_iterations=5
146
+ )
147
+
148
+ def process_natural_language_query(self, query):
149
+ """Process natural language query:
150
+ 1. Extract items and quantities.
151
+ 2. Match them with the catalogue.
152
+ 3. Convert the matches into a DataFrame so that quantity and price
153
+ can be displayed and the total computed.
154
+ """
155
+ try:
156
+ # Step 1: Extract items and quantities from the query.
157
+ extraction_prompt = f"""
158
+ Extract the products and their quantities from this shopping request.
159
+ If a quantity is not specified, assume 1.
160
+
161
+ Shopping request: {query}
162
+
163
+ Format each item on a separate line as:
164
+ ProductName == "EXTRACTED_PRODUCT" quantity: NUMBER
165
+ """
166
+
167
+ extracted_items = llm_flash_exp.predict(extraction_prompt)
168
+ # Step 2: Match the extracted items with your catalogue.
169
+ matched_products_str = self.match_products_with_catalogue(extracted_items)
170
+ # Parse the matched products string into a list of entries.
171
+ product_entries = [line.strip() for line in matched_products_str.splitlines() if line.strip()]
172
+ # Step 3: Do a fuzzy search and get the DataFrame result.
173
+ results_df = self.search_products_fuzzy(product_entries)
174
+ return results_df
175
+ except Exception as e:
176
+ return f"Error processing query: {str(e)}"
177
+
178
+ def extract_text_from_image(self, image):
179
+ """Extract text from an uploaded image using Gemini"""
180
+ prompt = """
181
+ Analyze this image and extract products and their quantities.
182
+ If quantities aren't specified, assume 1.
183
+ List each item with its quantity.
184
+ """
185
+ try:
186
+ response = model.generate_content([prompt, image])
187
+ return response.text
188
+ except Exception as e:
189
+ return f"Error processing image: {str(e)}"
190
+
191
+ def extract_text_from_pdf(self, pdf_file):
192
+ """Extract text from an uploaded PDF"""
193
+ try:
194
+ pdf_reader = PyPDF2.PdfReader(pdf_file)
195
+ text = ""
196
+ for page in pdf_reader.pages:
197
+ text += page.extract_text()
198
+ return text
199
+ except Exception as e:
200
+ return f"Error processing PDF: {str(e)}"
201
+
202
+
203
+ # --- Cart Management Functions ---
204
+
205
+ def add_to_cart(product):
206
+ if 'cart' not in st.session_state:
207
+ st.session_state.cart = []
208
+ # Check if product exists in the cart
209
+ existing = next((item for item in st.session_state.cart if item['ProductName'] == product['ProductName']), None)
210
+ if existing:
211
+ existing['Quantity'] += product['Quantity']
212
+ else:
213
+ st.session_state.cart.append(product)
214
+
215
+ def remove_from_cart(product_name):
216
+ st.session_state.cart = [item for item in st.session_state.cart if item['ProductName'] != product_name]
217
+
218
+ def generate_receipt():
219
+ from fpdf import FPDF
220
+ pdf = FPDF()
221
+ pdf.add_page()
222
+ pdf.set_font("Arial", size=12)
223
+
224
+ pdf.cell(200, 10, txt="Bon Marche Receipt", ln=1, align='C')
225
+ pdf.cell(200, 10, txt=f"Date: {pd.Timestamp.now().strftime('%Y-%m-%d %H:%M')}", ln=1)
226
+
227
+ total = 0
228
+ for item in st.session_state.cart:
229
+ price = item['RetailPrice'] * item['Quantity']
230
+ pdf.cell(200, 10,
231
+ txt=f"{item['ProductName']} x{item['Quantity']} - ${price:.2f}",
232
+ ln=1)
233
+ total += price
234
+
235
+ pdf.cell(200, 10, txt=f"Total: ${total:.2f}", ln=1)
236
+ return pdf.output(dest='S').encode('latin1')
237
+
238
+
239
+ # --- Main App Function ---
240
+
241
+ def main():
242
+ st.set_page_config(page_title="Smart Shopping Assistant", layout="wide")
243
+ st.title("🛒 Smart Shopping Assistant")
244
+
245
+ @st.cache_data
246
+ def load_product_data():
247
+ return pd.read_csv('supermarket4i.csv') # Adjust filename/path as needed
248
+
249
+ df = load_product_data()
250
+ assistant = SmartShoppingAssistant(df)
251
+
252
+ with st.sidebar:
253
+ st.header("Upload Shopping List")
254
+ uploaded_file = st.file_uploader(
255
+ "Upload an image or PDF of your shopping list",
256
+ type=['png', 'jpg', 'jpeg', 'pdf']
257
+ )
258
+
259
+ if uploaded_file:
260
+ try:
261
+ if uploaded_file.type.startswith('image'):
262
+ with st.spinner("Extracting items from image..."):
263
+ image = Image.open(uploaded_file)
264
+ extracted_text = assistant.extract_text_from_image(image)
265
+ st.session_state.query = extracted_text
266
+ elif uploaded_file.type == 'application/pdf':
267
+ with st.spinner("Extracting items from PDF..."):
268
+ extracted_text = assistant.extract_text_from_pdf(uploaded_file)
269
+ st.session_state.query = extracted_text
270
+ except Exception as e:
271
+ st.error(f"Error processing file: {str(e)}")
272
+
273
+ col1, col2 = st.columns([2, 1])
274
+
275
+ with col1:
276
+ st.header("Search Products")
277
+ query = st.text_area(
278
+ "Describe what you're looking for (include quantities if needed):",
279
+ height=100,
280
+ value=st.session_state.get('query', '')
281
+ )
282
+
283
+ if st.button("Search"):
284
+ if query:
285
+ with st.spinner("Searching..."):
286
+ results = assistant.process_natural_language_query(query)
287
+ st.session_state.last_results = results
288
+
289
+ # If results is a string (an error message), show it.
290
+ if isinstance(results, str):
291
+ st.write(results)
292
+ else:
293
+ st.subheader("Results")
294
+ # Display each product with its quantity, price and an Add to Cart button.
295
+ for index, row in results.iterrows():
296
+ cola, colb = st.columns([3, 1])
297
+ with cola:
298
+ st.write(f"**{row['ProductName']}**")
299
+ st.write(f"Price: ${row['RetailPrice']:.2f} | Qty: {row['Quantity']}")
300
+ with colb:
301
+ if st.button("Add", key=f"add_{index}"):
302
+ add_to_cart(row.to_dict())
303
+
304
+ # Show the total cost for the search results
305
+ total_search = (results['RetailPrice'] * results['Quantity']).sum()
306
+ st.markdown(f"**Total for these items: ${total_search:.2f}**")
307
+
308
+ with col2:
309
+ st.header("Shopping Cart")
310
+ if 'cart' in st.session_state and st.session_state.cart:
311
+ total_cart = 0
312
+ for item in st.session_state.cart:
313
+ cols = st.columns([3, 1, 1])
314
+ with cols[0]:
315
+ st.write(f"{item['ProductName']} x{item['Quantity']}")
316
+ with cols[1]:
317
+ cost = item['RetailPrice'] * item['Quantity']
318
+ st.write(f"${cost:.2f}")
319
+ with cols[2]:
320
+ if st.button("❌", key=f"del_{item['ProductName']}"):
321
+ remove_from_cart(item['ProductName'])
322
+ st.experimental_rerun()
323
+ total_cart += item['RetailPrice'] * item['Quantity']
324
+ st.divider()
325
+ st.write(f"**Total: ${total_cart:.2f}**")
326
+
327
+ if st.button("Checkout"):
328
+ receipt = generate_receipt()
329
+ st.download_button(
330
+ label="Download Receipt",
331
+ data=receipt,
332
+ file_name="bon_marche_receipt.pdf",
333
+ mime="application/pdf"
334
+ )
335
+ else:
336
+ st.write("Your cart is empty")
337
+
338
+ if __name__ == "__main__":
339
+ main()