Seth0330 commited on
Commit
0fc94bc
·
verified ·
1 Parent(s): c005675

Create APP.PY

Browse files
Files changed (1) hide show
  1. APP.PY +274 -0
APP.PY ADDED
@@ -0,0 +1,274 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, io, base64, json, time, random
2
+ from typing import Optional, Dict, Any, List, Tuple
3
+ from urllib.parse import quote_plus
4
+
5
+ from fastapi import FastAPI, Request, BackgroundTasks
6
+ from fastapi.responses import PlainTextResponse
7
+ import httpx
8
+ from bs4 import BeautifulSoup
9
+ from PIL import Image
10
+
11
+ from twilio.rest import Client as TwilioClient
12
+
13
+ # OpenAI
14
+ from openai import OpenAI
15
+ oai_client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
16
+
17
+ # LangChain minimal (structured output)
18
+ from langchain_openai import ChatOpenAI
19
+ from langchain_core.pydantic_v1 import BaseModel, Field
20
+ from langchain_core.prompts import ChatPromptTemplate
21
+
22
+ # Search tools
23
+ from duckduckgo_search import DDGS
24
+ try:
25
+ from tavily import TavilyClient
26
+ _HAS_TAVILY = True
27
+ except Exception:
28
+ _HAS_TAVILY = False
29
+
30
+ app = FastAPI(title="SAVE SMS Webhook (Async Reply)")
31
+
32
+ # ---------- Twilio client ----------
33
+ TW_SID = os.getenv("TWILIO_ACCOUNT_SID", "")
34
+ TW_TOKEN = os.getenv("TWILIO_AUTH_TOKEN", "")
35
+ TW_FROM = os.getenv("TWILIO_FROM", "") # e.g., +12175898085
36
+ _twilio_ok = bool(TW_SID and TW_TOKEN and TW_FROM)
37
+ twilio_client = TwilioClient(TW_SID, TW_TOKEN) if _twilio_ok else None
38
+
39
+ # ---------- shared helpers ----------
40
+ lc_llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
41
+
42
+ MERCHANT_DOMAINS = (
43
+ "walmart.ca","realcanadiansuperstore.ca","amazon.ca","metro.ca",
44
+ "nofrills.ca","freshco.com","well.ca","costco.ca","iga.net","londondrugs.com"
45
+ )
46
+
47
+ class Offer(BaseModel):
48
+ merchant: str = Field(...)
49
+ title: str = Field(...)
50
+ price: float = Field(...)
51
+ url: str = Field(...)
52
+
53
+ extract_prompt = ChatPromptTemplate.from_messages([
54
+ ("system","Extract one best CAD offer for the queried item from the page text. "
55
+ "Return JSON: merchant,title,price(float),url. If none, return empty with price 0."),
56
+ ("human","Query: {query}\nURL: {url}\n--- PAGE TEXT ---\n{text}\n--- END ---")
57
+ ])
58
+ chain_extract = extract_prompt | lc_llm.with_structured_output(Offer)
59
+
60
+ def img_or_pdf_to_image_bytes(data: bytes, filename: str) -> bytes:
61
+ name = (filename or "").lower()
62
+ if name.endswith((".jpg",".jpeg",".png",".webp")):
63
+ img = Image.open(io.BytesIO(data)).convert("RGB")
64
+ buf = io.BytesIO(); img.save(buf, format="JPEG", quality=90); return buf.getvalue()
65
+ if name.endswith(".pdf"):
66
+ try:
67
+ img = Image.open(io.BytesIO(data)).convert("RGB")
68
+ buf = io.BytesIO(); img.save(buf, format="JPEG", quality=90); return buf.getvalue()
69
+ except Exception:
70
+ return data
71
+ return data
72
+
73
+ def b64_data_uri(data: bytes, mime: str) -> str:
74
+ return f"data:{mime};base64," + base64.b64encode(data).decode("utf-8")
75
+
76
+ def call_openai_vision_for_receipt(image_bytes: bytes) -> Dict[str, Any]:
77
+ is_pdf = image_bytes[0:4] == b"%PDF"
78
+ mime = "application/pdf" if is_pdf else "image/jpeg"
79
+ system = ("You are a strict, no-chitchat receipt parser for Canadian grocery receipts. "
80
+ "Return ONLY JSON; prices in CAD.")
81
+ user_prompt = """
82
+ { "store":{"name":"string","address":"string|null","date":"YYYY-MM-DD|null"},
83
+ "items":[{"name":"string","size":"string|null","qty":1,"unit_price":0.00,"line_total":0.00}],
84
+ "subtotal":0.00,"tax":0.00,"total":0.00 }
85
+ Rules: shopper-friendly names; qty>=1; unit_price before tax; line_total=qty*unit_price; use null if missing.
86
+ Return ONLY JSON.
87
+ """
88
+ resp = oai_client.chat.completions.create(
89
+ model="gpt-4o-mini", temperature=0,
90
+ messages=[{"role":"system","content":system},
91
+ {"role":"user","content":[
92
+ {"type":"text","text":user_prompt},
93
+ {"type":"image_url","image_url":{"url":b64_data_uri(image_bytes,mime)}}
94
+ ]}]
95
+ )
96
+ s = resp.choices[0].message.content.strip()
97
+ if s.startswith("```"):
98
+ s = s.split("```",2)[1]
99
+ if s.lower().startswith("json"): s = s.split("\n",1)[1]
100
+ return json.loads(s)
101
+
102
+ def _fallback_store_search_urls(q: str, k: int = 5) -> List[str]:
103
+ qenc = quote_plus(q)
104
+ urls = [
105
+ f"https://www.walmart.ca/search?q={qenc}",
106
+ f"https://www.realcanadiansuperstore.ca/search?search-bar={qenc}",
107
+ f"https://www.amazon.ca/s?k={qenc}",
108
+ f"https://www.metro.ca/en/online-grocery/search?filter.query={qenc}",
109
+ f"https://www.nofrills.ca/search?search-bar={qenc}",
110
+ f"https://www.freshco.com/en/search?search-bar={qenc}",
111
+ f"https://well.ca/searchresult.html?keyword={qenc}",
112
+ f"https://www.costco.ca/CatalogSearch?dept=All&keyword={qenc}",
113
+ f"https://www.iga.net/en/search?search={qenc}",
114
+ f"https://www.londondrugs.com/search?searchTerm={qenc}",
115
+ ]
116
+ return urls[:k]
117
+
118
+ def _search_web(query: str, k: int = 5) -> List[str]:
119
+ urls: List[str] = []
120
+ if _HAS_TAVILY and os.getenv("TAVILY_API_KEY"):
121
+ try:
122
+ tv = TavilyClient(os.getenv("TAVILY_API_KEY"))
123
+ res = tv.search(query=f"{query} price", search_depth="basic", max_results=k,
124
+ include_domains=list(MERCHANT_DOMAINS))
125
+ for r in res.get("results", []):
126
+ if r.get("url"): urls.append(r["url"])
127
+ except Exception: pass
128
+ if not urls:
129
+ try:
130
+ with DDGS() as ddgs:
131
+ q = f"{query} price"
132
+ for attempt in range(3):
133
+ for r in ddgs.text(q, region="ca-en", max_results=k):
134
+ u = r.get("href") or r.get("url")
135
+ if u: urls.append(u)
136
+ if urls: break
137
+ time.sleep(0.6*(2**attempt)+random.random()*0.3)
138
+ except Exception: urls=[]
139
+ urls = [u for u in urls if any(dom in u for dom in MERCHANT_DOMAINS)]
140
+ if not urls: urls = _fallback_store_search_urls(query, k=k)
141
+ seen, out = set(), []
142
+ for u in urls:
143
+ if u not in seen: out.append(u); seen.add(u)
144
+ return out[:k]
145
+
146
+ def _fetch_text(url: str, timeout=15) -> str:
147
+ try:
148
+ headers = {"User-Agent":"Mozilla/5.0 (compatible; PriceAgent/1.0)"}
149
+ with httpx.Client(follow_redirects=True, timeout=timeout) as client:
150
+ r = client.get(url, headers=headers)
151
+ soup = BeautifulSoup(r.text,"html.parser")
152
+ for t in soup(["script","style","noscript"]): t.decompose()
153
+ return " ".join(soup.get_text(separator=" ").split())[:12000]
154
+ except Exception: return ""
155
+
156
+ def langchain_price_lookup(item_name: str) -> Optional[Dict[str, Any]]:
157
+ urls = _search_web(item_name, k=5)
158
+ best: Optional[Offer] = None
159
+ for u in urls:
160
+ text = _fetch_text(u)
161
+ if not text: continue
162
+ try:
163
+ offer: Offer = chain_extract.invoke({"query": item_name, "url": u, "text": text})
164
+ except Exception:
165
+ continue
166
+ if not offer or not offer.price or offer.price <= 0: continue
167
+ if best is None or offer.price < best.price: best = offer
168
+ if not best: return None
169
+ return {"title": best.title or item_name, "price": float(best.price),
170
+ "source": best.merchant or "Other store", "link": best.url or urls[0]}
171
+
172
+ def normalize_query(item: Dict[str, Any]) -> str:
173
+ base = item.get("name") or ""; size = item.get("size") or ""
174
+ q = f"{base} {size}".strip()
175
+ return " ".join([t for t in q.split() if len(t) > 1])
176
+
177
+ def research_prices(items: List[Dict[str, Any]], max_items=6) -> List[Dict[str, Any]]:
178
+ out=[]
179
+ for it in items[:max_items]:
180
+ name = normalize_query(it)
181
+ if not name: continue
182
+ unit = it.get("unit_price")
183
+ offer = langchain_price_lookup(name)
184
+ if not offer: continue
185
+ cheaper = isinstance(unit,(int,float)) and offer["price"] < float(unit)-0.005
186
+ out.append({"item_name":it.get("name"),"receipt_unit_price":unit,
187
+ "found_price":offer["price"],"found_store":offer["source"],
188
+ "found_title":offer["title"],"found_link":offer["link"],
189
+ "is_cheaper":cheaper})
190
+ time.sleep(0.25)
191
+ return out
192
+
193
+ def compute_savings(receipt: Dict[str, Any], found: List[Dict[str, Any]]) -> Tuple[float,List[Dict[str, Any]]]:
194
+ cheaper=[f for f in found if f.get("is_cheaper")]
195
+ s=0.0
196
+ for f in cheaper:
197
+ try: s += float(f["receipt_unit_price"]) - float(f["found_price"])
198
+ except Exception: pass
199
+ return round(s,2), cheaper
200
+
201
+ def format_five_lines(receipt: Dict[str, Any], savings: float, cheaper_list: List[Dict[str, Any]]) -> str:
202
+ store = (receipt.get("store") or {}).get("name") or "your store"
203
+ total_val = receipt.get("total") or receipt.get("subtotal")
204
+ total_txt = "N/A"
205
+ try: total_txt = f"${float(str(total_val).replace('$','').strip()):.2f}"
206
+ except Exception: pass
207
+ lines = [
208
+ f"Receipt read: {store}, total {total_txt}.",
209
+ f"I found potential savings of ${savings:.2f} by checking other stores.",
210
+ ]
211
+ if cheaper_list:
212
+ bullets=[]
213
+ for f in cheaper_list[:3]:
214
+ item=f.get("item_name") or "Item"; shop=f.get("found_store") or "other store"
215
+ price=float(f.get("found_price") or 0.0)
216
+ bullets.append(f"{item} @ {shop} for ${price:.2f}")
217
+ lines.append("Cheaper picks: " + "; ".join(bullets) + ".")
218
+ else:
219
+ lines.append("No clearly cheaper matches found right now for your items.")
220
+ lines.append("Reply 'DEALS' anytime to get weekly picks tailored to your receipts.")
221
+ return "\n".join(lines[:5])
222
+
223
+ # ---------- background job ----------
224
+ def process_and_reply(media_url: str, to_number: str):
225
+ if not _twilio_ok:
226
+ return
227
+ try:
228
+ with httpx.Client(follow_redirects=True, timeout=20) as client:
229
+ r = client.get(media_url); r.raise_for_status()
230
+ content = r.content
231
+ img_bytes = img_or_pdf_to_image_bytes(content, "mms.jpg")
232
+ receipt = call_openai_vision_for_receipt(img_bytes)
233
+ items = receipt.get("items") or []
234
+ if not items:
235
+ msg = "I couldn't read items. Send a clearer photo."
236
+ else:
237
+ found = research_prices(items)
238
+ savings, cheaper = compute_savings(receipt, found)
239
+ msg = format_five_lines(receipt, savings, cheaper)
240
+ msg = "\n".join(msg.split("\n")[:5])[:1400]
241
+ except Exception as e:
242
+ msg = f"Processing error: {e}"
243
+
244
+ # outbound SMS
245
+ try:
246
+ twilio_client.messages.create(to=to_number, from_=TW_FROM, body=msg)
247
+ except Exception:
248
+ pass
249
+
250
+ # ---------- HTTP routes ----------
251
+ @app.get("/sms")
252
+ async def sms_health():
253
+ return PlainTextResponse("SMS webhook is up (POST only).", media_type="text/plain")
254
+
255
+ @app.post("/sms")
256
+ async def sms_webhook(request: Request, background_tasks: BackgroundTasks):
257
+ form = dict(await request.form())
258
+ from_number = form.get("From", "")
259
+ num_media = int(form.get("NumMedia","0") or "0")
260
+ media_url = form.get("MediaUrl0") if num_media > 0 else None
261
+
262
+ # quick acknowledgement to beat 15s timeout
263
+ if not media_url:
264
+ ack = "<Response><Message>Please MMS a clear photo of your grocery receipt to analyze savings.</Message></Response>"
265
+ return PlainTextResponse(ack, media_type="application/xml")
266
+
267
+ # run heavy work in background, then send outbound SMS
268
+ if _twilio_ok and from_number:
269
+ background_tasks.add_task(process_and_reply, media_url, from_number)
270
+ reply = "<Response><Message>Got it—processing your receipt now. You’ll get a follow-up text shortly.</Message></Response>"
271
+ else:
272
+ reply = "<Response><Message>Got the image—backend SMS sending is misconfigured. Set TWILIO_ACCOUNT_SID, TWILIO_AUTH_TOKEN, TWILIO_FROM.</Message></Response>"
273
+
274
+ return PlainTextResponse(reply, media_type="application/xml")