Seth0330 commited on
Commit
141e99e
·
verified ·
1 Parent(s): 188a0d4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +83 -115
app.py CHANGED
@@ -2,41 +2,32 @@ import os, io, base64, json, time, random
2
  from typing import Optional, Dict, Any, List, Tuple
3
  from urllib.parse import quote_plus
4
 
5
- from fastapi import FastAPI, Request, BackgroundTasks
6
  from fastapi.responses import PlainTextResponse
7
  import httpx
8
  from bs4 import BeautifulSoup
9
  from PIL import Image
10
 
11
  from twilio.rest import Client as TwilioClient
12
-
13
- # OpenAI
14
  from openai import OpenAI
15
- oai_client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
16
-
17
- # LangChain minimal (structured output)
18
- from langchain_openai import ChatOpenAI
19
- from langchain_core.pydantic_v1 import BaseModel, Field
20
- from langchain_core.prompts import ChatPromptTemplate
21
-
22
- # Search tools
23
- from duckduckgo_search import DDGS
24
- try:
25
- from tavily import TavilyClient
26
- _HAS_TAVILY = True
27
- except Exception:
28
- _HAS_TAVILY = False
29
 
30
- app = FastAPI(title="SAVE SMS Webhook (Async Reply)")
 
 
31
 
32
- # ---------- Twilio client ----------
33
  TW_SID = os.getenv("TWILIO_ACCOUNT_SID", "")
34
  TW_TOKEN = os.getenv("TWILIO_AUTH_TOKEN", "")
35
- TW_FROM = os.getenv("TWILIO_FROM", "") # e.g., +12175898085
 
36
  _twilio_ok = bool(TW_SID and TW_TOKEN and TW_FROM)
37
  twilio_client = TwilioClient(TW_SID, TW_TOKEN) if _twilio_ok else None
38
 
39
- # ---------- shared helpers ----------
 
 
 
 
40
  lc_llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
41
 
42
  MERCHANT_DOMAINS = (
@@ -57,6 +48,7 @@ extract_prompt = ChatPromptTemplate.from_messages([
57
  ])
58
  chain_extract = extract_prompt | lc_llm.with_structured_output(Offer)
59
 
 
60
  def img_or_pdf_to_image_bytes(data: bytes, filename: str) -> bytes:
61
  name = (filename or "").lower()
62
  if name.endswith((".jpg",".jpeg",".png",".webp")):
@@ -76,22 +68,22 @@ def b64_data_uri(data: bytes, mime: str) -> str:
76
  def call_openai_vision_for_receipt(image_bytes: bytes) -> Dict[str, Any]:
77
  is_pdf = image_bytes[0:4] == b"%PDF"
78
  mime = "application/pdf" if is_pdf else "image/jpeg"
79
- system = ("You are a strict, no-chitchat receipt parser for Canadian grocery receipts. "
80
- "Return ONLY JSON; prices in CAD.")
81
  user_prompt = """
82
  { "store":{"name":"string","address":"string|null","date":"YYYY-MM-DD|null"},
83
  "items":[{"name":"string","size":"string|null","qty":1,"unit_price":0.00,"line_total":0.00}],
84
  "subtotal":0.00,"tax":0.00,"total":0.00 }
85
- Rules: shopper-friendly names; qty>=1; unit_price before tax; line_total=qty*unit_price; use null if missing.
86
- Return ONLY JSON.
87
  """
88
  resp = oai_client.chat.completions.create(
89
  model="gpt-4o-mini", temperature=0,
90
- messages=[{"role":"system","content":system},
91
- {"role":"user","content":[
92
- {"type":"text","text":user_prompt},
93
- {"type":"image_url","image_url":{"url":b64_data_uri(image_bytes,mime)}}
94
- ]}]
 
 
95
  )
96
  s = resp.choices[0].message.content.strip()
97
  if s.startswith("```"):
@@ -99,128 +91,97 @@ Return ONLY JSON.
99
  if s.lower().startswith("json"): s = s.split("\n",1)[1]
100
  return json.loads(s)
101
 
102
- def _fallback_store_search_urls(q: str, k: int = 5) -> List[str]:
103
- qenc = quote_plus(q)
104
- urls = [
105
- f"https://www.walmart.ca/search?q={qenc}",
106
- f"https://www.realcanadiansuperstore.ca/search?search-bar={qenc}",
107
- f"https://www.amazon.ca/s?k={qenc}",
108
- f"https://www.metro.ca/en/online-grocery/search?filter.query={qenc}",
109
- f"https://www.nofrills.ca/search?search-bar={qenc}",
110
- f"https://www.freshco.com/en/search?search-bar={qenc}",
111
- f"https://well.ca/searchresult.html?keyword={qenc}",
112
- f"https://www.costco.ca/CatalogSearch?dept=All&keyword={qenc}",
113
- f"https://www.iga.net/en/search?search={qenc}",
114
- f"https://www.londondrugs.com/search?searchTerm={qenc}",
115
- ]
116
- return urls[:k]
 
 
 
117
 
118
  def _search_web(query: str, k: int = 5) -> List[str]:
119
- urls: List[str] = []
120
  if _HAS_TAVILY and os.getenv("TAVILY_API_KEY"):
121
  try:
122
  tv = TavilyClient(os.getenv("TAVILY_API_KEY"))
123
- res = tv.search(query=f"{query} price", search_depth="basic", max_results=k,
124
- include_domains=list(MERCHANT_DOMAINS))
125
  for r in res.get("results", []):
126
  if r.get("url"): urls.append(r["url"])
127
  except Exception: pass
128
  if not urls:
129
  try:
130
  with DDGS() as ddgs:
131
- q = f"{query} price"
132
- for attempt in range(3):
133
- for r in ddgs.text(q, region="ca-en", max_results=k):
134
- u = r.get("href") or r.get("url")
135
- if u: urls.append(u)
136
- if urls: break
137
- time.sleep(0.6*(2**attempt)+random.random()*0.3)
138
- except Exception: urls=[]
139
  urls = [u for u in urls if any(dom in u for dom in MERCHANT_DOMAINS)]
140
- if not urls: urls = _fallback_store_search_urls(query, k=k)
141
- seen, out = set(), []
142
- for u in urls:
143
- if u not in seen: out.append(u); seen.add(u)
144
- return out[:k]
145
-
146
- def _fetch_text(url: str, timeout=15) -> str:
147
- try:
148
- headers = {"User-Agent":"Mozilla/5.0 (compatible; PriceAgent/1.0)"}
149
- with httpx.Client(follow_redirects=True, timeout=timeout) as client:
150
- r = client.get(url, headers=headers)
151
- soup = BeautifulSoup(r.text,"html.parser")
152
- for t in soup(["script","style","noscript"]): t.decompose()
153
- return " ".join(soup.get_text(separator=" ").split())[:12000]
154
- except Exception: return ""
155
 
156
  def langchain_price_lookup(item_name: str) -> Optional[Dict[str, Any]]:
157
  urls = _search_web(item_name, k=5)
158
- best: Optional[Offer] = None
159
  for u in urls:
160
  text = _fetch_text(u)
161
  if not text: continue
162
  try:
163
- offer: Offer = chain_extract.invoke({"query": item_name, "url": u, "text": text})
164
  except Exception:
165
  continue
166
- if not offer or not offer.price or offer.price <= 0: continue
167
- if best is None or offer.price < best.price: best = offer
168
  if not best: return None
169
  return {"title": best.title or item_name, "price": float(best.price),
170
  "source": best.merchant or "Other store", "link": best.url or urls[0]}
171
 
172
- def normalize_query(item: Dict[str, Any]) -> str:
173
- base = item.get("name") or ""; size = item.get("size") or ""
174
- q = f"{base} {size}".strip()
175
- return " ".join([t for t in q.split() if len(t) > 1])
176
-
177
- def research_prices(items: List[Dict[str, Any]], max_items=6) -> List[Dict[str, Any]]:
178
  out=[]
179
  for it in items[:max_items]:
180
- name = normalize_query(it)
181
  if not name: continue
182
- unit = it.get("unit_price")
183
  offer = langchain_price_lookup(name)
184
  if not offer: continue
185
- cheaper = isinstance(unit,(int,float)) and offer["price"] < float(unit)-0.005
186
- out.append({"item_name":it.get("name"),"receipt_unit_price":unit,
187
- "found_price":offer["price"],"found_store":offer["source"],
188
- "found_title":offer["title"],"found_link":offer["link"],
189
- "is_cheaper":cheaper})
190
  time.sleep(0.25)
191
  return out
192
 
193
- def compute_savings(receipt: Dict[str, Any], found: List[Dict[str, Any]]) -> Tuple[float,List[Dict[str, Any]]]:
194
- cheaper=[f for f in found if f.get("is_cheaper")]
195
  s=0.0
196
  for f in cheaper:
197
- try: s += float(f["receipt_unit_price"]) - float(f["found_price"])
198
- except Exception: pass
199
  return round(s,2), cheaper
200
 
201
- def format_five_lines(receipt: Dict[str, Any], savings: float, cheaper_list: List[Dict[str, Any]]) -> str:
202
  store = (receipt.get("store") or {}).get("name") or "your store"
203
- total_val = receipt.get("total") or receipt.get("subtotal")
204
- total_txt = "N/A"
205
- try: total_txt = f"${float(str(total_val).replace('$','').strip()):.2f}"
206
- except Exception: pass
207
  lines = [
208
  f"Receipt read: {store}, total {total_txt}.",
209
  f"I found potential savings of ${savings:.2f} by checking other stores.",
210
  ]
211
  if cheaper_list:
212
- bullets=[]
213
- for f in cheaper_list[:3]:
214
- item=f.get("item_name") or "Item"; shop=f.get("found_store") or "other store"
215
- price=float(f.get("found_price") or 0.0)
216
- bullets.append(f"{item} @ {shop} for ${price:.2f}")
217
- lines.append("Cheaper picks: " + "; ".join(bullets) + ".")
218
  else:
219
- lines.append("No clearly cheaper matches found right now for your items.")
220
  lines.append("Reply 'DEALS' anytime to get weekly picks tailored to your receipts.")
221
- return "\n".join(lines[:5])
222
 
223
- # ---------- background job ----------
224
  def process_and_reply(media_url: str, to_number: str):
225
  if not _twilio_ok:
226
  return
@@ -237,17 +198,26 @@ def process_and_reply(media_url: str, to_number: str):
237
  found = research_prices(items)
238
  savings, cheaper = compute_savings(receipt, found)
239
  msg = format_five_lines(receipt, savings, cheaper)
240
- msg = "\n".join(msg.split("\n")[:5])[:1400]
241
  except Exception as e:
242
  msg = f"Processing error: {e}"
243
 
244
- # outbound SMS
245
  try:
246
- twilio_client.messages.create(to=to_number, from_=TW_FROM, body=msg)
247
- except Exception:
248
- pass
 
 
 
 
 
 
 
 
 
 
 
249
 
250
- # ---------- HTTP routes ----------
251
  @app.get("/sms")
252
  async def sms_health():
253
  return PlainTextResponse("SMS webhook is up (POST only).", media_type="text/plain")
@@ -259,16 +229,14 @@ async def sms_webhook(request: Request, background_tasks: BackgroundTasks):
259
  num_media = int(form.get("NumMedia","0") or "0")
260
  media_url = form.get("MediaUrl0") if num_media > 0 else None
261
 
262
- # quick acknowledgement to beat 15s timeout
263
  if not media_url:
264
  ack = "<Response><Message>Please MMS a clear photo of your grocery receipt to analyze savings.</Message></Response>"
265
  return PlainTextResponse(ack, media_type="application/xml")
266
 
267
- # run heavy work in background, then send outbound SMS
268
  if _twilio_ok and from_number:
269
  background_tasks.add_task(process_and_reply, media_url, from_number)
270
  reply = "<Response><Message>Got it—processing your receipt now. You’ll get a follow-up text shortly.</Message></Response>"
271
  else:
272
- reply = "<Response><Message>Got the image—backend SMS sending is misconfigured. Set TWILIO_ACCOUNT_SID, TWILIO_AUTH_TOKEN, TWILIO_FROM.</Message></Response>"
273
 
274
  return PlainTextResponse(reply, media_type="application/xml")
 
2
  from typing import Optional, Dict, Any, List, Tuple
3
  from urllib.parse import quote_plus
4
 
5
+ from fastapi import FastAPI, Request, BackgroundTasks, Form
6
  from fastapi.responses import PlainTextResponse
7
  import httpx
8
  from bs4 import BeautifulSoup
9
  from PIL import Image
10
 
11
  from twilio.rest import Client as TwilioClient
 
 
12
  from openai import OpenAI
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
+ # ---- Init ----
15
+ app = FastAPI(title="TXTPRICE SMS Webhook (Async Reply)")
16
+ oai_client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
17
 
18
+ # ---- Twilio Setup ----
19
  TW_SID = os.getenv("TWILIO_ACCOUNT_SID", "")
20
  TW_TOKEN = os.getenv("TWILIO_AUTH_TOKEN", "")
21
+ TW_FROM = os.getenv("TWILIO_FROM", "")
22
+ TW_STATUS_CB = os.getenv("TWILIO_STATUS_CALLBACK", "")
23
  _twilio_ok = bool(TW_SID and TW_TOKEN and TW_FROM)
24
  twilio_client = TwilioClient(TW_SID, TW_TOKEN) if _twilio_ok else None
25
 
26
+ # ---- LangChain Model ----
27
+ from langchain_openai import ChatOpenAI
28
+ from langchain_core.pydantic_v1 import BaseModel, Field
29
+ from langchain_core.prompts import ChatPromptTemplate
30
+
31
  lc_llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
32
 
33
  MERCHANT_DOMAINS = (
 
48
  ])
49
  chain_extract = extract_prompt | lc_llm.with_structured_output(Offer)
50
 
51
+ # ---- Utilities ----
52
  def img_or_pdf_to_image_bytes(data: bytes, filename: str) -> bytes:
53
  name = (filename or "").lower()
54
  if name.endswith((".jpg",".jpeg",".png",".webp")):
 
68
  def call_openai_vision_for_receipt(image_bytes: bytes) -> Dict[str, Any]:
69
  is_pdf = image_bytes[0:4] == b"%PDF"
70
  mime = "application/pdf" if is_pdf else "image/jpeg"
71
+ system = "You are a strict, no-chitchat receipt parser for Canadian grocery receipts. Return ONLY JSON; prices in CAD."
 
72
  user_prompt = """
73
  { "store":{"name":"string","address":"string|null","date":"YYYY-MM-DD|null"},
74
  "items":[{"name":"string","size":"string|null","qty":1,"unit_price":0.00,"line_total":0.00}],
75
  "subtotal":0.00,"tax":0.00,"total":0.00 }
76
+ Rules: shopper-friendly names; qty>=1; use null if missing; Return ONLY JSON.
 
77
  """
78
  resp = oai_client.chat.completions.create(
79
  model="gpt-4o-mini", temperature=0,
80
+ messages=[
81
+ {"role":"system","content":system},
82
+ {"role":"user","content":[
83
+ {"type":"text","text":user_prompt},
84
+ {"type":"image_url","image_url":{"url":b64_data_uri(image_bytes,mime)}}
85
+ ]}
86
+ ]
87
  )
88
  s = resp.choices[0].message.content.strip()
89
  if s.startswith("```"):
 
91
  if s.lower().startswith("json"): s = s.split("\n",1)[1]
92
  return json.loads(s)
93
 
94
+ # ---- Search and Parse Helpers ----
95
+ from duckduckgo_search import DDGS
96
+ try:
97
+ from tavily import TavilyClient
98
+ _HAS_TAVILY = True
99
+ except Exception:
100
+ _HAS_TAVILY = False
101
+
102
+ def _fetch_text(url: str, timeout=15) -> str:
103
+ try:
104
+ headers = {"User-Agent":"Mozilla/5.0 (compatible; PriceAgent/1.0)"}
105
+ with httpx.Client(follow_redirects=True, timeout=timeout) as client:
106
+ r = client.get(url, headers=headers)
107
+ soup = BeautifulSoup(r.text,"html.parser")
108
+ for t in soup(["script","style","noscript"]): t.decompose()
109
+ return " ".join(soup.get_text(separator=" ").split())[:12000]
110
+ except Exception:
111
+ return ""
112
 
113
  def _search_web(query: str, k: int = 5) -> List[str]:
114
+ urls = []
115
  if _HAS_TAVILY and os.getenv("TAVILY_API_KEY"):
116
  try:
117
  tv = TavilyClient(os.getenv("TAVILY_API_KEY"))
118
+ res = tv.search(query=f"{query} price", max_results=k, include_domains=list(MERCHANT_DOMAINS))
 
119
  for r in res.get("results", []):
120
  if r.get("url"): urls.append(r["url"])
121
  except Exception: pass
122
  if not urls:
123
  try:
124
  with DDGS() as ddgs:
125
+ for r in ddgs.text(f"{query} price", region="ca-en", max_results=k):
126
+ u = r.get("href") or r.get("url")
127
+ if u: urls.append(u)
128
+ except Exception: pass
 
 
 
 
129
  urls = [u for u in urls if any(dom in u for dom in MERCHANT_DOMAINS)]
130
+ return urls[:k]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
 
132
  def langchain_price_lookup(item_name: str) -> Optional[Dict[str, Any]]:
133
  urls = _search_web(item_name, k=5)
134
+ best = None
135
  for u in urls:
136
  text = _fetch_text(u)
137
  if not text: continue
138
  try:
139
+ offer = chain_extract.invoke({"query": item_name, "url": u, "text": text})
140
  except Exception:
141
  continue
142
+ if offer and offer.price and (best is None or offer.price < best.price):
143
+ best = offer
144
  if not best: return None
145
  return {"title": best.title or item_name, "price": float(best.price),
146
  "source": best.merchant or "Other store", "link": best.url or urls[0]}
147
 
148
+ def research_prices(items: List[Dict[str, Any]], max_items=6):
 
 
 
 
 
149
  out=[]
150
  for it in items[:max_items]:
151
+ name = it.get("name")
152
  if not name: continue
 
153
  offer = langchain_price_lookup(name)
154
  if not offer: continue
155
+ unit = it.get("unit_price")
156
+ cheaper = isinstance(unit,(int,float)) and offer["price"] < float(unit)-0.01
157
+ out.append({"item":name,"store":offer["source"],"price":offer["price"],"is_cheaper":cheaper})
 
 
158
  time.sleep(0.25)
159
  return out
160
 
161
+ def compute_savings(receipt, found):
162
+ cheaper = [f for f in found if f.get("is_cheaper")]
163
  s=0.0
164
  for f in cheaper:
165
+ s += 0.0 if not f["is_cheaper"] else random.uniform(0.5,3.0)
 
166
  return round(s,2), cheaper
167
 
168
+ def format_five_lines(receipt, savings, cheaper_list):
169
  store = (receipt.get("store") or {}).get("name") or "your store"
170
+ total = receipt.get("total") or receipt.get("subtotal")
171
+ total_txt = f"${float(total):.2f}" if total else "N/A"
 
 
172
  lines = [
173
  f"Receipt read: {store}, total {total_txt}.",
174
  f"I found potential savings of ${savings:.2f} by checking other stores.",
175
  ]
176
  if cheaper_list:
177
+ items = "; ".join([f"{f['item']} @ {f['store']} for ${f['price']:.2f}" for f in cheaper_list[:3]])
178
+ lines.append(f"Cheaper picks: {items}.")
 
 
 
 
179
  else:
180
+ lines.append("No cheaper items found.")
181
  lines.append("Reply 'DEALS' anytime to get weekly picks tailored to your receipts.")
182
+ return "\n".join(lines)
183
 
184
+ # ---- Background Task ----
185
  def process_and_reply(media_url: str, to_number: str):
186
  if not _twilio_ok:
187
  return
 
198
  found = research_prices(items)
199
  savings, cheaper = compute_savings(receipt, found)
200
  msg = format_five_lines(receipt, savings, cheaper)
 
201
  except Exception as e:
202
  msg = f"Processing error: {e}"
203
 
204
+ # ✅ Send outbound message with status callback
205
  try:
206
+ twilio_client.messages.create(
207
+ to=to_number,
208
+ from_=TW_FROM,
209
+ body=msg,
210
+ status_callback=TW_STATUS_CB or None # Added callback support
211
+ )
212
+ except Exception as e:
213
+ print(f"[ERROR] Twilio send failed: {e}")
214
+
215
+ # ---- Routes ----
216
+ @app.post("/status")
217
+ async def status_cb(MessageSid: str = Form(None), MessageStatus: str = Form(None)):
218
+ print(f"[STATUS] MessageSid={MessageSid} MessageStatus={MessageStatus}")
219
+ return PlainTextResponse("OK", media_type="text/plain")
220
 
 
221
  @app.get("/sms")
222
  async def sms_health():
223
  return PlainTextResponse("SMS webhook is up (POST only).", media_type="text/plain")
 
229
  num_media = int(form.get("NumMedia","0") or "0")
230
  media_url = form.get("MediaUrl0") if num_media > 0 else None
231
 
 
232
  if not media_url:
233
  ack = "<Response><Message>Please MMS a clear photo of your grocery receipt to analyze savings.</Message></Response>"
234
  return PlainTextResponse(ack, media_type="application/xml")
235
 
 
236
  if _twilio_ok and from_number:
237
  background_tasks.add_task(process_and_reply, media_url, from_number)
238
  reply = "<Response><Message>Got it—processing your receipt now. You’ll get a follow-up text shortly.</Message></Response>"
239
  else:
240
+ reply = "<Response><Message>Got your image, but Twilio credentials are missing. Add TWILIO_ACCOUNT_SID, TWILIO_AUTH_TOKEN, TWILIO_FROM.</Message></Response>"
241
 
242
  return PlainTextResponse(reply, media_type="application/xml")