LogicGoInfotechSpaces commited on
Commit
c682e10
·
verified ·
1 Parent(s): edbd715

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +265 -12
app.py CHANGED
@@ -25,9 +25,12 @@ if not OPENAI_API_KEY:
25
 
26
  client = OpenAI(api_key=OPENAI_API_KEY)
27
 
28
- # Category API URL
29
  CATEGORY_API_URL = "https://logicgoinfotechspaces-auto-expense-categorization.hf.space/api/labels"
30
 
 
 
 
31
  # S3 client
32
  s3 = boto3.client(
33
  "s3",
@@ -60,7 +63,9 @@ async def upload_image(file: UploadFile = File(...)):
60
  ACL="private"
61
  )
62
 
63
- return {"image_id": image_key, "message": "Uploaded successfully"}
 
 
64
 
65
  except Exception as e:
66
  raise HTTPException(status_code=500, detail=str(e))
@@ -73,8 +78,14 @@ async def generate(image_id: str):
73
  try:
74
  obj = s3.get_object(Bucket=DO_BUCKET, Key=image_id)
75
  raw_bytes = obj["Body"].read()
76
- except:
77
- raise HTTPException(status_code=404, detail="Image not found")
 
 
 
 
 
 
78
 
79
  img_array = np.frombuffer(raw_bytes, np.uint8)
80
  img = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
@@ -97,7 +108,8 @@ async def generate(image_id: str):
97
  "image_id": image_id,
98
  "raw_text": full_text,
99
  "confidence": round(avg_confidence, 3),
100
- "message": "Upload image with more clarity or enter manually."
 
101
  }
102
 
103
  # -------- JSON SCHEMA FOR GPT --------
@@ -168,38 +180,279 @@ Return structured JSON (via schema) with:
168
  temperature=0.1
169
  )
170
 
171
- parsed = json.loads(response.choices[0].message.content)
 
 
 
 
 
 
 
 
 
 
 
172
 
173
  except Exception as e:
174
  raise HTTPException(status_code=500, detail=f"OpenAI Error: {str(e)}")
175
 
176
- # -------- CATEGORY API CALL --------
177
- extracted_label = parsed.get("label", "unknown")
 
 
 
 
 
 
 
 
 
178
 
179
  try:
180
  cat_response = requests.post(
181
- CATEGORY_API_URL,
182
- json={"label": extracted_label},
183
  timeout=10
184
  )
185
 
186
  if cat_response.status_code == 200:
187
  cat_data = cat_response.json()
188
- parsed["category"] = cat_data.get("category", "unknown")
 
 
 
 
 
189
  else:
190
  parsed["category"] = "unknown"
 
191
 
192
  except Exception:
193
  parsed["category"] = "unknown"
 
194
 
195
  # -------- FINAL RESPONSE --------
196
  return {
197
  "image_id": image_id,
198
  "raw_text": full_text,
199
  "confidence": round(avg_confidence, 3),
200
- "parsed": parsed
 
 
201
  }
202
 
203
 
204
  if __name__ == "__main__":
205
  uvicorn.run("app:app", host="0.0.0.0", port=7860)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
  client = OpenAI(api_key=OPENAI_API_KEY)
27
 
28
+ # Category API URL (label-based) - kept for reference but not used for final categorization
29
  CATEGORY_API_URL = "https://logicgoinfotechspaces-auto-expense-categorization.hf.space/api/labels"
30
 
31
+ # Notes-based categorizer (the one we will call)
32
+ NOTES_CATEGORIZER_URL = "https://logicgoinfotechspaces-expensecategorizenotes.hf.space/api/v1/categorize"
33
+
34
  # S3 client
35
  s3 = boto3.client(
36
  "s3",
 
63
  ACL="private"
64
  )
65
 
66
+ # Also return a local path (if available) for debugging / local testing.
67
+ # Developer note: we include a local container path at /mnt/data/image.png when applicable.
68
+ return {"image_id": image_key, "message": "Uploaded successfully", "local_path": "/mnt/data/image.png"}
69
 
70
  except Exception as e:
71
  raise HTTPException(status_code=500, detail=str(e))
 
78
  try:
79
  obj = s3.get_object(Bucket=DO_BUCKET, Key=image_id)
80
  raw_bytes = obj["Body"].read()
81
+ except Exception:
82
+ # Fallback: try to load from local path if exists (useful for local testing)
83
+ local_path = "/mnt/data/image.png"
84
+ if os.path.exists(local_path):
85
+ with open(local_path, "rb") as f:
86
+ raw_bytes = f.read()
87
+ else:
88
+ raise HTTPException(status_code=404, detail="Image not found")
89
 
90
  img_array = np.frombuffer(raw_bytes, np.uint8)
91
  img = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
 
108
  "image_id": image_id,
109
  "raw_text": full_text,
110
  "confidence": round(avg_confidence, 3),
111
+ "message": "Upload image with more clarity or enter manually.",
112
+ "source_image_path": "/mnt/data/image.png"
113
  }
114
 
115
  # -------- JSON SCHEMA FOR GPT --------
 
180
  temperature=0.1
181
  )
182
 
183
+ # The SDK may return the json directly in a field depending on version;
184
+ # fall back to extracting message content.
185
+ raw_content = None
186
+ try:
187
+ raw_content = response.choices[0].message.content
188
+ parsed = json.loads(raw_content)
189
+ except Exception:
190
+ # try another path if SDK embeds the json directly
191
+ try:
192
+ parsed = response.choices[0].message.json # hypothetical
193
+ except Exception:
194
+ raise
195
 
196
  except Exception as e:
197
  raise HTTPException(status_code=500, detail=f"OpenAI Error: {str(e)}")
198
 
199
+ # Ensure required keys exist and enforce strict defaults
200
+ parsed.setdefault("total_amount", 0)
201
+ parsed.setdefault("label", "unknown")
202
+ parsed.setdefault("date", "unknown")
203
+ parsed.setdefault("time", "unknown")
204
+ parsed.setdefault("payment_type", "unknown")
205
+ parsed.setdefault("notes", "unknown")
206
+
207
+ # -------- CATEGORY API CALL (USING NOTES INSTEAD OF LABEL) --------
208
+ # Use the notes text to derive a category/subcategory via the notes categorizer.
209
+ notes_text = parsed.get("notes", "")
210
 
211
  try:
212
  cat_response = requests.post(
213
+ NOTES_CATEGORIZER_URL,
214
+ json={"notes": notes_text},
215
  timeout=10
216
  )
217
 
218
  if cat_response.status_code == 200:
219
  cat_data = cat_response.json()
220
+ # category should be filled with the subcategory field from the notes API
221
+ parsed["category"] = cat_data.get("subcategory", "unknown")
222
+ # keep label unchanged
223
+ parsed["label"] = parsed.get("label", "unknown")
224
+ # also provide the top-level title for convenience
225
+ parsed["category_title"] = cat_data.get("title", None)
226
  else:
227
  parsed["category"] = "unknown"
228
+ parsed["category_title"] = None
229
 
230
  except Exception:
231
  parsed["category"] = "unknown"
232
+ parsed["category_title"] = None
233
 
234
  # -------- FINAL RESPONSE --------
235
  return {
236
  "image_id": image_id,
237
  "raw_text": full_text,
238
  "confidence": round(avg_confidence, 3),
239
+ "parsed": parsed,
240
+ # Developer/test helper: include local path (will be transformed if necessary)
241
+ "source_image_path": "/mnt/data/image.png"
242
  }
243
 
244
 
245
  if __name__ == "__main__":
246
  uvicorn.run("app:app", host="0.0.0.0", port=7860)
247
+
248
+
249
+
250
+
251
+
252
+
253
+
254
+ # # app.py
255
+ # import uvicorn
256
+ # import numpy as np
257
+ # import cv2
258
+ # import boto3
259
+ # import os
260
+ # import json
261
+ # import requests
262
+ # from fastapi import FastAPI, UploadFile, File, HTTPException
263
+ # from rapidocr_onnxruntime import RapidOCR
264
+ # from openai import OpenAI
265
+
266
+ # # ---------------- ENV CONFIG ----------------
267
+ # DO_KEY_ID = os.getenv("DO_SPACES_KEY_ID")
268
+ # DO_SECRET_KEY = os.getenv("DO_SPACES_SECRET_KEY")
269
+ # DO_REGION = os.getenv("DO_SPACES_REGION", "blr1")
270
+ # DO_ENDPOINT = os.getenv("DO_SPACES_ENDPOINT")
271
+ # DO_BUCKET = os.getenv("DO_SPACES_BUCKET", "milestone")
272
+ # OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
273
+
274
+ # FOLDER = "OCR_Images"
275
+
276
+ # if not OPENAI_API_KEY:
277
+ # raise RuntimeError("OPENAI_API_KEY missing!")
278
+
279
+ # client = OpenAI(api_key=OPENAI_API_KEY)
280
+
281
+ # # Category API URL
282
+ # CATEGORY_API_URL = "https://logicgoinfotechspaces-auto-expense-categorization.hf.space/api/labels"
283
+
284
+ # # S3 client
285
+ # s3 = boto3.client(
286
+ # "s3",
287
+ # region_name=DO_REGION,
288
+ # endpoint_url=DO_ENDPOINT,
289
+ # aws_access_key_id=DO_KEY_ID,
290
+ # aws_secret_access_key=DO_SECRET_KEY,
291
+ # )
292
+
293
+ # app = FastAPI()
294
+ # ocr_engine = RapidOCR()
295
+
296
+ # # ---------------- ROUTES ----------------
297
+ # @app.get("/health")
298
+ # async def health():
299
+ # return {"status": "ok"}
300
+
301
+
302
+ # @app.post("/upload")
303
+ # async def upload_image(file: UploadFile = File(...)):
304
+ # try:
305
+ # file_bytes = await file.read()
306
+ # image_key = f"{FOLDER}/{file.filename}"
307
+
308
+ # s3.put_object(
309
+ # Bucket=DO_BUCKET,
310
+ # Key=image_key,
311
+ # Body=file_bytes,
312
+ # ContentType=file.content_type,
313
+ # ACL="private"
314
+ # )
315
+
316
+ # return {"image_id": image_key, "message": "Uploaded successfully"}
317
+
318
+ # except Exception as e:
319
+ # raise HTTPException(status_code=500, detail=str(e))
320
+
321
+
322
+ # @app.post("/generate/{image_id:path}")
323
+ # async def generate(image_id: str):
324
+
325
+ # # -------- Download image --------
326
+ # try:
327
+ # obj = s3.get_object(Bucket=DO_BUCKET, Key=image_id)
328
+ # raw_bytes = obj["Body"].read()
329
+ # except:
330
+ # raise HTTPException(status_code=404, detail="Image not found")
331
+
332
+ # img_array = np.frombuffer(raw_bytes, np.uint8)
333
+ # img = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
334
+ # if img is None:
335
+ # raise HTTPException(status_code=400, detail="Unable to decode image")
336
+
337
+ # # -------- OCR --------
338
+ # result, _ = ocr_engine(img)
339
+ # if not result:
340
+ # raise HTTPException(status_code=500, detail="OCR returned empty result")
341
+
342
+ # full_text = "\n".join([text for _, text, _ in result])
343
+
344
+ # # -------- CONFIDENCE SCORE --------
345
+ # confidences = [conf for _, _, conf in result if isinstance(conf, (int, float))]
346
+ # avg_confidence = sum(confidences) / len(confidences) if confidences else 0
347
+
348
+ # if avg_confidence < 0.70:
349
+ # return {
350
+ # "image_id": image_id,
351
+ # "raw_text": full_text,
352
+ # "confidence": round(avg_confidence, 3),
353
+ # "message": "Upload image with more clarity or enter manually."
354
+ # }
355
+
356
+ # # -------- JSON SCHEMA FOR GPT --------
357
+ # schema = {
358
+ # "name": "extract_expense_details",
359
+ # "schema": {
360
+ # "type": "object",
361
+ # "properties": {
362
+ # "total_amount": {"type": "number"},
363
+ # "label": {"type": "string"},
364
+ # "date": {"type": "string"},
365
+ # "time": {"type": "string"},
366
+ # "payment_type": {
367
+ # "type": "string",
368
+ # "enum": ["cash", "card", "upi", "unknown"]
369
+ # },
370
+ # "notes": {"type": "string"}
371
+ # },
372
+ # "required": ["total_amount", "label"]
373
+ # }
374
+ # }
375
+
376
+ # # -------- PROMPT --------
377
+ # prompt = f"""
378
+ # You are an expense extraction AI.
379
+
380
+ # Extract expense details from the OCR text below:
381
+
382
+ # \"\"\"
383
+ # {full_text}
384
+ # \"\"\"
385
+
386
+ # ### STRICT INFORMATION RULES:
387
+ # - Do NOT create or guess any information that does not exist in the extracted text.
388
+ # - If any field (date, time, payment_type, total_amount) is not clearly present in the text, set its value to "unknown".
389
+ # - Only infer the label category (Restaurant, Store, etc.) based on business name and item types.
390
+
391
+ # ### Labeling Rules:
392
+ # 1. Detect the business/merchant name from the text (e.g., KFC, Starbucks, Ying Thai Kitchen).
393
+ # 2. If items are food or restaurant-related → label must be: "<Business Name> Restaurant".
394
+ # 3. If it's a store/retail → "<Business Name> Store".
395
+ # 4. If unclear, infer the closest meaningful category.
396
+ # 5. If business name is not found → label = "unknown".
397
+
398
+ # ### Notes Format:
399
+ # Always generate notes EXACTLY in this format:
400
+ # "Spent <total_amount> on <label> on <date>."
401
+
402
+ # ### Required Output:
403
+ # Return structured JSON (via schema) with:
404
+ # - total_amount
405
+ # - label
406
+ # - date
407
+ # - time
408
+ # - payment_type
409
+ # - notes
410
+ # """
411
+
412
+ # # -------- CALL GPT --------
413
+ # try:
414
+ # response = client.chat.completions.create(
415
+ # model="gpt-4o-mini",
416
+ # response_format={"type": "json_schema", "json_schema": schema},
417
+ # messages=[
418
+ # {"role": "system", "content": "You are an expert in receipt parsing."},
419
+ # {"role": "user", "content": prompt}
420
+ # ],
421
+ # temperature=0.1
422
+ # )
423
+
424
+ # parsed = json.loads(response.choices[0].message.content)
425
+
426
+ # except Exception as e:
427
+ # raise HTTPException(status_code=500, detail=f"OpenAI Error: {str(e)}")
428
+
429
+ # # -------- CATEGORY API CALL --------
430
+ # extracted_label = parsed.get("label", "unknown")
431
+
432
+ # try:
433
+ # cat_response = requests.post(
434
+ # CATEGORY_API_URL,
435
+ # json={"label": extracted_label},
436
+ # timeout=10
437
+ # )
438
+
439
+ # if cat_response.status_code == 200:
440
+ # cat_data = cat_response.json()
441
+ # parsed["category"] = cat_data.get("category", "unknown")
442
+ # else:
443
+ # parsed["category"] = "unknown"
444
+
445
+ # except Exception:
446
+ # parsed["category"] = "unknown"
447
+
448
+ # # -------- FINAL RESPONSE --------
449
+ # return {
450
+ # "image_id": image_id,
451
+ # "raw_text": full_text,
452
+ # "confidence": round(avg_confidence, 3),
453
+ # "parsed": parsed
454
+ # }
455
+
456
+
457
+ # if __name__ == "__main__":
458
+ # uvicorn.run("app:app", host="0.0.0.0", port=7860)