WalletSyncOCR

Sleeping

App Files Files Community

LogicGoInfotechSpaces commited on Nov 17, 2025

Commit

8c9caa6

verified ·

1 Parent(s): 96a4e67

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -10

app.py CHANGED Viewed

@@ -4,6 +4,7 @@ import numpy as np
 import cv2
 import boto3
 import os
 from fastapi import FastAPI, UploadFile, File, HTTPException
 from rapidocr_onnxruntime import RapidOCR
 from openai import OpenAI
@@ -72,7 +73,7 @@ async def upload_image(file: UploadFile = File(...)):
 # --------------------------------------------------
 @app.post("/generate/{image_id:path}")
 async def generate(image_id: str):
-    # ➜ Download from Spaces
     try:
         obj = s3.get_object(Bucket=DO_BUCKET, Key=image_id)
         raw_bytes = obj["Body"].read()
@@ -85,7 +86,7 @@ async def generate(image_id: str):
     if img is None:
         raise HTTPException(status_code=400, detail="Unable to decode image")
-    # ➜ OCR Extract
     result, _ = ocr_engine(img)
     if not result:
@@ -95,7 +96,7 @@ async def generate(image_id: str):
     full_text = "\n".join(extracted)
     # ------------------------------------------------
-    # CALL OPENAI TO GENERATE STRUCTURED EXPENSE DATA
     # ------------------------------------------------
     prompt = f"""
 Extract structured expense information from the following OCR text.
@@ -105,15 +106,15 @@ OCR TEXT:
 {full_text}
 \"\"\"
-Return a JSON with exactly these fields:
 - total_amount (number)
 - label (category like Food, Travel, Shopping, Utilities)
 - date
 - time
-- payment_type (cash, card, upi, unknown)
-- notes (1–2 line human readable description)
-If something is missing, infer logically.
 """
     try:
@@ -126,16 +127,23 @@ If something is missing, infer logically.
             temperature=0.2
         )
-        ai_output = response.choices[0].message["content"]
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"OpenAI Error: {str(e)}")
-    # Return OCR + structured AI result
     return {
         "image_id": image_id,
         "raw_text": full_text,
-        "parsed": ai_output
     }
 # --------------------------------------------------

 import cv2
 import boto3
 import os
+import json
 from fastapi import FastAPI, UploadFile, File, HTTPException
 from rapidocr_onnxruntime import RapidOCR
 from openai import OpenAI
 # --------------------------------------------------
 @app.post("/generate/{image_id:path}")
 async def generate(image_id: str):
+    # Download image from Spaces
     try:
         obj = s3.get_object(Bucket=DO_BUCKET, Key=image_id)
         raw_bytes = obj["Body"].read()
     if img is None:
         raise HTTPException(status_code=400, detail="Unable to decode image")
+    # OCR
     result, _ = ocr_engine(img)
     if not result:
     full_text = "\n".join(extracted)
     # ------------------------------------------------
+    # CALL OPENAI GPT-4o-mini FOR PARSING
     # ------------------------------------------------
     prompt = f"""
 Extract structured expense information from the following OCR text.
 {full_text}
 \"\"\"
+Return JSON with fields:
 - total_amount (number)
 - label (category like Food, Travel, Shopping, Utilities)
 - date
 - time
+- payment_type (cash, credit card, debit card, mobile payment, bank transfer, Mobile trasnfer)
+- notes (1–2 line description)
+Return ONLY JSON.
 """
     try:
             temperature=0.2
         )
+        ai_output = response.choices[0].message.content
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"OpenAI Error: {str(e)}")
+    # ------------------------------------------------
+    # Try parsing JSON output
+    # ------------------------------------------------
+    try:
+        parsed_json = json.loads(ai_output)
+    except:
+        parsed_json = {"error": "Failed to parse JSON", "raw_output": ai_output}
     return {
         "image_id": image_id,
         "raw_text": full_text,
+        "parsed": parsed_json
     }
 # --------------------------------------------------