WalletSyncOCR

Sleeping

App Files Files Community

LogicGoInfotechSpaces commited on Nov 17, 2025

Commit

945ba15

verified ·

1 Parent(s): 8c9caa6

Update app.py

Browse files

Files changed (1) hide show

app.py +51 -35

app.py CHANGED Viewed

@@ -4,7 +4,6 @@ import numpy as np
 import cv2
 import boto3
 import os
-import json
 from fastapi import FastAPI, UploadFile, File, HTTPException
 from rapidocr_onnxruntime import RapidOCR
 from openai import OpenAI
@@ -62,10 +61,7 @@ async def upload_image(file: UploadFile = File(...)):
             ACL="private"
         )
-        return {
-            "image_id": image_key,
-            "message": "Uploaded successfully"
-        }
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
@@ -73,7 +69,9 @@ async def upload_image(file: UploadFile = File(...)):
 # --------------------------------------------------
 @app.post("/generate/{image_id:path}")
 async def generate(image_id: str):
-    # Download image from Spaces
     try:
         obj = s3.get_object(Bucket=DO_BUCKET, Key=image_id)
         raw_bytes = obj["Body"].read()
@@ -82,13 +80,13 @@ async def generate(image_id: str):
     img_array = np.frombuffer(raw_bytes, np.uint8)
     img = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
     if img is None:
         raise HTTPException(status_code=400, detail="Unable to decode image")
-    # OCR
     result, _ = ocr_engine(img)
     if not result:
         raise HTTPException(status_code=500, detail="OCR returned empty result")
@@ -96,56 +94,74 @@ async def generate(image_id: str):
     full_text = "\n".join(extracted)
     # ------------------------------------------------
-    # CALL OPENAI GPT-4o-mini FOR PARSING
     # ------------------------------------------------
     prompt = f"""
-Extract structured expense information from the following OCR text.
-OCR TEXT:
-\"\"\"
-{full_text}
-\"\"\"
-Return JSON with fields:
-- total_amount (number)
-- label (category like Food, Travel, Shopping, Utilities)
-- date
-- time
-- payment_type (cash, credit card, debit card, mobile payment, bank transfer, Mobile trasnfer)
-- notes (1–2 line description)
-Return ONLY JSON.
 """
     try:
         response = client.chat.completions.create(
             model="gpt-4o-mini",
             messages=[
-                {"role": "system", "content": "You are an expert expense parser."},
                 {"role": "user", "content": prompt}
             ],
             temperature=0.2
         )
-        ai_output = response.choices[0].message.content
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"OpenAI Error: {str(e)}")
     # ------------------------------------------------
-    # Try parsing JSON output
     # ------------------------------------------------
-    try:
-        parsed_json = json.loads(ai_output)
-    except:
-        parsed_json = {"error": "Failed to parse JSON", "raw_output": ai_output}
     return {
         "image_id": image_id,
         "raw_text": full_text,
-        "parsed": parsed_json
     }
 # --------------------------------------------------
 if __name__ == "__main__":
     uvicorn.run("app:app", host="0.0.0.0", port=7860)

 import cv2
 import boto3
 import os
 from fastapi import FastAPI, UploadFile, File, HTTPException
 from rapidocr_onnxruntime import RapidOCR
 from openai import OpenAI
             ACL="private"
         )
+        return {"image_id": image_key, "message": "Uploaded successfully"}
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
 # --------------------------------------------------
 @app.post("/generate/{image_id:path}")
 async def generate(image_id: str):
+    # ------------------------------------------------
+    # Download image
+    # ------------------------------------------------
     try:
         obj = s3.get_object(Bucket=DO_BUCKET, Key=image_id)
         raw_bytes = obj["Body"].read()
     img_array = np.frombuffer(raw_bytes, np.uint8)
     img = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
     if img is None:
         raise HTTPException(status_code=400, detail="Unable to decode image")
+    # ------------------------------------------------
+    # OCR Extraction
+    # ------------------------------------------------
     result, _ = ocr_engine(img)
     if not result:
         raise HTTPException(status_code=500, detail="OCR returned empty result")
     full_text = "\n".join(extracted)
     # ------------------------------------------------
+    # OPENAI FUNCTION CALLING SETUP
     # ------------------------------------------------
+    functions = [
+        {
+            "name": "extract_expense_details",
+            "description": "Extract structured expense details from OCR text.",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "total_amount": {"type": "number"},
+                    "label": {
+                        "type": "string",
+                        "description": "Category such as Food, Travel, Shopping, Utilities"
+                    },
+                    "date": {"type": "string"},
+                    "time": {"type": "string"},
+                    "payment_type": {
+                        "type": "string",
+                        "enum": ["cash", "card", "upi", "unknown"]
+                    },
+                    "notes": {"type": "string"}
+                },
+                "required": ["total_amount", "label", "date"]
+            }
+        }
+    ]
     prompt = f"""
+Extract expense details from this OCR text:
+Return structured details using the provided function schema.
 """
+    # ------------------------------------------------
+    # OPENAI CALL WITH FUNCTION CALLING
+    # ------------------------------------------------
     try:
         response = client.chat.completions.create(
             model="gpt-4o-mini",
             messages=[
+                {"role": "system", "content": "You are a finance and receipt parsing AI."},
                 {"role": "user", "content": prompt}
             ],
+            functions=functions,
+            function_call={"name": "extract_expense_details"},
             temperature=0.2
         )
+        function_args = response.choices[0].message.function_call.arguments
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"OpenAI Error: {str(e)}")
+    # Convert returned JSON string -> dict
+    import json
+    parsed = json.loads(function_args)
     # ------------------------------------------------
+    # FINAL API RESPONSE
     # ------------------------------------------------
     return {
         "image_id": image_id,
         "raw_text": full_text,
+        "parsed": parsed
     }
 # --------------------------------------------------
 if __name__ == "__main__":
     uvicorn.run("app:app", host="0.0.0.0", port=7860)