WalletSyncOCR

Running

App Files Files Community

LogicGoInfotechSpaces commited on Nov 21

Commit

c682e10

verified ·

1 Parent(s): edbd715

Update app.py

Browse files

Files changed (1) hide show

app.py +265 -12

app.py CHANGED Viewed

@@ -25,9 +25,12 @@ if not OPENAI_API_KEY:
 client = OpenAI(api_key=OPENAI_API_KEY)
-# Category API URL
 CATEGORY_API_URL = "https://logicgoinfotechspaces-auto-expense-categorization.hf.space/api/labels"
 # S3 client
 s3 = boto3.client(
     "s3",
@@ -60,7 +63,9 @@ async def upload_image(file: UploadFile = File(...)):
             ACL="private"
         )
-        return {"image_id": image_key, "message": "Uploaded successfully"}
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
@@ -73,8 +78,14 @@ async def generate(image_id: str):
     try:
         obj = s3.get_object(Bucket=DO_BUCKET, Key=image_id)
         raw_bytes = obj["Body"].read()
-    except:
-        raise HTTPException(status_code=404, detail="Image not found")
     img_array = np.frombuffer(raw_bytes, np.uint8)
     img = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
@@ -97,7 +108,8 @@ async def generate(image_id: str):
             "image_id": image_id,
             "raw_text": full_text,
             "confidence": round(avg_confidence, 3),
-            "message": "Upload image with more clarity or enter manually."
         }
     # -------- JSON SCHEMA FOR GPT --------
@@ -168,38 +180,279 @@ Return structured JSON (via schema) with:
             temperature=0.1
         )
-        parsed = json.loads(response.choices[0].message.content)
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"OpenAI Error: {str(e)}")
-    # -------- CATEGORY API CALL --------
-    extracted_label = parsed.get("label", "unknown")
     try:
         cat_response = requests.post(
-            CATEGORY_API_URL,
-            json={"label": extracted_label},
             timeout=10
         )
         if cat_response.status_code == 200:
             cat_data = cat_response.json()
-            parsed["category"] = cat_data.get("category", "unknown")
         else:
             parsed["category"] = "unknown"
     except Exception:
         parsed["category"] = "unknown"
     # -------- FINAL RESPONSE --------
     return {
         "image_id": image_id,
         "raw_text": full_text,
         "confidence": round(avg_confidence, 3),
-        "parsed": parsed
     }
 if __name__ == "__main__":
     uvicorn.run("app:app", host="0.0.0.0", port=7860)

 client = OpenAI(api_key=OPENAI_API_KEY)
+# Category API URL (label-based) - kept for reference but not used for final categorization
 CATEGORY_API_URL = "https://logicgoinfotechspaces-auto-expense-categorization.hf.space/api/labels"
+# Notes-based categorizer (the one we will call)
+NOTES_CATEGORIZER_URL = "https://logicgoinfotechspaces-expensecategorizenotes.hf.space/api/v1/categorize"
 # S3 client
 s3 = boto3.client(
     "s3",
             ACL="private"
         )
+        # Also return a local path (if available) for debugging / local testing.
+        # Developer note: we include a local container path at /mnt/data/image.png when applicable.
+        return {"image_id": image_key, "message": "Uploaded successfully", "local_path": "/mnt/data/image.png"}
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
     try:
         obj = s3.get_object(Bucket=DO_BUCKET, Key=image_id)
         raw_bytes = obj["Body"].read()
+    except Exception:
+        # Fallback: try to load from local path if exists (useful for local testing)
+        local_path = "/mnt/data/image.png"
+        if os.path.exists(local_path):
+            with open(local_path, "rb") as f:
+                raw_bytes = f.read()
+        else:
+            raise HTTPException(status_code=404, detail="Image not found")
     img_array = np.frombuffer(raw_bytes, np.uint8)
     img = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
             "image_id": image_id,
             "raw_text": full_text,
             "confidence": round(avg_confidence, 3),
+            "message": "Upload image with more clarity or enter manually.",
+            "source_image_path": "/mnt/data/image.png"
         }
     # -------- JSON SCHEMA FOR GPT --------
             temperature=0.1
         )
+        # The SDK may return the json directly in a field depending on version;
+        # fall back to extracting message content.
+        raw_content = None
+        try:
+            raw_content = response.choices[0].message.content
+            parsed = json.loads(raw_content)
+        except Exception:
+            # try another path if SDK embeds the json directly
+            try:
+                parsed = response.choices[0].message.json  # hypothetical
+            except Exception:
+                raise
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"OpenAI Error: {str(e)}")
+    # Ensure required keys exist and enforce strict defaults
+    parsed.setdefault("total_amount", 0)
+    parsed.setdefault("label", "unknown")
+    parsed.setdefault("date", "unknown")
+    parsed.setdefault("time", "unknown")
+    parsed.setdefault("payment_type", "unknown")
+    parsed.setdefault("notes", "unknown")
+    # -------- CATEGORY API CALL (USING NOTES INSTEAD OF LABEL) --------
+    # Use the notes text to derive a category/subcategory via the notes categorizer.
+    notes_text = parsed.get("notes", "")
     try:
         cat_response = requests.post(
+            NOTES_CATEGORIZER_URL,
+            json={"notes": notes_text},
             timeout=10
         )
         if cat_response.status_code == 200:
             cat_data = cat_response.json()
+            # category should be filled with the subcategory field from the notes API
+            parsed["category"] = cat_data.get("subcategory", "unknown")
+            # keep label unchanged
+            parsed["label"] = parsed.get("label", "unknown")
+            # also provide the top-level title for convenience
+            parsed["category_title"] = cat_data.get("title", None)
         else:
             parsed["category"] = "unknown"
+            parsed["category_title"] = None
     except Exception:
         parsed["category"] = "unknown"
+        parsed["category_title"] = None
     # -------- FINAL RESPONSE --------
     return {
         "image_id": image_id,
         "raw_text": full_text,
         "confidence": round(avg_confidence, 3),
+        "parsed": parsed,
+        # Developer/test helper: include local path (will be transformed if necessary)
+        "source_image_path": "/mnt/data/image.png"
     }
 if __name__ == "__main__":
     uvicorn.run("app:app", host="0.0.0.0", port=7860)
+# # app.py
+# import uvicorn
+# import numpy as np
+# import cv2
+# import boto3
+# import os
+# import json
+# import requests
+# from fastapi import FastAPI, UploadFile, File, HTTPException
+# from rapidocr_onnxruntime import RapidOCR
+# from openai import OpenAI
+# # ---------------- ENV CONFIG ----------------
+# DO_KEY_ID = os.getenv("DO_SPACES_KEY_ID")
+# DO_SECRET_KEY = os.getenv("DO_SPACES_SECRET_KEY")
+# DO_REGION = os.getenv("DO_SPACES_REGION", "blr1")
+# DO_ENDPOINT = os.getenv("DO_SPACES_ENDPOINT")
+# DO_BUCKET = os.getenv("DO_SPACES_BUCKET", "milestone")
+# OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+# FOLDER = "OCR_Images"
+# if not OPENAI_API_KEY:
+#     raise RuntimeError("OPENAI_API_KEY missing!")
+# client = OpenAI(api_key=OPENAI_API_KEY)
+# # Category API URL
+# CATEGORY_API_URL = "https://logicgoinfotechspaces-auto-expense-categorization.hf.space/api/labels"
+# # S3 client
+# s3 = boto3.client(
+#     "s3",
+#     region_name=DO_REGION,
+#     endpoint_url=DO_ENDPOINT,
+#     aws_access_key_id=DO_KEY_ID,
+#     aws_secret_access_key=DO_SECRET_KEY,
+# )
+# app = FastAPI()
+# ocr_engine = RapidOCR()
+# # ---------------- ROUTES ----------------
+# @app.get("/health")
+# async def health():
+#     return {"status": "ok"}
+# @app.post("/upload")
+# async def upload_image(file: UploadFile = File(...)):
+#     try:
+#         file_bytes = await file.read()
+#         image_key = f"{FOLDER}/{file.filename}"
+#         s3.put_object(
+#             Bucket=DO_BUCKET,
+#             Key=image_key,
+#             Body=file_bytes,
+#             ContentType=file.content_type,
+#             ACL="private"
+#         )
+#         return {"image_id": image_key, "message": "Uploaded successfully"}
+#     except Exception as e:
+#         raise HTTPException(status_code=500, detail=str(e))
+# @app.post("/generate/{image_id:path}")
+# async def generate(image_id: str):
+#     # -------- Download image --------
+#     try:
+#         obj = s3.get_object(Bucket=DO_BUCKET, Key=image_id)
+#         raw_bytes = obj["Body"].read()
+#     except:
+#         raise HTTPException(status_code=404, detail="Image not found")
+#     img_array = np.frombuffer(raw_bytes, np.uint8)
+#     img = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
+#     if img is None:
+#         raise HTTPException(status_code=400, detail="Unable to decode image")
+#     # -------- OCR --------
+#     result, _ = ocr_engine(img)
+#     if not result:
+#         raise HTTPException(status_code=500, detail="OCR returned empty result")
+#     full_text = "\n".join([text for _, text, _ in result])
+#     # -------- CONFIDENCE SCORE --------
+#     confidences = [conf for _, _, conf in result if isinstance(conf, (int, float))]
+#     avg_confidence = sum(confidences) / len(confidences) if confidences else 0
+#     if avg_confidence < 0.70:
+#         return {
+#             "image_id": image_id,
+#             "raw_text": full_text,
+#             "confidence": round(avg_confidence, 3),
+#             "message": "Upload image with more clarity or enter manually."
+#         }
+#     # -------- JSON SCHEMA FOR GPT --------
+#     schema = {
+#         "name": "extract_expense_details",
+#         "schema": {
+#             "type": "object",
+#             "properties": {
+#                 "total_amount": {"type": "number"},
+#                 "label": {"type": "string"},
+#                 "date": {"type": "string"},
+#                 "time": {"type": "string"},
+#                 "payment_type": {
+#                     "type": "string",
+#                     "enum": ["cash", "card", "upi", "unknown"]
+#                 },
+#                 "notes": {"type": "string"}
+#             },
+#             "required": ["total_amount", "label"]
+#         }
+#     }
+#     # -------- PROMPT --------
+#     prompt = f"""
+# You are an expense extraction AI.
+# Extract expense details from the OCR text below:
+# \"\"\"
+# {full_text}
+# \"\"\"
+# ### STRICT INFORMATION RULES:
+# - Do NOT create or guess any information that does not exist in the extracted text.
+# - If any field (date, time, payment_type, total_amount) is not clearly present in the text, set its value to "unknown".
+# - Only infer the label category (Restaurant, Store, etc.) based on business name and item types.
+# ### Labeling Rules:
+# 1. Detect the business/merchant name from the text (e.g., KFC, Starbucks, Ying Thai Kitchen).
+# 2. If items are food or restaurant-related → label must be: "<Business Name> Restaurant".
+# 3. If it's a store/retail → "<Business Name> Store".
+# 4. If unclear, infer the closest meaningful category.
+# 5. If business name is not found → label = "unknown".
+# ### Notes Format:
+# Always generate notes EXACTLY in this format:
+# "Spent <total_amount> on <label> on <date>."
+# ### Required Output:
+# Return structured JSON (via schema) with:
+# - total_amount
+# - label
+# - date
+# - time
+# - payment_type
+# - notes
+# """
+#     # -------- CALL GPT --------
+#     try:
+#         response = client.chat.completions.create(
+#             model="gpt-4o-mini",
+#             response_format={"type": "json_schema", "json_schema": schema},
+#             messages=[
+#                 {"role": "system", "content": "You are an expert in receipt parsing."},
+#                 {"role": "user", "content": prompt}
+#             ],
+#             temperature=0.1
+#         )
+#         parsed = json.loads(response.choices[0].message.content)
+#     except Exception as e:
+#         raise HTTPException(status_code=500, detail=f"OpenAI Error: {str(e)}")
+#     # -------- CATEGORY API CALL --------
+#     extracted_label = parsed.get("label", "unknown")
+#     try:
+#         cat_response = requests.post(
+#             CATEGORY_API_URL,
+#             json={"label": extracted_label},
+#             timeout=10
+#         )
+#         if cat_response.status_code == 200:
+#             cat_data = cat_response.json()
+#             parsed["category"] = cat_data.get("category", "unknown")
+#         else:
+#             parsed["category"] = "unknown"
+#     except Exception:
+#         parsed["category"] = "unknown"
+#     # -------- FINAL RESPONSE --------
+#     return {
+#         "image_id": image_id,
+#         "raw_text": full_text,
+#         "confidence": round(avg_confidence, 3),
+#         "parsed": parsed
+#     }
+# if __name__ == "__main__":
+#     uvicorn.run("app:app", host="0.0.0.0", port=7860)