WalletSyncOCR

Sleeping

App Files Files Community

LogicGoInfotechSpaces commited on 16 days ago

Commit

72b7090

verified ·

1 Parent(s): 27d080d

Update app.py

Browse files

Files changed (1) hide show

app.py +229 -168

app.py CHANGED Viewed

@@ -1,3 +1,4 @@
 # app.py
 import uvicorn
 import numpy as np
@@ -5,10 +6,13 @@ import cv2
 import boto3
 import os
 import json
 import requests
-from fastapi import FastAPI, UploadFile, File, HTTPException
 from rapidocr_onnxruntime import RapidOCR
 from openai import OpenAI
 # ---------------- ENV CONFIG ----------------
 DO_KEY_ID = os.getenv("DO_SPACES_KEY_ID")
@@ -20,16 +24,16 @@ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
 FOLDER = "OCR_Images"
 if not OPENAI_API_KEY:
     raise RuntimeError("OPENAI_API_KEY missing!")
 client = OpenAI(api_key=OPENAI_API_KEY)
-CATEGORY_API_URL = os.getenv("CATEGORY_API_URL")
-NOTES_CATEGORIZER_URL = os.getenv("NOTES_CATEGORIZER_URL")
-# S3 client
 s3 = boto3.client(
     "s3",
     region_name=DO_REGION,
@@ -38,15 +42,50 @@ s3 = boto3.client(
     aws_secret_access_key=DO_SECRET_KEY,
 )
 app = FastAPI()
 ocr_engine = RapidOCR()
 # ---------------- ROUTES ----------------
 @app.get("/health")
 async def health():
     return {"status": "ok"}
 @app.post("/upload")
 async def upload_image(file: UploadFile = File(...)):
     try:
@@ -61,113 +100,102 @@ async def upload_image(file: UploadFile = File(...)):
             ACL="private"
         )
-        # Also return a local path (if available) for debugging / local testing.
-        # Developer note: we include a local container path at /mnt/data/image.png when applicable.
-        return {"image_id": image_key, "message": "Uploaded successfully", "local_path": "/mnt/data/image.png"}
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
 @app.post("/generate/{image_id:path}")
-async def generate(image_id: str):
-    # -------- Download image --------
     try:
-        obj = s3.get_object(Bucket=DO_BUCKET, Key=image_id)
-        raw_bytes = obj["Body"].read()
-    except Exception:
-        # Fallback: try to load from local path if exists (useful for local testing)
-        local_path = "/mnt/data/image.png"
-        if os.path.exists(local_path):
-            with open(local_path, "rb") as f:
-                raw_bytes = f.read()
-        else:
-            raise HTTPException(status_code=404, detail="Image not found")
-    img_array = np.frombuffer(raw_bytes, np.uint8)
-    img = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
-    if img is None:
-        raise HTTPException(status_code=400, detail="Unable to decode image")
-    # -------- OCR --------
-    result, _ = ocr_engine(img)
-    if not result:
-        raise HTTPException(status_code=500, detail="OCR returned empty result")
-    full_text = "\n".join([text for _, text, _ in result])
-    # -------- CONFIDENCE SCORE --------
-    confidences = [conf for _, _, conf in result if isinstance(conf, (int, float))]
-    avg_confidence = sum(confidences) / len(confidences) if confidences else 0
-    if avg_confidence < 0.70:
-        return {
-            "image_id": image_id,
-            "raw_text": full_text,
-            "confidence": round(avg_confidence, 3),
-            "message": "Upload image with more clarity or enter manually.",
-            "source_image_path": "/mnt/data/image.png"
-        }
-    # -------- JSON SCHEMA FOR GPT --------
-    schema = {
-        "name": "extract_expense_details",
-        "schema": {
-            "type": "object",
-            "properties": {
-                "total_amount": {"type": "number"},
-                "label": {"type": "string"},
-                "date": {"type": "string"},
-                "time": {"type": "string"},
-                "payment_type": {
-                    "type": "string",
-                    "enum": ["cash", "card", "upi", "unknown"]
                 },
-                "notes": {"type": "string"}
-            },
-            "required": ["total_amount", "label"]
         }
-    }
-    # -------- PROMPT --------
-    prompt = f"""
-You are an expense extraction AI.
-Extract expense details from the OCR text below:
 \"\"\"
 {full_text}
 \"\"\"
-### STRICT INFORMATION RULES:
-- Do NOT create or guess any information that does not exist in the extracted text.
-- If any field (date, time, payment_type, total_amount) is not clearly present in the text, set its value to "unknown".
-- Only infer the label category (Restaurant, Store, etc.) based on business name and item types.
-### Labeling Rules:
-1. Detect the business/merchant name from the text (e.g., KFC, Starbucks, Ying Thai Kitchen).
-2. If items are food or restaurant-related → label must be: "<Business Name> Restaurant".
-3. If it's a store/retail → "<Business Name> Store".
-4. If unclear, infer the closest meaningful category.
-5. If business name is not found → label = "unknown".
-### Notes Format:
-Always generate notes EXACTLY in this format:
 "Spent <total_amount> on <label> on <date>."
-### Required Output:
-Return structured JSON (via schema) with:
-- total_amount
-- label
-- date
-- time
-- payment_type
-- notes
 """
-    # -------- CALL GPT --------
-    try:
         response = client.chat.completions.create(
             model="gpt-4o-mini",
             response_format={"type": "json_schema", "json_schema": schema},
@@ -178,81 +206,70 @@ Return structured JSON (via schema) with:
             temperature=0.1
         )
-        # The SDK may return the json directly in a field depending on version;
-        # fall back to extracting message content.
-        raw_content = None
         try:
-            raw_content = response.choices[0].message.content
-            parsed = json.loads(raw_content)
         except Exception:
-            # try another path if SDK embeds the json directly
-            try:
-                parsed = response.choices[0].message.json  # hypothetical
-            except Exception:
-                raise
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=f"OpenAI Error: {str(e)}")
-    # Ensure required keys exist and enforce strict defaults
-    parsed.setdefault("total_amount", 0)
-    parsed.setdefault("label", "unknown")
-    parsed.setdefault("date", "unknown")
-    parsed.setdefault("time", "unknown")
-    parsed.setdefault("payment_type", "unknown")
-    parsed.setdefault("notes", "unknown")
-    # -------- CATEGORY API CALL (USING NOTES INSTEAD OF LABEL) --------
-    # Use the notes text to derive a category/subcategory via the notes categorizer.
-    notes_text = parsed.get("notes", "")
-    try:
-        cat_response = requests.post(
-            NOTES_CATEGORIZER_URL,
-            json={"notes": notes_text},
-            timeout=10
         )
-        if cat_response.status_code == 200:
-            cat_data = cat_response.json()
-            # category should be filled with the subcategory field from the notes API
-            parsed["category"] = cat_data.get("subcategory", "unknown")
-            # keep label unchanged
-            parsed["label"] = parsed.get("label", "unknown")
-            # also provide the top-level title for convenience
-            parsed["category_title"] = cat_data.get("title", None)
-        else:
-            parsed["category"] = "unknown"
-            parsed["category_title"] = None
-    except Exception:
-        parsed["category"] = "unknown"
-        parsed["category_title"] = None
-    # -------- FINAL RESPONSE --------
-    return {
-        "image_id": image_id,
-        "raw_text": full_text,
-        "confidence": round(avg_confidence, 3),
-        "parsed": parsed,
-        # Developer/test helper: include local path (will be transformed if necessary)
-        "source_image_path": "/mnt/data/image.png"
-    }
 @app.get("/ping")
 def ping():
     return {"status": "alive"}
 if __name__ == "__main__":
     uvicorn.run("app:app", host="0.0.0.0", port=7860)
 # # app.py
 # import uvicorn
 # import numpy as np
@@ -280,7 +297,9 @@ if __name__ == "__main__":
 # client = OpenAI(api_key=OPENAI_API_KEY)
-# # Category API URL
 # # S3 client
 # s3 = boto3.client(
@@ -314,7 +333,9 @@ if __name__ == "__main__":
 #             ACL="private"
 #         )
-#         return {"image_id": image_key, "message": "Uploaded successfully"}
 #     except Exception as e:
 #         raise HTTPException(status_code=500, detail=str(e))
@@ -327,8 +348,14 @@ if __name__ == "__main__":
 #     try:
 #         obj = s3.get_object(Bucket=DO_BUCKET, Key=image_id)
 #         raw_bytes = obj["Body"].read()
-#     except:
-#         raise HTTPException(status_code=404, detail="Image not found")
 #     img_array = np.frombuffer(raw_bytes, np.uint8)
 #     img = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
@@ -351,7 +378,8 @@ if __name__ == "__main__":
 #             "image_id": image_id,
 #             "raw_text": full_text,
 #             "confidence": round(avg_confidence, 3),
-#             "message": "Upload image with more clarity or enter manually."
 #         }
 #     # -------- JSON SCHEMA FOR GPT --------
@@ -422,38 +450,71 @@ if __name__ == "__main__":
 #             temperature=0.1
 #         )
-#         parsed = json.loads(response.choices[0].message.content)
 #     except Exception as e:
 #         raise HTTPException(status_code=500, detail=f"OpenAI Error: {str(e)}")
-#     # -------- CATEGORY API CALL --------
-#     extracted_label = parsed.get("label", "unknown")
 #     try:
 #         cat_response = requests.post(
-#             CATEGORY_API_URL,
-#             json={"label": extracted_label},
 #             timeout=10
 #         )
 #         if cat_response.status_code == 200:
 #             cat_data = cat_response.json()
-#             parsed["category"] = cat_data.get("category", "unknown")
 #         else:
 #             parsed["category"] = "unknown"
 #     except Exception:
 #         parsed["category"] = "unknown"
 #     # -------- FINAL RESPONSE --------
 #     return {
 #         "image_id": image_id,
 #         "raw_text": full_text,
 #         "confidence": round(avg_confidence, 3),
-#         "parsed": parsed
 #     }
 # if __name__ == "__main__":
-#     uvicorn.run("app:app", host="0.0.0.0", port=7860)

 # app.py
 import uvicorn
 import numpy as np
 import boto3
 import os
 import json
+import time
 import requests
+from datetime import datetime
+from fastapi import FastAPI, UploadFile, File, HTTPException, Header
 from rapidocr_onnxruntime import RapidOCR
 from openai import OpenAI
+from pymongo import MongoClient
 # ---------------- ENV CONFIG ----------------
 DO_KEY_ID = os.getenv("DO_SPACES_KEY_ID")
 FOLDER = "OCR_Images"
+CATEGORY_API_URL = os.getenv("CATEGORY_API_URL")
+NOTES_CATEGORIZER_URL = os.getenv("NOTES_CATEGORIZER_URL")
 if not OPENAI_API_KEY:
     raise RuntimeError("OPENAI_API_KEY missing!")
+# ---------------- OPENAI ----------------
 client = OpenAI(api_key=OPENAI_API_KEY)
+# ---------------- S3 ----------------
 s3 = boto3.client(
     "s3",
     region_name=DO_REGION,
     aws_secret_access_key=DO_SECRET_KEY,
 )
+# ---------------- MONGODB ----------------
+MONGO_URI = os.getenv("MONGO_URI")
+mongo_client = MongoClient(MONGO_URI)
+mongo_db = mongo_client["expense"]
+api_logs_col = mongo_db["api_logs"]
+# ---------------- APP ----------------
 app = FastAPI()
 ocr_engine = RapidOCR()
+# ---------------- HELPERS ----------------
+def ist_now():
+    return datetime.now().strftime("%d-%m-%Y %H:%M:%S:IST")
+def log_api_event(
+    *,
+    status: str,
+    response_time: float,
+    user_id: str | None,
+    error_message: str | None = None
+):
+    payload = {
+        "name": "Receipt Scanner",
+        "status": status,
+        "date": ist_now(),
+        "response_time": round(response_time, 3),
+    }
+    if user_id:
+        payload["user_id"] = user_id
+    if error_message:
+        payload["error_message"] = error_message
+    try:
+        api_logs_col.insert_one(payload)
+    except Exception:
+        pass  # never break API because of logging failure
 # ---------------- ROUTES ----------------
 @app.get("/health")
 async def health():
     return {"status": "ok"}
 @app.post("/upload")
 async def upload_image(file: UploadFile = File(...)):
     try:
             ACL="private"
         )
+        return {
+            "status": "success",
+            "message": "Uploaded successfully",
+            "image_id": image_key,
+            "local_path": "/mnt/data/image.png"
+        }
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
 @app.post("/generate/{image_id:path}")
+async def generate(
+    image_id: str,
+    user_id: str | None = Header(default=None)
+):
+    start_time = time.time()
     try:
+        # -------- DOWNLOAD IMAGE --------
+        try:
+            obj = s3.get_object(Bucket=DO_BUCKET, Key=image_id)
+            raw_bytes = obj["Body"].read()
+        except Exception:
+            local_path = "/mnt/data/image.png"
+            if os.path.exists(local_path):
+                with open(local_path, "rb") as f:
+                    raw_bytes = f.read()
+            else:
+                raise HTTPException(status_code=404, detail="Image not found")
+        img_array = np.frombuffer(raw_bytes, np.uint8)
+        img = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
+        if img is None:
+            raise HTTPException(status_code=400, detail="Unable to decode image")
+        # -------- OCR --------
+        result, _ = ocr_engine(img)
+        if not result:
+            raise RuntimeError("OCR returned empty result")
+        full_text = "\n".join([text for _, text, _ in result])
+        confidences = [conf for _, _, conf in result if isinstance(conf, (int, float))]
+        avg_confidence = sum(confidences) / len(confidences) if confidences else 0
+        if avg_confidence < 0.70:
+            response_time = time.time() - start_time
+            log_api_event(
+                status="fail",
+                response_time=response_time,
+                user_id=user_id,
+                error_message="Low OCR confidence"
+            )
+            return {
+                "status": "fail",
+                "message": "Upload image with more clarity or enter manually.",
+                "image_id": image_id,
+                "raw_text": full_text,
+                "confidence": round(avg_confidence, 3),
+            }
+        # -------- GPT SCHEMA --------
+        schema = {
+            "name": "extract_expense_details",
+            "schema": {
+                "type": "object",
+                "properties": {
+                    "total_amount": {"type": "number"},
+                    "label": {"type": "string"},
+                    "date": {"type": "string"},
+                    "time": {"type": "string"},
+                    "payment_type": {
+                        "type": "string",
+                        "enum": ["cash", "card", "upi", "unknown"]
+                    },
+                    "notes": {"type": "string"}
                 },
+                "required": ["total_amount", "label"]
+            }
         }
+        prompt = f"""
+Extract expense details from OCR text below:
 \"\"\"
 {full_text}
 \"\"\"
+Rules:
+- Do not guess missing values → use "unknown"
+- Notes format:
 "Spent <total_amount> on <label> on <date>."
 """
         response = client.chat.completions.create(
             model="gpt-4o-mini",
             response_format={"type": "json_schema", "json_schema": schema},
             temperature=0.1
         )
+        parsed = json.loads(response.choices[0].message.content)
+        parsed.setdefault("date", "unknown")
+        parsed.setdefault("time", "unknown")
+        parsed.setdefault("payment_type", "unknown")
+        parsed.setdefault("notes", "unknown")
+        # -------- CATEGORY API --------
         try:
+            cat_response = requests.post(
+                NOTES_CATEGORIZER_URL,
+                json={"notes": parsed["notes"]},
+                timeout=10
+            )
+            if cat_response.status_code == 200:
+                cat_data = cat_response.json()
+                parsed["category"] = cat_data.get("subcategory", "unknown")
+                parsed["category_title"] = cat_data.get("title")
+            else:
+                parsed["category"] = "unknown"
+                parsed["category_title"] = None
         except Exception:
+            parsed["category"] = "unknown"
+            parsed["category_title"] = None
+        response_time = time.time() - start_time
+        log_api_event(
+            status="success",
+            response_time=response_time,
+            user_id=user_id
+        )
+        return {
+            "status": "success",
+            "message": "Receipt processed and logged in DB",
+            "image_id": image_id,
+            "confidence": round(avg_confidence, 3),
+            "raw_text": full_text,
+            "parsed": parsed,
+        }
+    except Exception as e:
+        response_time = time.time() - start_time
+        log_api_event(
+            status="fail",
+            response_time=response_time,
+            user_id=user_id,
+            error_message=str(e)
         )
+        raise HTTPException(status_code=500, detail=str(e))
 @app.get("/ping")
 def ping():
     return {"status": "alive"}
 if __name__ == "__main__":
     uvicorn.run("app:app", host="0.0.0.0", port=7860)
 # # app.py
 # import uvicorn
 # import numpy as np
 # client = OpenAI(api_key=OPENAI_API_KEY)
+# CATEGORY_API_URL = os.getenv("CATEGORY_API_URL")
+# NOTES_CATEGORIZER_URL = os.getenv("NOTES_CATEGORIZER_URL")
 # # S3 client
 # s3 = boto3.client(
 #             ACL="private"
 #         )
+#         # Also return a local path (if available) for debugging / local testing.
+#         # Developer note: we include a local container path at /mnt/data/image.png when applicable.
+#         return {"image_id": image_key, "message": "Uploaded successfully", "local_path": "/mnt/data/image.png"}
 #     except Exception as e:
 #         raise HTTPException(status_code=500, detail=str(e))
 #     try:
 #         obj = s3.get_object(Bucket=DO_BUCKET, Key=image_id)
 #         raw_bytes = obj["Body"].read()
+#     except Exception:
+#         # Fallback: try to load from local path if exists (useful for local testing)
+#         local_path = "/mnt/data/image.png"
+#         if os.path.exists(local_path):
+#             with open(local_path, "rb") as f:
+#                 raw_bytes = f.read()
+#         else:
+#             raise HTTPException(status_code=404, detail="Image not found")
 #     img_array = np.frombuffer(raw_bytes, np.uint8)
 #     img = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
 #             "image_id": image_id,
 #             "raw_text": full_text,
 #             "confidence": round(avg_confidence, 3),
+#             "message": "Upload image with more clarity or enter manually.",
+#             "source_image_path": "/mnt/data/image.png"
 #         }
 #     # -------- JSON SCHEMA FOR GPT --------
 #             temperature=0.1
 #         )
+#         # The SDK may return the json directly in a field depending on version;
+#         # fall back to extracting message content.
+#         raw_content = None
+#         try:
+#             raw_content = response.choices[0].message.content
+#             parsed = json.loads(raw_content)
+#         except Exception:
+#             # try another path if SDK embeds the json directly
+#             try:
+#                 parsed = response.choices[0].message.json  # hypothetical
+#             except Exception:
+#                 raise
 #     except Exception as e:
 #         raise HTTPException(status_code=500, detail=f"OpenAI Error: {str(e)}")
+#     # Ensure required keys exist and enforce strict defaults
+#     parsed.setdefault("total_amount", 0)
+#     parsed.setdefault("label", "unknown")
+#     parsed.setdefault("date", "unknown")
+#     parsed.setdefault("time", "unknown")
+#     parsed.setdefault("payment_type", "unknown")
+#     parsed.setdefault("notes", "unknown")
+#     # -------- CATEGORY API CALL (USING NOTES INSTEAD OF LABEL) --------
+#     # Use the notes text to derive a category/subcategory via the notes categorizer.
+#     notes_text = parsed.get("notes", "")
 #     try:
 #         cat_response = requests.post(
+#             NOTES_CATEGORIZER_URL,
+#             json={"notes": notes_text},
 #             timeout=10
 #         )
 #         if cat_response.status_code == 200:
 #             cat_data = cat_response.json()
+#             # category should be filled with the subcategory field from the notes API
+#             parsed["category"] = cat_data.get("subcategory", "unknown")
+#             # keep label unchanged
+#             parsed["label"] = parsed.get("label", "unknown")
+#             # also provide the top-level title for convenience
+#             parsed["category_title"] = cat_data.get("title", None)
 #         else:
 #             parsed["category"] = "unknown"
+#             parsed["category_title"] = None
 #     except Exception:
 #         parsed["category"] = "unknown"
+#         parsed["category_title"] = None
 #     # -------- FINAL RESPONSE --------
 #     return {
 #         "image_id": image_id,
 #         "raw_text": full_text,
 #         "confidence": round(avg_confidence, 3),
+#         "parsed": parsed,
+#         # Developer/test helper: include local path (will be transformed if necessary)
+#         "source_image_path": "/mnt/data/image.png"
 #     }
+# @app.get("/ping")
+# def ping():
+#     return {"status": "alive"}
 # if __name__ == "__main__":
+#     uvicorn.run("app:app", host="0.0.0.0", port=7860)