LogicGoInfotechSpaces commited on
Commit
8c9caa6
·
verified ·
1 Parent(s): 96a4e67

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -10
app.py CHANGED
@@ -4,6 +4,7 @@ import numpy as np
4
  import cv2
5
  import boto3
6
  import os
 
7
  from fastapi import FastAPI, UploadFile, File, HTTPException
8
  from rapidocr_onnxruntime import RapidOCR
9
  from openai import OpenAI
@@ -72,7 +73,7 @@ async def upload_image(file: UploadFile = File(...)):
72
  # --------------------------------------------------
73
  @app.post("/generate/{image_id:path}")
74
  async def generate(image_id: str):
75
- # Download from Spaces
76
  try:
77
  obj = s3.get_object(Bucket=DO_BUCKET, Key=image_id)
78
  raw_bytes = obj["Body"].read()
@@ -85,7 +86,7 @@ async def generate(image_id: str):
85
  if img is None:
86
  raise HTTPException(status_code=400, detail="Unable to decode image")
87
 
88
- # OCR Extract
89
  result, _ = ocr_engine(img)
90
 
91
  if not result:
@@ -95,7 +96,7 @@ async def generate(image_id: str):
95
  full_text = "\n".join(extracted)
96
 
97
  # ------------------------------------------------
98
- # CALL OPENAI TO GENERATE STRUCTURED EXPENSE DATA
99
  # ------------------------------------------------
100
  prompt = f"""
101
  Extract structured expense information from the following OCR text.
@@ -105,15 +106,15 @@ OCR TEXT:
105
  {full_text}
106
  \"\"\"
107
 
108
- Return a JSON with exactly these fields:
109
  - total_amount (number)
110
  - label (category like Food, Travel, Shopping, Utilities)
111
  - date
112
  - time
113
- - payment_type (cash, card, upi, unknown)
114
- - notes (1–2 line human readable description)
115
 
116
- If something is missing, infer logically.
117
  """
118
 
119
  try:
@@ -126,16 +127,23 @@ If something is missing, infer logically.
126
  temperature=0.2
127
  )
128
 
129
- ai_output = response.choices[0].message["content"]
130
 
131
  except Exception as e:
132
  raise HTTPException(status_code=500, detail=f"OpenAI Error: {str(e)}")
133
 
134
- # Return OCR + structured AI result
 
 
 
 
 
 
 
135
  return {
136
  "image_id": image_id,
137
  "raw_text": full_text,
138
- "parsed": ai_output
139
  }
140
 
141
  # --------------------------------------------------
 
4
  import cv2
5
  import boto3
6
  import os
7
+ import json
8
  from fastapi import FastAPI, UploadFile, File, HTTPException
9
  from rapidocr_onnxruntime import RapidOCR
10
  from openai import OpenAI
 
73
  # --------------------------------------------------
74
  @app.post("/generate/{image_id:path}")
75
  async def generate(image_id: str):
76
+ # Download image from Spaces
77
  try:
78
  obj = s3.get_object(Bucket=DO_BUCKET, Key=image_id)
79
  raw_bytes = obj["Body"].read()
 
86
  if img is None:
87
  raise HTTPException(status_code=400, detail="Unable to decode image")
88
 
89
+ # OCR
90
  result, _ = ocr_engine(img)
91
 
92
  if not result:
 
96
  full_text = "\n".join(extracted)
97
 
98
  # ------------------------------------------------
99
+ # CALL OPENAI GPT-4o-mini FOR PARSING
100
  # ------------------------------------------------
101
  prompt = f"""
102
  Extract structured expense information from the following OCR text.
 
106
  {full_text}
107
  \"\"\"
108
 
109
+ Return JSON with fields:
110
  - total_amount (number)
111
  - label (category like Food, Travel, Shopping, Utilities)
112
  - date
113
  - time
114
+ - payment_type (cash, credit card, debit card, mobile payment, bank transfer, Mobile trasnfer)
115
+ - notes (1–2 line description)
116
 
117
+ Return ONLY JSON.
118
  """
119
 
120
  try:
 
127
  temperature=0.2
128
  )
129
 
130
+ ai_output = response.choices[0].message.content
131
 
132
  except Exception as e:
133
  raise HTTPException(status_code=500, detail=f"OpenAI Error: {str(e)}")
134
 
135
+ # ------------------------------------------------
136
+ # Try parsing JSON output
137
+ # ------------------------------------------------
138
+ try:
139
+ parsed_json = json.loads(ai_output)
140
+ except:
141
+ parsed_json = {"error": "Failed to parse JSON", "raw_output": ai_output}
142
+
143
  return {
144
  "image_id": image_id,
145
  "raw_text": full_text,
146
+ "parsed": parsed_json
147
  }
148
 
149
  # --------------------------------------------------