LogicGoInfotechSpaces commited on
Commit
945ba15
·
verified ·
1 Parent(s): 8c9caa6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -35
app.py CHANGED
@@ -4,7 +4,6 @@ import numpy as np
4
  import cv2
5
  import boto3
6
  import os
7
- import json
8
  from fastapi import FastAPI, UploadFile, File, HTTPException
9
  from rapidocr_onnxruntime import RapidOCR
10
  from openai import OpenAI
@@ -62,10 +61,7 @@ async def upload_image(file: UploadFile = File(...)):
62
  ACL="private"
63
  )
64
 
65
- return {
66
- "image_id": image_key,
67
- "message": "Uploaded successfully"
68
- }
69
 
70
  except Exception as e:
71
  raise HTTPException(status_code=500, detail=str(e))
@@ -73,7 +69,9 @@ async def upload_image(file: UploadFile = File(...)):
73
  # --------------------------------------------------
74
  @app.post("/generate/{image_id:path}")
75
  async def generate(image_id: str):
76
- # Download image from Spaces
 
 
77
  try:
78
  obj = s3.get_object(Bucket=DO_BUCKET, Key=image_id)
79
  raw_bytes = obj["Body"].read()
@@ -82,13 +80,13 @@ async def generate(image_id: str):
82
 
83
  img_array = np.frombuffer(raw_bytes, np.uint8)
84
  img = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
85
-
86
  if img is None:
87
  raise HTTPException(status_code=400, detail="Unable to decode image")
88
 
89
- # OCR
 
 
90
  result, _ = ocr_engine(img)
91
-
92
  if not result:
93
  raise HTTPException(status_code=500, detail="OCR returned empty result")
94
 
@@ -96,56 +94,74 @@ async def generate(image_id: str):
96
  full_text = "\n".join(extracted)
97
 
98
  # ------------------------------------------------
99
- # CALL OPENAI GPT-4o-mini FOR PARSING
100
  # ------------------------------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  prompt = f"""
102
- Extract structured expense information from the following OCR text.
103
-
104
- OCR TEXT:
105
- \"\"\"
106
- {full_text}
107
- \"\"\"
108
-
109
- Return JSON with fields:
110
- - total_amount (number)
111
- - label (category like Food, Travel, Shopping, Utilities)
112
- - date
113
- - time
114
- - payment_type (cash, credit card, debit card, mobile payment, bank transfer, Mobile trasnfer)
115
- - notes (1–2 line description)
116
-
117
- Return ONLY JSON.
118
  """
119
 
 
 
 
120
  try:
121
  response = client.chat.completions.create(
122
  model="gpt-4o-mini",
123
  messages=[
124
- {"role": "system", "content": "You are an expert expense parser."},
125
  {"role": "user", "content": prompt}
126
  ],
 
 
127
  temperature=0.2
128
  )
129
 
130
- ai_output = response.choices[0].message.content
131
 
132
  except Exception as e:
133
  raise HTTPException(status_code=500, detail=f"OpenAI Error: {str(e)}")
134
 
 
 
 
 
135
  # ------------------------------------------------
136
- # Try parsing JSON output
137
  # ------------------------------------------------
138
- try:
139
- parsed_json = json.loads(ai_output)
140
- except:
141
- parsed_json = {"error": "Failed to parse JSON", "raw_output": ai_output}
142
-
143
  return {
144
  "image_id": image_id,
145
  "raw_text": full_text,
146
- "parsed": parsed_json
147
  }
148
 
149
  # --------------------------------------------------
150
  if __name__ == "__main__":
151
  uvicorn.run("app:app", host="0.0.0.0", port=7860)
 
 
4
  import cv2
5
  import boto3
6
  import os
 
7
  from fastapi import FastAPI, UploadFile, File, HTTPException
8
  from rapidocr_onnxruntime import RapidOCR
9
  from openai import OpenAI
 
61
  ACL="private"
62
  )
63
 
64
+ return {"image_id": image_key, "message": "Uploaded successfully"}
 
 
 
65
 
66
  except Exception as e:
67
  raise HTTPException(status_code=500, detail=str(e))
 
69
  # --------------------------------------------------
70
  @app.post("/generate/{image_id:path}")
71
  async def generate(image_id: str):
72
+ # ------------------------------------------------
73
+ # Download image
74
+ # ------------------------------------------------
75
  try:
76
  obj = s3.get_object(Bucket=DO_BUCKET, Key=image_id)
77
  raw_bytes = obj["Body"].read()
 
80
 
81
  img_array = np.frombuffer(raw_bytes, np.uint8)
82
  img = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
 
83
  if img is None:
84
  raise HTTPException(status_code=400, detail="Unable to decode image")
85
 
86
+ # ------------------------------------------------
87
+ # OCR Extraction
88
+ # ------------------------------------------------
89
  result, _ = ocr_engine(img)
 
90
  if not result:
91
  raise HTTPException(status_code=500, detail="OCR returned empty result")
92
 
 
94
  full_text = "\n".join(extracted)
95
 
96
  # ------------------------------------------------
97
+ # OPENAI FUNCTION CALLING SETUP
98
  # ------------------------------------------------
99
+
100
+ functions = [
101
+ {
102
+ "name": "extract_expense_details",
103
+ "description": "Extract structured expense details from OCR text.",
104
+ "parameters": {
105
+ "type": "object",
106
+ "properties": {
107
+ "total_amount": {"type": "number"},
108
+ "label": {
109
+ "type": "string",
110
+ "description": "Category such as Food, Travel, Shopping, Utilities"
111
+ },
112
+ "date": {"type": "string"},
113
+ "time": {"type": "string"},
114
+ "payment_type": {
115
+ "type": "string",
116
+ "enum": ["cash", "card", "upi", "unknown"]
117
+ },
118
+ "notes": {"type": "string"}
119
+ },
120
+ "required": ["total_amount", "label", "date"]
121
+ }
122
+ }
123
+ ]
124
+
125
  prompt = f"""
126
+ Extract expense details from this OCR text:
127
+
128
+ Return structured details using the provided function schema.
 
 
 
 
 
 
 
 
 
 
 
 
 
129
  """
130
 
131
+ # ------------------------------------------------
132
+ # OPENAI CALL WITH FUNCTION CALLING
133
+ # ------------------------------------------------
134
  try:
135
  response = client.chat.completions.create(
136
  model="gpt-4o-mini",
137
  messages=[
138
+ {"role": "system", "content": "You are a finance and receipt parsing AI."},
139
  {"role": "user", "content": prompt}
140
  ],
141
+ functions=functions,
142
+ function_call={"name": "extract_expense_details"},
143
  temperature=0.2
144
  )
145
 
146
+ function_args = response.choices[0].message.function_call.arguments
147
 
148
  except Exception as e:
149
  raise HTTPException(status_code=500, detail=f"OpenAI Error: {str(e)}")
150
 
151
+ # Convert returned JSON string -> dict
152
+ import json
153
+ parsed = json.loads(function_args)
154
+
155
  # ------------------------------------------------
156
+ # FINAL API RESPONSE
157
  # ------------------------------------------------
 
 
 
 
 
158
  return {
159
  "image_id": image_id,
160
  "raw_text": full_text,
161
+ "parsed": parsed
162
  }
163
 
164
  # --------------------------------------------------
165
  if __name__ == "__main__":
166
  uvicorn.run("app:app", host="0.0.0.0", port=7860)
167
+