receipt-ocr / app.py
Khaw100
deploy receipt ocr api
0588e55
import os
os.environ["PADDLE_DISABLE_ONEDNN"] = "1"
import io
import json
import re
from PIL import Image
from fastapi import FastAPI, File, UploadFile, Form
from openai import OpenAI
from models.paddleocr_ocr import PaddleOCRModel
from models.donut_ocr import DonutModel
app = FastAPI()
ocr = PaddleOCRModel(lang="en")
donut = DonutModel()
openai_client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
SYSTEM_PROMPT = "You are a financial parser. Return ONLY raw JSON, no markdown, no explanation."
TRANSACTION_PROMPT = """Extract financial transaction from the message.
Return ONLY raw JSON:
{{"type":"expense or income","category":"food|transport|shopping|investment|other","description":"short description","amount":integer}}
Message: {message}"""
RECEIPT_PROMPTS = {
"ridehailing": 'Ride-hailing receipt (Gojek/Grab). Extract FINAL total after discount. Return ONLY raw JSON: {{"type":"expense","category":"transport","description":"route","amount":0,"platform":"gojek/grab"}}',
"ewallet": 'E-wallet transaction. Return ONLY raw JSON: {{"type":"expense/income","category":"transfer","description":"","amount":0,"platform":""}}',
"minimarket": 'Minimarket receipt. Return ONLY raw JSON: {{"type":"expense","category":"shopping","description":"","items":[{{"name":"","qty":1,"price":0}}],"amount":0}}',
"unknown": 'Extract financial transaction. Return ONLY raw JSON: {{"type":"expense/income","category":"food/transport/shopping/transfer/other","description":"","amount":0}}',
}
# ── Health check ──────────────────────────────────────────
@app.get("/")
def root():
return {"status": "ok", "service": "ai-financial-planner"}
# ── Endpoint 1: Parse text transaction ───────────────────
@app.post("/parse/transaction")
async def parse_transaction(
message: str = Form(...),
):
"""
Text β†’ OpenAI β†’ JSON
Contoh: "Naik gojek 15000"
"""
try:
prompt = TRANSACTION_PROMPT.format(message=message)
result = _call_openai(prompt)
return {"status": "ok", "result": _extract_json(result)}
except Exception as e:
return {"status": "error", "message": str(e)}
# ── Endpoint 2: Parse receipt image ──────────────────────
@app.post("/parse/receipt")
async def parse_receipt(
file: UploadFile = File(...),
receipt_type: str = Form(default="unknown"),
):
"""
Image β†’ OCR β†’ OpenAI β†’ JSON
"""
try:
contents = await file.read()
image = Image.open(io.BytesIO(contents)).convert("RGB")
# Restaurant β†’ Donut langsung (no LLM needed)
if receipt_type == "restaurant":
result = donut.run(image)
return {"status": "ok", "source": "donut", "result": result}
# Lainnya β†’ PaddleOCR β†’ OpenAI
raw_text = ocr.extract_clean(image, receipt_type=receipt_type)
if not raw_text or len(raw_text.strip()) < 10:
return {"status": "error", "message": "OCR failed to extract text"}
prompt = RECEIPT_PROMPTS.get(receipt_type, RECEIPT_PROMPTS["unknown"])
full_prompt = f"{prompt}\n\nReceipt text:\n{raw_text}"
result = _call_openai(full_prompt)
return {"status": "ok", "source": "paddleocr+openai", "result": _extract_json(result)}
except Exception as e:
return {"status": "error", "message": str(e)}
# ── Helpers ───────────────────────────────────────────────
def _call_openai(prompt: str) -> str:
response = openai_client.chat.completions.create(
model="gpt-4o-mini",
messages=[
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": prompt}
],
temperature=0.2
)
result = response.choices[0].message.content
print(f"[OpenAI] RAW: {result}")
return result
def _extract_json(raw: str) -> dict:
raw = re.sub(r'```json|```', '', raw).strip()
json_match = re.search(r'\{.*\}', raw, re.DOTALL)
if json_match:
raw = json_match.group()
return json.loads(raw)