Spaces:
Sleeping
Sleeping
| import os | |
| os.environ["PADDLE_DISABLE_ONEDNN"] = "1" | |
| import io | |
| import json | |
| import re | |
| from PIL import Image | |
| from fastapi import FastAPI, File, UploadFile, Form | |
| from openai import OpenAI | |
| from models.paddleocr_ocr import PaddleOCRModel | |
| from models.donut_ocr import DonutModel | |
| app = FastAPI() | |
| ocr = PaddleOCRModel(lang="en") | |
| donut = DonutModel() | |
| openai_client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY")) | |
| SYSTEM_PROMPT = "You are a financial parser. Return ONLY raw JSON, no markdown, no explanation." | |
| TRANSACTION_PROMPT = """Extract financial transaction from the message. | |
| Return ONLY raw JSON: | |
| {{"type":"expense or income","category":"food|transport|shopping|investment|other","description":"short description","amount":integer}} | |
| Message: {message}""" | |
| RECEIPT_PROMPTS = { | |
| "ridehailing": 'Ride-hailing receipt (Gojek/Grab). Extract FINAL total after discount. Return ONLY raw JSON: {{"type":"expense","category":"transport","description":"route","amount":0,"platform":"gojek/grab"}}', | |
| "ewallet": 'E-wallet transaction. Return ONLY raw JSON: {{"type":"expense/income","category":"transfer","description":"","amount":0,"platform":""}}', | |
| "minimarket": 'Minimarket receipt. Return ONLY raw JSON: {{"type":"expense","category":"shopping","description":"","items":[{{"name":"","qty":1,"price":0}}],"amount":0}}', | |
| "unknown": 'Extract financial transaction. Return ONLY raw JSON: {{"type":"expense/income","category":"food/transport/shopping/transfer/other","description":"","amount":0}}', | |
| } | |
| # ββ Health check ββββββββββββββββββββββββββββββββββββββββββ | |
| def root(): | |
| return {"status": "ok", "service": "ai-financial-planner"} | |
| # ββ Endpoint 1: Parse text transaction βββββββββββββββββββ | |
| async def parse_transaction( | |
| message: str = Form(...), | |
| ): | |
| """ | |
| Text β OpenAI β JSON | |
| Contoh: "Naik gojek 15000" | |
| """ | |
| try: | |
| prompt = TRANSACTION_PROMPT.format(message=message) | |
| result = _call_openai(prompt) | |
| return {"status": "ok", "result": _extract_json(result)} | |
| except Exception as e: | |
| return {"status": "error", "message": str(e)} | |
| # ββ Endpoint 2: Parse receipt image ββββββββββββββββββββββ | |
| async def parse_receipt( | |
| file: UploadFile = File(...), | |
| receipt_type: str = Form(default="unknown"), | |
| ): | |
| """ | |
| Image β OCR β OpenAI β JSON | |
| """ | |
| try: | |
| contents = await file.read() | |
| image = Image.open(io.BytesIO(contents)).convert("RGB") | |
| # Restaurant β Donut langsung (no LLM needed) | |
| if receipt_type == "restaurant": | |
| result = donut.run(image) | |
| return {"status": "ok", "source": "donut", "result": result} | |
| # Lainnya β PaddleOCR β OpenAI | |
| raw_text = ocr.extract_clean(image, receipt_type=receipt_type) | |
| if not raw_text or len(raw_text.strip()) < 10: | |
| return {"status": "error", "message": "OCR failed to extract text"} | |
| prompt = RECEIPT_PROMPTS.get(receipt_type, RECEIPT_PROMPTS["unknown"]) | |
| full_prompt = f"{prompt}\n\nReceipt text:\n{raw_text}" | |
| result = _call_openai(full_prompt) | |
| return {"status": "ok", "source": "paddleocr+openai", "result": _extract_json(result)} | |
| except Exception as e: | |
| return {"status": "error", "message": str(e)} | |
| # ββ Helpers βββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _call_openai(prompt: str) -> str: | |
| response = openai_client.chat.completions.create( | |
| model="gpt-4o-mini", | |
| messages=[ | |
| {"role": "system", "content": SYSTEM_PROMPT}, | |
| {"role": "user", "content": prompt} | |
| ], | |
| temperature=0.2 | |
| ) | |
| result = response.choices[0].message.content | |
| print(f"[OpenAI] RAW: {result}") | |
| return result | |
| def _extract_json(raw: str) -> dict: | |
| raw = re.sub(r'```json|```', '', raw).strip() | |
| json_match = re.search(r'\{.*\}', raw, re.DOTALL) | |
| if json_match: | |
| raw = json_match.group() | |
| return json.loads(raw) |