Spaces:

credent007
/

easyocr-phi3

Paused

App Files Files Community

easyocr-phi3 / mainapp.py

credent007

Update mainapp.py

c637105 verified about 1 month ago

raw

history blame contribute delete

2.13 kB

	from fastapi import FastAPI, UploadFile, File, HTTPException
	from fastapi.responses import JSONResponse
	from PIL import Image
	import io
	import asyncio
	import fitz # PyMuPDF
	from llm import call_llm

	app = FastAPI()

	@app.get("/")
	def home():
	return {"message": "home page"}


	# 🔥 Process single image
	async def process_image(image: Image.Image, page_num: int):
	prompt = """
	You are an intelligent document parser.
	<image>\n
	Extract structured key-value pairs from the invoice.

	Rules:
	- Return ONLY valid JSON
	- Each entry must be: {"key": "...", "value": "..."}
	- Do NOT return bounding boxes
	- Ignore layout info
	- Map related fields (e.g., Invoice No → 12345)

	Example:
	[
	{"key": "Invoice Number", "value": "12345"},
	{"key": "Date", "value": "01-01-2024"}
	]
	"""

	result = await call_llm(image, prompt)

	return {
	"page": page_num,
	"llm": result
	}


	@app.post("/ocr-llm")
	async def ocr_llm_endpoint(file: UploadFile = File(...)):


	if not file.filename.lower().endswith((".pdf", ".png", ".jpg", ".jpeg")):
	raise HTTPException(status_code=400, detail="File must be PDF or image")

	try:
	results = []

	# ✅ HANDLE PDF
	if file.filename.lower().endswith(".pdf"):
	file_bytes = await file.read()
	doc = fitz.open(stream=file_bytes, filetype="pdf")

	tasks = []

	for i, page in enumerate(doc):
	pix = page.get_pixmap()
	img_bytes = pix.tobytes("png")
	image = Image.open(io.BytesIO(img_bytes)).convert("RGB")

	tasks.append(process_image(image, i + 1))

	results = await asyncio.gather(*tasks)

	# ✅ HANDLE IMAGE
	else:
	image_bytes = await file.read()
	image = Image.open(io.BytesIO(image_bytes)).convert("RGB")

	result = await process_image(image, 1)
	results.append(result)

	return {"results": results}

	except Exception as e:
	raise HTTPException(status_code=500, detail=str(e))