from fastapi import FastAPI, HTTPException from fastapi.responses import JSONResponse from pdf2json import Pdf2Json from io import BytesIO import base64 app = FastAPI() @app.post("/api/convert") async def convert_pdf(file: bytes = File(...)): try: # Parse PDF pdf_parser = Pdf2Json(BytesIO(file)) pdf_data = pdf_parser.get_json() # Process PDF data result = { "width": pdf_data["width"], # Page width in pixels "height": pdf_data["height"], # Page height in pixels "texts": [], "images": [], "shapes": [] } # Extract text for text in pdf_data["texts"]: result["texts"].append({ "content": text["content"], "x": text["x"], "y": text["y"], "fontFamily": text["font"] or "Arial", "fontStyle": text["style"] or "Regular", "fontSize": text["size"], "color": {"r": text["color"]["r"]/255, "g": text["color"]["g"]/255, "b": text["color"]["b"]/255} }) # Extract images for img in pdf_data["images"]: result["images"].append({ "data": base64.b64encode(img["data"]).decode('utf-8'), "x": img["x"], "y": img["y"], "width": img["width"], "height": img["height"] }) # Extract shapes for shape in pdf_data["shapes"]: result["shapes"].append({ "path": shape["path"], "x": shape["x"], "y": shape["y"], "color": {"r": shape["color"]["r"]/255, "g": shape["color"]["g"]/255, "b": shape["color"]["b"]/255} }) return JSONResponse(content=result) except Exception as e: raise HTTPException(status_code=500, detail=str(e))