Spaces:
Sleeping
Sleeping
| from fastapi import FastAPI, HTTPException | |
| from fastapi.responses import JSONResponse | |
| from pdf2json import Pdf2Json | |
| from io import BytesIO | |
| import base64 | |
| app = FastAPI() | |
| async def convert_pdf(file: bytes = File(...)): | |
| try: | |
| # Parse PDF | |
| pdf_parser = Pdf2Json(BytesIO(file)) | |
| pdf_data = pdf_parser.get_json() | |
| # Process PDF data | |
| result = { | |
| "width": pdf_data["width"], # Page width in pixels | |
| "height": pdf_data["height"], # Page height in pixels | |
| "texts": [], | |
| "images": [], | |
| "shapes": [] | |
| } | |
| # Extract text | |
| for text in pdf_data["texts"]: | |
| result["texts"].append({ | |
| "content": text["content"], | |
| "x": text["x"], | |
| "y": text["y"], | |
| "fontFamily": text["font"] or "Arial", | |
| "fontStyle": text["style"] or "Regular", | |
| "fontSize": text["size"], | |
| "color": {"r": text["color"]["r"]/255, "g": text["color"]["g"]/255, "b": text["color"]["b"]/255} | |
| }) | |
| # Extract images | |
| for img in pdf_data["images"]: | |
| result["images"].append({ | |
| "data": base64.b64encode(img["data"]).decode('utf-8'), | |
| "x": img["x"], | |
| "y": img["y"], | |
| "width": img["width"], | |
| "height": img["height"] | |
| }) | |
| # Extract shapes | |
| for shape in pdf_data["shapes"]: | |
| result["shapes"].append({ | |
| "path": shape["path"], | |
| "x": shape["x"], | |
| "y": shape["y"], | |
| "color": {"r": shape["color"]["r"]/255, "g": shape["color"]["g"]/255, "b": shape["color"]["b"]/255} | |
| }) | |
| return JSONResponse(content=result) | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=str(e)) |