Spaces:
Sleeping
Sleeping
File size: 1,916 Bytes
59f237a 8809763 59f237a 8809763 59f237a 8809763 59f237a 8809763 59f237a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
from fastapi import FastAPI, HTTPException
from fastapi.responses import JSONResponse
from pdf2json import Pdf2Json
from io import BytesIO
import base64
app = FastAPI()
@app.post("/api/convert")
async def convert_pdf(file: bytes = File(...)):
try:
# Parse PDF
pdf_parser = Pdf2Json(BytesIO(file))
pdf_data = pdf_parser.get_json()
# Process PDF data
result = {
"width": pdf_data["width"], # Page width in pixels
"height": pdf_data["height"], # Page height in pixels
"texts": [],
"images": [],
"shapes": []
}
# Extract text
for text in pdf_data["texts"]:
result["texts"].append({
"content": text["content"],
"x": text["x"],
"y": text["y"],
"fontFamily": text["font"] or "Arial",
"fontStyle": text["style"] or "Regular",
"fontSize": text["size"],
"color": {"r": text["color"]["r"]/255, "g": text["color"]["g"]/255, "b": text["color"]["b"]/255}
})
# Extract images
for img in pdf_data["images"]:
result["images"].append({
"data": base64.b64encode(img["data"]).decode('utf-8'),
"x": img["x"],
"y": img["y"],
"width": img["width"],
"height": img["height"]
})
# Extract shapes
for shape in pdf_data["shapes"]:
result["shapes"].append({
"path": shape["path"],
"x": shape["x"],
"y": shape["y"],
"color": {"r": shape["color"]["r"]/255, "g": shape["color"]["g"]/255, "b": shape["color"]["b"]/255}
})
return JSONResponse(content=result)
except Exception as e:
raise HTTPException(status_code=500, detail=str(e)) |