File size: 1,916 Bytes
59f237a
 
 
 
 
8809763
59f237a
8809763
59f237a
 
8809763
59f237a
 
 
 
 
 
 
 
 
 
 
8809763
 
59f237a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
from fastapi import FastAPI, HTTPException
from fastapi.responses import JSONResponse
from pdf2json import Pdf2Json
from io import BytesIO
import base64

app = FastAPI()

@app.post("/api/convert")
async def convert_pdf(file: bytes = File(...)):
    try:
        # Parse PDF
        pdf_parser = Pdf2Json(BytesIO(file))
        pdf_data = pdf_parser.get_json()

        # Process PDF data
        result = {
            "width": pdf_data["width"],  # Page width in pixels
            "height": pdf_data["height"],  # Page height in pixels
            "texts": [],
            "images": [],
            "shapes": []
        }

        # Extract text
        for text in pdf_data["texts"]:
            result["texts"].append({
                "content": text["content"],
                "x": text["x"],
                "y": text["y"],
                "fontFamily": text["font"] or "Arial",
                "fontStyle": text["style"] or "Regular",
                "fontSize": text["size"],
                "color": {"r": text["color"]["r"]/255, "g": text["color"]["g"]/255, "b": text["color"]["b"]/255}
            })

        # Extract images
        for img in pdf_data["images"]:
            result["images"].append({
                "data": base64.b64encode(img["data"]).decode('utf-8'),
                "x": img["x"],
                "y": img["y"],
                "width": img["width"],
                "height": img["height"]
            })

        # Extract shapes
        for shape in pdf_data["shapes"]:
            result["shapes"].append({
                "path": shape["path"],
                "x": shape["x"],
                "y": shape["y"],
                "color": {"r": shape["color"]["r"]/255, "g": shape["color"]["g"]/255, "b": shape["color"]["b"]/255}
            })

        return JSONResponse(content=result)
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))