File size: 2,458 Bytes
a479f18
b6dcd96
59f237a
a479f18
59f237a
 
b6dcd96
8809763
59f237a
8809763
b6dcd96
 
 
 
 
 
 
 
 
59f237a
 
8809763
a479f18
 
b6dcd96
a479f18
59f237a
a479f18
 
 
 
 
 
 
 
8809763
a479f18
 
 
 
 
 
 
b6dcd96
a479f18
b6dcd96
a479f18
59f237a
a479f18
 
b6dcd96
a479f18
 
 
 
 
 
 
59f237a
b6dcd96
a479f18
b6dcd96
a479f18
 
 
 
b6dcd96
a479f18
59f237a
 
 
b6dcd96
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
from fastapi import FastAPI, File, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
import pdfplumber
from io import BytesIO
import base64
import uvicorn

app = FastAPI()

# Add CORS middleware
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],  # Adjust for production
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

@app.post("/api/convert")
async def convert_pdf(file: bytes = File(...)):
    try:
        # Parse PDF with pdfplumber
        with pdfplumber.open(BytesIO(file)) as pdf:
            page = pdf.pages[0]  # First page for simplicity
            width, height = page.width, page.height

            # Initialize result
            result = {
                "width": width,
                "height": height,
                "texts": [],
                "images": [],
                "shapes": []
            }

            # Extract text
            for char in page.chars:
                result["texts"].append({
                    "content": char["text"],
                    "x": char["x0"],
                    "y": char["y0"],
                    "font_family": char["fontname"].split("+")[-1] or "Arial",
                    "font_style": "Regular",
                    "font_size": char["size"],
                    "color": {"r": 0, "g": 0, "b": 0}  # Simplified: enhance for color extraction
                })

            # Extract images
            for img in page.images:
                img_data = img["stream"].get_data()
                result["images"].append({
                    "data": base64.b64encode(img_data).decode('utf-8'),
                    "x": img["x0"],
                    "y": img["y0"],
                    "width": img["width"],
                    "height": img["height"]
                })

            # Extract shapes
            for curve in page.curves:
                path = " ".join([f"M {p['x']},{p['y']}" for p in curve["points"]])
                result["shapes"].append({
                    "path": path,
                    "x": curve["x0"],
                    "y": curve["y0"],
                    "color": {"r": 0, "g": 0, "b": 0}
                })

        return JSONResponse(content=result)
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

# Run uvicorn server
if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=7860)