Spaces:
Sleeping
Sleeping
File size: 2,458 Bytes
a479f18 b6dcd96 59f237a a479f18 59f237a b6dcd96 8809763 59f237a 8809763 b6dcd96 59f237a 8809763 a479f18 b6dcd96 a479f18 59f237a a479f18 8809763 a479f18 b6dcd96 a479f18 b6dcd96 a479f18 59f237a a479f18 b6dcd96 a479f18 59f237a b6dcd96 a479f18 b6dcd96 a479f18 b6dcd96 a479f18 59f237a b6dcd96 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
from fastapi import FastAPI, File, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
import pdfplumber
from io import BytesIO
import base64
import uvicorn
app = FastAPI()
# Add CORS middleware
app.add_middleware(
CORSMiddleware,
allow_origins=["*"], # Adjust for production
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
@app.post("/api/convert")
async def convert_pdf(file: bytes = File(...)):
try:
# Parse PDF with pdfplumber
with pdfplumber.open(BytesIO(file)) as pdf:
page = pdf.pages[0] # First page for simplicity
width, height = page.width, page.height
# Initialize result
result = {
"width": width,
"height": height,
"texts": [],
"images": [],
"shapes": []
}
# Extract text
for char in page.chars:
result["texts"].append({
"content": char["text"],
"x": char["x0"],
"y": char["y0"],
"font_family": char["fontname"].split("+")[-1] or "Arial",
"font_style": "Regular",
"font_size": char["size"],
"color": {"r": 0, "g": 0, "b": 0} # Simplified: enhance for color extraction
})
# Extract images
for img in page.images:
img_data = img["stream"].get_data()
result["images"].append({
"data": base64.b64encode(img_data).decode('utf-8'),
"x": img["x0"],
"y": img["y0"],
"width": img["width"],
"height": img["height"]
})
# Extract shapes
for curve in page.curves:
path = " ".join([f"M {p['x']},{p['y']}" for p in curve["points"]])
result["shapes"].append({
"path": path,
"x": curve["x0"],
"y": curve["y0"],
"color": {"r": 0, "g": 0, "b": 0}
})
return JSONResponse(content=result)
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
# Run uvicorn server
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=7860) |