editableweb / app.py
AkashKumarave's picture
Update app.py
b6dcd96 verified
raw
history blame
2.46 kB
from fastapi import FastAPI, File, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
import pdfplumber
from io import BytesIO
import base64
import uvicorn
app = FastAPI()
# Add CORS middleware
app.add_middleware(
CORSMiddleware,
allow_origins=["*"], # Adjust for production
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
@app.post("/api/convert")
async def convert_pdf(file: bytes = File(...)):
try:
# Parse PDF with pdfplumber
with pdfplumber.open(BytesIO(file)) as pdf:
page = pdf.pages[0] # First page for simplicity
width, height = page.width, page.height
# Initialize result
result = {
"width": width,
"height": height,
"texts": [],
"images": [],
"shapes": []
}
# Extract text
for char in page.chars:
result["texts"].append({
"content": char["text"],
"x": char["x0"],
"y": char["y0"],
"font_family": char["fontname"].split("+")[-1] or "Arial",
"font_style": "Regular",
"font_size": char["size"],
"color": {"r": 0, "g": 0, "b": 0} # Simplified: enhance for color extraction
})
# Extract images
for img in page.images:
img_data = img["stream"].get_data()
result["images"].append({
"data": base64.b64encode(img_data).decode('utf-8'),
"x": img["x0"],
"y": img["y0"],
"width": img["width"],
"height": img["height"]
})
# Extract shapes
for curve in page.curves:
path = " ".join([f"M {p['x']},{p['y']}" for p in curve["points"]])
result["shapes"].append({
"path": path,
"x": curve["x0"],
"y": curve["y0"],
"color": {"r": 0, "g": 0, "b": 0}
})
return JSONResponse(content=result)
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
# Run uvicorn server
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=7860)