Spaces:
Sleeping
Sleeping
| from fastapi import FastAPI, File, HTTPException | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from fastapi.responses import JSONResponse | |
| import pdfplumber | |
| from io import BytesIO | |
| import base64 | |
| import uvicorn | |
| app = FastAPI() | |
| # Add CORS middleware | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], # Adjust for production | |
| allow_credentials=True, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| async def convert_pdf(file: bytes = File(...)): | |
| try: | |
| # Parse PDF with pdfplumber | |
| with pdfplumber.open(BytesIO(file)) as pdf: | |
| page = pdf.pages[0] # First page for simplicity | |
| width, height = page.width, page.height | |
| # Initialize result | |
| result = { | |
| "width": width, | |
| "height": height, | |
| "texts": [], | |
| "images": [], | |
| "shapes": [] | |
| } | |
| # Extract text | |
| for char in page.chars: | |
| result["texts"].append({ | |
| "content": char["text"], | |
| "x": char["x0"], | |
| "y": char["y0"], | |
| "font_family": char["fontname"].split("+")[-1] or "Arial", | |
| "font_style": "Regular", | |
| "font_size": char["size"], | |
| "color": {"r": 0, "g": 0, "b": 0} # Simplified: enhance for color extraction | |
| }) | |
| # Extract images | |
| for img in page.images: | |
| img_data = img["stream"].get_data() | |
| result["images"].append({ | |
| "data": base64.b64encode(img_data).decode('utf-8'), | |
| "x": img["x0"], | |
| "y": img["y0"], | |
| "width": img["width"], | |
| "height": img["height"] | |
| }) | |
| # Extract shapes | |
| for curve in page.curves: | |
| path = " ".join([f"M {p['x']},{p['y']}" for p in curve["points"]]) | |
| result["shapes"].append({ | |
| "path": path, | |
| "x": curve["x0"], | |
| "y": curve["y0"], | |
| "color": {"r": 0, "g": 0, "b": 0} | |
| }) | |
| return JSONResponse(content=result) | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=str(e)) | |
| # Run uvicorn server | |
| if __name__ == "__main__": | |
| uvicorn.run(app, host="0.0.0.0", port=7860) |