Spaces:

AkashKumarave
/

editableweb

Sleeping

editableweb / app.py

Update app.py

a479f18 verified 8 months ago

2.44 kB

	from fastapi import FastAPI, File, HTTPException
	from fastapi.responses import JSONResponse
	import pdfplumber
	from io import BytesIO
	import base64

	app = FastAPI()

	@app.post("/api/convert")
	async def convert_pdf(file: bytes = File(...)):
	try:
	# Parse PDF with pdfplumber
	with pdfplumber.open(BytesIO(file)) as pdf:
	page = pdf.pages[0] # Process first page for simplicity
	width, height = page.width, page.height

	# Initialize result
	result = {
	"width": width,
	"height": height,
	"texts": [],
	"images": [],
	"shapes": []
	}

	# Extract text
	for char in page.chars:
	result["texts"].append({
	"content": char["text"],
	"x": char["x0"],
	"y": char["y0"],
	"font_family": char["fontname"].split("+")[-1] or "Arial",
	"font_style": "Regular", # pdfplumber doesn't provide style directly
	"font_size": char["size"],
	"color": {
	"r": 0, # Simplified: assume black text (enhance with actual color extraction if needed)
	"g": 0,
	"b": 0
	}
	})

	# Extract images
	for img in page.images:
	img_data = img["stream"].get_data() # Raw image data
	result["images"].append({
	"data": base64.b64encode(img_data).decode('utf-8'),
	"x": img["x0"],
	"y": img["y0"],
	"width": img["width"],
	"height": img["height"]
	})

	# Extract shapes (basic lines/curves)
	for curve in page.curves:
	path = " ".join([f"M {p['x']},{p['y']}" for p in curve["points"]]) # Simplified SVG path
	result["shapes"].append({
	"path": path,
	"x": curve["x0"],
	"y": curve["y0"],
	"color": {
	"r": 0,
	"g": 0,
	"b": 0
	}
	})

	return JSONResponse(content=result)
	except Exception as e:
	raise HTTPException(status_code=500, detail=str(e))