Spaces:

AkashKumarave
/

editableweb

Sleeping

App Files Files Community

AkashKumarave commited on May 5

Commit

a479f18

verified ·

1 Parent(s): 2f699bc

Update app.py

Browse files

Files changed (1) hide show

app.py +52 -41

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
-from fastapi import FastAPI, HTTPException
 from fastapi.responses import JSONResponse
-from pdf2json import Pdf2Json
 from io import BytesIO
 import base64
@@ -9,49 +9,60 @@ app = FastAPI()
 @app.post("/api/convert")
 async def convert_pdf(file: bytes = File(...)):
     try:
-        # Parse PDF
-        pdf_parser = Pdf2Json(BytesIO(file))
-        pdf_data = pdf_parser.get_json()
-        # Process PDF data
-        result = {
-            "width": pdf_data["width"],  # Page width in pixels
-            "height": pdf_data["height"],  # Page height in pixels
-            "texts": [],
-            "images": [],
-            "shapes": []
-        }
-        # Extract text
-        for text in pdf_data["texts"]:
-            result["texts"].append({
-                "content": text["content"],
-                "x": text["x"],
-                "y": text["y"],
-                "fontFamily": text["font"] or "Arial",
-                "fontStyle": text["style"] or "Regular",
-                "fontSize": text["size"],
-                "color": {"r": text["color"]["r"]/255, "g": text["color"]["g"]/255, "b": text["color"]["b"]/255}
-            })
-        # Extract images
-        for img in pdf_data["images"]:
-            result["images"].append({
-                "data": base64.b64encode(img["data"]).decode('utf-8'),
-                "x": img["x"],
-                "y": img["y"],
-                "width": img["width"],
-                "height": img["height"]
-            })
-        # Extract shapes
-        for shape in pdf_data["shapes"]:
-            result["shapes"].append({
-                "path": shape["path"],
-                "x": shape["x"],
-                "y": shape["y"],
-                "color": {"r": shape["color"]["r"]/255, "g": shape["color"]["g"]/255, "b": shape["color"]["b"]/255}
-            })
         return JSONResponse(content=result)
     except Exception as e:

+from fastapi import FastAPI, File, HTTPException
 from fastapi.responses import JSONResponse
+import pdfplumber
 from io import BytesIO
 import base64
 @app.post("/api/convert")
 async def convert_pdf(file: bytes = File(...)):
     try:
+        # Parse PDF with pdfplumber
+        with pdfplumber.open(BytesIO(file)) as pdf:
+            page = pdf.pages[0]  # Process first page for simplicity
+            width, height = page.width, page.height
+            # Initialize result
+            result = {
+                "width": width,
+                "height": height,
+                "texts": [],
+                "images": [],
+                "shapes": []
+            }
+            # Extract text
+            for char in page.chars:
+                result["texts"].append({
+                    "content": char["text"],
+                    "x": char["x0"],
+                    "y": char["y0"],
+                    "font_family": char["fontname"].split("+")[-1] or "Arial",
+                    "font_style": "Regular",  # pdfplumber doesn't provide style directly
+                    "font_size": char["size"],
+                    "color": {
+                        "r": 0,  # Simplified: assume black text (enhance with actual color extraction if needed)
+                        "g": 0,
+                        "b": 0
+                    }
+                })
+            # Extract images
+            for img in page.images:
+                img_data = img["stream"].get_data()  # Raw image data
+                result["images"].append({
+                    "data": base64.b64encode(img_data).decode('utf-8'),
+                    "x": img["x0"],
+                    "y": img["y0"],
+                    "width": img["width"],
+                    "height": img["height"]
+                })
+            # Extract shapes (basic lines/curves)
+            for curve in page.curves:
+                path = " ".join([f"M {p['x']},{p['y']}" for p in curve["points"]])  # Simplified SVG path
+                result["shapes"].append({
+                    "path": path,
+                    "x": curve["x0"],
+                    "y": curve["y0"],
+                    "color": {
+                        "r": 0,
+                        "g": 0,
+                        "b": 0
+                    }
+                })
         return JSONResponse(content=result)
     except Exception as e: