AkashKumarave commited on
Commit
f38e71b
·
verified ·
1 Parent(s): ccd05a4

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +93 -0
app.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import fitz # PyMuPDF
2
+ import base64
3
+ from fastapi import FastAPI, File, UploadFile
4
+ from fastapi.middleware.cors import CORSMiddleware
5
+
6
+ app = FastAPI()
7
+
8
+ # Enable CORS for Figma
9
+ app.add_middleware(
10
+ CORSMiddleware,
11
+ allow_origins=["*"],
12
+ allow_methods=["*"],
13
+ allow_headers=["*"],
14
+ )
15
+
16
+ def rgb_to_figma(color):
17
+ if not color: return {"r": 0, "g": 0, "b": 0}
18
+ # PyMuPDF returns (r, g, b) in 0-1 range or 0-255 based on context
19
+ return {"r": color[0], "g": color[1], "b": color[2]}
20
+
21
+ @app.post("/convert")
22
+ async def convert_pdf(file: UploadFile = File(...)):
23
+ doc = fitz.open(stream=await file.read(), filetype="pdf")
24
+ pages_data = []
25
+
26
+ for page_index, page in enumerate(doc):
27
+ viewport = page.rect
28
+ page_dict = {
29
+ "width": viewport.width,
30
+ "height": viewport.height,
31
+ "text": [],
32
+ "images": [],
33
+ "vectors": []
34
+ }
35
+
36
+ # 1. Extract Styled Text
37
+ raw_dict = page.get_text("dict")
38
+ for block in raw_dict["blocks"]:
39
+ if block["type"] == 0: # Text block
40
+ for line in block["lines"]:
41
+ for span in line["spans"]:
42
+ page_dict["text"].append({
43
+ "content": span["text"],
44
+ "x": span["bbox"][0],
45
+ "y": span["bbox"][1],
46
+ "size": span["size"],
47
+ "font": span["font"],
48
+ "color": rgb_to_figma(fitz.utils.getColor(span["color"]))
49
+ })
50
+
51
+ # 2. Extract Vectors (Line Art)
52
+ drawings = page.get_drawings()
53
+ for draw in drawings:
54
+ path_data = ""
55
+ for item in draw["items"]:
56
+ if item[0] == "l": # line
57
+ path_data += f"M {item[1].x} {item[1].y} L {item[2].x} {item[2].y} "
58
+ elif item[0] == "c": # curve
59
+ path_data += f"M {item[1].x} {item[1].y} C {item[2].x} {item[2].y} {item[3].x} {item[3].y} {item[4].x} {item[4].y} "
60
+ elif item[0] == "re": # rect
61
+ r = item[1]
62
+ path_data += f"M {r.x0} {r.y0} L {r.x1} {r.y0} L {r.x1} {r.y1} L {r.x0} {r.y1} Z "
63
+
64
+ if path_data:
65
+ page_dict["vectors"].append({
66
+ "data": path_data.strip(),
67
+ "fill": rgb_to_figma(draw.get("fill")),
68
+ "stroke": rgb_to_figma(draw.get("color")),
69
+ "width": draw.get("width", 1)
70
+ })
71
+
72
+ # 3. Extract Images
73
+ for img_index, img in enumerate(page.get_images(full=True)):
74
+ xref = img[0]
75
+ base_image = doc.extract_image(xref)
76
+ # Get the exact location of the image on the page
77
+ img_rects = page.get_image_rects(xref)
78
+ for rect in img_rects:
79
+ page_dict["images"].append({
80
+ "bytes": base64.b64encode(base_image["image"]).decode("utf-8"),
81
+ "x": rect.x0,
82
+ "y": rect.y0,
83
+ "width": rect.width,
84
+ "height": rect.height
85
+ })
86
+
87
+ pages_data.append(page_dict)
88
+
89
+ return {"pages": pages_data}
90
+
91
+ if __name__ == "__main__":
92
+ import uvicorn
93
+ uvicorn.run(app, host="0.0.0.0", port=7860)