AkashKumarave commited on
Commit
a8621bc
·
verified ·
1 Parent(s): 69f734d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -24
app.py CHANGED
@@ -4,7 +4,6 @@ import logging
4
  from fastapi import FastAPI, File, UploadFile
5
  from fastapi.middleware.cors import CORSMiddleware
6
 
7
- # Set up logging to see errors in the Hugging Face Log tab
8
  logging.basicConfig(level=logging.INFO)
9
  logger = logging.getLogger(__name__)
10
 
@@ -19,17 +18,31 @@ app.add_middleware(
19
  )
20
 
21
  def normalize_color(color):
22
- """Ensures color is always a 0-1 RGB dict for Figma"""
23
  try:
24
- if color is None: return {"r": 0, "g": 0, "b": 0}
25
- # If color is an integer (from span['color'])
 
 
 
 
26
  if isinstance(color, int):
27
  color = fitz.utils.getColor(color)
28
 
 
 
 
 
 
29
  if len(color) == 1: # Grayscale
30
- return {"r": float(color[0]), "g": float(color[0]), "b": float(color[0])}
31
- return {"r": float(color[0]), "g": float(color[1]), "b": float(color[2])}
32
- except:
 
 
 
 
 
33
  return {"r": 0, "g": 0, "b": 0}
34
 
35
  @app.get("/")
@@ -39,7 +52,6 @@ async def root():
39
  @app.post("/convert")
40
  async def convert_pdf(file: UploadFile = File(...)):
41
  try:
42
- logger.info(f"Processing file: {file.filename}")
43
  content = await file.read()
44
  doc = fitz.open(stream=content, filetype="pdf")
45
  pages_data = []
@@ -51,40 +63,39 @@ async def convert_pdf(file: UploadFile = File(...)):
51
  "elements": []
52
  }
53
 
54
- # 1. Extract Text and Images
55
- # Using "dict" captures font size, location, and images
56
  raw_dict = page.get_text("dict")
57
  for block in raw_dict["blocks"]:
58
- if block["type"] == 0: # Text Block
59
- for line in block["lines"]:
60
- for span in line["spans"]:
61
  page_dict["elements"].append({
62
  "type": "TEXT",
63
- "content": span["text"],
64
  "x": float(span["bbox"][0]),
65
  "y": float(span["bbox"][1]),
66
- "size": float(span["size"]),
67
- "color": normalize_color(span["color"])
68
  })
69
- elif block["type"] == 1: # Image Block
70
  page_dict["elements"].append({
71
  "type": "IMAGE",
72
  "bytes": base64.b64encode(block["image"]).decode("utf-8"),
73
- "x": float(block["bbox"][0]),
74
  "y": float(block["bbox"][1]),
75
  "width": float(block["bbox"][2] - block["bbox"][0]),
76
  "height": float(block["bbox"][3] - block["bbox"][1])
77
  })
78
 
79
- # 2. Extract Vector Drawings (Paths)
80
  for path in page.get_drawings():
81
  svg_path = ""
82
- for item in path["items"]:
83
- if item[0] == "l": # line
84
  svg_path += f"M {item[1].x} {item[1].y} L {item[2].x} {item[2].y} "
85
- elif item[0] == "c": # curve
86
  svg_path += f"M {item[1].x} {item[1].y} C {item[2].x} {item[2].y} {item[3].x} {item[3].y} {item[4].x} {item[4].y} "
87
- elif item[0] == "re": # rectangle
88
  r = item[1]
89
  svg_path += f"M {r.x0} {r.y0} L {r.x1} {r.y0} L {r.x1} {r.y1} L {r.x0} {r.y1} Z "
90
 
@@ -103,7 +114,7 @@ async def convert_pdf(file: UploadFile = File(...)):
103
  return {"pages": pages_data}
104
 
105
  except Exception as e:
106
- logger.error(f"Conversion Error: {str(e)}")
107
  return {"error": str(e)}
108
 
109
  if __name__ == "__main__":
 
4
  from fastapi import FastAPI, File, UploadFile
5
  from fastapi.middleware.cors import CORSMiddleware
6
 
 
7
  logging.basicConfig(level=logging.INFO)
8
  logger = logging.getLogger(__name__)
9
 
 
18
  )
19
 
20
  def normalize_color(color):
21
+ """Safely converts any PDF color format to Figma RGB format."""
22
  try:
23
+ # If the PDF doesn't define a color for this element, return None
24
+ # This tells Figma not to apply a fill or stroke
25
+ if color is None:
26
+ return None
27
+
28
+ # Handle integer colors (e.g., 16711680 for Red)
29
  if isinstance(color, int):
30
  color = fitz.utils.getColor(color)
31
 
32
+ # Ensure it's a list/tuple of numbers
33
+ if not isinstance(color, (list, tuple)):
34
+ return {"r": 0, "g": 0, "b": 0}
35
+
36
+ # Convert to 0.0 - 1.0 range based on component count
37
  if len(color) == 1: # Grayscale
38
+ val = float(color[0])
39
+ return {"r": val, "g": val, "b": val}
40
+ elif len(color) >= 3: # RGB or CMYK (taking first 3)
41
+ return {"r": float(color[0]), "g": float(color[1]), "b": float(color[2])}
42
+
43
+ return {"r": 0, "g": 0, "b": 0}
44
+ except Exception as e:
45
+ logger.error(f"Color error: {e}")
46
  return {"r": 0, "g": 0, "b": 0}
47
 
48
  @app.get("/")
 
52
  @app.post("/convert")
53
  async def convert_pdf(file: UploadFile = File(...)):
54
  try:
 
55
  content = await file.read()
56
  doc = fitz.open(stream=content, filetype="pdf")
57
  pages_data = []
 
63
  "elements": []
64
  }
65
 
66
+ # 1. Text and Images
 
67
  raw_dict = page.get_text("dict")
68
  for block in raw_dict["blocks"]:
69
+ if block.get("type") == 0: # TEXT
70
+ for line in block.get("lines", []):
71
+ for span in line.get("spans", []):
72
  page_dict["elements"].append({
73
  "type": "TEXT",
74
+ "content": span.get("text", ""),
75
  "x": float(span["bbox"][0]),
76
  "y": float(span["bbox"][1]),
77
+ "size": float(span.get("size", 12)),
78
+ "color": normalize_color(span.get("color"))
79
  })
80
+ elif block.get("type") == 1: # IMAGE
81
  page_dict["elements"].append({
82
  "type": "IMAGE",
83
  "bytes": base64.b64encode(block["image"]).decode("utf-8"),
84
+ "x": float(block["bbox"][0]),
85
  "y": float(block["bbox"][1]),
86
  "width": float(block["bbox"][2] - block["bbox"][0]),
87
  "height": float(block["bbox"][3] - block["bbox"][1])
88
  })
89
 
90
+ # 2. Vector Drawings
91
  for path in page.get_drawings():
92
  svg_path = ""
93
+ for item in path.get("items", []):
94
+ if item[0] == "l":
95
  svg_path += f"M {item[1].x} {item[1].y} L {item[2].x} {item[2].y} "
96
+ elif item[0] == "c":
97
  svg_path += f"M {item[1].x} {item[1].y} C {item[2].x} {item[2].y} {item[3].x} {item[3].y} {item[4].x} {item[4].y} "
98
+ elif item[0] == "re":
99
  r = item[1]
100
  svg_path += f"M {r.x0} {r.y0} L {r.x1} {r.y0} L {r.x1} {r.y1} L {r.x0} {r.y1} Z "
101
 
 
114
  return {"pages": pages_data}
115
 
116
  except Exception as e:
117
+ logger.error(f"Global Error: {str(e)}")
118
  return {"error": str(e)}
119
 
120
  if __name__ == "__main__":