iplotnor commited on
Commit
2d56832
·
verified ·
1 Parent(s): ab4918d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -13
app.py CHANGED
@@ -133,28 +133,72 @@ class FloorPlanProcessor:
133
  pdf.page_count = len(pdf_document)
134
 
135
  images = []
136
- for page_num in range(len(pdf_document)):
 
 
 
 
 
137
  page = pdf_document[page_num]
 
 
 
138
  image_list = page.get_images(full=True)
 
139
 
140
- if not image_list:
141
- pix = page.get_pixmap(matrix=fitz.Matrix(1, 1), alpha=False)
142
- img = Image.open(BytesIO(pix.tobytes("png")))
143
- images.append(img)
144
- else:
145
  for img_info in image_list:
146
- xref = img_info[0]
147
- base_image = pdf_document.extract_image(xref)
148
- img = Image.open(BytesIO(base_image["image"]))
149
- if img.width > 100 and img.height > 100:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
  images.append(img)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
 
152
- pdf.images = images
153
- logger.info(f"Extracted {len(images)} images")
154
  return True
155
 
156
  except Exception as e:
157
- logger.error(f"PDF error: {str(e)}")
158
  pdf.error = str(e)
159
  return False
160
 
 
133
  pdf.page_count = len(pdf_document)
134
 
135
  images = []
136
+ logger.info(f"PDF has {pdf.page_count} pages")
137
+
138
+ # Process only first 5 pages (optimization)
139
+ pages_to_process = min(5, pdf.page_count)
140
+
141
+ for page_num in range(pages_to_process):
142
  page = pdf_document[page_num]
143
+ logger.info(f"Processing page {page_num + 1}/{pages_to_process}")
144
+
145
+ # Try to extract embedded images first (high quality)
146
  image_list = page.get_images(full=True)
147
+ extracted_from_page = False
148
 
149
+ if image_list:
150
+ logger.info(f" Found {len(image_list)} embedded images")
 
 
 
151
  for img_info in image_list:
152
+ try:
153
+ xref = img_info[0]
154
+ base_image = pdf_document.extract_image(xref)
155
+ if base_image and "image" in base_image:
156
+ img = Image.open(BytesIO(base_image["image"]))
157
+ if img.width > 200 and img.height > 200:
158
+ logger.info(f" ✓ Extracted embedded image: {img.size}")
159
+ images.append(img)
160
+ extracted_from_page = True
161
+ except Exception as e:
162
+ logger.warning(f" Could not extract embedded image: {e}")
163
+ continue
164
+
165
+ # If no good embedded images, render page at high quality
166
+ if not extracted_from_page:
167
+ try:
168
+ # Use higher zoom (2.0) for better quality rendering
169
+ pix = page.get_pixmap(matrix=fitz.Matrix(2, 2), alpha=False, dpi=300)
170
+ img = Image.open(BytesIO(pix.tobytes("png")))
171
+
172
+ if img.width > 0 and img.height > 0:
173
+ logger.info(f" ✓ Rendered page as image: {img.size}")
174
  images.append(img)
175
+ except Exception as e:
176
+ logger.warning(f" Could not render page {page_num + 1}: {e}")
177
+ continue
178
+
179
+ # Stop if we have good images from first page
180
+ if extracted_from_page and len(images) > 0:
181
+ logger.info("✓ Got good floor plan from embedded images, stopping search")
182
+ break
183
+
184
+ if not images:
185
+ logger.warning("No images extracted, trying fallback rendering")
186
+ # Fallback: render first page at maximum quality
187
+ try:
188
+ page = pdf_document[0]
189
+ pix = page.get_pixmap(matrix=fitz.Matrix(3, 3), alpha=False)
190
+ img = Image.open(BytesIO(pix.tobytes("png")))
191
+ images.append(img)
192
+ logger.info(f"✓ Fallback: Rendered first page at 3x zoom: {img.size}")
193
+ except Exception as e:
194
+ logger.error(f"Fallback rendering failed: {e}")
195
 
196
+ pdf.images = images[:3] # Keep max 3 images
197
+ logger.info(f" Successfully extracted {len(pdf.images)} images from PDF")
198
  return True
199
 
200
  except Exception as e:
201
+ logger.error(f"PDF error: {str(e)}", exc_info=True)
202
  pdf.error = str(e)
203
  return False
204