JKrishnanandhaa commited on
Commit
49ca167
·
verified ·
1 Parent(s): 5c8f686

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -12
app.py CHANGED
@@ -286,17 +286,32 @@ class ForgeryDetector:
286
  # Skip to end - image is ready
287
  pdf_path = None
288
 
289
- # If we got a PDF, convert it to image
290
  if pdf_path and os.path.exists(pdf_path):
291
  import fitz
292
  pdf_document = fitz.open(pdf_path)
293
- page = pdf_document[0]
294
- pix = page.get_pixmap(matrix=fitz.Matrix(2, 2))
295
- image = np.frombuffer(pix.samples, dtype=np.uint8).reshape(pix.height, pix.width, pix.n)
296
- if pix.n == 4:
297
- image = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB)
 
 
 
298
  pdf_document.close()
299
  os.unlink(pdf_path)
 
 
 
 
 
 
 
 
 
 
 
 
300
 
301
  except Exception as e:
302
  raise ValueError(f"Could not process Word document. Please convert to PDF or image first. Error: {str(e)}")
@@ -309,15 +324,30 @@ class ForgeryDetector:
309
  pass
310
 
311
  elif image.lower().endswith('.pdf'):
312
- # Handle PDF files
313
  import fitz # PyMuPDF
314
  pdf_document = fitz.open(image)
315
- page = pdf_document[0]
316
- pix = page.get_pixmap(matrix=fitz.Matrix(2, 2))
317
- image = np.frombuffer(pix.samples, dtype=np.uint8).reshape(pix.height, pix.width, pix.n)
318
- if pix.n == 4:
319
- image = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB)
 
 
 
320
  pdf_document.close()
 
 
 
 
 
 
 
 
 
 
 
 
321
  else:
322
  # Load image file
323
  image = Image.open(image)
 
286
  # Skip to end - image is ready
287
  pdf_path = None
288
 
289
+ # If we got a PDF, convert ALL pages to a single tall image
290
  if pdf_path and os.path.exists(pdf_path):
291
  import fitz
292
  pdf_document = fitz.open(pdf_path)
293
+ page_images = []
294
+ for page_num in range(len(pdf_document)):
295
+ page = pdf_document[page_num]
296
+ pix = page.get_pixmap(matrix=fitz.Matrix(2, 2))
297
+ page_img = np.frombuffer(pix.samples, dtype=np.uint8).reshape(pix.height, pix.width, pix.n)
298
+ if pix.n == 4:
299
+ page_img = cv2.cvtColor(page_img, cv2.COLOR_RGBA2RGB)
300
+ page_images.append(page_img)
301
  pdf_document.close()
302
  os.unlink(pdf_path)
303
+ # Stack all pages vertically into one tall image
304
+ if len(page_images) == 1:
305
+ image = page_images[0]
306
+ else:
307
+ max_width = max(p.shape[1] for p in page_images)
308
+ padded = []
309
+ for p in page_images:
310
+ if p.shape[1] < max_width:
311
+ pad = np.ones((p.shape[0], max_width - p.shape[1], 3), dtype=np.uint8) * 255
312
+ p = np.concatenate([p, pad], axis=1)
313
+ padded.append(p)
314
+ image = np.concatenate(padded, axis=0)
315
 
316
  except Exception as e:
317
  raise ValueError(f"Could not process Word document. Please convert to PDF or image first. Error: {str(e)}")
 
324
  pass
325
 
326
  elif image.lower().endswith('.pdf'):
327
+ # Handle PDF files - process ALL pages
328
  import fitz # PyMuPDF
329
  pdf_document = fitz.open(image)
330
+ page_images = []
331
+ for page_num in range(len(pdf_document)):
332
+ page = pdf_document[page_num]
333
+ pix = page.get_pixmap(matrix=fitz.Matrix(2, 2))
334
+ page_img = np.frombuffer(pix.samples, dtype=np.uint8).reshape(pix.height, pix.width, pix.n)
335
+ if pix.n == 4:
336
+ page_img = cv2.cvtColor(page_img, cv2.COLOR_RGBA2RGB)
337
+ page_images.append(page_img)
338
  pdf_document.close()
339
+ # Stack all pages vertically into one tall image
340
+ if len(page_images) == 1:
341
+ image = page_images[0]
342
+ else:
343
+ max_width = max(p.shape[1] for p in page_images)
344
+ padded = []
345
+ for p in page_images:
346
+ if p.shape[1] < max_width:
347
+ pad = np.ones((p.shape[0], max_width - p.shape[1], 3), dtype=np.uint8) * 255
348
+ p = np.concatenate([p, pad], axis=1)
349
+ padded.append(p)
350
+ image = np.concatenate(padded, axis=0)
351
  else:
352
  # Load image file
353
  image = Image.open(image)