kmuthudurai commited on
Commit
c12c4e7
·
verified ·
1 Parent(s): 4056722

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -5
app.py CHANGED
@@ -35,24 +35,31 @@ def get_ocr(lang, use_gpu=False):
35
  # Function to extract images from PDF
36
  def pdf_to_images(uploaded_file):
37
  try:
38
- doc = fitz.open(stream=uploaded_file.read(),filetype="pdf")
 
 
 
39
  image_parts = []
40
-
41
  for page_number in range(len(doc)):
42
- page = pdf_document.load_page(page_number)
43
  pix = page.get_pixmap()
44
  image_data = pix.tobytes("png")
45
 
 
 
 
46
  image_parts.append({
47
  "mime_type": "image/png",
48
  "data": image_data
49
  })
50
-
 
51
  return image_parts
 
52
  except Exception as e:
53
  logger.error(f"Error processing PDF: {str(e)}")
54
  raise HTTPException(status_code=500, detail="Error processing PDF file")
55
-
56
  @app.post("/ocr")
57
  async def create_upload_file(
58
  file: UploadFile = File(...),
 
35
  # Function to extract images from PDF
36
  def pdf_to_images(uploaded_file):
37
  try:
38
+ # Load PDF from the uploaded file
39
+ doc = fitz.open(stream=uploaded_file.read(), filetype="pdf")
40
+ logger.info(f"PDF loaded successfully with {len(doc)} pages.")
41
+
42
  image_parts = []
 
43
  for page_number in range(len(doc)):
44
+ page = doc.load_page(page_number)
45
  pix = page.get_pixmap()
46
  image_data = pix.tobytes("png")
47
 
48
+ # Log progress for each page
49
+ logger.info(f"Processed page {page_number + 1}/{len(doc)}.")
50
+
51
  image_parts.append({
52
  "mime_type": "image/png",
53
  "data": image_data
54
  })
55
+
56
+ logger.info(f"PDF to image conversion completed with {len(image_parts)} images.")
57
  return image_parts
58
+
59
  except Exception as e:
60
  logger.error(f"Error processing PDF: {str(e)}")
61
  raise HTTPException(status_code=500, detail="Error processing PDF file")
62
+
63
  @app.post("/ocr")
64
  async def create_upload_file(
65
  file: UploadFile = File(...),