kmuthudurai commited on
Commit
cd41155
·
verified ·
1 Parent(s): dca3ec3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -8
app.py CHANGED
@@ -33,15 +33,22 @@ def get_ocr(lang, use_gpu=False):
33
  return ocr_cache.get(lang)
34
 
35
  # Function to extract images from PDF
36
- def pdf_to_images(file_contents):
37
  try:
38
- doc = fitz.open(io.BytesIO(file_contents))
39
- images = []
40
- for page in doc:
 
 
41
  pix = page.get_pixmap()
42
- img = Image.open(io.BytesIO(pix.tobytes("png")))
43
- images.append(img)
44
- return images
 
 
 
 
 
45
  except Exception as e:
46
  logger.error(f"Error processing PDF: {str(e)}")
47
  raise HTTPException(status_code=500, detail="Error processing PDF file")
@@ -56,7 +63,7 @@ async def create_upload_file(
56
 
57
  # Determine if the uploaded file is a PDF or image
58
  if file.content_type == "application/pdf":
59
- images = pdf_to_images(contents)
60
  else:
61
  # If it's an image file
62
  images = [Image.open(io.BytesIO(contents))]
 
33
  return ocr_cache.get(lang)
34
 
35
  # Function to extract images from PDF
36
+ def pdf_to_images(uploaded_file):
37
  try:
38
+ doc = fitz.open(stream=uploaded_file.read(),filetype="pdf")
39
+ image_parts = []
40
+
41
+ for page_number in range(len(pdf_document)):
42
+ page = pdf_document.load_page(page_number)
43
  pix = page.get_pixmap()
44
+ image_data = pix.tobytes("png")
45
+
46
+ image_parts.append({
47
+ "mime_type": "image/png",
48
+ "data": image_data
49
+ })
50
+
51
+ return image_parts
52
  except Exception as e:
53
  logger.error(f"Error processing PDF: {str(e)}")
54
  raise HTTPException(status_code=500, detail="Error processing PDF file")
 
63
 
64
  # Determine if the uploaded file is a PDF or image
65
  if file.content_type == "application/pdf":
66
+ images = pdf_to_images(file)
67
  else:
68
  # If it's an image file
69
  images = [Image.open(io.BytesIO(contents))]