Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -35,24 +35,31 @@ def get_ocr(lang, use_gpu=False):
|
|
| 35 |
# Function to extract images from PDF
|
| 36 |
def pdf_to_images(uploaded_file):
|
| 37 |
try:
|
| 38 |
-
|
|
|
|
|
|
|
|
|
|
| 39 |
image_parts = []
|
| 40 |
-
|
| 41 |
for page_number in range(len(doc)):
|
| 42 |
-
page =
|
| 43 |
pix = page.get_pixmap()
|
| 44 |
image_data = pix.tobytes("png")
|
| 45 |
|
|
|
|
|
|
|
|
|
|
| 46 |
image_parts.append({
|
| 47 |
"mime_type": "image/png",
|
| 48 |
"data": image_data
|
| 49 |
})
|
| 50 |
-
|
|
|
|
| 51 |
return image_parts
|
|
|
|
| 52 |
except Exception as e:
|
| 53 |
logger.error(f"Error processing PDF: {str(e)}")
|
| 54 |
raise HTTPException(status_code=500, detail="Error processing PDF file")
|
| 55 |
-
|
| 56 |
@app.post("/ocr")
|
| 57 |
async def create_upload_file(
|
| 58 |
file: UploadFile = File(...),
|
|
|
|
| 35 |
# Function to extract images from PDF
|
| 36 |
def pdf_to_images(uploaded_file):
|
| 37 |
try:
|
| 38 |
+
# Load PDF from the uploaded file
|
| 39 |
+
doc = fitz.open(stream=uploaded_file.read(), filetype="pdf")
|
| 40 |
+
logger.info(f"PDF loaded successfully with {len(doc)} pages.")
|
| 41 |
+
|
| 42 |
image_parts = []
|
|
|
|
| 43 |
for page_number in range(len(doc)):
|
| 44 |
+
page = doc.load_page(page_number)
|
| 45 |
pix = page.get_pixmap()
|
| 46 |
image_data = pix.tobytes("png")
|
| 47 |
|
| 48 |
+
# Log progress for each page
|
| 49 |
+
logger.info(f"Processed page {page_number + 1}/{len(doc)}.")
|
| 50 |
+
|
| 51 |
image_parts.append({
|
| 52 |
"mime_type": "image/png",
|
| 53 |
"data": image_data
|
| 54 |
})
|
| 55 |
+
|
| 56 |
+
logger.info(f"PDF to image conversion completed with {len(image_parts)} images.")
|
| 57 |
return image_parts
|
| 58 |
+
|
| 59 |
except Exception as e:
|
| 60 |
logger.error(f"Error processing PDF: {str(e)}")
|
| 61 |
raise HTTPException(status_code=500, detail="Error processing PDF file")
|
| 62 |
+
|
| 63 |
@app.post("/ocr")
|
| 64 |
async def create_upload_file(
|
| 65 |
file: UploadFile = File(...),
|