Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -35,8 +35,11 @@ def get_ocr(lang, use_gpu=False):
|
|
| 35 |
# Function to extract images from PDF
|
| 36 |
def pdf_to_images(uploaded_file):
|
| 37 |
try:
|
| 38 |
-
#
|
| 39 |
-
|
|
|
|
|
|
|
|
|
|
| 40 |
logger.info(f"PDF loaded successfully with {len(doc)} pages.")
|
| 41 |
|
| 42 |
image_parts = []
|
|
@@ -59,6 +62,7 @@ def pdf_to_images(uploaded_file):
|
|
| 59 |
except Exception as e:
|
| 60 |
logger.error(f"Error processing PDF: {str(e)}")
|
| 61 |
raise HTTPException(status_code=500, detail="Error processing PDF file")
|
|
|
|
| 62 |
|
| 63 |
@app.post("/ocr")
|
| 64 |
async def create_upload_file(
|
|
|
|
| 35 |
# Function to extract images from PDF
|
| 36 |
def pdf_to_images(uploaded_file):
|
| 37 |
try:
|
| 38 |
+
# Read the uploaded file as bytes
|
| 39 |
+
file_data = uploaded_file.file.read() # This returns the file as bytes
|
| 40 |
+
|
| 41 |
+
# Open the PDF using fitz (PyMuPDF) from the bytes data
|
| 42 |
+
doc = fitz.open(stream=file_data, filetype="pdf")
|
| 43 |
logger.info(f"PDF loaded successfully with {len(doc)} pages.")
|
| 44 |
|
| 45 |
image_parts = []
|
|
|
|
| 62 |
except Exception as e:
|
| 63 |
logger.error(f"Error processing PDF: {str(e)}")
|
| 64 |
raise HTTPException(status_code=500, detail="Error processing PDF file")
|
| 65 |
+
|
| 66 |
|
| 67 |
@app.post("/ocr")
|
| 68 |
async def create_upload_file(
|