Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -23,11 +23,11 @@ def image_to_base64(image):
|
|
| 23 |
return base64.b64encode(buffered.getvalue()).decode("utf-8")
|
| 24 |
|
| 25 |
|
| 26 |
-
def render_pdf_page(page, max_resolution=
|
| 27 |
width, height = page.get_size()
|
| 28 |
pixel_width = width * scale
|
| 29 |
pixel_height = height * scale
|
| 30 |
-
resize_factor = min(max_resolution / pixel_width, max_resolution / pixel_height)
|
| 31 |
target_scale = scale * resize_factor
|
| 32 |
return page.render(scale=target_scale, rev_byteorder=True).to_pil()
|
| 33 |
|
|
@@ -157,6 +157,7 @@ with gr.Blocks(title="📖 Image/PDF OCR", theme=gr.themes.Soft()) as demo:
|
|
| 157 |
2. For PDFs: choose how many pages to process (1-5, default is 1)
|
| 158 |
3. Adjust temperature if needed
|
| 159 |
4. Click "Extract Text"
|
|
|
|
| 160 |
"""
|
| 161 |
)
|
| 162 |
|
|
|
|
| 23 |
return base64.b64encode(buffered.getvalue()).decode("utf-8")
|
| 24 |
|
| 25 |
|
| 26 |
+
def render_pdf_page(page, max_resolution=1540, scale=2.77):
|
| 27 |
width, height = page.get_size()
|
| 28 |
pixel_width = width * scale
|
| 29 |
pixel_height = height * scale
|
| 30 |
+
resize_factor = min(1, max_resolution / pixel_width, max_resolution / pixel_height)
|
| 31 |
target_scale = scale * resize_factor
|
| 32 |
return page.render(scale=target_scale, rev_byteorder=True).to_pil()
|
| 33 |
|
|
|
|
| 157 |
2. For PDFs: choose how many pages to process (1-5, default is 1)
|
| 158 |
3. Adjust temperature if needed
|
| 159 |
4. Click "Extract Text"
|
| 160 |
+
Note: The Markdown rendering for tables is not always correct, check the raw output for complex tables!
|
| 161 |
"""
|
| 162 |
)
|
| 163 |
|