Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -168,8 +168,8 @@ def extract_text_from_image(image, temperature=0.2, stream=False):
|
|
| 168 |
yield cleaned_text
|
| 169 |
|
| 170 |
|
| 171 |
-
def process_input(file_input, temperature, page_num):
|
| 172 |
-
"""Process uploaded file (image or PDF) and extract text with streaming."""
|
| 173 |
if file_input is None:
|
| 174 |
yield "Please upload an image or PDF first.", "", "", None, gr.update()
|
| 175 |
return
|
|
@@ -197,8 +197,8 @@ def process_input(file_input, temperature, page_num):
|
|
| 197 |
return
|
| 198 |
|
| 199 |
try:
|
| 200 |
-
# Extract text using LightOnOCR with streaming
|
| 201 |
-
for extracted_text in extract_text_from_image(image_to_process, temperature, stream=
|
| 202 |
yield extracted_text, extracted_text, page_info, image_to_process, gr.update()
|
| 203 |
|
| 204 |
except Exception as e:
|
|
@@ -228,12 +228,18 @@ def update_slider(file_input):
|
|
| 228 |
# Create Gradio interface
|
| 229 |
with gr.Blocks(title="π Image/PDF OCR with LightOnOCR", theme=gr.themes.Soft()) as demo:
|
| 230 |
gr.Markdown(f"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 231 |
# π Image/PDF to Text Extraction with LightOnOCR
|
| 232 |
|
| 233 |
**π‘ How to use:**
|
| 234 |
1. Upload an image or PDF
|
| 235 |
2. For PDFs: select which page to extract (1-20)
|
| 236 |
-
3. Adjust temperature if needed
|
| 237 |
4. Click "Extract Text"
|
| 238 |
|
| 239 |
**Note:** The Markdown rendering for tables may not always be perfect. Check the raw output for complex tables!
|
|
@@ -277,6 +283,11 @@ with gr.Blocks(title="π Image/PDF OCR with LightOnOCR", theme=gr.themes.Soft(
|
|
| 277 |
label="Temperature",
|
| 278 |
info="0.0 = deterministic, Higher = more varied"
|
| 279 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 280 |
submit_btn = gr.Button("Extract Text", variant="primary")
|
| 281 |
clear_btn = gr.Button("Clear", variant="secondary")
|
| 282 |
|
|
@@ -299,7 +310,7 @@ with gr.Blocks(title="π Image/PDF OCR with LightOnOCR", theme=gr.themes.Soft(
|
|
| 299 |
# Event handlers
|
| 300 |
submit_btn.click(
|
| 301 |
fn=process_input,
|
| 302 |
-
inputs=[file_input, temperature, num_pages],
|
| 303 |
outputs=[output_text, raw_output, page_info, rendered_image, num_pages]
|
| 304 |
)
|
| 305 |
|
|
|
|
| 168 |
yield cleaned_text
|
| 169 |
|
| 170 |
|
| 171 |
+
def process_input(file_input, temperature, page_num, enable_streaming):
|
| 172 |
+
"""Process uploaded file (image or PDF) and extract text with optional streaming."""
|
| 173 |
if file_input is None:
|
| 174 |
yield "Please upload an image or PDF first.", "", "", None, gr.update()
|
| 175 |
return
|
|
|
|
| 197 |
return
|
| 198 |
|
| 199 |
try:
|
| 200 |
+
# Extract text using LightOnOCR with optional streaming
|
| 201 |
+
for extracted_text in extract_text_from_image(image_to_process, temperature, stream=enable_streaming):
|
| 202 |
yield extracted_text, extracted_text, page_info, image_to_process, gr.update()
|
| 203 |
|
| 204 |
except Exception as e:
|
|
|
|
| 228 |
# Create Gradio interface
|
| 229 |
with gr.Blocks(title="π Image/PDF OCR with LightOnOCR", theme=gr.themes.Soft()) as demo:
|
| 230 |
gr.Markdown(f"""
|
| 231 |
+
# β οΈ **HEADS UP: This space is now on CPU and runs very slowly.**
|
| 232 |
+
|
| 233 |
+
For much faster results, check out the [GPU version here](https://huggingface.co/spaces/lightonai/LightOnOCR-1B-Demo-zero).
|
| 234 |
+
|
| 235 |
+
---
|
| 236 |
+
|
| 237 |
# π Image/PDF to Text Extraction with LightOnOCR
|
| 238 |
|
| 239 |
**π‘ How to use:**
|
| 240 |
1. Upload an image or PDF
|
| 241 |
2. For PDFs: select which page to extract (1-20)
|
| 242 |
+
3. Adjust temperature if needed
|
| 243 |
4. Click "Extract Text"
|
| 244 |
|
| 245 |
**Note:** The Markdown rendering for tables may not always be perfect. Check the raw output for complex tables!
|
|
|
|
| 283 |
label="Temperature",
|
| 284 |
info="0.0 = deterministic, Higher = more varied"
|
| 285 |
)
|
| 286 |
+
enable_streaming = gr.Checkbox(
|
| 287 |
+
label="Enable Streaming",
|
| 288 |
+
value=False,
|
| 289 |
+
info="Show text progressively as it's generated"
|
| 290 |
+
)
|
| 291 |
submit_btn = gr.Button("Extract Text", variant="primary")
|
| 292 |
clear_btn = gr.Button("Clear", variant="secondary")
|
| 293 |
|
|
|
|
| 310 |
# Event handlers
|
| 311 |
submit_btn.click(
|
| 312 |
fn=process_input,
|
| 313 |
+
inputs=[file_input, temperature, num_pages, enable_streaming],
|
| 314 |
outputs=[output_text, raw_output, page_info, rendered_image, num_pages]
|
| 315 |
)
|
| 316 |
|