Spaces:

lightonai
/

LightOnOCR-1B-Demo

Running

App Files Files Community

Bapt120 commited on 29 days ago

Commit

bc711a3

verified ·

1 Parent(s): 17e7b7d

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -6

app.py CHANGED Viewed

@@ -168,8 +168,8 @@ def extract_text_from_image(image, temperature=0.2, stream=False):
         yield cleaned_text
-def process_input(file_input, temperature, page_num):
-    """Process uploaded file (image or PDF) and extract text with streaming."""
     if file_input is None:
         yield "Please upload an image or PDF first.", "", "", None, gr.update()
         return
@@ -197,8 +197,8 @@ def process_input(file_input, temperature, page_num):
             return
     try:
-        # Extract text using LightOnOCR with streaming
-        for extracted_text in extract_text_from_image(image_to_process, temperature, stream=True):
             yield extracted_text, extracted_text, page_info, image_to_process, gr.update()
     except Exception as e:
@@ -228,12 +228,18 @@ def update_slider(file_input):
 # Create Gradio interface
 with gr.Blocks(title="📖 Image/PDF OCR with LightOnOCR", theme=gr.themes.Soft()) as demo:
     gr.Markdown(f"""
 # 📖 Image/PDF to Text Extraction with LightOnOCR
 **💡 How to use:**
 1. Upload an image or PDF
 2. For PDFs: select which page to extract (1-20)
-3. Adjust temperature if needed (0.0 for deterministic, higher for more varied output)
 4. Click "Extract Text"
 **Note:** The Markdown rendering for tables may not always be perfect. Check the raw output for complex tables!
@@ -277,6 +283,11 @@ with gr.Blocks(title="📖 Image/PDF OCR with LightOnOCR", theme=gr.themes.Soft(
                 label="Temperature",
                 info="0.0 = deterministic, Higher = more varied"
             )
             submit_btn = gr.Button("Extract Text", variant="primary")
             clear_btn = gr.Button("Clear", variant="secondary")
@@ -299,7 +310,7 @@ with gr.Blocks(title="📖 Image/PDF OCR with LightOnOCR", theme=gr.themes.Soft(
     # Event handlers
     submit_btn.click(
         fn=process_input,
-        inputs=[file_input, temperature, num_pages],
         outputs=[output_text, raw_output, page_info, rendered_image, num_pages]
     )

         yield cleaned_text
+def process_input(file_input, temperature, page_num, enable_streaming):
+    """Process uploaded file (image or PDF) and extract text with optional streaming."""
     if file_input is None:
         yield "Please upload an image or PDF first.", "", "", None, gr.update()
         return
             return
     try:
+        # Extract text using LightOnOCR with optional streaming
+        for extracted_text in extract_text_from_image(image_to_process, temperature, stream=enable_streaming):
             yield extracted_text, extracted_text, page_info, image_to_process, gr.update()
     except Exception as e:
 # Create Gradio interface
 with gr.Blocks(title="📖 Image/PDF OCR with LightOnOCR", theme=gr.themes.Soft()) as demo:
     gr.Markdown(f"""
+# ⚠️ **HEADS UP: This space is now on CPU and runs very slowly.**
+For much faster results, check out the [GPU version here](https://huggingface.co/spaces/lightonai/LightOnOCR-1B-Demo-zero).
+---
 # 📖 Image/PDF to Text Extraction with LightOnOCR
 **💡 How to use:**
 1. Upload an image or PDF
 2. For PDFs: select which page to extract (1-20)
+3. Adjust temperature if needed
 4. Click "Extract Text"
 **Note:** The Markdown rendering for tables may not always be perfect. Check the raw output for complex tables!
                 label="Temperature",
                 info="0.0 = deterministic, Higher = more varied"
             )
+            enable_streaming = gr.Checkbox(
+                label="Enable Streaming",
+                value=False,
+                info="Show text progressively as it's generated"
+            )
             submit_btn = gr.Button("Extract Text", variant="primary")
             clear_btn = gr.Button("Clear", variant="secondary")
     # Event handlers
     submit_btn.click(
         fn=process_input,
+        inputs=[file_input, temperature, num_pages, enable_streaming],
         outputs=[output_text, raw_output, page_info, rendered_image, num_pages]
     )