Spaces:

lightonai
/

LightOnOCR-1B-Demo

Running

App Files Files Community

staghado commited on Oct 21

Commit

111ff5f

verified ·

1 Parent(s): 2fcfad9

Update app.py

Browse files

Files changed (1) hide show

app.py +59 -67

app.py CHANGED Viewed

@@ -22,56 +22,29 @@ def image_to_base64(image):
     return base64.b64encode(buffered.getvalue()).decode("utf-8")
-def respond(
-    message,
-    history: list[dict[str, str]],
-    system_message,
-    max_tokens,
-    temperature,
-    top_p,
-):
     """
-    Send messages (with optional images) to vLLM endpoint and stream the response.
     """
-    messages = [{"role": "system", "content": system_message}]
-    # Add conversation history
-    for msg in history:
-        messages.append(msg)
-    # Process the current message - check if it contains an image
-    if message and "files" in message and message["files"]:
-        # Message has image(s)
-        content = []
-        # Add text if present
-        if message.get("text", "").strip():
-            content.append({"type": "text", "text": message["text"]})
-        # Add all images
-        for file_info in message["files"]:
-            try:
-                image = Image.open(file_info)
-                b64_image = image_to_base64(image)
-                content.append({
-                    "type": "image_url",
-                    "image_url": {"url": f"data:image/png;base64,{b64_image}"}
-                })
-            except Exception as e:
-                print(f"Error processing image: {e}")
-        messages.append({"role": "user", "content": content})
-    else:
-        # Text-only message
-        text_content = message if isinstance(message, str) else message.get("text", "")
-        messages.append({"role": "user", "content": text_content})
     payload = {
         "model": MODEL,
-        "messages": messages,
-        "max_tokens": max_tokens,
         "temperature": temperature,
-        "top_p": top_p,
         "stream": True
     }
@@ -111,42 +84,61 @@ def respond(
 # Build the Gradio Interface
-with gr.Blocks(title="💬 Vision Chat", theme=gr.themes.Soft()) as demo:
     gr.Markdown(
         """
-        # 💬 Vision-Enabled Chat Interface
         **💡 How to use:**
-        1. Type your message in the chat box
-        2. Optionally upload images by clicking the 📎 icon
-        3. Adjust parameters in the accordion below if needed
-        4. Press Enter or click Send
-        The model can understand both text and images!
         """
     )
-    chatbot = gr.ChatInterface(
-        respond,
-        type="messages",
-        multimodal=True,
-        additional_inputs=[
-            gr.Textbox(
-                value="You are a helpful AI assistant with vision capabilities. You can understand and analyze images.",
-                label="System message"
-            ),
-            gr.Slider(minimum=1, maximum=4096, value=2048, step=1, label="Max new tokens"),
-            gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature"),
-            gr.Slider(
                 minimum=0.1,
                 maximum=1.0,
-                value=0.95,
                 step=0.05,
-                label="Top-p (nucleus sampling)",
-            ),
-        ],
     )
-    chatbot.render()
     gr.Markdown("""
     ---

     return base64.b64encode(buffered.getvalue()).decode("utf-8")
+def process_image(image, temperature):
     """
+    Send image to vLLM endpoint and stream the response.
     """
+    if image is None:
+        return "Please upload an image first."
+    # Convert image to base64
+    b64_image = image_to_base64(image)
+    # Build the payload with only image input (no text prompt)
     payload = {
         "model": MODEL,
+        "messages": [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": ""},
+                    {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{b64_image}"}}
+                ]
+            }
+        ],
         "temperature": temperature,
         "stream": True
     }
 # Build the Gradio Interface
+with gr.Blocks(title="📖 Image OCR", theme=gr.themes.Soft()) as demo:
     gr.Markdown(
         """
+        # 📖 Image to Text Extraction
         **💡 How to use:**
+        1. Upload an image using the upload box
+        2. Adjust temperature if needed
+        3. Click "Extract Text" to process
+        The model will extract and format text from your image.
         """
     )
+    with gr.Row():
+        with gr.Column(scale=1):
+            image_input = gr.Image(
+                type="pil",
+                label="🖼️ Upload Image",
+                sources=["upload", "clipboard"]
+            )
+            temperature = gr.Slider(
                 minimum=0.1,
                 maximum=1.0,
+                value=0.15,
                 step=0.05,
+                label="Temperature"
+            )
+            submit_btn = gr.Button("Extract Text", variant="primary")
+            clear_btn = gr.Button("Clear", variant="secondary")
+        with gr.Column(scale=2):
+            output_text = gr.Markdown(
+                label="📄 Extracted Text",
+                value="<div style='min-height: 400px; padding: 10px; border: 1px solid #e0e0e0; border-radius: 4px; background-color: #f9f9f9;'><em>Extracted text will appear here...</em></div>"
+            )
+    with gr.Row():
+        raw_output = gr.Textbox(
+            label="Raw Output",
+            placeholder="Raw text will appear here...",
+            lines=10,
+            show_copy_button=True
+        )
+    # Event handlers
+    submit_btn.click(
+        fn=lambda img, temp: (process_image(img, temp), process_image(img, temp)),
+        inputs=[image_input, temperature],
+        outputs=[output_text, raw_output]
     )
+    clear_btn.click(
+        fn=lambda: (None, "", ""),
+        outputs=[image_input, output_text, raw_output]
+    )
     gr.Markdown("""
     ---