Spaces:

numind
/

NuMarkdown-8B-Thinking

Paused

App Files Files Community

etiennebcp commited on Feb 13

Commit

3750374

verified ·

1 Parent(s): 85834ed

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -26

app.py CHANGED Viewed

@@ -7,7 +7,7 @@ import os
 print("=== DEBUG: Starting app.py ===")
-# Get example images
 example_dir = os.path.join(os.environ.get("HOME", "/home/user"), "app", "example_images")
 example_images = []
@@ -27,11 +27,9 @@ def encode_image_to_base64(image: Image.Image) -> str:
 def load_image_any(image):
     """
-    Gradio Image can return:
-      - filepath (str) if type="filepath"
-      - PIL.Image if type="pil"
-      - None
-    We normalize to PIL.Image in RGB.
     """
     if image is None:
         return None
@@ -39,7 +37,7 @@ def load_image_any(image):
         return Image.open(image).convert("RGB")
     if isinstance(image, Image.Image):
         return image.convert("RGB")
-    # Fallback: try to coerce
     return Image.open(image).convert("RGB")
 def query_vllm_api(image, temperature, max_tokens=12_000):
@@ -50,11 +48,11 @@ def query_vllm_api(image, temperature, max_tokens=12_000):
         return "No image provided", "No image provided", "Please upload an image first."
     try:
-        # Optional: Resize image if needed (to avoid huge uploads)
         max_size = 2048
         if max(pil_img.size) > max_size:
             ratio = max_size / max(pil_img.size)
-            new_size = tuple(int(dim * ratio) for dim in pil_img.size)
             pil_img = pil_img.resize(new_size, Image.Resampling.LANCZOS)
         image_b64 = encode_image_to_base64(pil_img)
@@ -84,11 +82,11 @@ def query_vllm_api(image, temperature, max_tokens=12_000):
         data = response.json()
         result = data["choices"][0]["message"]["content"]
-        # Handle the thinking/answer parsing
         try:
             reasoning = result.split("<think>")[1].split("</think>")[0]
             answer = result.split("<answer>")[1].split("</answer>")[0]
-        except IndexError:
             reasoning = "No thinking trace found"
             answer = result
@@ -122,8 +120,8 @@ with gr.Blocks(title="NuMarkdown-8B-Thinking") as demo:
             <a href="https://huggingface.co/numind/NuMarkdown-8B-Thinking" target="_blank" rel="noopener noreferrer" style="color: white; text-decoration: none; margin: 0 10px; font-weight: 500;">🤗 Model</a>
           </div>
         </div>
-        <p>NuMarkdown-8B-Thinking is the first reasoning OCR VLM. It is specifically trained to convert documents into clean Markdown files, well suited for RAG applications.</p>
-        <p>NOTE: In this space we downsize large images and restrict the maximum output of the model, so performance could improve if you run the model yourself.</p>
         """
     )
@@ -132,17 +130,9 @@ with gr.Blocks(title="NuMarkdown-8B-Thinking") as demo:
             temperature = gr.Slider(0.1, 1.5, value=0.4, step=0.1, label="Temperature")
             btn = gr.Button("Generate Response", variant="primary", size="lg")
-            # ✅ Use filepath so the input panel reliably shows both uploads and Examples
             img_in = gr.Image(type="filepath", label="Upload Image")
-            # Optional: show what Gradio is actually passing to backend (for debugging)
-            dbg = gr.Textbox(label="Debug (image value)", interactive=False)
-            def debug_image_value(image):
-                return f"{type(image)}: {image}"
-            img_in.change(debug_image_value, inputs=img_in, outputs=dbg)
         with gr.Column(scale=2):
             with gr.Accordion("🔍 Model Outputs", open=True):
                 with gr.Tabs():
@@ -169,7 +159,6 @@ with gr.Blocks(title="NuMarkdown-8B-Thinking") as demo:
         outputs=[thinking, raw_answer, output],
     )
-    # ✅ Examples: list-of-lists + inputs as list, works reliably across Gradio versions
     if example_images:
         gr.Examples(
             examples=[[p] for p in example_images[:5]],
@@ -181,16 +170,23 @@ print("=== DEBUG: Gradio interface created ===")
 if __name__ == "__main__":
     print("=== DEBUG: About to launch Gradio ===")
     demo.launch(
         server_name="0.0.0.0",
         server_port=7860,
         share=True,
-        # Security: allow serving local example files if your environment requires it.
-        # Harmless if not needed; helpful in many deployments.
-        allowed_paths=[example_dir] if os.path.exists(example_dir) else None,
         theme=gr.themes.Soft(),
         css="""
         * { font-family: 'Inter', 'Segoe UI', 'Roboto', 'Helvetica Neue', Arial, sans-serif !important; }
         """,
     )
     print("=== DEBUG: Gradio launched ===")

 print("=== DEBUG: Starting app.py ===")
+# Example images directory
 example_dir = os.path.join(os.environ.get("HOME", "/home/user"), "app", "example_images")
 example_images = []
 def load_image_any(image):
     """
+    With gr.Image(type="filepath"), image is a str path.
+    With other types it can be PIL.Image.
+    Normalize to PIL.Image RGB.
     """
     if image is None:
         return None
         return Image.open(image).convert("RGB")
     if isinstance(image, Image.Image):
         return image.convert("RGB")
+    # Best-effort fallback
     return Image.open(image).convert("RGB")
 def query_vllm_api(image, temperature, max_tokens=12_000):
         return "No image provided", "No image provided", "Please upload an image first."
     try:
+        # Optional resize to avoid huge uploads
         max_size = 2048
         if max(pil_img.size) > max_size:
             ratio = max_size / max(pil_img.size)
+            new_size = (int(pil_img.size[0] * ratio), int(pil_img.size[1] * ratio))
             pil_img = pil_img.resize(new_size, Image.Resampling.LANCZOS)
         image_b64 = encode_image_to_base64(pil_img)
         data = response.json()
         result = data["choices"][0]["message"]["content"]
+        # Parse optional <think>/<answer>
         try:
             reasoning = result.split("<think>")[1].split("</think>")[0]
             answer = result.split("<answer>")[1].split("</answer>")[0]
+        except Exception:
             reasoning = "No thinking trace found"
             answer = result
             <a href="https://huggingface.co/numind/NuMarkdown-8B-Thinking" target="_blank" rel="noopener noreferrer" style="color: white; text-decoration: none; margin: 0 10px; font-weight: 500;">🤗 Model</a>
           </div>
         </div>
+        <p>NuMarkdown-8B-Thinking converts documents into clean Markdown, well suited for RAG applications.</p>
+        <p>NOTE: We downsize large images and restrict max output tokens in this demo.</p>
         """
     )
             temperature = gr.Slider(0.1, 1.5, value=0.4, step=0.1, label="Temperature")
             btn = gr.Button("Generate Response", variant="primary", size="lg")
+            # ✅ Use filepath so preview works consistently with Examples and uploads
             img_in = gr.Image(type="filepath", label="Upload Image")
         with gr.Column(scale=2):
             with gr.Accordion("🔍 Model Outputs", open=True):
                 with gr.Tabs():
         outputs=[thinking, raw_answer, output],
     )
     if example_images:
         gr.Examples(
             examples=[[p] for p in example_images[:5]],
 if __name__ == "__main__":
     print("=== DEBUG: About to launch Gradio ===")
+    # ✅ IMPORTANT:
+    # If you set allowed_paths, include Gradio's upload temp dir, otherwise previews break.
+    # Uploads typically land in /tmp/gradio/...
+    allowed = ["/tmp/gradio"]
+    if os.path.exists(example_dir):
+        allowed.append(example_dir)
     demo.launch(
         server_name="0.0.0.0",
         server_port=7860,
         share=True,
         theme=gr.themes.Soft(),
+        allowed_paths=allowed,  # ✅ include /tmp/gradio so uploaded previews render
         css="""
         * { font-family: 'Inter', 'Segoe UI', 'Roboto', 'Helvetica Neue', Arial, sans-serif !important; }
         """,
     )
     print("=== DEBUG: Gradio launched ===")