Spaces:

thinkPy
/

Docling-Layout-Analysis

Running

App Files Files Community

enpaiva commited on Sep 12, 2025

Commit

584b383

verified ·

1 Parent(s): a4cb188

Update app.py

Browse files

Files changed (1) hide show

app.py +145 -159

app.py CHANGED Viewed

@@ -111,11 +111,11 @@ def nms_custom(boxes, scores, iou_threshold=0.5):
     return torch.tensor(keep, dtype=torch.long)
 def load_model(model_name):
-    """Load the selected model."""
     global current_model, current_processor, current_model_name
     if current_model_name == model_name:
-        return f"✅ Model {model_name} is already loaded!"
     try:
         model_info = MODELS[model_name]
@@ -133,11 +133,11 @@ def load_model(model_name):
         current_model = model
         current_model_name = model_name
-        return f"✅ Successfully loaded {model_name}!"
     except Exception as e:
         print(f"Error loading model: {e}")
-        return f"❌ Error loading {model_name}: {str(e)}"
 def visualize_bbox(image_input, bboxes, classes, scores, id_to_names, alpha=0.3, show_labels=True):
     """Visualize bounding boxes with OpenCV."""
@@ -199,13 +199,15 @@ def visualize_bbox(image_input, bboxes, classes, scores, id_to_names, alpha=0.3,
     return cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
-def process_image(input_img, conf_threshold, iou_threshold, nms_method, alpha, show_labels):
     """Process image with document layout detection."""
     if input_img is None:
         return None, "❌ Please upload an image first."
-    if current_model is None or current_processor is None:
-        return None, "❌ Please load a model first."
     try:
         # Prepare image
@@ -216,14 +218,14 @@ def process_image(input_img, conf_threshold, iou_threshold, nms_method, alpha, s
             input_img = input_img.convert('RGB')
         # Process with model
-        inputs = current_processor(images=[input_img], return_tensors="pt")
         inputs = {k: v.to(device) for k, v in inputs.items()}
         with torch.no_grad():
-            outputs = current_model(**inputs)
         # Post-process results
-        results = current_processor.post_process_object_detection(
             outputs,
             target_sizes=torch.tensor([input_img.size[::-1]]),
             threshold=conf_threshold,
@@ -256,7 +258,7 @@ def process_image(input_img, conf_threshold, iou_threshold, nms_method, alpha, s
         output = visualize_bbox(input_img, boxes, labels, scores, classes_map, alpha=alpha, show_labels=show_labels)
         labels_status = "with labels" if show_labels else "without labels"
-        info = f"✅ Found {len(boxes)} detections ({labels_status}) | NMS: {nms_method} | Threshold: {conf_threshold:.2f}"
         return output, info
@@ -267,58 +269,54 @@ def process_image(input_img, conf_threshold, iou_threshold, nms_method, alpha, s
             return np.array(input_img), error_msg
         return np.zeros((512, 512, 3), dtype=np.uint8), error_msg
-def reset_interface():
-    """Reset all interface components."""
-    return gr.update(value=None), gr.update(value=None), gr.update(value="")
 if __name__ == "__main__":
     print(f"🚀 Starting Document Layout Analysis App")
     print(f"📱 Device: {device}")
     print(f"🤖 Available models: {len(MODELS)}")
-    # Custom CSS for full-width layout
     custom_css = """
     .gradio-container {
-        max-width: 100% !important;
         padding: 20px !important;
     }
-    .main-container {
-        width: 100% !important;
-        max-width: none !important;
-    }
-    .panel-left, .panel-right {
-        min-height: 600px;
-        padding: 20px;
         background: #f8f9fa;
         border-radius: 12px;
-        border: 1px solid #e9ecef;
     }
-    .control-section {
-        margin-bottom: 20px;
-        padding: 15px;
-        background: white;
-        border-radius: 8px;
         border: 1px solid #dee2e6;
     }
-    .status-good { color: #28a745; font-weight: bold; }
-    .status-error { color: #dc3545; font-weight: bold; }
-    .status-info { color: #17a2b8; font-weight: bold; }
-    .toggle-labels {
         background: linear-gradient(45deg, #667eea, #764ba2) !important;
         border: none !important;
         color: white !important;
         font-weight: bold !important;
     }
     """
     # Create Gradio interface
     with gr.Blocks(
-        title="📄 Document Layout Analysis - Full Width",
         theme=gr.themes.Soft(),
         css=custom_css
     ) as demo:
@@ -326,138 +324,126 @@ if __name__ == "__main__":
         # Header
         gr.HTML("""
         <div style='text-align: center; padding: 30px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; border-radius: 15px; margin-bottom: 30px;'>
-            <h1 style='margin: 0; font-size: 3em; text-shadow: 2px 2px 4px rgba(0,0,0,0.3);'>🔍 Document Layout Analysis</h1>
-            <p style='margin: 10px 0 0 0; font-size: 1.3em; opacity: 0.9;'>Advanced document structure detection with multiple AI models</p>
         </div>
         """)
-        # Main content in two columns
-        with gr.Row():
-            # LEFT COLUMN - Controls and Input
-            with gr.Column(scale=1, elem_classes=["panel-left"]):
-                # Model Section
-                with gr.Group(elem_classes=["control-section"]):
-                    gr.HTML("<h3>🤖 Model Configuration</h3>")
-                    model_dropdown = gr.Dropdown(
-                        choices=list(MODELS.keys()),
-                        value="Egret XLarge",
-                        label="Select Model",
-                        info="Choose the AI model for document analysis",
-                        interactive=True
-                    )
-                    with gr.Row():
-                        load_btn = gr.Button("📥 Load Model", variant="primary", scale=1)
-                        clear_btn = gr.Button("🗑️ Clear All", variant="secondary", scale=1)
-                    model_status = gr.Textbox(
-                        label="Model Status",
-                        value="🔄 No model loaded. Please select and load a model.",
-                        interactive=False,
-                        lines=2
-                    )
-                # Image Upload Section
-                with gr.Group(elem_classes=["control-section"]):
-                    gr.HTML("<h3>📄 Image Input</h3>")
-                    input_img = gr.Image(
-                        label="Upload Document Image",
-                        type="pil",
-                        height=400,
-                        interactive=True
-                    )
-                    detect_btn = gr.Button("🔍 Analyze Document", variant="primary", size="lg")
-                # Parameters Section
-                with gr.Group(elem_classes=["control-section"]):
-                    gr.HTML("<h3>⚙️ Detection Parameters</h3>")
-                    conf_threshold = gr.Slider(
-                        minimum=0.0,
-                        maximum=1.0,
-                        value=0.6,
-                        step=0.05,
-                        label="Confidence Threshold",
-                        info="Minimum confidence for detections"
-                    )
-                    iou_threshold = gr.Slider(
-                        minimum=0.0,
-                        maximum=1.0,
-                        value=0.5,
-                        step=0.05,
-                        label="NMS IoU Threshold",
-                        info="Non-maximum suppression threshold"
-                    )
-                    nms_method = gr.Radio(
-                        choices=["Custom IoMin", "Standard IoU"],
-                        value="Custom IoMin",
-                        label="NMS Algorithm",
-                        info="Choose suppression method"
-                    )
-                    alpha_slider = gr.Slider(
-                        minimum=0.0,
-                        maximum=1.0,
-                        value=0.3,
-                        step=0.1,
-                        label="Overlay Transparency",
-                        info="Transparency of detection overlays"
-                    )
-            # RIGHT COLUMN - Results and Output
-            with gr.Column(scale=1, elem_classes=["panel-right"]):
-                # Results Section
-                with gr.Group(elem_classes=["control-section"]):
-                    gr.HTML("<h3>🎯 Detection Results</h3>")
-                    output_img = gr.Image(
-                        label="Analyzed Document",
-                        type="numpy",
-                        height=500,
-                        interactive=False
-                    )
-                    detection_info = gr.Textbox(
-                        label="Analysis Summary",
-                        value="",
-                        interactive=False,
-                        lines=3,
-                        placeholder="Detection results will appear here..."
-                    )
-                    # Visualization Options Section
-                with gr.Group(elem_classes=["control-section"]):
-                    gr.HTML("<h3>🎨 Visualization Options</h3>")
-                    show_labels_checkbox = gr.Checkbox(
-                        value=True,
-                        label="Show Class Labels",
-                        info="Display class names and confidence scores on detections",
-                        interactive=True
-                    )
-        # Event Handlers
-        load_btn.click(
-            fn=load_model,
-            inputs=[model_dropdown],
-            outputs=[model_status]
-        )
-        clear_btn.click(
-            fn=reset_interface,
-            outputs=[input_img, output_img, detection_info]
-        )
         detect_btn.click(
             fn=process_image,
-            inputs=[input_img, conf_threshold, iou_threshold, nms_method, alpha_slider, show_labels_checkbox],
             outputs=[output_img, detection_info]
         )

     return torch.tensor(keep, dtype=torch.long)
 def load_model(model_name):
+    """Load the selected model automatically."""
     global current_model, current_processor, current_model_name
     if current_model_name == model_name:
+        return current_model, current_processor
     try:
         model_info = MODELS[model_name]
         current_model = model
         current_model_name = model_name
+        return model, processor
     except Exception as e:
         print(f"Error loading model: {e}")
+        return None, None
 def visualize_bbox(image_input, bboxes, classes, scores, id_to_names, alpha=0.3, show_labels=True):
     """Visualize bounding boxes with OpenCV."""
     return cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+def process_image(input_img, model_name, conf_threshold, iou_threshold, nms_method, alpha, show_labels):
     """Process image with document layout detection."""
     if input_img is None:
         return None, "❌ Please upload an image first."
+    # Load model if needed
+    model, processor = load_model(model_name)
+    if model is None or processor is None:
+        return None, f"❌ Error loading model {model_name}."
     try:
         # Prepare image
             input_img = input_img.convert('RGB')
         # Process with model
+        inputs = processor(images=[input_img], return_tensors="pt")
         inputs = {k: v.to(device) for k, v in inputs.items()}
         with torch.no_grad():
+            outputs = model(**inputs)
         # Post-process results
+        results = processor.post_process_object_detection(
             outputs,
             target_sizes=torch.tensor([input_img.size[::-1]]),
             threshold=conf_threshold,
         output = visualize_bbox(input_img, boxes, labels, scores, classes_map, alpha=alpha, show_labels=show_labels)
         labels_status = "with labels" if show_labels else "without labels"
+        info = f"✅ Found {len(boxes)} detections ({labels_status}) | Model: {model_name} | Confidence: {conf_threshold:.2f}"
         return output, info
             return np.array(input_img), error_msg
         return np.zeros((512, 512, 3), dtype=np.uint8), error_msg
 if __name__ == "__main__":
     print(f"🚀 Starting Document Layout Analysis App")
     print(f"📱 Device: {device}")
     print(f"🤖 Available models: {len(MODELS)}")
+    # Custom CSS for compact layout
     custom_css = """
     .gradio-container {
+        max-width: 1400px !important;
+        margin: 0 auto !important;
         padding: 20px !important;
     }
+    .controls-container {
         background: #f8f9fa;
         border-radius: 12px;
+        border: 1px solid #dee2e6;
+        padding: 20px;
+        margin-bottom: 20px;
     }
+    .results-container {
+        background: #ffffff;
+        border-radius: 12px;
         border: 1px solid #dee2e6;
+        padding: 20px;
     }
+    .section-divider {
+        border-top: 2px solid #e9ecef;
+        margin: 20px 0;
+        padding-top: 20px;
+    }
+    .analyze-btn {
         background: linear-gradient(45deg, #667eea, #764ba2) !important;
         border: none !important;
         color: white !important;
         font-weight: bold !important;
+        font-size: 18px !important;
+        padding: 15px 30px !important;
+        border-radius: 10px !important;
     }
     """
     # Create Gradio interface
     with gr.Blocks(
+        title="📄 Document Layout Analysis",
         theme=gr.themes.Soft(),
         css=custom_css
     ) as demo:
         # Header
         gr.HTML("""
         <div style='text-align: center; padding: 30px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; border-radius: 15px; margin-bottom: 30px;'>
+            <h1 style='margin: 0; font-size: 2.5em; text-shadow: 2px 2px 4px rgba(0,0,0,0.3);'>🔍 Document Layout Analysis</h1>
+            <p style='margin: 10px 0 0 0; font-size: 1.2em; opacity: 0.9;'>Compact interface for advanced document structure detection</p>
         </div>
         """)
+        # Controls Section
+        with gr.Group(elem_classes=["controls-container"]):
+            # 1. Image Upload (First)
+            gr.HTML("<h3 style='margin-top: 0;'>📄 Upload Document</h3>")
+            input_img = gr.Image(
+                label="Document Image",
+                type="pil",
+                height=300,
+                interactive=True
+            )
+            # Divider
+            gr.HTML("<div class='section-divider'></div>")
+            # 2. Model Selection (Second)
+            gr.HTML("<h3>🤖 Model Selection</h3>")
+            model_dropdown = gr.Dropdown(
+                choices=list(MODELS.keys()),
+                value="Egret XLarge",
+                label="AI Model",
+                info="Model will load automatically when analyzing",
+                interactive=True
+            )
+            # Divider
+            gr.HTML("<div class='section-divider'></div>")
+            # 3. Detection Parameters (Third)
+            gr.HTML("<h3>⚙️ Detection Settings</h3>")
+            with gr.Row():
+                conf_threshold = gr.Slider(
+                    minimum=0.0,
+                    maximum=1.0,
+                    value=0.6,
+                    step=0.05,
+                    label="Confidence Threshold",
+                    info="Minimum confidence for detections"
+                )
+                iou_threshold = gr.Slider(
+                    minimum=0.0,
+                    maximum=1.0,
+                    value=0.5,
+                    step=0.05,
+                    label="NMS IoU Threshold",
+                    info="Non-maximum suppression threshold"
+                )
+            with gr.Row():
+                nms_method = gr.Radio(
+                    choices=["Custom IoMin", "Standard IoU"],
+                    value="Custom IoMin",
+                    label="NMS Algorithm",
+                    info="Choose suppression method"
+                )
+                alpha_slider = gr.Slider(
+                    minimum=0.0,
+                    maximum=1.0,
+                    value=0.3,
+                    step=0.1,
+                    label="Overlay Transparency",
+                    info="Transparency of detection overlays"
+                )
+            show_labels_checkbox = gr.Checkbox(
+                value=True,
+                label="Show Class Labels and Confidence Scores",
+                info="Display detection labels on the output image",
+                interactive=True
+            )
+            # Divider
+            gr.HTML("<div class='section-divider'></div>")
+            # 4. Analyze Button (Last)
+            detect_btn = gr.Button(
+                "🔍 Analyze Document",
+                variant="primary",
+                size="lg",
+                elem_classes=["analyze-btn"]
+            )
+        # Results Section
+        with gr.Group(elem_classes=["results-container"]):
+            gr.HTML("<h3 style='margin-top: 0;'>🎯 Analysis Results</h3>")
+            output_img = gr.Image(
+                label="Analyzed Document",
+                type="numpy",
+                height=600,
+                interactive=False
+            )
+            detection_info = gr.Textbox(
+                label="Detection Summary",
+                value="Ready for analysis. Upload an image and click 'Analyze Document'.",
+                interactive=False,
+                lines=2,
+                show_copy_button=True
+            )
+        # Event Handler
         detect_btn.click(
             fn=process_image,
+            inputs=[
+                input_img,
+                model_dropdown,
+                conf_threshold,
+                iou_threshold,
+                nms_method,
+                alpha_slider,
+                show_labels_checkbox
+            ],
             outputs=[output_img, detection_info]
         )