Spaces:

iammraat
/

document

Running

App Files Files Community

iammraat commited on 10 days ago

Commit

af3df60

verified ·

1 Parent(s): 583f78a

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -27

app.py CHANGED Viewed

@@ -11,16 +11,14 @@ model_path = snapshot_download(repo_id="PaddlePaddle/PP-DocLayoutV3", allow_patt
 print(f"Model downloaded to: {model_path}")
 # --- STEP 2: Initialize V3 Engine ---
 layout_engine = PPStructureV3(
-    layout_model_dir=model_path,
-    table=False,
-    ocr=False,
-    # show_log=True,  <-- REMOVED (Caused the crash)
-    # In V3, 'use_angle_cls' is often renamed for documents:
     use_doc_orientation_classify=True,
-    enable_mkldnn=False # Keeps the crash fix
 )
 def analyze_layout(input_image):
@@ -31,38 +29,38 @@ def analyze_layout(input_image):
     # Run Inference
     try:
-        # V3 usually returns a generator or list
-        result = layout_engine(image_np)
     except Exception as e:
         return image_np, f"Error running layout analysis: {e}"
     viz_image = image_np.copy()
     detections_text = []
-    if result is None:
         return viz_image, "No layout detected."
-    # Iterate through results
-    for region in result:
-        # V3 Output format usually includes 'layout_bbox' or 'bbox'
-        if isinstance(region, dict):
-            # Try specific v3 keys first, fallback to generic
-            box = region.get('layout_bbox') or region.get('bbox')
-            label = region.get('label', 'unknown')
-        else:
-            continue
         if box is None: continue
-        # Draw the box
         try:
             x1, y1, x2, y2 = int(box[0]), int(box[1]), int(box[2]), int(box[3])
-            # Color coding
-            color = (0, 255, 0) # Default Green
-            if label == 'title': color = (0, 0, 255) # Red
-            elif label == 'figure': color = (255, 0, 0) # Blue
-            elif label == 'table': color = (255, 255, 0) # Cyan
             cv2.rectangle(viz_image, (x1, y1), (x2, y2), color, 3)
             cv2.putText(viz_image, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2)
@@ -74,6 +72,7 @@ def analyze_layout(input_image):
 with gr.Blocks(title="PP-DocLayoutV3 Explorer") as demo:
     gr.Markdown("## 📄 PP-DocLayoutV3 Explorer")
     with gr.Row():
         with gr.Column():

 print(f"Model downloaded to: {model_path}")
 # --- STEP 2: Initialize V3 Engine ---
+# FIXES:
+# 1. Used 'model_dir' instead of 'layout_model_dir'
+# 2. Removed 'table=False' and 'ocr=False' (Invalid in V3)
+# 3. Kept 'enable_mkldnn=False' (Essential for CPU stability)
 layout_engine = PPStructureV3(
+    model_dir=model_path,
     use_doc_orientation_classify=True,
+    enable_mkldnn=False
 )
 def analyze_layout(input_image):
     # Run Inference
     try:
+        # V3 inference often returns a generator or a list object
+        # We convert to list to be safe
+        results = list(layout_engine(image_np))
     except Exception as e:
         return image_np, f"Error running layout analysis: {e}"
     viz_image = image_np.copy()
     detections_text = []
+    if not results:
         return viz_image, "No layout detected."
+    # --- STEP 3: Visualize V3 Results ---
+    # The structure of V3 results is typically a list of dicts.
+    # Each dict has 'layout_bbox' (or 'bbox') and 'label'.
+    for region in results:
+        if not isinstance(region, dict): continue
+        # Try finding the box with supported keys
+        box = region.get('layout_bbox') or region.get('bbox')
+        label = region.get('label', 'unknown')
         if box is None: continue
         try:
             x1, y1, x2, y2 = int(box[0]), int(box[1]), int(box[2]), int(box[3])
+            # Color coding for different regions
+            color = (0, 255, 0) # Text (Green)
+            if label == 'title': color = (0, 0, 255)   # Title (Red)
+            elif label == 'figure': color = (255, 0, 0) # Figure (Blue)
+            elif label == 'table': color = (255, 255, 0)# Table (Cyan)
             cv2.rectangle(viz_image, (x1, y1), (x2, y2), color, 3)
             cv2.putText(viz_image, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2)
 with gr.Blocks(title="PP-DocLayoutV3 Explorer") as demo:
     gr.Markdown("## 📄 PP-DocLayoutV3 Explorer")
+    gr.Markdown("Visualizes document structure (Title, Text, Table, Figure).")
     with gr.Row():
         with gr.Column():