Spaces:

iammraat
/

document

Running

App Files Files Community

iammraat commited on Feb 2

Commit

94c91d9

verified ·

1 Parent(s): af3df60

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -27

app.py CHANGED Viewed

@@ -3,22 +3,28 @@ import cv2
 import numpy as np
 import os
 from huggingface_hub import snapshot_download
-from paddleocr import PPStructureV3
-# --- STEP 1: Download the Model ---
 print("Downloading PP-DocLayoutV3 from Hugging Face...")
 model_path = snapshot_download(repo_id="PaddlePaddle/PP-DocLayoutV3", allow_patterns=["*.pdiparams", "*.pdmodel", "*.yml", "*.json"])
 print(f"Model downloaded to: {model_path}")
-# --- STEP 2: Initialize V3 Engine ---
-# FIXES:
-# 1. Used 'model_dir' instead of 'layout_model_dir'
-# 2. Removed 'table=False' and 'ocr=False' (Invalid in V3)
-# 3. Kept 'enable_mkldnn=False' (Essential for CPU stability)
-layout_engine = PPStructureV3(
-    model_dir=model_path,
-    use_doc_orientation_classify=True,
-    enable_mkldnn=False
 )
 def analyze_layout(input_image):
@@ -29,41 +35,39 @@ def analyze_layout(input_image):
     # Run Inference
     try:
-        # V3 inference often returns a generator or a list object
-        # We convert to list to be safe
-        results = list(layout_engine(image_np))
     except Exception as e:
         return image_np, f"Error running layout analysis: {e}"
     viz_image = image_np.copy()
     detections_text = []
-    if not results:
         return viz_image, "No layout detected."
-    # --- STEP 3: Visualize V3 Results ---
-    # The structure of V3 results is typically a list of dicts.
-    # Each dict has 'layout_bbox' (or 'bbox') and 'label'.
-    for region in results:
-        if not isinstance(region, dict): continue
-        # Try finding the box with supported keys
-        box = region.get('layout_bbox') or region.get('bbox')
-        label = region.get('label', 'unknown')
         if box is None: continue
         try:
             x1, y1, x2, y2 = int(box[0]), int(box[1]), int(box[2]), int(box[3])
-            # Color coding for different regions
             color = (0, 255, 0) # Text (Green)
             if label == 'title': color = (0, 0, 255)   # Title (Red)
             elif label == 'figure': color = (255, 0, 0) # Figure (Blue)
             elif label == 'table': color = (255, 255, 0)# Table (Cyan)
             cv2.rectangle(viz_image, (x1, y1), (x2, y2), color, 3)
-            cv2.putText(viz_image, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2)
             detections_text.append(f"Found {label} at {box}")
         except Exception:
             pass
@@ -72,7 +76,7 @@ def analyze_layout(input_image):
 with gr.Blocks(title="PP-DocLayoutV3 Explorer") as demo:
     gr.Markdown("## 📄 PP-DocLayoutV3 Explorer")
-    gr.Markdown("Visualizes document structure (Title, Text, Table, Figure).")
     with gr.Row():
         with gr.Column():

 import numpy as np
 import os
 from huggingface_hub import snapshot_download
+# --- KEY FIX: Use the standard PPStructure class ---
+# The 'PPStructureV3' class is currently broken/strict in the PyPI release.
+# The standard 'PPStructure' class is stable and CAN load V3 weights
+# because it reads the architecture from the downloaded inference.yml file.
+from paddleocr import PPStructure
+# --- STEP 1: Download the V3 Model ---
 print("Downloading PP-DocLayoutV3 from Hugging Face...")
 model_path = snapshot_download(repo_id="PaddlePaddle/PP-DocLayoutV3", allow_patterns=["*.pdiparams", "*.pdmodel", "*.yml", "*.json"])
 print(f"Model downloaded to: {model_path}")
+# --- STEP 2: Initialize ---
+# We use the stable class but point 'layout_model_dir' to your V3 download.
+layout_engine = PPStructure(
+    layout_model_dir=model_path, # This argument is valid in the standard class
+    use_angle_cls=True,
+    enable_mkldnn=False,         # Keeps your CPU from crashing
+    show_log=False,              # Explicitly False to avoid "Unknown Argument" error
+    # We disable these extra modules to focus strictly on layout analysis speed
+    table=False,
+    ocr=False
 )
 def analyze_layout(input_image):
     # Run Inference
     try:
+        # The standard class returns a list of results directly
+        result = layout_engine(image_np)
     except Exception as e:
         return image_np, f"Error running layout analysis: {e}"
     viz_image = image_np.copy()
     detections_text = []
+    if not result:
         return viz_image, "No layout detected."
+    # --- STEP 3: Visualize ---
+    for region in result:
+        # PPStructure V2/Standard output format: dict with 'type', 'bbox', 'img'
+        # Note: V3 model output via V2 class might label keys slightly differently,
+        # so we check for both standard sets of keys.
+        box = region.get('bbox')
+        label = region.get('type') or region.get('label')
         if box is None: continue
         try:
             x1, y1, x2, y2 = int(box[0]), int(box[1]), int(box[2]), int(box[3])
+            # Color coding
             color = (0, 255, 0) # Text (Green)
             if label == 'title': color = (0, 0, 255)   # Title (Red)
             elif label == 'figure': color = (255, 0, 0) # Figure (Blue)
             elif label == 'table': color = (255, 255, 0)# Table (Cyan)
             cv2.rectangle(viz_image, (x1, y1), (x2, y2), color, 3)
+            cv2.putText(viz_image, str(label), (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2)
             detections_text.append(f"Found {label} at {box}")
         except Exception:
             pass
 with gr.Blocks(title="PP-DocLayoutV3 Explorer") as demo:
     gr.Markdown("## 📄 PP-DocLayoutV3 Explorer")
+    gr.Markdown("Using **PP-DocLayoutV3** weights via the stable engine.")
     with gr.Row():
         with gr.Column():