Spaces:

iammraat
/

document

Sleeping

App Files Files Community

iammraat commited on Feb 2

Commit

0cf77d7

verified ·

1 Parent(s): 052257a

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -32

app.py CHANGED Viewed

@@ -1,26 +1,27 @@
 import gradio as gr
 import cv2
 import numpy as np
-from paddleocr import PPStructure
-from huggingface_hub import snapshot_download
 import os
-# --- STEP 1: Download the Model from Hugging Face ---
-# We download the 'main' branch which contains the Paddle inference weights
 print("Downloading PP-DocLayoutV3 from Hugging Face...")
 model_path = snapshot_download(repo_id="PaddlePaddle/PP-DocLayoutV3", allow_patterns=["*.pdiparams", "*.pdmodel", "*.yml", "*.json"])
 print(f"Model downloaded to: {model_path}")
-# --- STEP 2: Initialize the Layout Engine ---
-# We use PPStructure, which is PaddleOCR's layout analysis module.
-# We point it to the downloaded model folder.
-layout_engine = PPStructure(
     layout_model_dir=model_path,
-    table=False,          # Disable table structure recognition for speed
-    ocr=False,            # Disable OCR for now (we just want to see layout)
     show_log=True,
-    use_angle_cls=True,   # Helps with orientation
-    enable_mkldnn=False   # CRITICAL: Fixes the CPU crash
 )
 def analyze_layout(input_image):
@@ -30,42 +31,46 @@ def analyze_layout(input_image):
     image_np = np.array(input_image)
     # Run Inference
-    # result is a list of dictionaries, one per detected region
-    result = layout_engine(image_np)
     viz_image = image_np.copy()
     detections_text = []
-    # --- STEP 3: Visualize Results ---
     for region in result:
-        # Extract Box (4 points)
-        box = region['layout_bbox']
-        label = region['label']
-        # Convert to numpy format for drawing
-        # layout_bbox is usually [x1, y1, x2, y2]
         x1, y1, x2, y2 = int(box[0]), int(box[1]), int(box[2]), int(box[3])
-        # Color coding based on type
-        color = (0, 255, 0) # Green for Text
-        if label == 'title': color = (0, 0, 255) # Red for Title
-        elif label == 'figure': color = (255, 0, 0) # Blue for Figures
-        elif label == 'table': color = (255, 255, 0) # Cyan for Tables
-        # Draw Rectangle
         cv2.rectangle(viz_image, (x1, y1), (x2, y2), color, 3)
-        # Draw Label
         cv2.putText(viz_image, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2)
         detections_text.append(f"Found {label} at {box}")
     return viz_image, "\n".join(detections_text)
-# --- Gradio UI ---
-with gr.Blocks(title="PP-DocLayoutV3 Demo") as demo:
     gr.Markdown("## 📄 PP-DocLayoutV3 Explorer")
-    gr.Markdown("This model detects **layout regions** (Text, Tables, Titles) instead of reading characters. It is excellent for de-warping and segmenting messy documents.")
     with gr.Row():
         with gr.Column():

 import gradio as gr
 import cv2
 import numpy as np
 import os
+from huggingface_hub import snapshot_download
+# --- STRICT UPDATE: Use PPStructureV3 directly ---
+# Your logs confirmed this class exists in your installed version.
+from paddleocr import PPStructureV3
+# --- STEP 1: Download the Model ---
 print("Downloading PP-DocLayoutV3 from Hugging Face...")
 model_path = snapshot_download(repo_id="PaddlePaddle/PP-DocLayoutV3", allow_patterns=["*.pdiparams", "*.pdmodel", "*.yml", "*.json"])
 print(f"Model downloaded to: {model_path}")
+# --- STEP 2: Initialize V3 Engine ---
+# We instantiate PPStructureV3 directly.
+layout_engine = PPStructureV3(
     layout_model_dir=model_path,
+    table=False,
+    ocr=False,
     show_log=True,
+    use_angle_cls=True,
+    enable_mkldnn=False # Keeps the crash fix while using the new model
 )
 def analyze_layout(input_image):
     image_np = np.array(input_image)
     # Run Inference
+    try:
+        # V3 usually returns a generator or list
+        result = layout_engine(image_np)
+    except Exception as e:
+        return image_np, f"Error running layout analysis: {e}"
     viz_image = image_np.copy()
     detections_text = []
+    if result is None:
+        return viz_image, "No layout detected."
+    # Iterate through results
     for region in result:
+        # V3 Output format usually includes 'layout_bbox'
+        if isinstance(region, dict):
+            box = region.get('layout_bbox') or region.get('bbox')
+            label = region.get('label', 'unknown')
+        else:
+            continue
+        if box is None: continue
+        # Draw the box
         x1, y1, x2, y2 = int(box[0]), int(box[1]), int(box[2]), int(box[3])
+        # Color coding
+        color = (0, 255, 0)
+        if label == 'title': color = (0, 0, 255)
+        elif label == 'figure': color = (255, 0, 0)
+        elif label == 'table': color = (255, 255, 0)
         cv2.rectangle(viz_image, (x1, y1), (x2, y2), color, 3)
         cv2.putText(viz_image, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2)
         detections_text.append(f"Found {label} at {box}")
     return viz_image, "\n".join(detections_text)
+with gr.Blocks(title="PP-DocLayoutV3 Explorer") as demo:
     gr.Markdown("## 📄 PP-DocLayoutV3 Explorer")
     with gr.Row():
         with gr.Column():