iammraat commited on
Commit
af3df60
·
verified ·
1 Parent(s): 583f78a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -27
app.py CHANGED
@@ -11,16 +11,14 @@ model_path = snapshot_download(repo_id="PaddlePaddle/PP-DocLayoutV3", allow_patt
11
  print(f"Model downloaded to: {model_path}")
12
 
13
  # --- STEP 2: Initialize V3 Engine ---
 
 
 
 
14
  layout_engine = PPStructureV3(
15
- layout_model_dir=model_path,
16
- table=False,
17
- ocr=False,
18
- # show_log=True, <-- REMOVED (Caused the crash)
19
-
20
- # In V3, 'use_angle_cls' is often renamed for documents:
21
  use_doc_orientation_classify=True,
22
-
23
- enable_mkldnn=False # Keeps the crash fix
24
  )
25
 
26
  def analyze_layout(input_image):
@@ -31,38 +29,38 @@ def analyze_layout(input_image):
31
 
32
  # Run Inference
33
  try:
34
- # V3 usually returns a generator or list
35
- result = layout_engine(image_np)
 
36
  except Exception as e:
37
  return image_np, f"Error running layout analysis: {e}"
38
 
39
  viz_image = image_np.copy()
40
  detections_text = []
41
 
42
- if result is None:
43
  return viz_image, "No layout detected."
44
 
45
- # Iterate through results
46
- for region in result:
47
- # V3 Output format usually includes 'layout_bbox' or 'bbox'
48
- if isinstance(region, dict):
49
- # Try specific v3 keys first, fallback to generic
50
- box = region.get('layout_bbox') or region.get('bbox')
51
- label = region.get('label', 'unknown')
52
- else:
53
- continue
54
-
55
  if box is None: continue
56
 
57
- # Draw the box
58
  try:
59
  x1, y1, x2, y2 = int(box[0]), int(box[1]), int(box[2]), int(box[3])
60
 
61
- # Color coding
62
- color = (0, 255, 0) # Default Green
63
- if label == 'title': color = (0, 0, 255) # Red
64
- elif label == 'figure': color = (255, 0, 0) # Blue
65
- elif label == 'table': color = (255, 255, 0) # Cyan
66
 
67
  cv2.rectangle(viz_image, (x1, y1), (x2, y2), color, 3)
68
  cv2.putText(viz_image, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2)
@@ -74,6 +72,7 @@ def analyze_layout(input_image):
74
 
75
  with gr.Blocks(title="PP-DocLayoutV3 Explorer") as demo:
76
  gr.Markdown("## 📄 PP-DocLayoutV3 Explorer")
 
77
 
78
  with gr.Row():
79
  with gr.Column():
 
11
  print(f"Model downloaded to: {model_path}")
12
 
13
  # --- STEP 2: Initialize V3 Engine ---
14
+ # FIXES:
15
+ # 1. Used 'model_dir' instead of 'layout_model_dir'
16
+ # 2. Removed 'table=False' and 'ocr=False' (Invalid in V3)
17
+ # 3. Kept 'enable_mkldnn=False' (Essential for CPU stability)
18
  layout_engine = PPStructureV3(
19
+ model_dir=model_path,
 
 
 
 
 
20
  use_doc_orientation_classify=True,
21
+ enable_mkldnn=False
 
22
  )
23
 
24
  def analyze_layout(input_image):
 
29
 
30
  # Run Inference
31
  try:
32
+ # V3 inference often returns a generator or a list object
33
+ # We convert to list to be safe
34
+ results = list(layout_engine(image_np))
35
  except Exception as e:
36
  return image_np, f"Error running layout analysis: {e}"
37
 
38
  viz_image = image_np.copy()
39
  detections_text = []
40
 
41
+ if not results:
42
  return viz_image, "No layout detected."
43
 
44
+ # --- STEP 3: Visualize V3 Results ---
45
+ # The structure of V3 results is typically a list of dicts.
46
+ # Each dict has 'layout_bbox' (or 'bbox') and 'label'.
47
+ for region in results:
48
+ if not isinstance(region, dict): continue
49
+
50
+ # Try finding the box with supported keys
51
+ box = region.get('layout_bbox') or region.get('bbox')
52
+ label = region.get('label', 'unknown')
53
+
54
  if box is None: continue
55
 
 
56
  try:
57
  x1, y1, x2, y2 = int(box[0]), int(box[1]), int(box[2]), int(box[3])
58
 
59
+ # Color coding for different regions
60
+ color = (0, 255, 0) # Text (Green)
61
+ if label == 'title': color = (0, 0, 255) # Title (Red)
62
+ elif label == 'figure': color = (255, 0, 0) # Figure (Blue)
63
+ elif label == 'table': color = (255, 255, 0)# Table (Cyan)
64
 
65
  cv2.rectangle(viz_image, (x1, y1), (x2, y2), color, 3)
66
  cv2.putText(viz_image, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2)
 
72
 
73
  with gr.Blocks(title="PP-DocLayoutV3 Explorer") as demo:
74
  gr.Markdown("## 📄 PP-DocLayoutV3 Explorer")
75
+ gr.Markdown("Visualizes document structure (Title, Text, Table, Figure).")
76
 
77
  with gr.Row():
78
  with gr.Column():