iammraat commited on
Commit
94c91d9
·
verified ·
1 Parent(s): af3df60

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -27
app.py CHANGED
@@ -3,22 +3,28 @@ import cv2
3
  import numpy as np
4
  import os
5
  from huggingface_hub import snapshot_download
6
- from paddleocr import PPStructureV3
7
 
8
- # --- STEP 1: Download the Model ---
 
 
 
 
 
 
9
  print("Downloading PP-DocLayoutV3 from Hugging Face...")
10
  model_path = snapshot_download(repo_id="PaddlePaddle/PP-DocLayoutV3", allow_patterns=["*.pdiparams", "*.pdmodel", "*.yml", "*.json"])
11
  print(f"Model downloaded to: {model_path}")
12
 
13
- # --- STEP 2: Initialize V3 Engine ---
14
- # FIXES:
15
- # 1. Used 'model_dir' instead of 'layout_model_dir'
16
- # 2. Removed 'table=False' and 'ocr=False' (Invalid in V3)
17
- # 3. Kept 'enable_mkldnn=False' (Essential for CPU stability)
18
- layout_engine = PPStructureV3(
19
- model_dir=model_path,
20
- use_doc_orientation_classify=True,
21
- enable_mkldnn=False
 
22
  )
23
 
24
  def analyze_layout(input_image):
@@ -29,41 +35,39 @@ def analyze_layout(input_image):
29
 
30
  # Run Inference
31
  try:
32
- # V3 inference often returns a generator or a list object
33
- # We convert to list to be safe
34
- results = list(layout_engine(image_np))
35
  except Exception as e:
36
  return image_np, f"Error running layout analysis: {e}"
37
 
38
  viz_image = image_np.copy()
39
  detections_text = []
40
 
41
- if not results:
42
  return viz_image, "No layout detected."
43
 
44
- # --- STEP 3: Visualize V3 Results ---
45
- # The structure of V3 results is typically a list of dicts.
46
- # Each dict has 'layout_bbox' (or 'bbox') and 'label'.
47
- for region in results:
48
- if not isinstance(region, dict): continue
49
-
50
- # Try finding the box with supported keys
51
- box = region.get('layout_bbox') or region.get('bbox')
52
- label = region.get('label', 'unknown')
53
 
 
 
 
54
  if box is None: continue
55
 
56
  try:
57
  x1, y1, x2, y2 = int(box[0]), int(box[1]), int(box[2]), int(box[3])
58
 
59
- # Color coding for different regions
60
  color = (0, 255, 0) # Text (Green)
61
  if label == 'title': color = (0, 0, 255) # Title (Red)
62
  elif label == 'figure': color = (255, 0, 0) # Figure (Blue)
63
  elif label == 'table': color = (255, 255, 0)# Table (Cyan)
64
 
65
  cv2.rectangle(viz_image, (x1, y1), (x2, y2), color, 3)
66
- cv2.putText(viz_image, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2)
67
  detections_text.append(f"Found {label} at {box}")
68
  except Exception:
69
  pass
@@ -72,7 +76,7 @@ def analyze_layout(input_image):
72
 
73
  with gr.Blocks(title="PP-DocLayoutV3 Explorer") as demo:
74
  gr.Markdown("## 📄 PP-DocLayoutV3 Explorer")
75
- gr.Markdown("Visualizes document structure (Title, Text, Table, Figure).")
76
 
77
  with gr.Row():
78
  with gr.Column():
 
3
  import numpy as np
4
  import os
5
  from huggingface_hub import snapshot_download
 
6
 
7
+ # --- KEY FIX: Use the standard PPStructure class ---
8
+ # The 'PPStructureV3' class is currently broken/strict in the PyPI release.
9
+ # The standard 'PPStructure' class is stable and CAN load V3 weights
10
+ # because it reads the architecture from the downloaded inference.yml file.
11
+ from paddleocr import PPStructure
12
+
13
+ # --- STEP 1: Download the V3 Model ---
14
  print("Downloading PP-DocLayoutV3 from Hugging Face...")
15
  model_path = snapshot_download(repo_id="PaddlePaddle/PP-DocLayoutV3", allow_patterns=["*.pdiparams", "*.pdmodel", "*.yml", "*.json"])
16
  print(f"Model downloaded to: {model_path}")
17
 
18
+ # --- STEP 2: Initialize ---
19
+ # We use the stable class but point 'layout_model_dir' to your V3 download.
20
+ layout_engine = PPStructure(
21
+ layout_model_dir=model_path, # This argument is valid in the standard class
22
+ use_angle_cls=True,
23
+ enable_mkldnn=False, # Keeps your CPU from crashing
24
+ show_log=False, # Explicitly False to avoid "Unknown Argument" error
25
+ # We disable these extra modules to focus strictly on layout analysis speed
26
+ table=False,
27
+ ocr=False
28
  )
29
 
30
  def analyze_layout(input_image):
 
35
 
36
  # Run Inference
37
  try:
38
+ # The standard class returns a list of results directly
39
+ result = layout_engine(image_np)
 
40
  except Exception as e:
41
  return image_np, f"Error running layout analysis: {e}"
42
 
43
  viz_image = image_np.copy()
44
  detections_text = []
45
 
46
+ if not result:
47
  return viz_image, "No layout detected."
48
 
49
+ # --- STEP 3: Visualize ---
50
+ for region in result:
51
+ # PPStructure V2/Standard output format: dict with 'type', 'bbox', 'img'
52
+ # Note: V3 model output via V2 class might label keys slightly differently,
53
+ # so we check for both standard sets of keys.
 
 
 
 
54
 
55
+ box = region.get('bbox')
56
+ label = region.get('type') or region.get('label')
57
+
58
  if box is None: continue
59
 
60
  try:
61
  x1, y1, x2, y2 = int(box[0]), int(box[1]), int(box[2]), int(box[3])
62
 
63
+ # Color coding
64
  color = (0, 255, 0) # Text (Green)
65
  if label == 'title': color = (0, 0, 255) # Title (Red)
66
  elif label == 'figure': color = (255, 0, 0) # Figure (Blue)
67
  elif label == 'table': color = (255, 255, 0)# Table (Cyan)
68
 
69
  cv2.rectangle(viz_image, (x1, y1), (x2, y2), color, 3)
70
+ cv2.putText(viz_image, str(label), (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2)
71
  detections_text.append(f"Found {label} at {box}")
72
  except Exception:
73
  pass
 
76
 
77
  with gr.Blocks(title="PP-DocLayoutV3 Explorer") as demo:
78
  gr.Markdown("## 📄 PP-DocLayoutV3 Explorer")
79
+ gr.Markdown("Using **PP-DocLayoutV3** weights via the stable engine.")
80
 
81
  with gr.Row():
82
  with gr.Column():