iammraat commited on
Commit
0cf77d7
·
verified ·
1 Parent(s): 052257a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -32
app.py CHANGED
@@ -1,26 +1,27 @@
1
  import gradio as gr
2
  import cv2
3
  import numpy as np
4
- from paddleocr import PPStructure
5
- from huggingface_hub import snapshot_download
6
  import os
 
7
 
8
- # --- STEP 1: Download the Model from Hugging Face ---
9
- # We download the 'main' branch which contains the Paddle inference weights
 
 
 
10
  print("Downloading PP-DocLayoutV3 from Hugging Face...")
11
  model_path = snapshot_download(repo_id="PaddlePaddle/PP-DocLayoutV3", allow_patterns=["*.pdiparams", "*.pdmodel", "*.yml", "*.json"])
12
  print(f"Model downloaded to: {model_path}")
13
 
14
- # --- STEP 2: Initialize the Layout Engine ---
15
- # We use PPStructure, which is PaddleOCR's layout analysis module.
16
- # We point it to the downloaded model folder.
17
- layout_engine = PPStructure(
18
  layout_model_dir=model_path,
19
- table=False, # Disable table structure recognition for speed
20
- ocr=False, # Disable OCR for now (we just want to see layout)
21
  show_log=True,
22
- use_angle_cls=True, # Helps with orientation
23
- enable_mkldnn=False # CRITICAL: Fixes the CPU crash
24
  )
25
 
26
  def analyze_layout(input_image):
@@ -30,42 +31,46 @@ def analyze_layout(input_image):
30
  image_np = np.array(input_image)
31
 
32
  # Run Inference
33
- # result is a list of dictionaries, one per detected region
34
- result = layout_engine(image_np)
 
 
 
35
 
36
  viz_image = image_np.copy()
37
  detections_text = []
38
 
39
- # --- STEP 3: Visualize Results ---
 
 
 
40
  for region in result:
41
- # Extract Box (4 points)
42
- box = region['layout_bbox']
43
- label = region['label']
 
 
 
 
 
44
 
45
- # Convert to numpy format for drawing
46
- # layout_bbox is usually [x1, y1, x2, y2]
47
  x1, y1, x2, y2 = int(box[0]), int(box[1]), int(box[2]), int(box[3])
48
 
49
- # Color coding based on type
50
- color = (0, 255, 0) # Green for Text
51
- if label == 'title': color = (0, 0, 255) # Red for Title
52
- elif label == 'figure': color = (255, 0, 0) # Blue for Figures
53
- elif label == 'table': color = (255, 255, 0) # Cyan for Tables
54
 
55
- # Draw Rectangle
56
  cv2.rectangle(viz_image, (x1, y1), (x2, y2), color, 3)
57
-
58
- # Draw Label
59
  cv2.putText(viz_image, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2)
60
-
61
  detections_text.append(f"Found {label} at {box}")
62
 
63
  return viz_image, "\n".join(detections_text)
64
 
65
- # --- Gradio UI ---
66
- with gr.Blocks(title="PP-DocLayoutV3 Demo") as demo:
67
  gr.Markdown("## 📄 PP-DocLayoutV3 Explorer")
68
- gr.Markdown("This model detects **layout regions** (Text, Tables, Titles) instead of reading characters. It is excellent for de-warping and segmenting messy documents.")
69
 
70
  with gr.Row():
71
  with gr.Column():
 
1
  import gradio as gr
2
  import cv2
3
  import numpy as np
 
 
4
  import os
5
+ from huggingface_hub import snapshot_download
6
 
7
+ # --- STRICT UPDATE: Use PPStructureV3 directly ---
8
+ # Your logs confirmed this class exists in your installed version.
9
+ from paddleocr import PPStructureV3
10
+
11
+ # --- STEP 1: Download the Model ---
12
  print("Downloading PP-DocLayoutV3 from Hugging Face...")
13
  model_path = snapshot_download(repo_id="PaddlePaddle/PP-DocLayoutV3", allow_patterns=["*.pdiparams", "*.pdmodel", "*.yml", "*.json"])
14
  print(f"Model downloaded to: {model_path}")
15
 
16
+ # --- STEP 2: Initialize V3 Engine ---
17
+ # We instantiate PPStructureV3 directly.
18
+ layout_engine = PPStructureV3(
 
19
  layout_model_dir=model_path,
20
+ table=False,
21
+ ocr=False,
22
  show_log=True,
23
+ use_angle_cls=True,
24
+ enable_mkldnn=False # Keeps the crash fix while using the new model
25
  )
26
 
27
  def analyze_layout(input_image):
 
31
  image_np = np.array(input_image)
32
 
33
  # Run Inference
34
+ try:
35
+ # V3 usually returns a generator or list
36
+ result = layout_engine(image_np)
37
+ except Exception as e:
38
+ return image_np, f"Error running layout analysis: {e}"
39
 
40
  viz_image = image_np.copy()
41
  detections_text = []
42
 
43
+ if result is None:
44
+ return viz_image, "No layout detected."
45
+
46
+ # Iterate through results
47
  for region in result:
48
+ # V3 Output format usually includes 'layout_bbox'
49
+ if isinstance(region, dict):
50
+ box = region.get('layout_bbox') or region.get('bbox')
51
+ label = region.get('label', 'unknown')
52
+ else:
53
+ continue
54
+
55
+ if box is None: continue
56
 
57
+ # Draw the box
 
58
  x1, y1, x2, y2 = int(box[0]), int(box[1]), int(box[2]), int(box[3])
59
 
60
+ # Color coding
61
+ color = (0, 255, 0)
62
+ if label == 'title': color = (0, 0, 255)
63
+ elif label == 'figure': color = (255, 0, 0)
64
+ elif label == 'table': color = (255, 255, 0)
65
 
 
66
  cv2.rectangle(viz_image, (x1, y1), (x2, y2), color, 3)
 
 
67
  cv2.putText(viz_image, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2)
 
68
  detections_text.append(f"Found {label} at {box}")
69
 
70
  return viz_image, "\n".join(detections_text)
71
 
72
+ with gr.Blocks(title="PP-DocLayoutV3 Explorer") as demo:
 
73
  gr.Markdown("## 📄 PP-DocLayoutV3 Explorer")
 
74
 
75
  with gr.Row():
76
  with gr.Column():