kebson commited on
Commit
34707d6
·
verified ·
1 Parent(s): 89b60b1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -19
app.py CHANGED
@@ -6,7 +6,8 @@ from paddleocr import PaddleOCR
6
 
7
  ocr = PaddleOCR(
8
  use_angle_cls=True,
9
- lang="en"
 
10
  )
11
 
12
 
@@ -20,31 +21,37 @@ def extract_descriptions(image: Image.Image):
20
 
21
  words = []
22
 
23
- # 🔴 PARSING ROBUSTE PaddleOCR
24
  for line in result[0]:
25
- # Cas 1 : [box, (text, score)]
26
- if len(line) >= 2 and isinstance(line[1], (list, tuple)):
27
- box = line[0]
28
- text = line[1][0]
29
- score = line[1][1]
30
-
31
- # Cas 2 : [box, text, score]
32
- elif len(line) >= 3:
33
- box = line[0]
34
- text = line[1]
35
- score = line[2]
36
-
37
- else:
 
38
  continue
39
 
40
- if score < 0.5 or not text.strip():
 
 
 
 
 
 
41
  continue
42
 
43
  xs = [p[0] for p in box]
44
  ys = [p[1] for p in box]
45
 
46
  words.append({
47
- "text": text.strip(),
48
  "x": min(xs),
49
  "y": min(ys),
50
  "w": max(xs) - min(xs),
@@ -60,7 +67,7 @@ def extract_descriptions(image: Image.Image):
60
  if header is None:
61
  return "❌ Colonne 'Description' non détectée."
62
 
63
- # 2️⃣ Définir la zone de la colonne
64
  x_min = header["x"] - 15
65
  x_max = header["x"] + header["w"] + 380
66
  y_min = header["y"] + header["h"] + 10
@@ -112,7 +119,7 @@ demo = gr.Interface(
112
  fn=extract_descriptions,
113
  inputs=gr.Image(type="pil"),
114
  outputs=gr.Textbox(lines=20),
115
- title="Extraction colonne Description – PaddleOCR (Stable)"
116
  )
117
 
118
  demo.launch(server_name="0.0.0.0", server_port=7860)
 
6
 
7
  ocr = PaddleOCR(
8
  use_angle_cls=True,
9
+ lang="en",
10
+ use_gpu=False
11
  )
12
 
13
 
 
21
 
22
  words = []
23
 
 
24
  for line in result[0]:
25
+ text, score, box = None, None, None
26
+
27
+ # Parsing défensif PaddleOCR
28
+ if isinstance(line, (list, tuple)):
29
+ if len(line) >= 2 and isinstance(line[1], (list, tuple)):
30
+ box = line[0]
31
+ text = line[1][0]
32
+ score = line[1][1]
33
+ elif len(line) >= 3:
34
+ box = line[0]
35
+ text = line[1]
36
+ score = line[2]
37
+
38
+ if box is None or text is None:
39
  continue
40
 
41
+ # 🔒 Sécurisation du score
42
+ try:
43
+ score = float(score)
44
+ except:
45
+ score = 1.0
46
+
47
+ if score < 0.5 or not str(text).strip():
48
  continue
49
 
50
  xs = [p[0] for p in box]
51
  ys = [p[1] for p in box]
52
 
53
  words.append({
54
+ "text": str(text).strip(),
55
  "x": min(xs),
56
  "y": min(ys),
57
  "w": max(xs) - min(xs),
 
67
  if header is None:
68
  return "❌ Colonne 'Description' non détectée."
69
 
70
+ # 2️⃣ Zone de la colonne
71
  x_min = header["x"] - 15
72
  x_max = header["x"] + header["w"] + 380
73
  y_min = header["y"] + header["h"] + 10
 
119
  fn=extract_descriptions,
120
  inputs=gr.Image(type="pil"),
121
  outputs=gr.Textbox(lines=20),
122
+ title="Extraction colonne Description – PaddleOCR (Production Safe)"
123
  )
124
 
125
  demo.launch(server_name="0.0.0.0", server_port=7860)