MuzammalHussain commited on
Commit
c0ecb78
·
verified ·
1 Parent(s): a53c53c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +62 -356
app.py CHANGED
@@ -1,365 +1,71 @@
1
- """
2
- app.py -- Hugging Face / Gradio app
3
- Image -> OCR/crack-width extraction -> Word (.docx) report (one image per page)
4
-
5
- Notes:
6
- - Requires Tesseract installed (add to apt.txt: tesseract-ocr, libtesseract-dev)
7
- - Put in requirements.txt: gradio, pytesseract, pillow, opencv-python-headless, numpy, python-docx
8
- - This is best-effort: primary method uses OCR to find explicit 'mm' values. If OCR fails,
9
- a fallback attempts to estimate pixel->mm using ruler tick patterns (may not work on all images).
10
- """
11
-
12
- import io
13
- import re
14
- import os
15
- import tempfile
16
- from typing import List, Tuple, Optional
17
-
18
- import numpy as np
19
- from PIL import Image, ImageOps
20
- import pytesseract
21
- import cv2
22
  import gradio as gr
 
 
23
  from docx import Document
24
- from docx.shared import Inches, Pt
25
-
26
- # -------------------- Utilities --------------------
27
-
28
- def pil_from_bytes(b: bytes) -> Image.Image:
29
- return Image.open(io.BytesIO(b)).convert("RGB")
30
-
31
- def ocr_text_from_pil(img: Image.Image) -> str:
32
- # Basic OCR - return raw text
33
- try:
34
- return pytesseract.image_to_string(img)
35
- except Exception:
36
- return pytesseract.image_to_string(np.array(img))
37
-
38
- _mm_regex = re.compile(r"([0-9]+(?:\.[0-9]+)?)\s*(?:mm|Millimeter|millimetre|mm\.)", re.IGNORECASE)
39
- _crack_regex = re.compile(r"crack\s*width[^0-9]*([0-9]+(?:\.[0-9]+)?)", re.IGNORECASE)
40
-
41
- def find_width_from_text(text: str) -> Optional[float]:
42
- # Try patterns like "Crack width: 0.25mm" or "0.25 mm"
43
- if not text:
44
- return None
45
- # First look for explicit 'crack width' phrase
46
- m = _crack_regex.search(text)
47
- if m:
48
- try:
49
- return float(m.group(1))
50
- except:
51
- pass
52
- # Look for any mm numbers
53
- m2 = _mm_regex.search(text)
54
- if m2:
55
- try:
56
- return float(m2.group(1))
57
- except:
58
- pass
59
- # Fallback: look for any number and assume it's mm (risky)
60
- nums = re.findall(r"([0-9]+(?:\.[0-9]+)?)", text)
61
- if nums:
62
- try:
63
- # Don't assume if number is very large (>50)
64
- val = float(nums[0])
65
- if val <= 50:
66
- return val
67
- except:
68
- pass
69
- return None
70
-
71
- # -------------------- Fallback pixel-based ruler detection (best-effort) --------------------
72
-
73
- def estimate_scale_from_ruler(img_cv_gray: np.ndarray) -> Optional[float]:
74
- """
75
- Attempt to find a ruler region by detecting many short, high-contrast vertical ticks.
76
- If found, return pixel_per_mm (pixels per 1 mm).
77
- This is heuristic and may fail on many images.
78
- """
79
- # edge detect and morphological
80
- edges = cv2.Canny(img_cv_gray, 50, 150)
81
- h, w = edges.shape
82
-
83
- # Horizontal projection to find candidate rows containing many edges
84
- row_sum = edges.sum(axis=1)
85
- row_peaks = np.where(row_sum > (0.25 * row_sum.max()))[0] # rows with lots of edges
86
-
87
- if len(row_peaks) == 0:
88
- return None
89
-
90
- # Take a band around the densest row
91
- row = int(np.median(row_peaks))
92
- band_h = max(10, h // 10)
93
- r0 = max(0, row - band_h)
94
- r1 = min(h, row + band_h)
95
- band = edges[r0:r1, :]
96
-
97
- # Vertical projection on the band to find repeated short ticks
98
- col_sum = band.sum(axis=0)
99
- # Normalize and find peaks (tick columns)
100
- col_norm = (col_sum - col_sum.min()) / (col_sum.max() - col_sum.min() + 1e-9)
101
- peaks = np.where(col_norm > 0.35)[0]
102
-
103
- if len(peaks) < 3:
104
- # not enough repeated ticks
105
- return None
106
-
107
- # Clean peaks: keep peaks that are separated (unique tick positions)
108
- diffs = np.diff(peaks)
109
- groups = []
110
- current = [peaks[0]]
111
- for i, d in enumerate(diffs):
112
- if d <= 2:
113
- current.append(peaks[i+1])
114
- else:
115
- groups.append(current)
116
- current = [peaks[i+1]]
117
- groups.append(current)
118
- tick_positions = [int(np.mean(g)) for g in groups if len(g) >= 1]
119
-
120
- if len(tick_positions) < 2:
121
- return None
122
-
123
- # compute distances between adjacent tick positions in pixels
124
- dists = np.diff(sorted(tick_positions))
125
- median_pixel_between_ticks = float(np.median(dists))
126
-
127
- # We need to guess how many mm are between ticks: common small ruler tick spacing is 1mm.
128
- # We'll assume ticks correspond to 1 mm (best-effort). So pixel_per_mm = median_pixel_between_ticks
129
- pixel_per_mm = median_pixel_between_ticks
130
- # sanity check
131
- if pixel_per_mm < 0.5 or pixel_per_mm > 100:
132
- return None
133
- return pixel_per_mm
134
-
135
- def estimate_crack_width_pixels(img_cv_gray: np.ndarray) -> Optional[float]:
136
- """
137
- Estimate crack thickness (max width in pixels) using morphological operations.
138
- Returns width in pixels (best-effort).
139
- """
140
- # Enhance contrast
141
- img_eq = cv2.equalizeHist(img_cv_gray)
142
- # Use a strong edge detection and dilate to form thicker regions
143
- edges = cv2.Canny(img_eq, 30, 120)
144
- # Dilate edges to close small gaps
145
- kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
146
- dil = cv2.dilate(edges, kernel, iterations=2)
147
- # Find contours
148
- contours, _ = cv2.findContours(dil, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
149
- if not contours:
150
- return None
151
- # Choose contour with largest area (likely the crack line)
152
- c = max(contours, key=cv2.contourArea)
153
- # Use bounding box height or compute thickness by distance transform
154
- x, y, w, h = cv2.boundingRect(c)
155
- # compute local thickness via distance transform on inverted edges
156
- # Create mask for contour region
157
- mask = np.zeros_like(img_cv_gray)
158
- cv2.drawContours(mask, [c], -1, 255, thickness=cv2.FILLED)
159
- # compute distance transform on mask
160
- inv = cv2.bitwise_not(mask)
161
- dt = cv2.distanceTransform(inv, cv2.DIST_L2, 5)
162
- max_thick = dt.max() * 2 # approximate width
163
- if max_thick <= 0 or max_thick > max(img_cv_gray.shape):
164
- # fallback use bounding box width
165
- max_thick = float(min(w, h))
166
- return float(max_thick)
167
-
168
- # -------------------- Report generation --------------------
169
-
170
- def build_docx_from_results(results: List[dict], out_path: str) -> str:
171
- """
172
- results: list of dicts per image:
173
- {
174
- 'filename': str,
175
- 'ocr_text': str,
176
- 'crack_mm': float or None,
177
- 'image_pil': PIL.Image
178
- }
179
- Creates a Word docx with one image + table per page. Returns path.
180
- """
181
- doc = Document()
182
- style = doc.styles['Normal']
183
- style.font.name = 'Arial'
184
- style.font.size = Pt(11)
185
-
186
- for i, r in enumerate(results, start=1):
187
- # Add image
188
- img = r['image_pil']
189
- # Save temp image to insert
190
- tmpf = tempfile.NamedTemporaryFile(delete=False, suffix=".png")
191
- img.save(tmpf.name, format="PNG")
192
- tmpf.close()
193
-
194
- # Add image large width (fit page width)
195
- doc.add_picture(tmpf.name, width=Inches(6.5)) # adjust as needed
196
- os.unlink(tmpf.name)
197
-
198
- # Add a table with required fields
199
- table = doc.add_table(rows=3, cols=3)
200
- table.style = 'Table Grid'
201
- # Row 1
202
- row = table.rows[0].cells
203
- row[0].text = "Location"
204
- row[1].text = r.get('location', '-')
205
- row[2].text = f"Mapping Tag No.\n{i}"
206
- # Row 2
207
- row = table.rows[1].cells
208
- row[0].text = "Description"
209
- desc_lines = []
210
- if r.get('ocr_text'):
211
- # use OCR text to create description if possible
212
- desc_lines.append(r['ocr_text'].strip())
213
- desc_lines.append("Detected crack (inspection photo).")
214
- if r.get('crack_mm') is not None:
215
- desc_lines.append(f"(Crack width: {r['crack_mm']:.2f} mm)")
216
- row[1].text = "\n".join(desc_lines)
217
- row[2].text = "" # optional
218
- # Row 3
219
- row = table.rows[2].cells
220
- row[0].text = "Remarks"
221
- row[1].text = "-"
222
- row[2].text = ""
223
-
224
- # Page break after each image except last
225
- if i != len(results):
226
- doc.add_page_break()
227
-
228
- doc.save(out_path)
229
- return out_path
230
-
231
- # -------------------- Main processing pipeline --------------------
232
-
233
- def analyze_image_bytes(filename: str, b: bytes) -> dict:
234
- """
235
- Process one image bytes, return dict with keys:
236
- filename, image_pil, ocr_text, crack_mm (or None), pixel_scale (pixels per mm or None), remarks
237
- """
238
- pil = pil_from_bytes(b)
239
- # Preprocessing for OCR: convert to RGB/gray and simple thresholding
240
- w, h = pil.size
241
- # Resize if very big for speed (maintain aspect)
242
- if max(w, h) > 2000:
243
- pil = pil.resize((int(w * 1000 / max(w, h)), int(h * 1000 / max(w, h))), Image.LANCZOS)
244
-
245
- # Convert to grayscale for cv ops
246
- cv_img = cv2.cvtColor(np.array(pil), cv2.COLOR_RGB2GRAY)
247
-
248
- # Run OCR on original image and also on a contrast-enhanced version
249
- texts = []
250
- texts.append(ocr_text_from_pil(pil))
251
- # enhance contrast
252
- pil_enh = ImageOps.autocontrast(pil)
253
- texts.append(ocr_text_from_pil(pil_enh))
254
- # small blurred binary for better numeric read sometimes
255
- pil_gray = Image.fromarray(cv_img)
256
- pil_bw = pil_gray.point(lambda p: 0 if p < 200 else 255)
257
- texts.append(ocr_text_from_pil(pil_bw))
258
-
259
- ocr_combined = "\n".join([t for t in texts if t and t.strip()])
260
-
261
- # Try to parse mm from OCR
262
- crack_mm = find_width_from_text(ocr_combined)
263
-
264
- pixel_per_mm = None
265
- estimated_width_mm = None
266
- remarks = []
267
-
268
- if crack_mm is None:
269
- # try pixel-based fallback
270
- pixel_per_mm = estimate_scale_from_ruler(cv_img)
271
- crack_px = estimate_crack_width_pixels(cv_img)
272
- if crack_px is not None and pixel_per_mm is not None:
273
- estimated_width_mm = crack_px / pixel_per_mm
274
- crack_mm = estimated_width_mm
275
- remarks.append("Width estimated using ruler-detection fallback.")
276
- elif crack_px is not None and pixel_per_mm is None:
277
- remarks.append("Detected crack pixels but could not detect ruler; scale unknown.")
278
- else:
279
- remarks.append("Could not detect crack width via OCR or fallback.")
280
-
281
- result = {
282
- "filename": filename,
283
- "image_pil": pil,
284
- "ocr_text": ocr_combined,
285
- "crack_mm": float(crack_mm) if crack_mm is not None else None,
286
- "pixel_scale_px_per_mm": float(pixel_per_mm) if pixel_per_mm is not None else None,
287
- "estimated_width_mm": float(estimated_width_mm) if estimated_width_mm is not None else None,
288
- "remarks": " | ".join(remarks) if remarks else ""
289
- }
290
- return result
291
 
292
- # -------------------- Gradio App --------------------
 
 
 
293
 
294
- def process_and_make_docx(files: List[tuple]) -> Tuple[str, str]:
295
- """
296
- files: list of (filename, bytes)
297
- Returns: (log_text, path_to_docx)
298
- """
299
- if not files:
300
- return ("No files uploaded.", None)
301
 
302
- results = []
303
- logs = []
304
- for idx, (fname, b) in enumerate(files, start=1):
305
- logs.append(f"Processing {fname} ...")
306
  try:
307
- res = analyze_image_bytes(fname, b)
308
- results.append(res)
309
- if res['crack_mm'] is not None:
310
- logs.append(f" -> Found width: {res['crack_mm']:.3f} mm")
311
- else:
312
- logs.append(" -> Width: NOT FOUND")
313
- if res['pixel_scale_px_per_mm'] is not None:
314
- logs.append(f" -> pixel_per_mm ~ {res['pixel_scale_px_per_mm']:.2f} px/mm")
315
- if res['remarks']:
316
- logs.append(f" -> remarks: {res['remarks']}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
317
  except Exception as e:
318
- logs.append(f"ERROR processing {fname}: {e}")
319
-
320
- # Build docx
321
- out_path = "/tmp/generated_report.docx"
322
- build_docx_from_results(results, out_path)
323
- logs.append(f"Generated report: {out_path}")
324
- log_text = "\n".join(logs)
325
- return log_text, out_path
326
-
327
- # Gradio UI
328
- with gr.Blocks(title="Crack Measurement → Word Report") as demo:
329
- gr.Markdown("# Crack Measurement Report Generator\nUpload images (one or many). App tries to extract crack width (automatically) and generates a Word report (.docx) with one image per page.")
330
- with gr.Row():
331
- with gr.Column(scale=2):
332
- image_files = gr.Files(label="Upload inspection images", file_count="multiple", type="binary")
333
- proc_btn = gr.Button("Generate Report (.docx)")
334
- download = gr.File(label="Download generated report")
335
- with gr.Column(scale=1):
336
- log_box = gr.Textbox(label="Process log", lines=18)
337
- preview_json = gr.Textbox(label="OCR (first page) excerpt", lines=8)
338
-
339
- def _process(files):
340
- # convert files to list of (name, bytes) if needed
341
- wrapped = []
342
- for f in files or []:
343
- # Gradio binary returns tuples (name, bytes)
344
- if isinstance(f, (list, tuple)) and len(f) == 2 and isinstance(f[1], (bytes, bytearray)):
345
- wrapped.append((f[0], f[1]))
346
- elif hasattr(f, 'name'):
347
- wrapped.append((os.path.basename(getattr(f, 'name')), f.read()))
348
- log_text, path = process_and_make_docx(wrapped)
349
- # put first OCR excerpt if any
350
- ocr_excerpt = ""
351
- if wrapped:
352
- try:
353
- # show first image OCR
354
- first = analyze_image_bytes(wrapped[0][0], wrapped[0][1])
355
- ocr_excerpt = first.get('ocr_text', '')[:1000]
356
- except Exception:
357
- ocr_excerpt = ""
358
- return log_text, ocr_excerpt, path
359
-
360
- proc_btn.click(_process, inputs=[image_files], outputs=[log_box, preview_json, download])
361
-
362
- gr.Markdown("**Notes:** Tesseract must be installed in the environment. The fallback pixel method is heuristic — best results come when the image contains readable 'mm' text near the ruler or clearly visible ruler ticks.")
363
 
364
  if __name__ == "__main__":
365
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ import pytesseract
3
+ from PIL import Image
4
  from docx import Document
5
+ from docx.shared import Inches
6
+ import io
7
+ import os
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
+ # Function to process images and generate a report
10
+ def generate_report(images):
11
+ if not images:
12
+ return "No images uploaded.", None
13
 
14
+ document = Document()
 
 
 
 
 
 
15
 
16
+ for idx, img_path in enumerate(images):
 
 
 
17
  try:
18
+ # Load image
19
+ image = Image.open(img_path)
20
+ # OCR text extraction
21
+ text = pytesseract.image_to_string(image)
22
+
23
+ # Add page header
24
+ document.add_heading(f"Mapping Tag No. {idx + 1}", level=1)
25
+ document.add_picture(img_path, width=Inches(5.5))
26
+ document.add_paragraph(f"Extracted text:\n{text.strip()}")
27
+
28
+ # Add table similar to your format
29
+ table = document.add_table(rows=3, cols=3)
30
+ table.style = "Table Grid"
31
+ hdr_cells = table.rows[0].cells
32
+ hdr_cells[0].text = "Location"
33
+ hdr_cells[1].text = "-"
34
+ hdr_cells[2].text = f"Mapping Tag No. {idx + 1}"
35
+
36
+ row_cells = table.rows[1].cells
37
+ row_cells[0].text = "Description"
38
+ row_cells[1].merge(row_cells[2])
39
+ row_cells[1].text = text.strip() if text.strip() else "Text not detected"
40
+
41
+ rem_cells = table.rows[2].cells
42
+ rem_cells[0].text = "Remarks"
43
+ rem_cells[1].merge(rem_cells[2])
44
+ rem_cells[1].text = "-"
45
+
46
+ document.add_page_break()
47
  except Exception as e:
48
+ print("Error processing image:", e)
49
+ continue
50
+
51
+ # Save document
52
+ output_path = "report.docx"
53
+ document.save(output_path)
54
+
55
+ # Return text preview + file download
56
+ return "Report generated successfully!", output_path
57
+
58
+ # Define Gradio UI
59
+ iface = gr.Interface(
60
+ fn=generate_report,
61
+ inputs=gr.Files(label="Upload crack images", file_count="multiple", type="filepath"),
62
+ outputs=[
63
+ gr.Textbox(label="Status / Preview"),
64
+ gr.File(label="Download Word Report")
65
+ ],
66
+ title="Auto Crack Report Generator",
67
+ description="Upload crack ruler images to automatically generate a formatted Word report with OCR data."
68
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
 
70
  if __name__ == "__main__":
71
+ iface.launch()