Spaces:

MuzammalHussain
/

TEST

Sleeping

App Files Files Community

MuzammalHussain commited on Oct 21, 2025

Commit

c0ecb78

verified ·

1 Parent(s): a53c53c

Update app.py

Browse files

Files changed (1) hide show

app.py +62 -356

app.py CHANGED Viewed

@@ -1,365 +1,71 @@
-"""
-app.py -- Hugging Face / Gradio app
-Image -> OCR/crack-width extraction -> Word (.docx) report (one image per page)
-Notes:
-- Requires Tesseract installed (add to apt.txt: tesseract-ocr, libtesseract-dev)
-- Put in requirements.txt: gradio, pytesseract, pillow, opencv-python-headless, numpy, python-docx
-- This is best-effort: primary method uses OCR to find explicit 'mm' values. If OCR fails,
-  a fallback attempts to estimate pixel->mm using ruler tick patterns (may not work on all images).
-"""
-import io
-import re
-import os
-import tempfile
-from typing import List, Tuple, Optional
-import numpy as np
-from PIL import Image, ImageOps
-import pytesseract
-import cv2
 import gradio as gr
 from docx import Document
-from docx.shared import Inches, Pt
-# -------------------- Utilities --------------------
-def pil_from_bytes(b: bytes) -> Image.Image:
-    return Image.open(io.BytesIO(b)).convert("RGB")
-def ocr_text_from_pil(img: Image.Image) -> str:
-    # Basic OCR - return raw text
-    try:
-        return pytesseract.image_to_string(img)
-    except Exception:
-        return pytesseract.image_to_string(np.array(img))
-_mm_regex = re.compile(r"([0-9]+(?:\.[0-9]+)?)\s*(?:mm|Millimeter|millimetre|mm\.)", re.IGNORECASE)
-_crack_regex = re.compile(r"crack\s*width[^0-9]*([0-9]+(?:\.[0-9]+)?)", re.IGNORECASE)
-def find_width_from_text(text: str) -> Optional[float]:
-    # Try patterns like "Crack width: 0.25mm" or "0.25 mm"
-    if not text:
-        return None
-    # First look for explicit 'crack width' phrase
-    m = _crack_regex.search(text)
-    if m:
-        try:
-            return float(m.group(1))
-        except:
-            pass
-    # Look for any mm numbers
-    m2 = _mm_regex.search(text)
-    if m2:
-        try:
-            return float(m2.group(1))
-        except:
-            pass
-    # Fallback: look for any number and assume it's mm (risky)
-    nums = re.findall(r"([0-9]+(?:\.[0-9]+)?)", text)
-    if nums:
-        try:
-            # Don't assume if number is very large (>50)
-            val = float(nums[0])
-            if val <= 50:
-                return val
-        except:
-            pass
-    return None
-# -------------------- Fallback pixel-based ruler detection (best-effort) --------------------
-def estimate_scale_from_ruler(img_cv_gray: np.ndarray) -> Optional[float]:
-    """
-    Attempt to find a ruler region by detecting many short, high-contrast vertical ticks.
-    If found, return pixel_per_mm (pixels per 1 mm).
-    This is heuristic and may fail on many images.
-    """
-    # edge detect and morphological
-    edges = cv2.Canny(img_cv_gray, 50, 150)
-    h, w = edges.shape
-    # Horizontal projection to find candidate rows containing many edges
-    row_sum = edges.sum(axis=1)
-    row_peaks = np.where(row_sum > (0.25 * row_sum.max()))[0]  # rows with lots of edges
-    if len(row_peaks) == 0:
-        return None
-    # Take a band around the densest row
-    row = int(np.median(row_peaks))
-    band_h = max(10, h // 10)
-    r0 = max(0, row - band_h)
-    r1 = min(h, row + band_h)
-    band = edges[r0:r1, :]
-    # Vertical projection on the band to find repeated short ticks
-    col_sum = band.sum(axis=0)
-    # Normalize and find peaks (tick columns)
-    col_norm = (col_sum - col_sum.min()) / (col_sum.max() - col_sum.min() + 1e-9)
-    peaks = np.where(col_norm > 0.35)[0]
-    if len(peaks) < 3:
-        # not enough repeated ticks
-        return None
-    # Clean peaks: keep peaks that are separated (unique tick positions)
-    diffs = np.diff(peaks)
-    groups = []
-    current = [peaks[0]]
-    for i, d in enumerate(diffs):
-        if d <= 2:
-            current.append(peaks[i+1])
-        else:
-            groups.append(current)
-            current = [peaks[i+1]]
-    groups.append(current)
-    tick_positions = [int(np.mean(g)) for g in groups if len(g) >= 1]
-    if len(tick_positions) < 2:
-        return None
-    # compute distances between adjacent tick positions in pixels
-    dists = np.diff(sorted(tick_positions))
-    median_pixel_between_ticks = float(np.median(dists))
-    # We need to guess how many mm are between ticks: common small ruler tick spacing is 1mm.
-    # We'll assume ticks correspond to 1 mm (best-effort). So pixel_per_mm = median_pixel_between_ticks
-    pixel_per_mm = median_pixel_between_ticks
-    # sanity check
-    if pixel_per_mm < 0.5 or pixel_per_mm > 100:
-        return None
-    return pixel_per_mm
-def estimate_crack_width_pixels(img_cv_gray: np.ndarray) -> Optional[float]:
-    """
-    Estimate crack thickness (max width in pixels) using morphological operations.
-    Returns width in pixels (best-effort).
-    """
-    # Enhance contrast
-    img_eq = cv2.equalizeHist(img_cv_gray)
-    # Use a strong edge detection and dilate to form thicker regions
-    edges = cv2.Canny(img_eq, 30, 120)
-    # Dilate edges to close small gaps
-    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
-    dil = cv2.dilate(edges, kernel, iterations=2)
-    # Find contours
-    contours, _ = cv2.findContours(dil, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
-    if not contours:
-        return None
-    # Choose contour with largest area (likely the crack line)
-    c = max(contours, key=cv2.contourArea)
-    # Use bounding box height or compute thickness by distance transform
-    x, y, w, h = cv2.boundingRect(c)
-    # compute local thickness via distance transform on inverted edges
-    # Create mask for contour region
-    mask = np.zeros_like(img_cv_gray)
-    cv2.drawContours(mask, [c], -1, 255, thickness=cv2.FILLED)
-    # compute distance transform on mask
-    inv = cv2.bitwise_not(mask)
-    dt = cv2.distanceTransform(inv, cv2.DIST_L2, 5)
-    max_thick = dt.max() * 2  # approximate width
-    if max_thick <= 0 or max_thick > max(img_cv_gray.shape):
-        # fallback use bounding box width
-        max_thick = float(min(w, h))
-    return float(max_thick)
-# -------------------- Report generation --------------------
-def build_docx_from_results(results: List[dict], out_path: str) -> str:
-    """
-    results: list of dicts per image:
-      {
-        'filename': str,
-        'ocr_text': str,
-        'crack_mm': float or None,
-        'image_pil': PIL.Image
-      }
-    Creates a Word docx with one image + table per page. Returns path.
-    """
-    doc = Document()
-    style = doc.styles['Normal']
-    style.font.name = 'Arial'
-    style.font.size = Pt(11)
-    for i, r in enumerate(results, start=1):
-        # Add image
-        img = r['image_pil']
-        # Save temp image to insert
-        tmpf = tempfile.NamedTemporaryFile(delete=False, suffix=".png")
-        img.save(tmpf.name, format="PNG")
-        tmpf.close()
-        # Add image large width (fit page width)
-        doc.add_picture(tmpf.name, width=Inches(6.5))  # adjust as needed
-        os.unlink(tmpf.name)
-        # Add a table with required fields
-        table = doc.add_table(rows=3, cols=3)
-        table.style = 'Table Grid'
-        # Row 1
-        row = table.rows[0].cells
-        row[0].text = "Location"
-        row[1].text = r.get('location', '-')
-        row[2].text = f"Mapping Tag No.\n{i}"
-        # Row 2
-        row = table.rows[1].cells
-        row[0].text = "Description"
-        desc_lines = []
-        if r.get('ocr_text'):
-            # use OCR text to create description if possible
-            desc_lines.append(r['ocr_text'].strip())
-        desc_lines.append("Detected crack (inspection photo).")
-        if r.get('crack_mm') is not None:
-            desc_lines.append(f"(Crack width: {r['crack_mm']:.2f} mm)")
-        row[1].text = "\n".join(desc_lines)
-        row[2].text = ""  # optional
-        # Row 3
-        row = table.rows[2].cells
-        row[0].text = "Remarks"
-        row[1].text = "-"
-        row[2].text = ""
-        # Page break after each image except last
-        if i != len(results):
-            doc.add_page_break()
-    doc.save(out_path)
-    return out_path
-# -------------------- Main processing pipeline --------------------
-def analyze_image_bytes(filename: str, b: bytes) -> dict:
-    """
-    Process one image bytes, return dict with keys:
-      filename, image_pil, ocr_text, crack_mm (or None), pixel_scale (pixels per mm or None), remarks
-    """
-    pil = pil_from_bytes(b)
-    # Preprocessing for OCR: convert to RGB/gray and simple thresholding
-    w, h = pil.size
-    # Resize if very big for speed (maintain aspect)
-    if max(w, h) > 2000:
-        pil = pil.resize((int(w * 1000 / max(w, h)), int(h * 1000 / max(w, h))), Image.LANCZOS)
-    # Convert to grayscale for cv ops
-    cv_img = cv2.cvtColor(np.array(pil), cv2.COLOR_RGB2GRAY)
-    # Run OCR on original image and also on a contrast-enhanced version
-    texts = []
-    texts.append(ocr_text_from_pil(pil))
-    # enhance contrast
-    pil_enh = ImageOps.autocontrast(pil)
-    texts.append(ocr_text_from_pil(pil_enh))
-    # small blurred binary for better numeric read sometimes
-    pil_gray = Image.fromarray(cv_img)
-    pil_bw = pil_gray.point(lambda p: 0 if p < 200 else 255)
-    texts.append(ocr_text_from_pil(pil_bw))
-    ocr_combined = "\n".join([t for t in texts if t and t.strip()])
-    # Try to parse mm from OCR
-    crack_mm = find_width_from_text(ocr_combined)
-    pixel_per_mm = None
-    estimated_width_mm = None
-    remarks = []
-    if crack_mm is None:
-        # try pixel-based fallback
-        pixel_per_mm = estimate_scale_from_ruler(cv_img)
-        crack_px = estimate_crack_width_pixels(cv_img)
-        if crack_px is not None and pixel_per_mm is not None:
-            estimated_width_mm = crack_px / pixel_per_mm
-            crack_mm = estimated_width_mm
-            remarks.append("Width estimated using ruler-detection fallback.")
-        elif crack_px is not None and pixel_per_mm is None:
-            remarks.append("Detected crack pixels but could not detect ruler; scale unknown.")
-        else:
-            remarks.append("Could not detect crack width via OCR or fallback.")
-    result = {
-        "filename": filename,
-        "image_pil": pil,
-        "ocr_text": ocr_combined,
-        "crack_mm": float(crack_mm) if crack_mm is not None else None,
-        "pixel_scale_px_per_mm": float(pixel_per_mm) if pixel_per_mm is not None else None,
-        "estimated_width_mm": float(estimated_width_mm) if estimated_width_mm is not None else None,
-        "remarks": " | ".join(remarks) if remarks else ""
-    }
-    return result
-# -------------------- Gradio App --------------------
-def process_and_make_docx(files: List[tuple]) -> Tuple[str, str]:
-    """
-    files: list of (filename, bytes)
-    Returns: (log_text, path_to_docx)
-    """
-    if not files:
-        return ("No files uploaded.", None)
-    results = []
-    logs = []
-    for idx, (fname, b) in enumerate(files, start=1):
-        logs.append(f"Processing {fname} ...")
         try:
-            res = analyze_image_bytes(fname, b)
-            results.append(res)
-            if res['crack_mm'] is not None:
-                logs.append(f" -> Found width: {res['crack_mm']:.3f} mm")
-            else:
-                logs.append(" -> Width: NOT FOUND")
-            if res['pixel_scale_px_per_mm'] is not None:
-                logs.append(f" -> pixel_per_mm ~ {res['pixel_scale_px_per_mm']:.2f} px/mm")
-            if res['remarks']:
-                logs.append(f" -> remarks: {res['remarks']}")
         except Exception as e:
-            logs.append(f"ERROR processing {fname}: {e}")
-    # Build docx
-    out_path = "/tmp/generated_report.docx"
-    build_docx_from_results(results, out_path)
-    logs.append(f"Generated report: {out_path}")
-    log_text = "\n".join(logs)
-    return log_text, out_path
-# Gradio UI
-with gr.Blocks(title="Crack Measurement → Word Report") as demo:
-    gr.Markdown("# Crack Measurement Report Generator\nUpload images (one or many). App tries to extract crack width (automatically) and generates a Word report (.docx) with one image per page.")
-    with gr.Row():
-        with gr.Column(scale=2):
-            image_files = gr.Files(label="Upload inspection images", file_count="multiple", type="binary")
-            proc_btn = gr.Button("Generate Report (.docx)")
-            download = gr.File(label="Download generated report")
-        with gr.Column(scale=1):
-            log_box = gr.Textbox(label="Process log", lines=18)
-            preview_json = gr.Textbox(label="OCR (first page) excerpt", lines=8)
-    def _process(files):
-        # convert files to list of (name, bytes) if needed
-        wrapped = []
-        for f in files or []:
-            # Gradio binary returns tuples (name, bytes)
-            if isinstance(f, (list, tuple)) and len(f) == 2 and isinstance(f[1], (bytes, bytearray)):
-                wrapped.append((f[0], f[1]))
-            elif hasattr(f, 'name'):
-                wrapped.append((os.path.basename(getattr(f, 'name')), f.read()))
-        log_text, path = process_and_make_docx(wrapped)
-        # put first OCR excerpt if any
-        ocr_excerpt = ""
-        if wrapped:
-            try:
-                # show first image OCR
-                first = analyze_image_bytes(wrapped[0][0], wrapped[0][1])
-                ocr_excerpt = first.get('ocr_text', '')[:1000]
-            except Exception:
-                ocr_excerpt = ""
-        return log_text, ocr_excerpt, path
-    proc_btn.click(_process, inputs=[image_files], outputs=[log_box, preview_json, download])
-    gr.Markdown("**Notes:** Tesseract must be installed in the environment. The fallback pixel method is heuristic — best results come when the image contains readable 'mm' text near the ruler or clearly visible ruler ticks.")
 if __name__ == "__main__":
-    demo.launch(server_name="0.0.0.0", server_port=7860)

 import gradio as gr
+import pytesseract
+from PIL import Image
 from docx import Document
+from docx.shared import Inches
+import io
+import os
+# Function to process images and generate a report
+def generate_report(images):
+    if not images:
+        return "No images uploaded.", None
+    document = Document()
+    for idx, img_path in enumerate(images):
         try:
+            # Load image
+            image = Image.open(img_path)
+            # OCR text extraction
+            text = pytesseract.image_to_string(image)
+            # Add page header
+            document.add_heading(f"Mapping Tag No. {idx + 1}", level=1)
+            document.add_picture(img_path, width=Inches(5.5))
+            document.add_paragraph(f"Extracted text:\n{text.strip()}")
+            # Add table similar to your format
+            table = document.add_table(rows=3, cols=3)
+            table.style = "Table Grid"
+            hdr_cells = table.rows[0].cells
+            hdr_cells[0].text = "Location"
+            hdr_cells[1].text = "-"
+            hdr_cells[2].text = f"Mapping Tag No. {idx + 1}"
+            row_cells = table.rows[1].cells
+            row_cells[0].text = "Description"
+            row_cells[1].merge(row_cells[2])
+            row_cells[1].text = text.strip() if text.strip() else "Text not detected"
+            rem_cells = table.rows[2].cells
+            rem_cells[0].text = "Remarks"
+            rem_cells[1].merge(rem_cells[2])
+            rem_cells[1].text = "-"
+            document.add_page_break()
         except Exception as e:
+            print("Error processing image:", e)
+            continue
+    # Save document
+    output_path = "report.docx"
+    document.save(output_path)
+    # Return text preview + file download
+    return "Report generated successfully!", output_path
+# Define Gradio UI
+iface = gr.Interface(
+    fn=generate_report,
+    inputs=gr.Files(label="Upload crack images", file_count="multiple", type="filepath"),
+    outputs=[
+        gr.Textbox(label="Status / Preview"),
+        gr.File(label="Download Word Report")
+    ],
+    title="Auto Crack Report Generator",
+    description="Upload crack ruler images to automatically generate a formatted Word report with OCR data."
+)
 if __name__ == "__main__":
+    iface.launch()