Spaces:

rbaks
/

document-readability-scorer

Sleeping

File size: 12,487 Bytes

feca759

"""
Document Readability Scorer — Gradio App
=========================================
Interactive UI for scoring documents and calibrating weights.
Upload documents, adjust signal weights with sliders, and see
how the readability score changes in real-time.
"""

import os
import json
import tempfile
import gradio as gr
import numpy as np
from PIL import Image

from document_readability import (
    DocumentReadabilityScorer,
    ScorerConfig,
    ReadabilityResult,
)

# ─── Scoring logic ───────────────────────────────────────────────────────────

def score_document(
    image,
    w_sharpness, w_contrast, w_noise, w_text_presence,
    w_brightness, w_entropy, w_learned_iqa,
    ocr_threshold,
    learned_metric,
):
    """Score a document with the given weights and return results."""
    if image is None:
        return (
            "⬆️ Upload a document image to get started",
            None, None, None
        )

    # Normalize weights to sum to 1.0
    weights = [w_sharpness, w_contrast, w_noise, w_text_presence,
               w_brightness, w_entropy, w_learned_iqa]
    total = sum(weights)
    if total == 0:
        return "❌ All weights are zero!", None, None, None
    weights = [w / total for w in weights]

    config = ScorerConfig(
        w_sharpness=weights[0],
        w_contrast=weights[1],
        w_noise=weights[2],
        w_text_presence=weights[3],
        w_brightness=weights[4],
        w_entropy=weights[5],
        w_learned_iqa=weights[6],
        ocr_threshold=ocr_threshold,
        learned_metric=learned_metric if learned_metric != "disabled" else None,
        device="cpu",
    )

    scorer = DocumentReadabilityScorer(config)
    
    # Convert gradio image (numpy array) to PIL
    if isinstance(image, np.ndarray):
        pil_img = Image.fromarray(image)
    else:
        pil_img = image

    result = scorer.score(pil_img)

    # ── Build the summary ──
    emoji = {"excellent": "🟢", "good": "🟢", "fair": "🟡", "poor": "🟠", "bad": "🔴"}
    e = emoji.get(result.confidence_label, "⚪")
    
    ocr_status = "✅ Proceed with OCR" if result.ocr_recommended else "⛔ Skip OCR (below threshold)"

    summary = f"""## {e} Readability Score: **{result.readability_score:.3f}** / 1.000

### Verdict: **{result.confidence_label.upper()}** — {ocr_status}

---

### Signal Breakdown

| Signal | Score | Raw Value | Description |
|--------|-------|-----------|-------------|
| 🔍 Sharpness | **{result.signals['sharpness']:.3f}** | Lap. var = {result.signals['laplacian_variance']:.1f} | {'Sharp ✓' if result.signals['sharpness'] > 0.5 else '⚠️ Blurry'} |
| 🎨 Contrast | **{result.signals['contrast']:.3f}** | RMS = {result.signals['rms_contrast']:.3f} | {'Good ✓' if result.signals['contrast'] > 0.4 else '⚠️ Low contrast'} |
| 📡 Noise | **{result.signals['noise']:.3f}** | σ = {result.signals['noise_sigma']:.2f} | {'Clean ✓' if result.signals['noise'] > 0.5 else '⚠️ Noisy'} |
| 📝 Text Presence | **{result.signals['text_presence']:.3f}** | Coverage = {result.signals['text_coverage']:.3f} | {'Has text ✓' if result.signals.get('has_text') else '⚠️ No text detected'} |
| ☀️ Brightness | **{result.signals['brightness']:.3f}** | Mean = {result.signals['mean_brightness']:.1f} | {'Normal ✓' if result.signals['brightness'] > 0.5 else '⚠️ Bad exposure'} |
| 📊 Entropy | **{result.signals['entropy']:.3f}** | H = {result.signals['shannon_entropy']:.2f} | {'Content ✓' if result.signals['entropy'] > 0.3 else '⚠️ Low info'} |
| 🧠 Learned IQA | **{result.signals['learned_iqa']:.3f}** | {result.signals.get('metric_name', 'N/A')} | {'Good ✓' if result.signals['learned_iqa'] > 0.5 else '⚠️ Low quality'} |

---

### Diagnostics
"""
    # Add specific warnings
    issues = []
    if result.signals['sharpness'] < 0.3:
        issues.append("⚠️ **Blur detected** — document is too blurry for reliable OCR")
    if result.signals['contrast'] < 0.3:
        issues.append("⚠️ **Low contrast** — text may not be distinguishable from background")
    if result.signals['noise'] < 0.3:
        issues.append("⚠️ **High noise** — may cause OCR character errors")
    if not result.signals.get('has_text', True):
        issues.append("⚠️ **No text detected** — page may be blank or non-textual")
    if result.signals['brightness'] < 0.3:
        issues.append("⚠️ **Bad exposure** — document is too dark or over-exposed")
    if result.signals['entropy'] < 0.15:
        issues.append("⚠️ **Very low information content** — possibly blank page")
    
    if issues:
        summary += "\n".join(issues)
    else:
        summary += "✅ No major issues detected"

    # ── Bar chart data ──
    signal_names = ["Sharpness", "Contrast", "Noise", "Text", "Brightness", "Entropy", "IQA"]
    signal_values = [
        result.signals['sharpness'], result.signals['contrast'],
        result.signals['noise'], result.signals['text_presence'],
        result.signals['brightness'], result.signals['entropy'],
        result.signals['learned_iqa']
    ]

    # ── JSON for API/programmatic use ──
    api_output = json.dumps(result.to_dict(), indent=2)
    
    # ── Config for reproduction ──
    config_output = json.dumps({
        "weights": {
            "sharpness": round(weights[0], 4),
            "contrast": round(weights[1], 4),
            "noise": round(weights[2], 4),
            "text_presence": round(weights[3], 4),
            "brightness": round(weights[4], 4),
            "entropy": round(weights[5], 4),
            "learned_iqa": round(weights[6], 4),
        },
        "ocr_threshold": ocr_threshold,
        "learned_metric": learned_metric,
    }, indent=2)

    return summary, api_output, config_output, signal_values


def create_bar_plot(signal_values):
    """Create a simple bar plot of signal scores."""
    if signal_values is None:
        return None
    names = ["Sharp", "Contrast", "Noise", "Text", "Bright", "Entropy", "IQA"]
    bars = {names[i]: signal_values[i] for i in range(len(names))}
    return bars


# ─── Gradio UI ────────────────────────────────────────────────────────────────

DESCRIPTION = """
# 📄 Document Readability Scorer

**Pre-screen documents before expensive OCR/LLM inference.** Upload a document image and get a readability score 
with detailed signal breakdown. Adjust weights to calibrate for your specific pipeline.

### How it works
The scorer extracts 7 independent signals from the image and combines them into a single **readability score** (0–1):

| Signal | What it measures | Method |
|--------|-----------------|--------|
| **Sharpness** | Is the text sharp/blurry? | Laplacian variance + FFT high-freq energy |
| **Contrast** | Is text distinguishable from background? | RMS + Michelson contrast |
| **Noise** | How clean is the image? | Immerkær noise estimation |
| **Text Presence** | Is there text on the page? | MSER regions + Sobel edge density |
| **Brightness** | Is exposure appropriate? | Mean brightness + saturation analysis |
| **Entropy** | Is there information content? | Shannon entropy |
| **Learned IQA** | ML-based quality score | CLIP-IQA via pyiqa library |

> 💡 **Calibration**: Adjust the weight sliders to match your pipeline's sensitivity. For example, if your OCR handles blur well but fails on low contrast, increase the contrast weight.
"""

INTEGRATION_GUIDE = """
### Python Integration

```python
from document_readability import DocumentReadabilityScorer, ScorerConfig

# Use default weights
scorer = DocumentReadabilityScorer()
result = scorer.score("document.png")

if result.ocr_recommended:
    # Proceed with expensive OCR/LLM
    run_ocr_pipeline(document)
else:
    log_rejected(result.signals)  # Log why it was rejected

# Custom calibration
config = ScorerConfig(
    w_sharpness=0.35,      # prioritize sharpness
    w_contrast=0.20,       # important for your docs
    w_noise=0.05,          # your OCR handles noise well
    w_text_presence=0.15,
    w_brightness=0.05,
    w_entropy=0.10,
    w_learned_iqa=0.10,
    ocr_threshold=0.50,    # your calibrated threshold
    learned_metric="clipiqa",  # or "brisque", "topiq_nr", None
)
scorer = DocumentReadabilityScorer(config)

# Batch processing
from document_readability import score_batch
results = score_batch(["doc1.png", "doc2.jpg", "doc3.tiff"])
# → sorted by readability score, highest first
```

### For GPU-accelerated scoring (VLM-based)
If you need even higher accuracy, use `mapo80/DeQA-Doc-Sharpness` (a 7B VLM scorer, SRCC ~0.92 on document quality):
```python
# Requires GPU (16GB VRAM)
from transformers import AutoModelForCausalLM
import torch
model = AutoModelForCausalLM.from_pretrained(
    "mapo80/DeQA-Doc-Sharpness",
    trust_remote_code=True,
    torch_dtype=torch.float16,
    device_map="auto",
)
score = model.score([pil_image]).item()  # 1-5 scale
```
"""

with gr.Blocks(
    title="Document Readability Scorer",
    theme=gr.themes.Soft(),
) as demo:
    gr.Markdown(DESCRIPTION)
    
    with gr.Row():
        # ── Left column: Input ──
        with gr.Column(scale=1):
            image_input = gr.Image(
                label="📄 Upload Document",
                type="numpy",
                height=400,
            )
            
            gr.Markdown("### ⚖️ Signal Weights (auto-normalized to sum to 1.0)")
            
            w_sharpness = gr.Slider(0, 1, value=0.30, step=0.05, label="🔍 Sharpness")
            w_contrast = gr.Slider(0, 1, value=0.15, step=0.05, label="🎨 Contrast")
            w_noise = gr.Slider(0, 1, value=0.10, step=0.05, label="📡 Noise (inverted)")
            w_text_presence = gr.Slider(0, 1, value=0.15, step=0.05, label="📝 Text Presence")
            w_brightness = gr.Slider(0, 1, value=0.05, step=0.05, label="☀️ Brightness")
            w_entropy = gr.Slider(0, 1, value=0.10, step=0.05, label="📊 Entropy")
            w_learned_iqa = gr.Slider(0, 1, value=0.15, step=0.05, label="🧠 Learned IQA")
            
            ocr_threshold = gr.Slider(
                0, 1, value=0.45, step=0.05,
                label="🎯 OCR Threshold (score below → skip OCR)"
            )
            
            learned_metric = gr.Dropdown(
                choices=["clipiqa", "brisque", "niqe", "topiq_nr", "disabled"],
                value="clipiqa",
                label="🧠 Learned IQA Metric",
            )
            
            score_btn = gr.Button("🔍 Score Document", variant="primary", size="lg")
        
        # ── Right column: Output ──
        with gr.Column(scale=1):
            result_md = gr.Markdown("⬆️ Upload a document to get started")
            
            with gr.Accordion("📋 API Response (JSON)", open=False):
                api_json = gr.Code(language="json", label="API Response")
            
            with gr.Accordion("⚙️ Current Config (for reproduction)", open=False):
                config_json = gr.Code(language="json", label="Config")
    
    # Hidden state for signal values
    signal_state = gr.State(None)
    
    with gr.Accordion("📖 Integration Guide", open=False):
        gr.Markdown(INTEGRATION_GUIDE)

    # ── Event handlers ──
    all_inputs = [
        image_input,
        w_sharpness, w_contrast, w_noise, w_text_presence,
        w_brightness, w_entropy, w_learned_iqa,
        ocr_threshold, learned_metric,
    ]
    all_outputs = [result_md, api_json, config_json, signal_state]
    
    score_btn.click(fn=score_document, inputs=all_inputs, outputs=all_outputs)
    
    # Auto-score on image upload
    image_input.change(fn=score_document, inputs=all_inputs, outputs=all_outputs)
    
    # Re-score when weights change
    for slider in [w_sharpness, w_contrast, w_noise, w_text_presence,
                   w_brightness, w_entropy, w_learned_iqa, ocr_threshold]:
        slider.release(fn=score_document, inputs=all_inputs, outputs=all_outputs)
    
    learned_metric.change(fn=score_document, inputs=all_inputs, outputs=all_outputs)


if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", server_port=7860)