Spaces:
Sleeping
Sleeping
File size: 12,487 Bytes
feca759 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 | """
Document Readability Scorer β Gradio App
=========================================
Interactive UI for scoring documents and calibrating weights.
Upload documents, adjust signal weights with sliders, and see
how the readability score changes in real-time.
"""
import os
import json
import tempfile
import gradio as gr
import numpy as np
from PIL import Image
from document_readability import (
DocumentReadabilityScorer,
ScorerConfig,
ReadabilityResult,
)
# βββ Scoring logic βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def score_document(
image,
w_sharpness, w_contrast, w_noise, w_text_presence,
w_brightness, w_entropy, w_learned_iqa,
ocr_threshold,
learned_metric,
):
"""Score a document with the given weights and return results."""
if image is None:
return (
"β¬οΈ Upload a document image to get started",
None, None, None
)
# Normalize weights to sum to 1.0
weights = [w_sharpness, w_contrast, w_noise, w_text_presence,
w_brightness, w_entropy, w_learned_iqa]
total = sum(weights)
if total == 0:
return "β All weights are zero!", None, None, None
weights = [w / total for w in weights]
config = ScorerConfig(
w_sharpness=weights[0],
w_contrast=weights[1],
w_noise=weights[2],
w_text_presence=weights[3],
w_brightness=weights[4],
w_entropy=weights[5],
w_learned_iqa=weights[6],
ocr_threshold=ocr_threshold,
learned_metric=learned_metric if learned_metric != "disabled" else None,
device="cpu",
)
scorer = DocumentReadabilityScorer(config)
# Convert gradio image (numpy array) to PIL
if isinstance(image, np.ndarray):
pil_img = Image.fromarray(image)
else:
pil_img = image
result = scorer.score(pil_img)
# ββ Build the summary ββ
emoji = {"excellent": "π’", "good": "π’", "fair": "π‘", "poor": "π ", "bad": "π΄"}
e = emoji.get(result.confidence_label, "βͺ")
ocr_status = "β
Proceed with OCR" if result.ocr_recommended else "β Skip OCR (below threshold)"
summary = f"""## {e} Readability Score: **{result.readability_score:.3f}** / 1.000
### Verdict: **{result.confidence_label.upper()}** β {ocr_status}
---
### Signal Breakdown
| Signal | Score | Raw Value | Description |
|--------|-------|-----------|-------------|
| π Sharpness | **{result.signals['sharpness']:.3f}** | Lap. var = {result.signals['laplacian_variance']:.1f} | {'Sharp β' if result.signals['sharpness'] > 0.5 else 'β οΈ Blurry'} |
| π¨ Contrast | **{result.signals['contrast']:.3f}** | RMS = {result.signals['rms_contrast']:.3f} | {'Good β' if result.signals['contrast'] > 0.4 else 'β οΈ Low contrast'} |
| π‘ Noise | **{result.signals['noise']:.3f}** | Ο = {result.signals['noise_sigma']:.2f} | {'Clean β' if result.signals['noise'] > 0.5 else 'β οΈ Noisy'} |
| π Text Presence | **{result.signals['text_presence']:.3f}** | Coverage = {result.signals['text_coverage']:.3f} | {'Has text β' if result.signals.get('has_text') else 'β οΈ No text detected'} |
| βοΈ Brightness | **{result.signals['brightness']:.3f}** | Mean = {result.signals['mean_brightness']:.1f} | {'Normal β' if result.signals['brightness'] > 0.5 else 'β οΈ Bad exposure'} |
| π Entropy | **{result.signals['entropy']:.3f}** | H = {result.signals['shannon_entropy']:.2f} | {'Content β' if result.signals['entropy'] > 0.3 else 'β οΈ Low info'} |
| π§ Learned IQA | **{result.signals['learned_iqa']:.3f}** | {result.signals.get('metric_name', 'N/A')} | {'Good β' if result.signals['learned_iqa'] > 0.5 else 'β οΈ Low quality'} |
---
### Diagnostics
"""
# Add specific warnings
issues = []
if result.signals['sharpness'] < 0.3:
issues.append("β οΈ **Blur detected** β document is too blurry for reliable OCR")
if result.signals['contrast'] < 0.3:
issues.append("β οΈ **Low contrast** β text may not be distinguishable from background")
if result.signals['noise'] < 0.3:
issues.append("β οΈ **High noise** β may cause OCR character errors")
if not result.signals.get('has_text', True):
issues.append("β οΈ **No text detected** β page may be blank or non-textual")
if result.signals['brightness'] < 0.3:
issues.append("β οΈ **Bad exposure** β document is too dark or over-exposed")
if result.signals['entropy'] < 0.15:
issues.append("β οΈ **Very low information content** β possibly blank page")
if issues:
summary += "\n".join(issues)
else:
summary += "β
No major issues detected"
# ββ Bar chart data ββ
signal_names = ["Sharpness", "Contrast", "Noise", "Text", "Brightness", "Entropy", "IQA"]
signal_values = [
result.signals['sharpness'], result.signals['contrast'],
result.signals['noise'], result.signals['text_presence'],
result.signals['brightness'], result.signals['entropy'],
result.signals['learned_iqa']
]
# ββ JSON for API/programmatic use ββ
api_output = json.dumps(result.to_dict(), indent=2)
# ββ Config for reproduction ββ
config_output = json.dumps({
"weights": {
"sharpness": round(weights[0], 4),
"contrast": round(weights[1], 4),
"noise": round(weights[2], 4),
"text_presence": round(weights[3], 4),
"brightness": round(weights[4], 4),
"entropy": round(weights[5], 4),
"learned_iqa": round(weights[6], 4),
},
"ocr_threshold": ocr_threshold,
"learned_metric": learned_metric,
}, indent=2)
return summary, api_output, config_output, signal_values
def create_bar_plot(signal_values):
"""Create a simple bar plot of signal scores."""
if signal_values is None:
return None
names = ["Sharp", "Contrast", "Noise", "Text", "Bright", "Entropy", "IQA"]
bars = {names[i]: signal_values[i] for i in range(len(names))}
return bars
# βββ Gradio UI ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
DESCRIPTION = """
# π Document Readability Scorer
**Pre-screen documents before expensive OCR/LLM inference.** Upload a document image and get a readability score
with detailed signal breakdown. Adjust weights to calibrate for your specific pipeline.
### How it works
The scorer extracts 7 independent signals from the image and combines them into a single **readability score** (0β1):
| Signal | What it measures | Method |
|--------|-----------------|--------|
| **Sharpness** | Is the text sharp/blurry? | Laplacian variance + FFT high-freq energy |
| **Contrast** | Is text distinguishable from background? | RMS + Michelson contrast |
| **Noise** | How clean is the image? | Immerkær noise estimation |
| **Text Presence** | Is there text on the page? | MSER regions + Sobel edge density |
| **Brightness** | Is exposure appropriate? | Mean brightness + saturation analysis |
| **Entropy** | Is there information content? | Shannon entropy |
| **Learned IQA** | ML-based quality score | CLIP-IQA via pyiqa library |
> π‘ **Calibration**: Adjust the weight sliders to match your pipeline's sensitivity. For example, if your OCR handles blur well but fails on low contrast, increase the contrast weight.
"""
INTEGRATION_GUIDE = """
### Python Integration
```python
from document_readability import DocumentReadabilityScorer, ScorerConfig
# Use default weights
scorer = DocumentReadabilityScorer()
result = scorer.score("document.png")
if result.ocr_recommended:
# Proceed with expensive OCR/LLM
run_ocr_pipeline(document)
else:
log_rejected(result.signals) # Log why it was rejected
# Custom calibration
config = ScorerConfig(
w_sharpness=0.35, # prioritize sharpness
w_contrast=0.20, # important for your docs
w_noise=0.05, # your OCR handles noise well
w_text_presence=0.15,
w_brightness=0.05,
w_entropy=0.10,
w_learned_iqa=0.10,
ocr_threshold=0.50, # your calibrated threshold
learned_metric="clipiqa", # or "brisque", "topiq_nr", None
)
scorer = DocumentReadabilityScorer(config)
# Batch processing
from document_readability import score_batch
results = score_batch(["doc1.png", "doc2.jpg", "doc3.tiff"])
# β sorted by readability score, highest first
```
### For GPU-accelerated scoring (VLM-based)
If you need even higher accuracy, use `mapo80/DeQA-Doc-Sharpness` (a 7B VLM scorer, SRCC ~0.92 on document quality):
```python
# Requires GPU (16GB VRAM)
from transformers import AutoModelForCausalLM
import torch
model = AutoModelForCausalLM.from_pretrained(
"mapo80/DeQA-Doc-Sharpness",
trust_remote_code=True,
torch_dtype=torch.float16,
device_map="auto",
)
score = model.score([pil_image]).item() # 1-5 scale
```
"""
with gr.Blocks(
title="Document Readability Scorer",
theme=gr.themes.Soft(),
) as demo:
gr.Markdown(DESCRIPTION)
with gr.Row():
# ββ Left column: Input ββ
with gr.Column(scale=1):
image_input = gr.Image(
label="π Upload Document",
type="numpy",
height=400,
)
gr.Markdown("### βοΈ Signal Weights (auto-normalized to sum to 1.0)")
w_sharpness = gr.Slider(0, 1, value=0.30, step=0.05, label="π Sharpness")
w_contrast = gr.Slider(0, 1, value=0.15, step=0.05, label="π¨ Contrast")
w_noise = gr.Slider(0, 1, value=0.10, step=0.05, label="π‘ Noise (inverted)")
w_text_presence = gr.Slider(0, 1, value=0.15, step=0.05, label="π Text Presence")
w_brightness = gr.Slider(0, 1, value=0.05, step=0.05, label="βοΈ Brightness")
w_entropy = gr.Slider(0, 1, value=0.10, step=0.05, label="π Entropy")
w_learned_iqa = gr.Slider(0, 1, value=0.15, step=0.05, label="π§ Learned IQA")
ocr_threshold = gr.Slider(
0, 1, value=0.45, step=0.05,
label="π― OCR Threshold (score below β skip OCR)"
)
learned_metric = gr.Dropdown(
choices=["clipiqa", "brisque", "niqe", "topiq_nr", "disabled"],
value="clipiqa",
label="π§ Learned IQA Metric",
)
score_btn = gr.Button("π Score Document", variant="primary", size="lg")
# ββ Right column: Output ββ
with gr.Column(scale=1):
result_md = gr.Markdown("β¬οΈ Upload a document to get started")
with gr.Accordion("π API Response (JSON)", open=False):
api_json = gr.Code(language="json", label="API Response")
with gr.Accordion("βοΈ Current Config (for reproduction)", open=False):
config_json = gr.Code(language="json", label="Config")
# Hidden state for signal values
signal_state = gr.State(None)
with gr.Accordion("π Integration Guide", open=False):
gr.Markdown(INTEGRATION_GUIDE)
# ββ Event handlers ββ
all_inputs = [
image_input,
w_sharpness, w_contrast, w_noise, w_text_presence,
w_brightness, w_entropy, w_learned_iqa,
ocr_threshold, learned_metric,
]
all_outputs = [result_md, api_json, config_json, signal_state]
score_btn.click(fn=score_document, inputs=all_inputs, outputs=all_outputs)
# Auto-score on image upload
image_input.change(fn=score_document, inputs=all_inputs, outputs=all_outputs)
# Re-score when weights change
for slider in [w_sharpness, w_contrast, w_noise, w_text_presence,
w_brightness, w_entropy, w_learned_iqa, ocr_threshold]:
slider.release(fn=score_document, inputs=all_inputs, outputs=all_outputs)
learned_metric.change(fn=score_document, inputs=all_inputs, outputs=all_outputs)
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860) |