rbaks's picture
Upload app.py
feca759 verified
"""
Document Readability Scorer β€” Gradio App
=========================================
Interactive UI for scoring documents and calibrating weights.
Upload documents, adjust signal weights with sliders, and see
how the readability score changes in real-time.
"""
import os
import json
import tempfile
import gradio as gr
import numpy as np
from PIL import Image
from document_readability import (
DocumentReadabilityScorer,
ScorerConfig,
ReadabilityResult,
)
# ─── Scoring logic ───────────────────────────────────────────────────────────
def score_document(
image,
w_sharpness, w_contrast, w_noise, w_text_presence,
w_brightness, w_entropy, w_learned_iqa,
ocr_threshold,
learned_metric,
):
"""Score a document with the given weights and return results."""
if image is None:
return (
"⬆️ Upload a document image to get started",
None, None, None
)
# Normalize weights to sum to 1.0
weights = [w_sharpness, w_contrast, w_noise, w_text_presence,
w_brightness, w_entropy, w_learned_iqa]
total = sum(weights)
if total == 0:
return "❌ All weights are zero!", None, None, None
weights = [w / total for w in weights]
config = ScorerConfig(
w_sharpness=weights[0],
w_contrast=weights[1],
w_noise=weights[2],
w_text_presence=weights[3],
w_brightness=weights[4],
w_entropy=weights[5],
w_learned_iqa=weights[6],
ocr_threshold=ocr_threshold,
learned_metric=learned_metric if learned_metric != "disabled" else None,
device="cpu",
)
scorer = DocumentReadabilityScorer(config)
# Convert gradio image (numpy array) to PIL
if isinstance(image, np.ndarray):
pil_img = Image.fromarray(image)
else:
pil_img = image
result = scorer.score(pil_img)
# ── Build the summary ──
emoji = {"excellent": "🟒", "good": "🟒", "fair": "🟑", "poor": "🟠", "bad": "πŸ”΄"}
e = emoji.get(result.confidence_label, "βšͺ")
ocr_status = "βœ… Proceed with OCR" if result.ocr_recommended else "β›” Skip OCR (below threshold)"
summary = f"""## {e} Readability Score: **{result.readability_score:.3f}** / 1.000
### Verdict: **{result.confidence_label.upper()}** β€” {ocr_status}
---
### Signal Breakdown
| Signal | Score | Raw Value | Description |
|--------|-------|-----------|-------------|
| πŸ” Sharpness | **{result.signals['sharpness']:.3f}** | Lap. var = {result.signals['laplacian_variance']:.1f} | {'Sharp βœ“' if result.signals['sharpness'] > 0.5 else '⚠️ Blurry'} |
| 🎨 Contrast | **{result.signals['contrast']:.3f}** | RMS = {result.signals['rms_contrast']:.3f} | {'Good βœ“' if result.signals['contrast'] > 0.4 else '⚠️ Low contrast'} |
| πŸ“‘ Noise | **{result.signals['noise']:.3f}** | Οƒ = {result.signals['noise_sigma']:.2f} | {'Clean βœ“' if result.signals['noise'] > 0.5 else '⚠️ Noisy'} |
| πŸ“ Text Presence | **{result.signals['text_presence']:.3f}** | Coverage = {result.signals['text_coverage']:.3f} | {'Has text βœ“' if result.signals.get('has_text') else '⚠️ No text detected'} |
| β˜€οΈ Brightness | **{result.signals['brightness']:.3f}** | Mean = {result.signals['mean_brightness']:.1f} | {'Normal βœ“' if result.signals['brightness'] > 0.5 else '⚠️ Bad exposure'} |
| πŸ“Š Entropy | **{result.signals['entropy']:.3f}** | H = {result.signals['shannon_entropy']:.2f} | {'Content βœ“' if result.signals['entropy'] > 0.3 else '⚠️ Low info'} |
| 🧠 Learned IQA | **{result.signals['learned_iqa']:.3f}** | {result.signals.get('metric_name', 'N/A')} | {'Good βœ“' if result.signals['learned_iqa'] > 0.5 else '⚠️ Low quality'} |
---
### Diagnostics
"""
# Add specific warnings
issues = []
if result.signals['sharpness'] < 0.3:
issues.append("⚠️ **Blur detected** β€” document is too blurry for reliable OCR")
if result.signals['contrast'] < 0.3:
issues.append("⚠️ **Low contrast** β€” text may not be distinguishable from background")
if result.signals['noise'] < 0.3:
issues.append("⚠️ **High noise** β€” may cause OCR character errors")
if not result.signals.get('has_text', True):
issues.append("⚠️ **No text detected** β€” page may be blank or non-textual")
if result.signals['brightness'] < 0.3:
issues.append("⚠️ **Bad exposure** β€” document is too dark or over-exposed")
if result.signals['entropy'] < 0.15:
issues.append("⚠️ **Very low information content** β€” possibly blank page")
if issues:
summary += "\n".join(issues)
else:
summary += "βœ… No major issues detected"
# ── Bar chart data ──
signal_names = ["Sharpness", "Contrast", "Noise", "Text", "Brightness", "Entropy", "IQA"]
signal_values = [
result.signals['sharpness'], result.signals['contrast'],
result.signals['noise'], result.signals['text_presence'],
result.signals['brightness'], result.signals['entropy'],
result.signals['learned_iqa']
]
# ── JSON for API/programmatic use ──
api_output = json.dumps(result.to_dict(), indent=2)
# ── Config for reproduction ──
config_output = json.dumps({
"weights": {
"sharpness": round(weights[0], 4),
"contrast": round(weights[1], 4),
"noise": round(weights[2], 4),
"text_presence": round(weights[3], 4),
"brightness": round(weights[4], 4),
"entropy": round(weights[5], 4),
"learned_iqa": round(weights[6], 4),
},
"ocr_threshold": ocr_threshold,
"learned_metric": learned_metric,
}, indent=2)
return summary, api_output, config_output, signal_values
def create_bar_plot(signal_values):
"""Create a simple bar plot of signal scores."""
if signal_values is None:
return None
names = ["Sharp", "Contrast", "Noise", "Text", "Bright", "Entropy", "IQA"]
bars = {names[i]: signal_values[i] for i in range(len(names))}
return bars
# ─── Gradio UI ────────────────────────────────────────────────────────────────
DESCRIPTION = """
# πŸ“„ Document Readability Scorer
**Pre-screen documents before expensive OCR/LLM inference.** Upload a document image and get a readability score
with detailed signal breakdown. Adjust weights to calibrate for your specific pipeline.
### How it works
The scorer extracts 7 independent signals from the image and combines them into a single **readability score** (0–1):
| Signal | What it measures | Method |
|--------|-----------------|--------|
| **Sharpness** | Is the text sharp/blurry? | Laplacian variance + FFT high-freq energy |
| **Contrast** | Is text distinguishable from background? | RMS + Michelson contrast |
| **Noise** | How clean is the image? | Immerkær noise estimation |
| **Text Presence** | Is there text on the page? | MSER regions + Sobel edge density |
| **Brightness** | Is exposure appropriate? | Mean brightness + saturation analysis |
| **Entropy** | Is there information content? | Shannon entropy |
| **Learned IQA** | ML-based quality score | CLIP-IQA via pyiqa library |
> πŸ’‘ **Calibration**: Adjust the weight sliders to match your pipeline's sensitivity. For example, if your OCR handles blur well but fails on low contrast, increase the contrast weight.
"""
INTEGRATION_GUIDE = """
### Python Integration
```python
from document_readability import DocumentReadabilityScorer, ScorerConfig
# Use default weights
scorer = DocumentReadabilityScorer()
result = scorer.score("document.png")
if result.ocr_recommended:
# Proceed with expensive OCR/LLM
run_ocr_pipeline(document)
else:
log_rejected(result.signals) # Log why it was rejected
# Custom calibration
config = ScorerConfig(
w_sharpness=0.35, # prioritize sharpness
w_contrast=0.20, # important for your docs
w_noise=0.05, # your OCR handles noise well
w_text_presence=0.15,
w_brightness=0.05,
w_entropy=0.10,
w_learned_iqa=0.10,
ocr_threshold=0.50, # your calibrated threshold
learned_metric="clipiqa", # or "brisque", "topiq_nr", None
)
scorer = DocumentReadabilityScorer(config)
# Batch processing
from document_readability import score_batch
results = score_batch(["doc1.png", "doc2.jpg", "doc3.tiff"])
# β†’ sorted by readability score, highest first
```
### For GPU-accelerated scoring (VLM-based)
If you need even higher accuracy, use `mapo80/DeQA-Doc-Sharpness` (a 7B VLM scorer, SRCC ~0.92 on document quality):
```python
# Requires GPU (16GB VRAM)
from transformers import AutoModelForCausalLM
import torch
model = AutoModelForCausalLM.from_pretrained(
"mapo80/DeQA-Doc-Sharpness",
trust_remote_code=True,
torch_dtype=torch.float16,
device_map="auto",
)
score = model.score([pil_image]).item() # 1-5 scale
```
"""
with gr.Blocks(
title="Document Readability Scorer",
theme=gr.themes.Soft(),
) as demo:
gr.Markdown(DESCRIPTION)
with gr.Row():
# ── Left column: Input ──
with gr.Column(scale=1):
image_input = gr.Image(
label="πŸ“„ Upload Document",
type="numpy",
height=400,
)
gr.Markdown("### βš–οΈ Signal Weights (auto-normalized to sum to 1.0)")
w_sharpness = gr.Slider(0, 1, value=0.30, step=0.05, label="πŸ” Sharpness")
w_contrast = gr.Slider(0, 1, value=0.15, step=0.05, label="🎨 Contrast")
w_noise = gr.Slider(0, 1, value=0.10, step=0.05, label="πŸ“‘ Noise (inverted)")
w_text_presence = gr.Slider(0, 1, value=0.15, step=0.05, label="πŸ“ Text Presence")
w_brightness = gr.Slider(0, 1, value=0.05, step=0.05, label="β˜€οΈ Brightness")
w_entropy = gr.Slider(0, 1, value=0.10, step=0.05, label="πŸ“Š Entropy")
w_learned_iqa = gr.Slider(0, 1, value=0.15, step=0.05, label="🧠 Learned IQA")
ocr_threshold = gr.Slider(
0, 1, value=0.45, step=0.05,
label="🎯 OCR Threshold (score below β†’ skip OCR)"
)
learned_metric = gr.Dropdown(
choices=["clipiqa", "brisque", "niqe", "topiq_nr", "disabled"],
value="clipiqa",
label="🧠 Learned IQA Metric",
)
score_btn = gr.Button("πŸ” Score Document", variant="primary", size="lg")
# ── Right column: Output ──
with gr.Column(scale=1):
result_md = gr.Markdown("⬆️ Upload a document to get started")
with gr.Accordion("πŸ“‹ API Response (JSON)", open=False):
api_json = gr.Code(language="json", label="API Response")
with gr.Accordion("βš™οΈ Current Config (for reproduction)", open=False):
config_json = gr.Code(language="json", label="Config")
# Hidden state for signal values
signal_state = gr.State(None)
with gr.Accordion("πŸ“– Integration Guide", open=False):
gr.Markdown(INTEGRATION_GUIDE)
# ── Event handlers ──
all_inputs = [
image_input,
w_sharpness, w_contrast, w_noise, w_text_presence,
w_brightness, w_entropy, w_learned_iqa,
ocr_threshold, learned_metric,
]
all_outputs = [result_md, api_json, config_json, signal_state]
score_btn.click(fn=score_document, inputs=all_inputs, outputs=all_outputs)
# Auto-score on image upload
image_input.change(fn=score_document, inputs=all_inputs, outputs=all_outputs)
# Re-score when weights change
for slider in [w_sharpness, w_contrast, w_noise, w_text_presence,
w_brightness, w_entropy, w_learned_iqa, ocr_threshold]:
slider.release(fn=score_document, inputs=all_inputs, outputs=all_outputs)
learned_metric.change(fn=score_document, inputs=all_inputs, outputs=all_outputs)
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860)