""" Document Readability Scorer — Gradio App ========================================= Interactive UI for scoring documents and calibrating weights. Upload documents, adjust signal weights with sliders, and see how the readability score changes in real-time. """ import os import json import tempfile import gradio as gr import numpy as np from PIL import Image from document_readability import ( DocumentReadabilityScorer, ScorerConfig, ReadabilityResult, ) # ─── Scoring logic ─────────────────────────────────────────────────────────── def score_document( image, w_sharpness, w_contrast, w_noise, w_text_presence, w_brightness, w_entropy, w_learned_iqa, ocr_threshold, learned_metric, ): """Score a document with the given weights and return results.""" if image is None: return ( "⬆️ Upload a document image to get started", None, None, None ) # Normalize weights to sum to 1.0 weights = [w_sharpness, w_contrast, w_noise, w_text_presence, w_brightness, w_entropy, w_learned_iqa] total = sum(weights) if total == 0: return "❌ All weights are zero!", None, None, None weights = [w / total for w in weights] config = ScorerConfig( w_sharpness=weights[0], w_contrast=weights[1], w_noise=weights[2], w_text_presence=weights[3], w_brightness=weights[4], w_entropy=weights[5], w_learned_iqa=weights[6], ocr_threshold=ocr_threshold, learned_metric=learned_metric if learned_metric != "disabled" else None, device="cpu", ) scorer = DocumentReadabilityScorer(config) # Convert gradio image (numpy array) to PIL if isinstance(image, np.ndarray): pil_img = Image.fromarray(image) else: pil_img = image result = scorer.score(pil_img) # ── Build the summary ── emoji = {"excellent": "🟢", "good": "🟢", "fair": "🟡", "poor": "🟠", "bad": "🔴"} e = emoji.get(result.confidence_label, "⚪") ocr_status = "✅ Proceed with OCR" if result.ocr_recommended else "⛔ Skip OCR (below threshold)" summary = f"""## {e} Readability Score: **{result.readability_score:.3f}** / 1.000 ### Verdict: **{result.confidence_label.upper()}** — {ocr_status} --- ### Signal Breakdown | Signal | Score | Raw Value | Description | |--------|-------|-----------|-------------| | 🔍 Sharpness | **{result.signals['sharpness']:.3f}** | Lap. var = {result.signals['laplacian_variance']:.1f} | {'Sharp ✓' if result.signals['sharpness'] > 0.5 else '⚠️ Blurry'} | | 🎨 Contrast | **{result.signals['contrast']:.3f}** | RMS = {result.signals['rms_contrast']:.3f} | {'Good ✓' if result.signals['contrast'] > 0.4 else '⚠️ Low contrast'} | | 📡 Noise | **{result.signals['noise']:.3f}** | σ = {result.signals['noise_sigma']:.2f} | {'Clean ✓' if result.signals['noise'] > 0.5 else '⚠️ Noisy'} | | 📝 Text Presence | **{result.signals['text_presence']:.3f}** | Coverage = {result.signals['text_coverage']:.3f} | {'Has text ✓' if result.signals.get('has_text') else '⚠️ No text detected'} | | ☀️ Brightness | **{result.signals['brightness']:.3f}** | Mean = {result.signals['mean_brightness']:.1f} | {'Normal ✓' if result.signals['brightness'] > 0.5 else '⚠️ Bad exposure'} | | 📊 Entropy | **{result.signals['entropy']:.3f}** | H = {result.signals['shannon_entropy']:.2f} | {'Content ✓' if result.signals['entropy'] > 0.3 else '⚠️ Low info'} | | 🧠 Learned IQA | **{result.signals['learned_iqa']:.3f}** | {result.signals.get('metric_name', 'N/A')} | {'Good ✓' if result.signals['learned_iqa'] > 0.5 else '⚠️ Low quality'} | --- ### Diagnostics """ # Add specific warnings issues = [] if result.signals['sharpness'] < 0.3: issues.append("⚠️ **Blur detected** — document is too blurry for reliable OCR") if result.signals['contrast'] < 0.3: issues.append("⚠️ **Low contrast** — text may not be distinguishable from background") if result.signals['noise'] < 0.3: issues.append("⚠️ **High noise** — may cause OCR character errors") if not result.signals.get('has_text', True): issues.append("⚠️ **No text detected** — page may be blank or non-textual") if result.signals['brightness'] < 0.3: issues.append("⚠️ **Bad exposure** — document is too dark or over-exposed") if result.signals['entropy'] < 0.15: issues.append("⚠️ **Very low information content** — possibly blank page") if issues: summary += "\n".join(issues) else: summary += "✅ No major issues detected" # ── Bar chart data ── signal_names = ["Sharpness", "Contrast", "Noise", "Text", "Brightness", "Entropy", "IQA"] signal_values = [ result.signals['sharpness'], result.signals['contrast'], result.signals['noise'], result.signals['text_presence'], result.signals['brightness'], result.signals['entropy'], result.signals['learned_iqa'] ] # ── JSON for API/programmatic use ── api_output = json.dumps(result.to_dict(), indent=2) # ── Config for reproduction ── config_output = json.dumps({ "weights": { "sharpness": round(weights[0], 4), "contrast": round(weights[1], 4), "noise": round(weights[2], 4), "text_presence": round(weights[3], 4), "brightness": round(weights[4], 4), "entropy": round(weights[5], 4), "learned_iqa": round(weights[6], 4), }, "ocr_threshold": ocr_threshold, "learned_metric": learned_metric, }, indent=2) return summary, api_output, config_output, signal_values def create_bar_plot(signal_values): """Create a simple bar plot of signal scores.""" if signal_values is None: return None names = ["Sharp", "Contrast", "Noise", "Text", "Bright", "Entropy", "IQA"] bars = {names[i]: signal_values[i] for i in range(len(names))} return bars # ─── Gradio UI ──────────────────────────────────────────────────────────────── DESCRIPTION = """ # 📄 Document Readability Scorer **Pre-screen documents before expensive OCR/LLM inference.** Upload a document image and get a readability score with detailed signal breakdown. Adjust weights to calibrate for your specific pipeline. ### How it works The scorer extracts 7 independent signals from the image and combines them into a single **readability score** (0–1): | Signal | What it measures | Method | |--------|-----------------|--------| | **Sharpness** | Is the text sharp/blurry? | Laplacian variance + FFT high-freq energy | | **Contrast** | Is text distinguishable from background? | RMS + Michelson contrast | | **Noise** | How clean is the image? | Immerkær noise estimation | | **Text Presence** | Is there text on the page? | MSER regions + Sobel edge density | | **Brightness** | Is exposure appropriate? | Mean brightness + saturation analysis | | **Entropy** | Is there information content? | Shannon entropy | | **Learned IQA** | ML-based quality score | CLIP-IQA via pyiqa library | > 💡 **Calibration**: Adjust the weight sliders to match your pipeline's sensitivity. For example, if your OCR handles blur well but fails on low contrast, increase the contrast weight. """ INTEGRATION_GUIDE = """ ### Python Integration ```python from document_readability import DocumentReadabilityScorer, ScorerConfig # Use default weights scorer = DocumentReadabilityScorer() result = scorer.score("document.png") if result.ocr_recommended: # Proceed with expensive OCR/LLM run_ocr_pipeline(document) else: log_rejected(result.signals) # Log why it was rejected # Custom calibration config = ScorerConfig( w_sharpness=0.35, # prioritize sharpness w_contrast=0.20, # important for your docs w_noise=0.05, # your OCR handles noise well w_text_presence=0.15, w_brightness=0.05, w_entropy=0.10, w_learned_iqa=0.10, ocr_threshold=0.50, # your calibrated threshold learned_metric="clipiqa", # or "brisque", "topiq_nr", None ) scorer = DocumentReadabilityScorer(config) # Batch processing from document_readability import score_batch results = score_batch(["doc1.png", "doc2.jpg", "doc3.tiff"]) # → sorted by readability score, highest first ``` ### For GPU-accelerated scoring (VLM-based) If you need even higher accuracy, use `mapo80/DeQA-Doc-Sharpness` (a 7B VLM scorer, SRCC ~0.92 on document quality): ```python # Requires GPU (16GB VRAM) from transformers import AutoModelForCausalLM import torch model = AutoModelForCausalLM.from_pretrained( "mapo80/DeQA-Doc-Sharpness", trust_remote_code=True, torch_dtype=torch.float16, device_map="auto", ) score = model.score([pil_image]).item() # 1-5 scale ``` """ with gr.Blocks( title="Document Readability Scorer", theme=gr.themes.Soft(), ) as demo: gr.Markdown(DESCRIPTION) with gr.Row(): # ── Left column: Input ── with gr.Column(scale=1): image_input = gr.Image( label="📄 Upload Document", type="numpy", height=400, ) gr.Markdown("### ⚖️ Signal Weights (auto-normalized to sum to 1.0)") w_sharpness = gr.Slider(0, 1, value=0.30, step=0.05, label="🔍 Sharpness") w_contrast = gr.Slider(0, 1, value=0.15, step=0.05, label="🎨 Contrast") w_noise = gr.Slider(0, 1, value=0.10, step=0.05, label="📡 Noise (inverted)") w_text_presence = gr.Slider(0, 1, value=0.15, step=0.05, label="📝 Text Presence") w_brightness = gr.Slider(0, 1, value=0.05, step=0.05, label="☀️ Brightness") w_entropy = gr.Slider(0, 1, value=0.10, step=0.05, label="📊 Entropy") w_learned_iqa = gr.Slider(0, 1, value=0.15, step=0.05, label="🧠 Learned IQA") ocr_threshold = gr.Slider( 0, 1, value=0.45, step=0.05, label="🎯 OCR Threshold (score below → skip OCR)" ) learned_metric = gr.Dropdown( choices=["clipiqa", "brisque", "niqe", "topiq_nr", "disabled"], value="clipiqa", label="🧠 Learned IQA Metric", ) score_btn = gr.Button("🔍 Score Document", variant="primary", size="lg") # ── Right column: Output ── with gr.Column(scale=1): result_md = gr.Markdown("⬆️ Upload a document to get started") with gr.Accordion("📋 API Response (JSON)", open=False): api_json = gr.Code(language="json", label="API Response") with gr.Accordion("⚙️ Current Config (for reproduction)", open=False): config_json = gr.Code(language="json", label="Config") # Hidden state for signal values signal_state = gr.State(None) with gr.Accordion("📖 Integration Guide", open=False): gr.Markdown(INTEGRATION_GUIDE) # ── Event handlers ── all_inputs = [ image_input, w_sharpness, w_contrast, w_noise, w_text_presence, w_brightness, w_entropy, w_learned_iqa, ocr_threshold, learned_metric, ] all_outputs = [result_md, api_json, config_json, signal_state] score_btn.click(fn=score_document, inputs=all_inputs, outputs=all_outputs) # Auto-score on image upload image_input.change(fn=score_document, inputs=all_inputs, outputs=all_outputs) # Re-score when weights change for slider in [w_sharpness, w_contrast, w_noise, w_text_presence, w_brightness, w_entropy, w_learned_iqa, ocr_threshold]: slider.release(fn=score_document, inputs=all_inputs, outputs=all_outputs) learned_metric.change(fn=score_document, inputs=all_inputs, outputs=all_outputs) if __name__ == "__main__": demo.launch(server_name="0.0.0.0", server_port=7860)