Spaces:
Sleeping
Sleeping
| """ | |
| Document Readability Scorer β Gradio App | |
| ========================================= | |
| Interactive UI for scoring documents and calibrating weights. | |
| Upload documents, adjust signal weights with sliders, and see | |
| how the readability score changes in real-time. | |
| """ | |
| import os | |
| import json | |
| import tempfile | |
| import gradio as gr | |
| import numpy as np | |
| from PIL import Image | |
| from document_readability import ( | |
| DocumentReadabilityScorer, | |
| ScorerConfig, | |
| ReadabilityResult, | |
| ) | |
| # βββ Scoring logic βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def score_document( | |
| image, | |
| w_sharpness, w_contrast, w_noise, w_text_presence, | |
| w_brightness, w_entropy, w_learned_iqa, | |
| ocr_threshold, | |
| learned_metric, | |
| ): | |
| """Score a document with the given weights and return results.""" | |
| if image is None: | |
| return ( | |
| "β¬οΈ Upload a document image to get started", | |
| None, None, None | |
| ) | |
| # Normalize weights to sum to 1.0 | |
| weights = [w_sharpness, w_contrast, w_noise, w_text_presence, | |
| w_brightness, w_entropy, w_learned_iqa] | |
| total = sum(weights) | |
| if total == 0: | |
| return "β All weights are zero!", None, None, None | |
| weights = [w / total for w in weights] | |
| config = ScorerConfig( | |
| w_sharpness=weights[0], | |
| w_contrast=weights[1], | |
| w_noise=weights[2], | |
| w_text_presence=weights[3], | |
| w_brightness=weights[4], | |
| w_entropy=weights[5], | |
| w_learned_iqa=weights[6], | |
| ocr_threshold=ocr_threshold, | |
| learned_metric=learned_metric if learned_metric != "disabled" else None, | |
| device="cpu", | |
| ) | |
| scorer = DocumentReadabilityScorer(config) | |
| # Convert gradio image (numpy array) to PIL | |
| if isinstance(image, np.ndarray): | |
| pil_img = Image.fromarray(image) | |
| else: | |
| pil_img = image | |
| result = scorer.score(pil_img) | |
| # ββ Build the summary ββ | |
| emoji = {"excellent": "π’", "good": "π’", "fair": "π‘", "poor": "π ", "bad": "π΄"} | |
| e = emoji.get(result.confidence_label, "βͺ") | |
| ocr_status = "β Proceed with OCR" if result.ocr_recommended else "β Skip OCR (below threshold)" | |
| summary = f"""## {e} Readability Score: **{result.readability_score:.3f}** / 1.000 | |
| ### Verdict: **{result.confidence_label.upper()}** β {ocr_status} | |
| --- | |
| ### Signal Breakdown | |
| | Signal | Score | Raw Value | Description | | |
| |--------|-------|-----------|-------------| | |
| | π Sharpness | **{result.signals['sharpness']:.3f}** | Lap. var = {result.signals['laplacian_variance']:.1f} | {'Sharp β' if result.signals['sharpness'] > 0.5 else 'β οΈ Blurry'} | | |
| | π¨ Contrast | **{result.signals['contrast']:.3f}** | RMS = {result.signals['rms_contrast']:.3f} | {'Good β' if result.signals['contrast'] > 0.4 else 'β οΈ Low contrast'} | | |
| | π‘ Noise | **{result.signals['noise']:.3f}** | Ο = {result.signals['noise_sigma']:.2f} | {'Clean β' if result.signals['noise'] > 0.5 else 'β οΈ Noisy'} | | |
| | π Text Presence | **{result.signals['text_presence']:.3f}** | Coverage = {result.signals['text_coverage']:.3f} | {'Has text β' if result.signals.get('has_text') else 'β οΈ No text detected'} | | |
| | βοΈ Brightness | **{result.signals['brightness']:.3f}** | Mean = {result.signals['mean_brightness']:.1f} | {'Normal β' if result.signals['brightness'] > 0.5 else 'β οΈ Bad exposure'} | | |
| | π Entropy | **{result.signals['entropy']:.3f}** | H = {result.signals['shannon_entropy']:.2f} | {'Content β' if result.signals['entropy'] > 0.3 else 'β οΈ Low info'} | | |
| | π§ Learned IQA | **{result.signals['learned_iqa']:.3f}** | {result.signals.get('metric_name', 'N/A')} | {'Good β' if result.signals['learned_iqa'] > 0.5 else 'β οΈ Low quality'} | | |
| --- | |
| ### Diagnostics | |
| """ | |
| # Add specific warnings | |
| issues = [] | |
| if result.signals['sharpness'] < 0.3: | |
| issues.append("β οΈ **Blur detected** β document is too blurry for reliable OCR") | |
| if result.signals['contrast'] < 0.3: | |
| issues.append("β οΈ **Low contrast** β text may not be distinguishable from background") | |
| if result.signals['noise'] < 0.3: | |
| issues.append("β οΈ **High noise** β may cause OCR character errors") | |
| if not result.signals.get('has_text', True): | |
| issues.append("β οΈ **No text detected** β page may be blank or non-textual") | |
| if result.signals['brightness'] < 0.3: | |
| issues.append("β οΈ **Bad exposure** β document is too dark or over-exposed") | |
| if result.signals['entropy'] < 0.15: | |
| issues.append("β οΈ **Very low information content** β possibly blank page") | |
| if issues: | |
| summary += "\n".join(issues) | |
| else: | |
| summary += "β No major issues detected" | |
| # ββ Bar chart data ββ | |
| signal_names = ["Sharpness", "Contrast", "Noise", "Text", "Brightness", "Entropy", "IQA"] | |
| signal_values = [ | |
| result.signals['sharpness'], result.signals['contrast'], | |
| result.signals['noise'], result.signals['text_presence'], | |
| result.signals['brightness'], result.signals['entropy'], | |
| result.signals['learned_iqa'] | |
| ] | |
| # ββ JSON for API/programmatic use ββ | |
| api_output = json.dumps(result.to_dict(), indent=2) | |
| # ββ Config for reproduction ββ | |
| config_output = json.dumps({ | |
| "weights": { | |
| "sharpness": round(weights[0], 4), | |
| "contrast": round(weights[1], 4), | |
| "noise": round(weights[2], 4), | |
| "text_presence": round(weights[3], 4), | |
| "brightness": round(weights[4], 4), | |
| "entropy": round(weights[5], 4), | |
| "learned_iqa": round(weights[6], 4), | |
| }, | |
| "ocr_threshold": ocr_threshold, | |
| "learned_metric": learned_metric, | |
| }, indent=2) | |
| return summary, api_output, config_output, signal_values | |
| def create_bar_plot(signal_values): | |
| """Create a simple bar plot of signal scores.""" | |
| if signal_values is None: | |
| return None | |
| names = ["Sharp", "Contrast", "Noise", "Text", "Bright", "Entropy", "IQA"] | |
| bars = {names[i]: signal_values[i] for i in range(len(names))} | |
| return bars | |
| # βββ Gradio UI ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| DESCRIPTION = """ | |
| # π Document Readability Scorer | |
| **Pre-screen documents before expensive OCR/LLM inference.** Upload a document image and get a readability score | |
| with detailed signal breakdown. Adjust weights to calibrate for your specific pipeline. | |
| ### How it works | |
| The scorer extracts 7 independent signals from the image and combines them into a single **readability score** (0β1): | |
| | Signal | What it measures | Method | | |
| |--------|-----------------|--------| | |
| | **Sharpness** | Is the text sharp/blurry? | Laplacian variance + FFT high-freq energy | | |
| | **Contrast** | Is text distinguishable from background? | RMS + Michelson contrast | | |
| | **Noise** | How clean is the image? | Immerkær noise estimation | | |
| | **Text Presence** | Is there text on the page? | MSER regions + Sobel edge density | | |
| | **Brightness** | Is exposure appropriate? | Mean brightness + saturation analysis | | |
| | **Entropy** | Is there information content? | Shannon entropy | | |
| | **Learned IQA** | ML-based quality score | CLIP-IQA via pyiqa library | | |
| > π‘ **Calibration**: Adjust the weight sliders to match your pipeline's sensitivity. For example, if your OCR handles blur well but fails on low contrast, increase the contrast weight. | |
| """ | |
| INTEGRATION_GUIDE = """ | |
| ### Python Integration | |
| ```python | |
| from document_readability import DocumentReadabilityScorer, ScorerConfig | |
| # Use default weights | |
| scorer = DocumentReadabilityScorer() | |
| result = scorer.score("document.png") | |
| if result.ocr_recommended: | |
| # Proceed with expensive OCR/LLM | |
| run_ocr_pipeline(document) | |
| else: | |
| log_rejected(result.signals) # Log why it was rejected | |
| # Custom calibration | |
| config = ScorerConfig( | |
| w_sharpness=0.35, # prioritize sharpness | |
| w_contrast=0.20, # important for your docs | |
| w_noise=0.05, # your OCR handles noise well | |
| w_text_presence=0.15, | |
| w_brightness=0.05, | |
| w_entropy=0.10, | |
| w_learned_iqa=0.10, | |
| ocr_threshold=0.50, # your calibrated threshold | |
| learned_metric="clipiqa", # or "brisque", "topiq_nr", None | |
| ) | |
| scorer = DocumentReadabilityScorer(config) | |
| # Batch processing | |
| from document_readability import score_batch | |
| results = score_batch(["doc1.png", "doc2.jpg", "doc3.tiff"]) | |
| # β sorted by readability score, highest first | |
| ``` | |
| ### For GPU-accelerated scoring (VLM-based) | |
| If you need even higher accuracy, use `mapo80/DeQA-Doc-Sharpness` (a 7B VLM scorer, SRCC ~0.92 on document quality): | |
| ```python | |
| # Requires GPU (16GB VRAM) | |
| from transformers import AutoModelForCausalLM | |
| import torch | |
| model = AutoModelForCausalLM.from_pretrained( | |
| "mapo80/DeQA-Doc-Sharpness", | |
| trust_remote_code=True, | |
| torch_dtype=torch.float16, | |
| device_map="auto", | |
| ) | |
| score = model.score([pil_image]).item() # 1-5 scale | |
| ``` | |
| """ | |
| with gr.Blocks( | |
| title="Document Readability Scorer", | |
| theme=gr.themes.Soft(), | |
| ) as demo: | |
| gr.Markdown(DESCRIPTION) | |
| with gr.Row(): | |
| # ββ Left column: Input ββ | |
| with gr.Column(scale=1): | |
| image_input = gr.Image( | |
| label="π Upload Document", | |
| type="numpy", | |
| height=400, | |
| ) | |
| gr.Markdown("### βοΈ Signal Weights (auto-normalized to sum to 1.0)") | |
| w_sharpness = gr.Slider(0, 1, value=0.30, step=0.05, label="π Sharpness") | |
| w_contrast = gr.Slider(0, 1, value=0.15, step=0.05, label="π¨ Contrast") | |
| w_noise = gr.Slider(0, 1, value=0.10, step=0.05, label="π‘ Noise (inverted)") | |
| w_text_presence = gr.Slider(0, 1, value=0.15, step=0.05, label="π Text Presence") | |
| w_brightness = gr.Slider(0, 1, value=0.05, step=0.05, label="βοΈ Brightness") | |
| w_entropy = gr.Slider(0, 1, value=0.10, step=0.05, label="π Entropy") | |
| w_learned_iqa = gr.Slider(0, 1, value=0.15, step=0.05, label="π§ Learned IQA") | |
| ocr_threshold = gr.Slider( | |
| 0, 1, value=0.45, step=0.05, | |
| label="π― OCR Threshold (score below β skip OCR)" | |
| ) | |
| learned_metric = gr.Dropdown( | |
| choices=["clipiqa", "brisque", "niqe", "topiq_nr", "disabled"], | |
| value="clipiqa", | |
| label="π§ Learned IQA Metric", | |
| ) | |
| score_btn = gr.Button("π Score Document", variant="primary", size="lg") | |
| # ββ Right column: Output ββ | |
| with gr.Column(scale=1): | |
| result_md = gr.Markdown("β¬οΈ Upload a document to get started") | |
| with gr.Accordion("π API Response (JSON)", open=False): | |
| api_json = gr.Code(language="json", label="API Response") | |
| with gr.Accordion("βοΈ Current Config (for reproduction)", open=False): | |
| config_json = gr.Code(language="json", label="Config") | |
| # Hidden state for signal values | |
| signal_state = gr.State(None) | |
| with gr.Accordion("π Integration Guide", open=False): | |
| gr.Markdown(INTEGRATION_GUIDE) | |
| # ββ Event handlers ββ | |
| all_inputs = [ | |
| image_input, | |
| w_sharpness, w_contrast, w_noise, w_text_presence, | |
| w_brightness, w_entropy, w_learned_iqa, | |
| ocr_threshold, learned_metric, | |
| ] | |
| all_outputs = [result_md, api_json, config_json, signal_state] | |
| score_btn.click(fn=score_document, inputs=all_inputs, outputs=all_outputs) | |
| # Auto-score on image upload | |
| image_input.change(fn=score_document, inputs=all_inputs, outputs=all_outputs) | |
| # Re-score when weights change | |
| for slider in [w_sharpness, w_contrast, w_noise, w_text_presence, | |
| w_brightness, w_entropy, w_learned_iqa, ocr_threshold]: | |
| slider.release(fn=score_document, inputs=all_inputs, outputs=all_outputs) | |
| learned_metric.change(fn=score_document, inputs=all_inputs, outputs=all_outputs) | |
| if __name__ == "__main__": | |
| demo.launch(server_name="0.0.0.0", server_port=7860) |