Spaces:

rbaks
/

document-readability-scorer

Sleeping

App Files Files Community

document-readability-scorer / app.py

rbaks

Upload app.py

feca759 verified 28 days ago

raw

history blame contribute delete

12.5 kB

	"""
	Document Readability Scorer — Gradio App
	=========================================
	Interactive UI for scoring documents and calibrating weights.
	Upload documents, adjust signal weights with sliders, and see
	how the readability score changes in real-time.
	"""

	import os
	import json
	import tempfile
	import gradio as gr
	import numpy as np
	from PIL import Image

	from document_readability import (
	DocumentReadabilityScorer,
	ScorerConfig,
	ReadabilityResult,
	)

	# ─── Scoring logic ───────────────────────────────────────────────────────────

	def score_document(
	image,
	w_sharpness, w_contrast, w_noise, w_text_presence,
	w_brightness, w_entropy, w_learned_iqa,
	ocr_threshold,
	learned_metric,
	):
	"""Score a document with the given weights and return results."""
	if image is None:
	return (
	"⬆️ Upload a document image to get started",
	None, None, None
	)

	# Normalize weights to sum to 1.0
	weights = [w_sharpness, w_contrast, w_noise, w_text_presence,
	w_brightness, w_entropy, w_learned_iqa]
	total = sum(weights)
	if total == 0:
	return "❌ All weights are zero!", None, None, None
	weights = [w / total for w in weights]

	config = ScorerConfig(
	w_sharpness=weights[0],
	w_contrast=weights[1],
	w_noise=weights[2],
	w_text_presence=weights[3],
	w_brightness=weights[4],
	w_entropy=weights[5],
	w_learned_iqa=weights[6],
	ocr_threshold=ocr_threshold,
	learned_metric=learned_metric if learned_metric != "disabled" else None,
	device="cpu",
	)

	scorer = DocumentReadabilityScorer(config)

	# Convert gradio image (numpy array) to PIL
	if isinstance(image, np.ndarray):
	pil_img = Image.fromarray(image)
	else:
	pil_img = image

	result = scorer.score(pil_img)

	# ── Build the summary ──
	emoji = {"excellent": "🟢", "good": "🟢", "fair": "🟡", "poor": "🟠", "bad": "🔴"}
	e = emoji.get(result.confidence_label, "⚪")

	ocr_status = "✅ Proceed with OCR" if result.ocr_recommended else "⛔ Skip OCR (below threshold)"

	summary = f"""## {e} Readability Score: {result.readability_score:.3f} / 1.000

	### Verdict: {result.confidence_label.upper()} — {ocr_status}

	---

	### Signal Breakdown

	\| Signal \| Score \| Raw Value \| Description \|
	\|--------\|-------\|-----------\|-------------\|
	\| 🔍 Sharpness \| {result.signals['sharpness']:.3f} \| Lap. var = {result.signals['laplacian_variance']:.1f} \| {'Sharp ✓' if result.signals['sharpness'] > 0.5 else '⚠️ Blurry'} \|
	\| 🎨 Contrast \| {result.signals['contrast']:.3f} \| RMS = {result.signals['rms_contrast']:.3f} \| {'Good ✓' if result.signals['contrast'] > 0.4 else '⚠️ Low contrast'} \|
	\| 📡 Noise \| {result.signals['noise']:.3f} \| σ = {result.signals['noise_sigma']:.2f} \| {'Clean ✓' if result.signals['noise'] > 0.5 else '⚠️ Noisy'} \|
	\| 📝 Text Presence \| {result.signals['text_presence']:.3f} \| Coverage = {result.signals['text_coverage']:.3f} \| {'Has text ✓' if result.signals.get('has_text') else '⚠️ No text detected'} \|
	\| ☀️ Brightness \| {result.signals['brightness']:.3f} \| Mean = {result.signals['mean_brightness']:.1f} \| {'Normal ✓' if result.signals['brightness'] > 0.5 else '⚠️ Bad exposure'} \|
	\| 📊 Entropy \| {result.signals['entropy']:.3f} \| H = {result.signals['shannon_entropy']:.2f} \| {'Content ✓' if result.signals['entropy'] > 0.3 else '⚠️ Low info'} \|
	\| 🧠 Learned IQA \| {result.signals['learned_iqa']:.3f} \| {result.signals.get('metric_name', 'N/A')} \| {'Good ✓' if result.signals['learned_iqa'] > 0.5 else '⚠️ Low quality'} \|

	---

	### Diagnostics
	"""
	# Add specific warnings
	issues = []
	if result.signals['sharpness'] < 0.3:
	issues.append("⚠️ Blur detected — document is too blurry for reliable OCR")
	if result.signals['contrast'] < 0.3:
	issues.append("⚠️ Low contrast — text may not be distinguishable from background")
	if result.signals['noise'] < 0.3:
	issues.append("⚠️ High noise — may cause OCR character errors")
	if not result.signals.get('has_text', True):
	issues.append("⚠️ No text detected — page may be blank or non-textual")
	if result.signals['brightness'] < 0.3:
	issues.append("⚠️ Bad exposure — document is too dark or over-exposed")
	if result.signals['entropy'] < 0.15:
	issues.append("⚠️ Very low information content — possibly blank page")

	if issues:
	summary += "\n".join(issues)
	else:
	summary += "✅ No major issues detected"

	# ── Bar chart data ──
	signal_names = ["Sharpness", "Contrast", "Noise", "Text", "Brightness", "Entropy", "IQA"]
	signal_values = [
	result.signals['sharpness'], result.signals['contrast'],
	result.signals['noise'], result.signals['text_presence'],
	result.signals['brightness'], result.signals['entropy'],
	result.signals['learned_iqa']
	]

	# ── JSON for API/programmatic use ──
	api_output = json.dumps(result.to_dict(), indent=2)

	# ── Config for reproduction ──
	config_output = json.dumps({
	"weights": {
	"sharpness": round(weights[0], 4),
	"contrast": round(weights[1], 4),
	"noise": round(weights[2], 4),
	"text_presence": round(weights[3], 4),
	"brightness": round(weights[4], 4),
	"entropy": round(weights[5], 4),
	"learned_iqa": round(weights[6], 4),
	},
	"ocr_threshold": ocr_threshold,
	"learned_metric": learned_metric,
	}, indent=2)

	return summary, api_output, config_output, signal_values


	def create_bar_plot(signal_values):
	"""Create a simple bar plot of signal scores."""
	if signal_values is None:
	return None
	names = ["Sharp", "Contrast", "Noise", "Text", "Bright", "Entropy", "IQA"]
	bars = {names[i]: signal_values[i] for i in range(len(names))}
	return bars


	# ─── Gradio UI ────────────────────────────────────────────────────────────────

	DESCRIPTION = """
	# 📄 Document Readability Scorer

	Pre-screen documents before expensive OCR/LLM inference. Upload a document image and get a readability score
	with detailed signal breakdown. Adjust weights to calibrate for your specific pipeline.

	### How it works
	The scorer extracts 7 independent signals from the image and combines them into a single readability score (0–1):

	\| Signal \| What it measures \| Method \|
	\|--------\|-----------------\|--------\|
	\| Sharpness \| Is the text sharp/blurry? \| Laplacian variance + FFT high-freq energy \|
	\| Contrast \| Is text distinguishable from background? \| RMS + Michelson contrast \|
	\| Noise \| How clean is the image? \| Immerkær noise estimation \|
	\| Text Presence \| Is there text on the page? \| MSER regions + Sobel edge density \|
	\| Brightness \| Is exposure appropriate? \| Mean brightness + saturation analysis \|
	\| Entropy \| Is there information content? \| Shannon entropy \|
	\| Learned IQA \| ML-based quality score \| CLIP-IQA via pyiqa library \|

	> 💡 Calibration: Adjust the weight sliders to match your pipeline's sensitivity. For example, if your OCR handles blur well but fails on low contrast, increase the contrast weight.
	"""

	INTEGRATION_GUIDE = """
	### Python Integration

	```python
	from document_readability import DocumentReadabilityScorer, ScorerConfig

	# Use default weights
	scorer = DocumentReadabilityScorer()
	result = scorer.score("document.png")

	if result.ocr_recommended:
	# Proceed with expensive OCR/LLM
	run_ocr_pipeline(document)
	else:
	log_rejected(result.signals) # Log why it was rejected

	# Custom calibration
	config = ScorerConfig(
	w_sharpness=0.35, # prioritize sharpness
	w_contrast=0.20, # important for your docs
	w_noise=0.05, # your OCR handles noise well
	w_text_presence=0.15,
	w_brightness=0.05,
	w_entropy=0.10,
	w_learned_iqa=0.10,
	ocr_threshold=0.50, # your calibrated threshold
	learned_metric="clipiqa", # or "brisque", "topiq_nr", None
	)
	scorer = DocumentReadabilityScorer(config)

	# Batch processing
	from document_readability import score_batch
	results = score_batch(["doc1.png", "doc2.jpg", "doc3.tiff"])
	# → sorted by readability score, highest first
	```

	### For GPU-accelerated scoring (VLM-based)
	If you need even higher accuracy, use `mapo80/DeQA-Doc-Sharpness` (a 7B VLM scorer, SRCC ~0.92 on document quality):
	```python
	# Requires GPU (16GB VRAM)
	from transformers import AutoModelForCausalLM
	import torch
	model = AutoModelForCausalLM.from_pretrained(
	"mapo80/DeQA-Doc-Sharpness",
	trust_remote_code=True,
	torch_dtype=torch.float16,
	device_map="auto",
	)
	score = model.score([pil_image]).item() # 1-5 scale
	```
	"""

	with gr.Blocks(
	title="Document Readability Scorer",
	theme=gr.themes.Soft(),
	) as demo:
	gr.Markdown(DESCRIPTION)

	with gr.Row():
	# ── Left column: Input ──
	with gr.Column(scale=1):
	image_input = gr.Image(
	label="📄 Upload Document",
	type="numpy",
	height=400,
	)

	gr.Markdown("### ⚖️ Signal Weights (auto-normalized to sum to 1.0)")

	w_sharpness = gr.Slider(0, 1, value=0.30, step=0.05, label="🔍 Sharpness")
	w_contrast = gr.Slider(0, 1, value=0.15, step=0.05, label="🎨 Contrast")
	w_noise = gr.Slider(0, 1, value=0.10, step=0.05, label="📡 Noise (inverted)")
	w_text_presence = gr.Slider(0, 1, value=0.15, step=0.05, label="📝 Text Presence")
	w_brightness = gr.Slider(0, 1, value=0.05, step=0.05, label="☀️ Brightness")
	w_entropy = gr.Slider(0, 1, value=0.10, step=0.05, label="📊 Entropy")
	w_learned_iqa = gr.Slider(0, 1, value=0.15, step=0.05, label="🧠 Learned IQA")

	ocr_threshold = gr.Slider(
	0, 1, value=0.45, step=0.05,
	label="🎯 OCR Threshold (score below → skip OCR)"
	)

	learned_metric = gr.Dropdown(
	choices=["clipiqa", "brisque", "niqe", "topiq_nr", "disabled"],
	value="clipiqa",
	label="🧠 Learned IQA Metric",
	)

	score_btn = gr.Button("🔍 Score Document", variant="primary", size="lg")

	# ── Right column: Output ──
	with gr.Column(scale=1):
	result_md = gr.Markdown("⬆️ Upload a document to get started")

	with gr.Accordion("📋 API Response (JSON)", open=False):
	api_json = gr.Code(language="json", label="API Response")

	with gr.Accordion("⚙️ Current Config (for reproduction)", open=False):
	config_json = gr.Code(language="json", label="Config")

	# Hidden state for signal values
	signal_state = gr.State(None)

	with gr.Accordion("📖 Integration Guide", open=False):
	gr.Markdown(INTEGRATION_GUIDE)

	# ── Event handlers ──
	all_inputs = [
	image_input,
	w_sharpness, w_contrast, w_noise, w_text_presence,
	w_brightness, w_entropy, w_learned_iqa,
	ocr_threshold, learned_metric,
	]
	all_outputs = [result_md, api_json, config_json, signal_state]

	score_btn.click(fn=score_document, inputs=all_inputs, outputs=all_outputs)

	# Auto-score on image upload
	image_input.change(fn=score_document, inputs=all_inputs, outputs=all_outputs)

	# Re-score when weights change
	for slider in [w_sharpness, w_contrast, w_noise, w_text_presence,
	w_brightness, w_entropy, w_learned_iqa, ocr_threshold]:
	slider.release(fn=score_document, inputs=all_inputs, outputs=all_outputs)

	learned_metric.change(fn=score_document, inputs=all_inputs, outputs=all_outputs)


	if __name__ == "__main__":
	demo.launch(server_name="0.0.0.0", server_port=7860)