""" OCR Confidence Visualization - Gradio Application. Upload a document image to extract text with confidence visualization. Supports deployment to HuggingFace Spaces with ZeroGPU via @spaces.GPU decorator. The decorator is effect-free in non-ZeroGPU environments for local development. """ import html import json from typing import Generator import gradio as gr from PIL import Image # Import spaces for ZeroGPU support (effect-free outside HuggingFace Spaces) try: import spaces SPACES_AVAILABLE = True except ImportError: SPACES_AVAILABLE = False from model import generate_with_logprobs, load_model, TokenData, AVAILABLE_MODELS, DEFAULT_MODEL def gpu_decorator(duration: int = 120): """ Return @spaces.GPU decorator if available, otherwise a no-op decorator. This allows the code to work both locally and on HuggingFace Spaces. """ if SPACES_AVAILABLE: return spaces.GPU(duration=duration) return lambda fn: fn def probability_to_color(prob: float) -> str: """ Map probability to a color for text and underline styling. Args: prob: Confidence probability (0.0 to 1.0) Returns: Hex color string """ if prob > 0.99: return "#3b82f6" # Blue - very high confidence elif prob > 0.95: return "#16a34a" # Dark Green - high confidence elif prob > 0.85: return "#65a30d" # Darker Light Green - good confidence (darkened for readability) elif prob > 0.70: return "#ca8a04" # Darker Yellow - moderate confidence (darkened for readability) elif prob > 0.50: return "#ef4444" # Red - low confidence else: return "#a855f7" # Purple - very low confidence def entropy_to_color(entropy: float) -> str: """ Map entropy (in bits) to a color for visualization. Higher entropy = more uncertainty = warmer colors. Args: entropy: Shannon entropy in bits (0.0 = certain) Returns: Hex color string """ if entropy < 0.1: return "#3b82f6" # Blue - very certain elif entropy < 0.3: return "#16a34a" # Dark Green - certain elif entropy < 0.7: return "#65a30d" # Green - fairly certain elif entropy < 1.5: return "#ca8a04" # Amber - some uncertainty elif entropy < 2.5: return "#ef4444" # Red - uncertain else: return "#a855f7" # Purple - very uncertain def build_html_output(tokens: list[TokenData], mode: str = "probability") -> str: """ Build HTML output from accumulated tokens with confidence coloring. Args: tokens: List of TokenData objects mode: "probability" for confidence coloring, "entropy" for uncertainty coloring Returns: HTML string with styled token spans """ # Font stack with emoji support font_family = "'Cascadia Code', 'Fira Code', Consolas, monospace, 'Apple Color Emoji', 'Segoe UI Emoji', 'Noto Color Emoji'" # CSS for hover underline effect style_tag = '' if not tokens: return f'{style_tag}
' spans = [] for token_data in tokens: # Escape HTML entities in token text token_text = html.escape(token_data.token) # Handle newlines - convert to
if "\n" in token_text: token_text = token_text.replace("\n", "
") spans.append(token_text) else: # Get color based on mode if mode == "entropy": color = entropy_to_color(token_data.entropy) else: color = probability_to_color(token_data.probability) # Encode alternatives as JSON for data attribute alternatives_json = html.escape(json.dumps(token_data.alternatives)) # Build styled span with color (underline on hover via CSS) span = ( f'' f'{token_text}' ) spans.append(span) html_content = "".join(spans) return f'{style_tag}
{html_content}
' @gpu_decorator(duration=120) def transcribe_full(image: Image.Image, model_name: str = None) -> list[TokenData]: """ Run full OCR inference on GPU and return all tokens. On HuggingFace Spaces with ZeroGPU, this function is decorated with @spaces.GPU to allocate GPU resources for the duration of inference. The GPU is released when the function returns. Args: image: PIL Image to process model_name: Which model to use for inference Returns: List of TokenData with token strings, probabilities, and alternatives """ return list(generate_with_logprobs(image, model_name=model_name)) def transcribe_streaming(image: Image.Image, model_name: str = None) -> Generator[tuple[str, str], None, None]: """ Stream OCR transcription with progressive HTML output for both views. This function separates GPU-bound inference from HTML rendering: 1. Shows a "Processing..." indicator during inference 2. Runs full inference in a single GPU-decorated call 3. Streams HTML rendering from pre-computed tokens (no GPU needed) This architecture is required for HuggingFace ZeroGPU, which allocates GPU resources per decorated function call rather than for streaming. Args: image: PIL Image to process model_name: Which model to use for inference Yields: Tuple of (probability_html, entropy_html) as tokens stream """ if image is None: empty = '
Please upload an image.
' yield empty, empty return # Show processing indicator during GPU inference loading = f'''
Processing image with {model_name or DEFAULT_MODEL}...
''' yield loading, loading # Run full inference (GPU allocated here on ZeroGPU) tokens = transcribe_full(image, model_name=model_name) # Stream HTML rendering (no GPU needed) accumulated: list[TokenData] = [] for token in tokens: accumulated.append(token) prob_html = build_html_output(accumulated, mode="probability") entropy_html = build_html_output(accumulated, mode="entropy") yield prob_html, entropy_html # JavaScript for token alternatives panel (loaded via launch js parameter) TOKEN_ALTERNATIVES_JS = """ (function() { document.addEventListener('click', function(e) { var token = e.target.closest('[data-alternatives]'); if (!token || !token.dataset.alternatives) return; var panel = document.getElementById('alternatives-panel'); if (!panel) return; var prob = parseFloat(token.dataset.prob) || 0; var alts = JSON.parse(token.dataset.alternatives); var tokenText = token.textContent; // Build panel content var html = '
' + 'Selected: "' + tokenText + '" (' + (prob * 100).toFixed(2) + '%)' + '
'; if (alts.length === 0) { html += '
No alternatives available
'; } else { html += '
Top ' + Math.min(alts.length, 10) + ' alternatives:
'; for (var i = 0; i < Math.min(alts.length, 10); i++) { var alt = alts[i]; var altProb = (alt.probability * 100).toFixed(2); var barWidth = Math.max(alt.probability * 100, 1); html += '
' + '' + alt.token.replace(//g,'>') + '' + '' + altProb + '%' + '
' + '
' + '
'; } } panel.innerHTML = html; }); })(); """ # Initial HTML for alternatives panel ALTERNATIVES_PANEL_INITIAL = '''
Click on any token above to see alternative predictions.
''' # Build Gradio interface with gr.Blocks(title="OCR Confidence Visualization") as demo: gr.Markdown("# OCR Confidence Visualization") gr.Markdown("Upload a document image to extract text with token streaming.") with gr.Row(): with gr.Column(scale=1): model_selector = gr.Radio( choices=list(AVAILABLE_MODELS.keys()), value=DEFAULT_MODEL, label="Model", ) image_input = gr.Image(type="pil", label="Upload Document") submit_btn = gr.Button("Transcribe", variant="primary") with gr.Column(scale=2): with gr.Tabs(): with gr.TabItem("Probability"): output_html_prob = gr.HTML( value='
Upload an image and click Transcribe to start.
', ) with gr.TabItem("Entropy"): output_html_entropy = gr.HTML( value='
Upload an image and click Transcribe to start.
', ) gr.Markdown("### Token Alternatives") alternatives_html = gr.HTML( value=ALTERNATIVES_PANEL_INITIAL, ) submit_btn.click( fn=transcribe_streaming, inputs=[image_input, model_selector], outputs=[output_html_prob, output_html_entropy], ) if __name__ == "__main__": # Preload model at startup for local development # On HuggingFace Spaces with ZeroGPU, model loading happens on first request # when GPU is allocated by the @spaces.GPU decorator if not SPACES_AVAILABLE: print("Preloading model (local development)...") load_model() else: print("ZeroGPU detected - model will load on first inference request") print("Starting Gradio server...") demo.launch(server_port=7860, js=TOKEN_ALTERNATIVES_JS)