Spaces:
Sleeping
Sleeping
| """ | |
| OCR Confidence Visualization - Gradio Application. | |
| Upload a document image to extract text with confidence visualization. | |
| Supports deployment to HuggingFace Spaces with ZeroGPU via @spaces.GPU decorator. | |
| The decorator is effect-free in non-ZeroGPU environments for local development. | |
| """ | |
| import html | |
| import json | |
| from typing import Generator | |
| import gradio as gr | |
| from PIL import Image | |
| # Import spaces for ZeroGPU support (effect-free outside HuggingFace Spaces) | |
| try: | |
| import spaces | |
| SPACES_AVAILABLE = True | |
| except ImportError: | |
| SPACES_AVAILABLE = False | |
| from model import generate_with_logprobs, load_model, TokenData, AVAILABLE_MODELS, DEFAULT_MODEL | |
| def gpu_decorator(duration: int = 120): | |
| """ | |
| Return @spaces.GPU decorator if available, otherwise a no-op decorator. | |
| This allows the code to work both locally and on HuggingFace Spaces. | |
| """ | |
| if SPACES_AVAILABLE: | |
| return spaces.GPU(duration=duration) | |
| return lambda fn: fn | |
| def probability_to_color(prob: float) -> str: | |
| """ | |
| Map probability to a color for text and underline styling. | |
| Args: | |
| prob: Confidence probability (0.0 to 1.0) | |
| Returns: | |
| Hex color string | |
| """ | |
| if prob > 0.99: | |
| return "#3b82f6" # Blue - very high confidence | |
| elif prob > 0.95: | |
| return "#16a34a" # Dark Green - high confidence | |
| elif prob > 0.85: | |
| return "#65a30d" # Darker Light Green - good confidence (darkened for readability) | |
| elif prob > 0.70: | |
| return "#ca8a04" # Darker Yellow - moderate confidence (darkened for readability) | |
| elif prob > 0.50: | |
| return "#ef4444" # Red - low confidence | |
| else: | |
| return "#a855f7" # Purple - very low confidence | |
| def entropy_to_color(entropy: float) -> str: | |
| """ | |
| Map entropy (in bits) to a color for visualization. | |
| Higher entropy = more uncertainty = warmer colors. | |
| Args: | |
| entropy: Shannon entropy in bits (0.0 = certain) | |
| Returns: | |
| Hex color string | |
| """ | |
| if entropy < 0.1: | |
| return "#3b82f6" # Blue - very certain | |
| elif entropy < 0.3: | |
| return "#16a34a" # Dark Green - certain | |
| elif entropy < 0.7: | |
| return "#65a30d" # Green - fairly certain | |
| elif entropy < 1.5: | |
| return "#ca8a04" # Amber - some uncertainty | |
| elif entropy < 2.5: | |
| return "#ef4444" # Red - uncertain | |
| else: | |
| return "#a855f7" # Purple - very uncertain | |
| def build_html_output(tokens: list[TokenData], mode: str = "probability") -> str: | |
| """ | |
| Build HTML output from accumulated tokens with confidence coloring. | |
| Args: | |
| tokens: List of TokenData objects | |
| mode: "probability" for confidence coloring, "entropy" for uncertainty coloring | |
| Returns: | |
| HTML string with styled token spans | |
| """ | |
| # Font stack with emoji support | |
| font_family = "'Cascadia Code', 'Fira Code', Consolas, monospace, 'Apple Color Emoji', 'Segoe UI Emoji', 'Noto Color Emoji'" | |
| # CSS for hover underline effect | |
| style_tag = '<style>.token-span:hover { text-decoration: underline !important; }</style>' | |
| if not tokens: | |
| return f'{style_tag}<div class="token-container" style="font-family: {font_family}; line-height: 1.8; padding: 10px;"></div>' | |
| spans = [] | |
| for token_data in tokens: | |
| # Escape HTML entities in token text | |
| token_text = html.escape(token_data.token) | |
| # Handle newlines - convert to <br> | |
| if "\n" in token_text: | |
| token_text = token_text.replace("\n", "<br>") | |
| spans.append(token_text) | |
| else: | |
| # Get color based on mode | |
| if mode == "entropy": | |
| color = entropy_to_color(token_data.entropy) | |
| else: | |
| color = probability_to_color(token_data.probability) | |
| # Encode alternatives as JSON for data attribute | |
| alternatives_json = html.escape(json.dumps(token_data.alternatives)) | |
| # Build styled span with color (underline on hover via CSS) | |
| span = ( | |
| f'<span class="token-span" style="color: {color}; ' | |
| f'text-decoration-color: {color}; cursor: pointer;" ' | |
| f'data-prob="{token_data.probability}" ' | |
| f'data-entropy="{token_data.entropy}" ' | |
| f'data-alternatives="{alternatives_json}">' | |
| f'{token_text}</span>' | |
| ) | |
| spans.append(span) | |
| html_content = "".join(spans) | |
| return f'{style_tag}<div class="token-container" style="font-family: {font_family}; line-height: 1.6; padding: 10px; white-space: pre-wrap;">{html_content}</div>' | |
| def transcribe_full(image: Image.Image, model_name: str = None) -> list[TokenData]: | |
| """ | |
| Run full OCR inference on GPU and return all tokens. | |
| On HuggingFace Spaces with ZeroGPU, this function is decorated with | |
| @spaces.GPU to allocate GPU resources for the duration of inference. | |
| The GPU is released when the function returns. | |
| Args: | |
| image: PIL Image to process | |
| model_name: Which model to use for inference | |
| Returns: | |
| List of TokenData with token strings, probabilities, and alternatives | |
| """ | |
| return list(generate_with_logprobs(image, model_name=model_name)) | |
| def transcribe_streaming(image: Image.Image, model_name: str = None) -> Generator[tuple[str, str], None, None]: | |
| """ | |
| Stream OCR transcription with progressive HTML output for both views. | |
| This function separates GPU-bound inference from HTML rendering: | |
| 1. Shows a "Processing..." indicator during inference | |
| 2. Runs full inference in a single GPU-decorated call | |
| 3. Streams HTML rendering from pre-computed tokens (no GPU needed) | |
| This architecture is required for HuggingFace ZeroGPU, which allocates | |
| GPU resources per decorated function call rather than for streaming. | |
| Args: | |
| image: PIL Image to process | |
| model_name: Which model to use for inference | |
| Yields: | |
| Tuple of (probability_html, entropy_html) as tokens stream | |
| """ | |
| if image is None: | |
| empty = '<div style="color: #666; padding: 10px;">Please upload an image.</div>' | |
| yield empty, empty | |
| return | |
| # Show processing indicator during GPU inference | |
| loading = f'''<div style="color: #60a5fa; padding: 10px; display: flex; align-items: center; gap: 10px;"> | |
| <div style="width: 20px; height: 20px; border: 2px solid #60a5fa; border-top-color: transparent; border-radius: 50%; animation: spin 1s linear infinite;"></div> | |
| <style>@keyframes spin {{ to {{ transform: rotate(360deg); }} }}</style> | |
| Processing image with {model_name or DEFAULT_MODEL}... | |
| </div>''' | |
| yield loading, loading | |
| # Run full inference (GPU allocated here on ZeroGPU) | |
| tokens = transcribe_full(image, model_name=model_name) | |
| # Stream HTML rendering (no GPU needed) | |
| accumulated: list[TokenData] = [] | |
| for token in tokens: | |
| accumulated.append(token) | |
| prob_html = build_html_output(accumulated, mode="probability") | |
| entropy_html = build_html_output(accumulated, mode="entropy") | |
| yield prob_html, entropy_html | |
| # JavaScript for token alternatives panel (loaded via launch js parameter) | |
| TOKEN_ALTERNATIVES_JS = """ | |
| (function() { | |
| document.addEventListener('click', function(e) { | |
| var token = e.target.closest('[data-alternatives]'); | |
| if (!token || !token.dataset.alternatives) return; | |
| var panel = document.getElementById('alternatives-panel'); | |
| if (!panel) return; | |
| var prob = parseFloat(token.dataset.prob) || 0; | |
| var alts = JSON.parse(token.dataset.alternatives); | |
| var tokenText = token.textContent; | |
| // Build panel content | |
| var html = '<div style="font-weight:600;margin-bottom:12px;padding-bottom:8px;border-bottom:1px solid #374151;">' + | |
| 'Selected: "<span style="color:#60a5fa">' + tokenText + '</span>" (' + (prob * 100).toFixed(2) + '%)' + | |
| '</div>'; | |
| if (alts.length === 0) { | |
| html += '<div style="color:#9ca3af;font-style:italic">No alternatives available</div>'; | |
| } else { | |
| html += '<div style="font-size:12px;color:#9ca3af;margin-bottom:8px;">Top ' + Math.min(alts.length, 10) + ' alternatives:</div>'; | |
| for (var i = 0; i < Math.min(alts.length, 10); i++) { | |
| var alt = alts[i]; | |
| var altProb = (alt.probability * 100).toFixed(2); | |
| var barWidth = Math.max(alt.probability * 100, 1); | |
| html += '<div style="display:flex;align-items:center;margin:6px 0;">' + | |
| '<span style="width:80px;overflow:hidden;text-overflow:ellipsis;white-space:nowrap;font-family:monospace;">' + | |
| alt.token.replace(/</g,'<').replace(/>/g,'>') + '</span>' + | |
| '<span style="width:55px;text-align:right;color:#9ca3af;font-size:12px;margin-right:10px;">' + | |
| altProb + '%</span>' + | |
| '<div style="flex:1;height:10px;background:#374151;border-radius:5px;overflow:hidden;">' + | |
| '<div style="width:' + barWidth + '%;height:100%;background:#60a5fa;border-radius:5px;"></div>' + | |
| '</div></div>'; | |
| } | |
| } | |
| panel.innerHTML = html; | |
| }); | |
| })(); | |
| """ | |
| # Initial HTML for alternatives panel | |
| ALTERNATIVES_PANEL_INITIAL = ''' | |
| <div id="alternatives-panel" style=" | |
| padding: 16px; | |
| background: #1f2937; | |
| border-radius: 8px; | |
| color: #e5e7eb; | |
| font-family: system-ui, -apple-system, sans-serif; | |
| font-size: 14px; | |
| min-height: 100px; | |
| "> | |
| <div style="color: #9ca3af; font-style: italic;"> | |
| Click on any token above to see alternative predictions. | |
| </div> | |
| </div> | |
| ''' | |
| # Build Gradio interface | |
| with gr.Blocks(title="OCR Confidence Visualization") as demo: | |
| gr.Markdown("# OCR Confidence Visualization") | |
| gr.Markdown("Upload a document image to extract text with token streaming.") | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| model_selector = gr.Radio( | |
| choices=list(AVAILABLE_MODELS.keys()), | |
| value=DEFAULT_MODEL, | |
| label="Model", | |
| ) | |
| image_input = gr.Image(type="pil", label="Upload Document") | |
| submit_btn = gr.Button("Transcribe", variant="primary") | |
| with gr.Column(scale=2): | |
| with gr.Tabs(): | |
| with gr.TabItem("Probability"): | |
| output_html_prob = gr.HTML( | |
| value='<div style="color: #666; padding: 10px;">Upload an image and click Transcribe to start.</div>', | |
| ) | |
| with gr.TabItem("Entropy"): | |
| output_html_entropy = gr.HTML( | |
| value='<div style="color: #666; padding: 10px;">Upload an image and click Transcribe to start.</div>', | |
| ) | |
| gr.Markdown("### Token Alternatives") | |
| alternatives_html = gr.HTML( | |
| value=ALTERNATIVES_PANEL_INITIAL, | |
| ) | |
| submit_btn.click( | |
| fn=transcribe_streaming, | |
| inputs=[image_input, model_selector], | |
| outputs=[output_html_prob, output_html_entropy], | |
| ) | |
| if __name__ == "__main__": | |
| # Preload model at startup for local development | |
| # On HuggingFace Spaces with ZeroGPU, model loading happens on first request | |
| # when GPU is allocated by the @spaces.GPU decorator | |
| if not SPACES_AVAILABLE: | |
| print("Preloading model (local development)...") | |
| load_model() | |
| else: | |
| print("ZeroGPU detected - model will load on first inference request") | |
| print("Starting Gradio server...") | |
| demo.launch(server_port=7860, js=TOKEN_ALTERNATIVES_JS) | |