""" Visualization utilities for displaying similarity results. """ import plotly.graph_objects as go import plotly.express as px from typing import List, Dict, Any import difflib from models.similarity import SimilarityReport, ModalityScore def create_similarity_gauge(score: float, title: str = "Overall Similarity") -> go.Figure: """ Create a gauge chart showing similarity score. Args: score: Similarity score (0.0 to 1.0) title: Chart title Returns: Plotly figure """ # Determine color based on score if score >= 0.7: color = "green" elif score >= 0.4: color = "orange" else: color = "red" fig = go.Figure(go.Indicator( mode="gauge+number+delta", value=score * 100, # Convert to percentage domain={'x': [0, 1], 'y': [0, 1]}, title={'text': title, 'font': {'size': 24}}, number={'suffix': "%", 'font': {'size': 40}}, gauge={ 'axis': {'range': [None, 100], 'tickwidth': 1, 'tickcolor': "darkblue"}, 'bar': {'color': color}, 'bgcolor': "white", 'borderwidth': 2, 'bordercolor': "gray", 'steps': [ {'range': [0, 40], 'color': '#ffcccc'}, {'range': [40, 70], 'color': '#fff4cc'}, {'range': [70, 100], 'color': '#ccffcc'} ], 'threshold': { 'line': {'color': "red", 'width': 4}, 'thickness': 0.75, 'value': 90 } } )) fig.update_layout( height=300, margin=dict(l=20, r=20, t=60, b=20) ) return fig def create_modality_breakdown_chart(report: SimilarityReport) -> go.Figure: """ Create a bar chart showing per-modality similarity scores. Args: report: SimilarityReport object Returns: Plotly figure """ modalities = [] scores = [] weights = [] # Add all available modalities if report.text_score: modalities.append("Text") scores.append(report.text_score.score * 100) weights.append(report.weights_used.get("text", 0) * 100) if report.table_score: modalities.append("Table") scores.append(report.table_score.score * 100) weights.append(report.weights_used.get("table", 0) * 100) if report.image_score: modalities.append("Image") scores.append(report.image_score.score * 100) weights.append(report.weights_used.get("image", 0) * 100) if report.layout_score: modalities.append("Layout") scores.append(report.layout_score.score * 100) weights.append(report.weights_used.get("layout", 0) * 100) if report.metadata_score: modalities.append("Metadata") scores.append(report.metadata_score.score * 100) weights.append(report.weights_used.get("metadata", 0) * 100) # Create bar chart fig = go.Figure() fig.add_trace(go.Bar( name='Similarity Score', x=modalities, y=scores, marker_color='lightblue', text=[f"{s:.1f}%" for s in scores], textposition='auto', )) fig.add_trace(go.Bar( name='Weight', x=modalities, y=weights, marker_color='lightcoral', text=[f"{w:.0f}%" for w in weights], textposition='auto', )) fig.update_layout( title="Per-Modality Similarity Breakdown", xaxis_title="Modality", yaxis_title="Percentage (%)", yaxis_range=[0, 100], barmode='group', height=400, showlegend=True ) return fig def format_matched_sections(matched_sections: List[Dict[str, Any]]) -> str: """ Format matched sections for display. Args: matched_sections: List of matched section dictionaries Returns: Formatted string """ if not matched_sections: return "No matched sections found." output = [] for idx, section in enumerate(matched_sections, start=1): section_type = section.get("type", "unknown") similarity = section.get("similarity", 0.0) output.append(f"**Match {idx}** ({section_type.upper()}) - Similarity: {similarity:.2%}") output.append("") if section_type == "text": output.append(f"📄 Doc 1 (Page {section.get('doc1_page', '?')}):") output.append(f"```\n{section.get('doc1_content', '')}\n```") output.append("") output.append(f"📄 Doc 2 (Page {section.get('doc2_page', '?')}):") output.append(f"```\n{section.get('doc2_content', '')}\n```") elif section_type == "table": output.append(f"📊 Doc 1 Table (Page {section.get('doc1_page', '?')}):") output.append(f"_{section.get('doc1_schema', '')}_") output.append("") output.append(f"📊 Doc 2 Table (Page {section.get('doc2_page', '?')}):") output.append(f"_{section.get('doc2_schema', '')}_") elif section_type == "image": output.append(f"🖼️ Doc 1 Image (Page {section.get('doc1_page', '?')}):") output.append(f"_Image ID: {section.get('doc1_image_id', 'N/A')}_") output.append("") output.append(f"🖼️ Doc 2 Image (Page {section.get('doc2_page', '?')}):") output.append(f"_Image ID: {section.get('doc2_image_id', 'N/A')}_") elif section_type == "metadata": output.append(f"📋 Field: **{section.get('field', 'unknown').title()}**") output.append(f"- Doc 1: {section.get('doc1_value', 'N/A')}") output.append(f"- Doc 2: {section.get('doc2_value', 'N/A')}") output.append("") output.append("---") output.append("") return "\n".join(output) def generate_diff_html(text1: str, text2: str) -> str: """ Generate HTML diff highlighting differences between two texts. Args: text1: First text text2: Second text Returns: HTML string with diff highlighting """ # Split into words for better diff words1 = text1.split() words2 = text2.split() # Generate diff diff = difflib.ndiff(words1, words2) html_parts = [] html_parts.append('
') for item in diff: if item.startswith(' '): # Unchanged word = item[2:] html_parts.append(f'{word} ') elif item.startswith('- '): # Removed from text1 word = item[2:] html_parts.append(f'{word} ') elif item.startswith('+ '): # Added in text2 word = item[2:] html_parts.append(f'{word} ') html_parts.append('
') return ''.join(html_parts) def create_score_legend() -> str: """ Create a legend explaining similarity scores. Returns: Markdown formatted legend """ legend = """ ### 📊 Similarity Score Guide - **90-100%**: Nearly identical documents - **70-89%**: Very similar with minor differences - **40-69%**: Moderately similar with notable differences - **0-39%**: Significantly different documents """ return legend