Spaces:
Sleeping
Sleeping
| <html lang="en"> | |
| <head> | |
| <meta charset="UTF-8"> | |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
| <title>RAG Evaluation Dashboard</title> | |
| <script src="https://cdnjs.cloudflare.com/ajax/libs/plotly.js/2.26.0/plotly.min.js"></script> | |
| <style> | |
| /* ----------------------------- | |
| Design tokens (matching index.html) | |
| ------------------------------*/ | |
| :root { | |
| --surface: #ffffff; | |
| --surface-subtle: #fafafa; | |
| --bg-main: #f5f7fb; | |
| --card-bg: #ffffff; | |
| --accent: #2563eb; | |
| --accent-soft: #eff6ff; | |
| --text-main: #111827; | |
| --text-muted: #6b7280; | |
| --border-soft: #e5e7eb; | |
| --success: #16a34a; | |
| --error: #dc2626; | |
| --warning: #f59e0b; | |
| --info: #2563eb; | |
| --radius-sm: 6px; | |
| --radius-md: 10px; | |
| --radius-lg: 14px; | |
| } | |
| /* Dark mode */ | |
| [data-theme="dark"] { | |
| --surface: #1f2937; | |
| --surface-subtle: #111827; | |
| --bg-main: #0f172a; | |
| --card-bg: #1e293b; | |
| --accent: #60a5fa; | |
| --accent-soft: #1e3a5f; | |
| --text-main: #f1f5f9; | |
| --text-muted: #94a3b8; | |
| --border-soft: #334155; | |
| } | |
| /* ----------------------------- | |
| Reset | |
| ------------------------------*/ | |
| * { | |
| margin: 0; | |
| padding: 0; | |
| box-sizing: border-box; | |
| } | |
| /* ----------------------------- | |
| Base | |
| ------------------------------*/ | |
| body { | |
| font-family: Inter, -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Helvetica, Arial, sans-serif; | |
| background: var(--bg-main); | |
| min-height: 100vh; | |
| padding: 24px; | |
| color: var(--text-main); | |
| } | |
| /* ----------------------------- | |
| Container | |
| ------------------------------*/ | |
| .container { | |
| max-width: 1200px; | |
| margin: 0 auto; | |
| } | |
| /* ----------------------------- | |
| Header | |
| ------------------------------*/ | |
| header { | |
| text-align: center; | |
| margin-bottom: 36px; | |
| } | |
| header h1 { | |
| font-size: 2.2rem; | |
| font-weight: 600; | |
| letter-spacing: -0.02em; | |
| margin-bottom: 8px; | |
| } | |
| header p { | |
| font-size: 1rem; | |
| color: var(--text-muted); | |
| margin-bottom: 20px; | |
| } | |
| .nav-buttons { | |
| display: flex; | |
| justify-content: center; | |
| gap: 12px; | |
| flex-wrap: wrap; | |
| margin-bottom: 12px; | |
| } | |
| .nav-btn { | |
| display: inline-flex; | |
| align-items: center; | |
| gap: 8px; | |
| padding: 10px 18px; | |
| background: var(--accent); | |
| color: white; | |
| text-decoration: none; | |
| border-radius: var(--radius-md); | |
| font-size: 0.9rem; | |
| font-weight: 500; | |
| transition: background 0.15s ease, transform 0.15s ease; | |
| border: none; | |
| cursor: pointer; | |
| } | |
| .nav-btn:hover { | |
| background: #1d4ed8; | |
| transform: translateY(-1px); | |
| } | |
| .nav-btn.secondary { | |
| background: var(--surface); | |
| color: var(--text-main); | |
| border: 1px solid var(--border-soft); | |
| } | |
| .nav-btn.secondary:hover { | |
| background: var(--surface-subtle); | |
| } | |
| [data-theme="dark"] .nav-btn { | |
| background: #60a5fa; | |
| } | |
| [data-theme="dark"] .nav-btn:hover { | |
| background: #3b82f6; | |
| } | |
| /* Theme toggle */ | |
| .theme-toggle { | |
| position: fixed; | |
| top: 20px; | |
| right: 20px; | |
| padding: 8px 14px; | |
| background: var(--surface); | |
| border: 1px solid var(--border-soft); | |
| border-radius: var(--radius-md); | |
| cursor: pointer; | |
| font-size: 0.85rem; | |
| color: var(--text-main); | |
| z-index: 100; | |
| } | |
| /* ----------------------------- | |
| Tab Navigation | |
| ------------------------------*/ | |
| .tab-nav { | |
| display: flex; | |
| justify-content: center; | |
| gap: 8px; | |
| margin-bottom: 24px; | |
| flex-wrap: wrap; | |
| } | |
| .tab-btn { | |
| padding: 10px 20px; | |
| background: var(--surface); | |
| border: 1px solid var(--border-soft); | |
| color: var(--text-main); | |
| border-radius: var(--radius-md); | |
| cursor: pointer; | |
| font-size: 0.9rem; | |
| transition: all 0.15s ease; | |
| } | |
| .tab-btn:hover { | |
| background: var(--surface-subtle); | |
| } | |
| .tab-btn.active { | |
| background: var(--accent); | |
| color: white; | |
| border-color: var(--accent); | |
| } | |
| /* ----------------------------- | |
| Cards | |
| ------------------------------*/ | |
| .card { | |
| background: var(--surface); | |
| border-radius: var(--radius-lg); | |
| padding: 24px; | |
| border: 1px solid var(--border-soft); | |
| box-shadow: 0 4px 12px rgba(0, 0, 0, 0.05); | |
| margin-bottom: 24px; | |
| } | |
| .card h2 { | |
| font-size: 1.1rem; | |
| font-weight: 600; | |
| margin-bottom: 16px; | |
| color: var(--text-main); | |
| } | |
| /* ----------------------------- | |
| Metrics Grid | |
| ------------------------------*/ | |
| .metrics-grid { | |
| display: grid; | |
| grid-template-columns: repeat(auto-fit, minmax(180px, 1fr)); | |
| gap: 16px; | |
| margin-bottom: 24px; | |
| } | |
| .metric-card { | |
| background: var(--accent); | |
| color: white; | |
| padding: 20px; | |
| border-radius: var(--radius-md); | |
| text-align: center; | |
| transition: transform 0.15s ease; | |
| } | |
| .metric-card:hover { | |
| transform: translateY(-2px); | |
| } | |
| .metric-card.success { | |
| background: var(--success); | |
| } | |
| .metric-card.warning { | |
| background: var(--warning); | |
| } | |
| .metric-card.error { | |
| background: var(--error); | |
| } | |
| .metric-label { | |
| font-size: 0.8rem; | |
| opacity: 0.9; | |
| margin-bottom: 8px; | |
| text-transform: uppercase; | |
| letter-spacing: 0.5px; | |
| } | |
| .metric-value { | |
| font-size: 1.8rem; | |
| font-weight: 700; | |
| } | |
| .metric-unit { | |
| font-size: 0.75rem; | |
| opacity: 0.8; | |
| margin-top: 4px; | |
| } | |
| /* ----------------------------- | |
| Charts | |
| ------------------------------*/ | |
| .chart-container { | |
| background: var(--surface); | |
| border: 1px solid var(--border-soft); | |
| border-radius: var(--radius-md); | |
| padding: 16px; | |
| margin-bottom: 20px; | |
| min-height: 350px; | |
| } | |
| .chart-title { | |
| font-size: 1rem; | |
| font-weight: 600; | |
| margin-bottom: 12px; | |
| color: var(--text-main); | |
| } | |
| .two-column { | |
| display: grid; | |
| grid-template-columns: 1fr 1fr; | |
| gap: 20px; | |
| } | |
| @media (max-width: 900px) { | |
| .two-column { | |
| grid-template-columns: 1fr; | |
| } | |
| } | |
| /* ----------------------------- | |
| Sections | |
| ------------------------------*/ | |
| .section { | |
| display: none; | |
| } | |
| .section.active { | |
| display: block; | |
| animation: fadeIn 0.3s ease; | |
| } | |
| @keyframes fadeIn { | |
| from { | |
| opacity: 0; | |
| transform: translateY(10px); | |
| } | |
| to { | |
| opacity: 1; | |
| transform: translateY(0); | |
| } | |
| } | |
| /* ----------------------------- | |
| Info Box | |
| ------------------------------*/ | |
| .info-box { | |
| background: var(--accent-soft); | |
| border-left: 4px solid var(--accent); | |
| padding: 14px; | |
| border-radius: var(--radius-sm); | |
| margin-bottom: 20px; | |
| } | |
| .info-box p { | |
| font-size: 0.9rem; | |
| color: var(--text-muted); | |
| } | |
| /* ----------------------------- | |
| Button Group | |
| ------------------------------*/ | |
| .button-group { | |
| display: flex; | |
| gap: 10px; | |
| margin-bottom: 20px; | |
| flex-wrap: wrap; | |
| } | |
| .btn { | |
| padding: 10px 18px; | |
| background: var(--accent); | |
| color: white; | |
| border: none; | |
| border-radius: var(--radius-md); | |
| cursor: pointer; | |
| font-size: 0.9rem; | |
| font-weight: 500; | |
| transition: background 0.15s ease; | |
| } | |
| .btn:hover { | |
| background: #1d4ed8; | |
| } | |
| .btn-secondary { | |
| background: var(--surface); | |
| color: var(--text-main); | |
| border: 1px solid var(--border-soft); | |
| } | |
| .btn-secondary:hover { | |
| background: var(--surface-subtle); | |
| } | |
| /* ----------------------------- | |
| Loading | |
| ------------------------------*/ | |
| .loading { | |
| display: none; | |
| text-align: center; | |
| padding: 40px; | |
| color: var(--accent); | |
| } | |
| .spinner { | |
| border: 4px solid var(--border-soft); | |
| border-top: 4px solid var(--accent); | |
| border-radius: 50%; | |
| width: 40px; | |
| height: 40px; | |
| animation: spin 1s linear infinite; | |
| margin: 0 auto 15px; | |
| } | |
| @keyframes spin { | |
| 0% { | |
| transform: rotate(0deg); | |
| } | |
| 100% { | |
| transform: rotate(360deg); | |
| } | |
| } | |
| /* ----------------------------- | |
| Failure Items | |
| ------------------------------*/ | |
| .failure-item { | |
| background: var(--surface-subtle); | |
| border-left: 4px solid var(--error); | |
| padding: 12px; | |
| margin-bottom: 10px; | |
| border-radius: var(--radius-sm); | |
| } | |
| .failure-item-query { | |
| font-size: 0.9rem; | |
| color: var(--text-main); | |
| margin-bottom: 4px; | |
| } | |
| .failure-item-score { | |
| font-size: 0.8rem; | |
| color: var(--error); | |
| font-weight: 500; | |
| } | |
| /* ----------------------------- | |
| Status Colors | |
| ------------------------------*/ | |
| .status-good { | |
| color: var(--success); | |
| } | |
| .status-warning { | |
| color: var(--warning); | |
| } | |
| .status-critical { | |
| color: var(--error); | |
| } | |
| /* Plotly chart background fix for dark mode */ | |
| [data-theme="dark"] .js-plotly-plot .plotly .bg { | |
| fill: var(--surface) ; | |
| } | |
| </style> | |
| </head> | |
| <body> | |
| <button class="theme-toggle" id="themeToggle">๐ Dark mode</button> | |
| <div class="container"> | |
| <header> | |
| <h1>๐ RAG Evaluation Dashboard</h1> | |
| <p>Real-time metrics and analysis for your RAG pipeline</p> | |
| <div class="nav-buttons"> | |
| <a href="/" class="nav-btn secondary">โ Back to Main</a> | |
| <a href="/ragas-demo" class="nav-btn">๐ฌ RAGAS Evaluation</a> | |
| </div> | |
| </header> | |
| <!-- Tab Navigation --> | |
| <div class="tab-nav"> | |
| <button class="tab-btn active" onclick="showSection('overview')">Overview</button> | |
| <button class="tab-btn" onclick="showSection('retrieval')">Retrieval</button> | |
| <button class="tab-btn" onclick="showSection('generation')">Generation</button> | |
| <button class="tab-btn" onclick="showSection('faithfulness')">Faithfulness</button> | |
| <button class="tab-btn" onclick="showSection('performance')">Performance</button> | |
| <button class="tab-btn" onclick="showSection('failures')">Failures</button> | |
| </div> | |
| <div class="loading" id="loading"> | |
| <div class="spinner"></div> | |
| <p>Loading evaluation data...</p> | |
| </div> | |
| <!-- Overview Section --> | |
| <div class="section active" id="overview"> | |
| <div class="metrics-grid"> | |
| <div class="metric-card"> | |
| <div class="metric-label">Total Evaluations</div> | |
| <div class="metric-value" id="totalEvaluations">0</div> | |
| </div> | |
| <div class="metric-card success"> | |
| <div class="metric-label">Avg Precision</div> | |
| <div class="metric-value" id="overviewPrecision">-</div> | |
| </div> | |
| <div class="metric-card"> | |
| <div class="metric-label">Avg Recall</div> | |
| <div class="metric-value" id="overviewRecall">-</div> | |
| </div> | |
| <div class="metric-card success"> | |
| <div class="metric-label">Avg BERTScore</div> | |
| <div class="metric-value" id="overviewBert">-</div> | |
| </div> | |
| <div class="metric-card"> | |
| <div class="metric-label">Avg Faithfulness</div> | |
| <div class="metric-value" id="overviewFaith">-</div> | |
| </div> | |
| <div class="metric-card warning"> | |
| <div class="metric-label">Hallucination Rate</div> | |
| <div class="metric-value" id="overviewHalluc">-</div> | |
| </div> | |
| <div class="metric-card"> | |
| <div class="metric-label">Avg Latency</div> | |
| <div class="metric-value" id="overviewLatency">-</div> | |
| <div class="metric-unit">ms</div> | |
| </div> | |
| <div class="metric-card"> | |
| <div class="metric-label">MRR</div> | |
| <div class="metric-value" id="overviewMRR">-</div> | |
| </div> | |
| </div> | |
| <div class="button-group"> | |
| <button class="btn" onclick="loadMetrics()">๐ Refresh</button> | |
| <button class="btn btn-secondary" onclick="exportResults()">๐ฅ Export CSV</button> | |
| <button class="btn btn-secondary" onclick="clearResults()">๐๏ธ Clear Results</button> | |
| </div> | |
| <div class="info-box"> | |
| <p><strong>How to use:</strong> This dashboard shows evaluation metrics from your RAG pipeline. | |
| Use the tabs above to explore different aspects of performance. | |
| Try the <a href="/ragas-demo">RAGAS Evaluation</a> to run live evaluations.</p> | |
| </div> | |
| </div> | |
| <!-- Retrieval Section --> | |
| <div class="section" id="retrieval"> | |
| <div class="card"> | |
| <h2>Retrieval Metrics Over Time</h2> | |
| <div class="chart-container" id="retrievalChart"></div> | |
| </div> | |
| <div class="metrics-grid"> | |
| <div class="metric-card"> | |
| <div class="metric-label">MRR</div> | |
| <div class="metric-value" id="mrrValue">-</div> | |
| </div> | |
| <div class="metric-card success"> | |
| <div class="metric-label">Avg Precision</div> | |
| <div class="metric-value" id="avgPrecision">-</div> | |
| </div> | |
| <div class="metric-card"> | |
| <div class="metric-label">Avg Recall</div> | |
| <div class="metric-value" id="avgRecall">-</div> | |
| </div> | |
| </div> | |
| </div> | |
| <!-- Generation Section --> | |
| <div class="section" id="generation"> | |
| <div class="two-column"> | |
| <div class="card"> | |
| <h2>Generation Quality Over Time</h2> | |
| <div class="chart-container" id="generationChart"></div> | |
| </div> | |
| <div class="card"> | |
| <h2>Average Scores</h2> | |
| <div class="chart-container" id="generationBars"></div> | |
| </div> | |
| </div> | |
| </div> | |
| <!-- Faithfulness Section --> | |
| <div class="section" id="faithfulness"> | |
| <div class="two-column"> | |
| <div class="card"> | |
| <h2>Hallucination Rate</h2> | |
| <div class="chart-container" id="hallucinationChart"></div> | |
| </div> | |
| <div class="card"> | |
| <h2>Faithfulness Over Time</h2> | |
| <div class="chart-container" id="faithfulnessChart"></div> | |
| </div> | |
| </div> | |
| </div> | |
| <!-- Performance Section --> | |
| <div class="section" id="performance"> | |
| <div class="card"> | |
| <h2>Latency Distribution</h2> | |
| <div class="chart-container" id="latencyChart"></div> | |
| </div> | |
| <div class="metrics-grid"> | |
| <div class="metric-card"> | |
| <div class="metric-label">P50 Latency</div> | |
| <div class="metric-value" id="p50Value">-</div> | |
| <div class="metric-unit">ms</div> | |
| </div> | |
| <div class="metric-card warning"> | |
| <div class="metric-label">P95 Latency</div> | |
| <div class="metric-value" id="p95Value">-</div> | |
| <div class="metric-unit">ms</div> | |
| </div> | |
| <div class="metric-card"> | |
| <div class="metric-label">P99 Latency</div> | |
| <div class="metric-value" id="p99Value">-</div> | |
| <div class="metric-unit">ms</div> | |
| </div> | |
| <div class="metric-card"> | |
| <div class="metric-label">Cost per Query</div> | |
| <div class="metric-value" id="costValue">-</div> | |
| <div class="metric-unit">USD</div> | |
| </div> | |
| </div> | |
| </div> | |
| <!-- Failures Section --> | |
| <div class="section" id="failures"> | |
| <div class="two-column"> | |
| <div class="card"> | |
| <h2>Failure Modes</h2> | |
| <div class="chart-container" id="failureChart"></div> | |
| </div> | |
| <div class="card"> | |
| <h2>Failure Counts</h2> | |
| <div class="metrics-grid" style="margin-top: 20px;"> | |
| <div class="metric-card error"> | |
| <div class="metric-label">Total Failures</div> | |
| <div class="metric-value" id="totalFailures">0</div> | |
| </div> | |
| <div class="metric-card error"> | |
| <div class="metric-label">Hallucinations</div> | |
| <div class="metric-value" id="hallCount">0</div> | |
| </div> | |
| <div class="metric-card warning"> | |
| <div class="metric-label">Low Retrieval</div> | |
| <div class="metric-value" id="retCount">0</div> | |
| </div> | |
| <div class="metric-card warning"> | |
| <div class="metric-label">Low Generation</div> | |
| <div class="metric-value" id="genCount">0</div> | |
| </div> | |
| </div> | |
| </div> | |
| </div> | |
| <div class="card"> | |
| <h2>Recent Failures</h2> | |
| <div id="failureList"> | |
| <p style="color: var(--text-muted); padding: 20px;">No failures detected! ๐</p> | |
| </div> | |
| </div> | |
| </div> | |
| </div> | |
| <script> | |
| const API_URL = window.location.origin; | |
| let metricsData = null; | |
| let timeseriesData = null; | |
| let failureData = null; | |
| // Tab navigation | |
| function showSection(sectionId) { | |
| document.querySelectorAll('.section').forEach(s => s.classList.remove('active')); | |
| document.querySelectorAll('.tab-btn').forEach(b => b.classList.remove('active')); | |
| document.getElementById(sectionId).classList.add('active'); | |
| event.target.classList.add('active'); | |
| // Load section-specific data | |
| switch (sectionId) { | |
| case 'retrieval': loadRetrievalCharts(); break; | |
| case 'generation': loadGenerationCharts(); break; | |
| case 'faithfulness': loadFaithfulnessCharts(); break; | |
| case 'performance': loadPerformanceCharts(); break; | |
| case 'failures': loadFailureCharts(); break; | |
| } | |
| } | |
| async function loadMetrics() { | |
| showLoading(true); | |
| try { | |
| const response = await fetch(`${API_URL}/evaluation/metrics`); | |
| metricsData = await response.json(); | |
| // Update overview | |
| document.getElementById('totalEvaluations').textContent = metricsData.total_evaluations; | |
| document.getElementById('overviewPrecision').textContent = metricsData.retrieval_precision_mean?.toFixed(3) || '-'; | |
| document.getElementById('overviewRecall').textContent = metricsData.retrieval_recall_mean?.toFixed(3) || '-'; | |
| document.getElementById('overviewBert').textContent = metricsData.bert_score_mean?.toFixed(3) || '-'; | |
| document.getElementById('overviewFaith').textContent = metricsData.faithfulness_mean?.toFixed(3) || '-'; | |
| document.getElementById('overviewHalluc').textContent = ((metricsData.hallucination_rate || 0) * 100).toFixed(1) + '%'; | |
| document.getElementById('overviewLatency').textContent = metricsData.latency_mean?.toFixed(0) || '-'; | |
| document.getElementById('overviewMRR').textContent = metricsData.mrr?.toFixed(3) || '-'; | |
| } catch (e) { | |
| console.error('Error loading metrics:', e); | |
| } | |
| showLoading(false); | |
| } | |
| async function loadRetrievalCharts() { | |
| try { | |
| if (!timeseriesData) { | |
| const response = await fetch(`${API_URL}/evaluation/timeseries`); | |
| timeseriesData = await response.json(); | |
| } | |
| if (!timeseriesData.query_idx || timeseriesData.query_idx.length === 0) { | |
| document.getElementById('retrievalChart').innerHTML = '<p style="padding: 40px; text-align: center; color: var(--text-muted);">No data yet. Run some evaluations first.</p>'; | |
| return; | |
| } | |
| const trace1 = { | |
| x: timeseriesData.query_idx, | |
| y: timeseriesData.retrieval_precision, | |
| name: 'Precision', | |
| mode: 'lines+markers', | |
| line: { color: '#2563eb' } | |
| }; | |
| const trace2 = { | |
| x: timeseriesData.query_idx, | |
| y: timeseriesData.retrieval_recall, | |
| name: 'Recall', | |
| mode: 'lines+markers', | |
| line: { color: '#16a34a' } | |
| }; | |
| const layout = { | |
| xaxis: { title: 'Query Index' }, | |
| yaxis: { title: 'Score' }, | |
| hovermode: 'x unified', | |
| responsive: true, | |
| paper_bgcolor: 'rgba(0,0,0,0)', | |
| plot_bgcolor: 'rgba(0,0,0,0)', | |
| font: { color: getComputedStyle(document.body).getPropertyValue('--text-main') } | |
| }; | |
| Plotly.newPlot('retrievalChart', [trace1, trace2], layout); | |
| if (metricsData) { | |
| document.getElementById('mrrValue').textContent = metricsData.mrr?.toFixed(3) || '-'; | |
| document.getElementById('avgPrecision').textContent = metricsData.retrieval_precision_mean?.toFixed(3) || '-'; | |
| document.getElementById('avgRecall').textContent = metricsData.retrieval_recall_mean?.toFixed(3) || '-'; | |
| } | |
| } catch (e) { | |
| console.error('Error loading retrieval charts:', e); | |
| } | |
| } | |
| async function loadGenerationCharts() { | |
| try { | |
| if (!timeseriesData) { | |
| const response = await fetch(`${API_URL}/evaluation/timeseries`); | |
| timeseriesData = await response.json(); | |
| } | |
| if (!timeseriesData.query_idx || timeseriesData.query_idx.length === 0) return; | |
| const trace1 = { | |
| x: timeseriesData.query_idx, | |
| y: timeseriesData.rouge_l, | |
| name: 'ROUGE-L', | |
| mode: 'lines+markers', | |
| line: { color: '#f59e0b' } | |
| }; | |
| const trace2 = { | |
| x: timeseriesData.query_idx, | |
| y: timeseriesData.bert_score, | |
| name: 'BERTScore', | |
| mode: 'lines+markers', | |
| line: { color: '#16a34a' } | |
| }; | |
| const layout = { | |
| xaxis: { title: 'Query Index' }, | |
| yaxis: { title: 'Score' }, | |
| hovermode: 'x unified', | |
| responsive: true, | |
| paper_bgcolor: 'rgba(0,0,0,0)', | |
| plot_bgcolor: 'rgba(0,0,0,0)' | |
| }; | |
| Plotly.newPlot('generationChart', [trace1, trace2], layout); | |
| if (metricsData) { | |
| const barsTrace = { | |
| x: ['ROUGE-L', 'BERTScore', 'Answer Relevance'], | |
| y: [metricsData.rouge_l_mean, metricsData.bert_score_mean, metricsData.answer_relevance_mean], | |
| type: 'bar', | |
| marker: { color: ['#f59e0b', '#16a34a', '#2563eb'] } | |
| }; | |
| Plotly.newPlot('generationBars', [barsTrace], { | |
| yaxis: { title: 'Score' }, | |
| responsive: true, | |
| showlegend: false, | |
| paper_bgcolor: 'rgba(0,0,0,0)', | |
| plot_bgcolor: 'rgba(0,0,0,0)' | |
| }); | |
| } | |
| } catch (e) { | |
| console.error('Error loading generation charts:', e); | |
| } | |
| } | |
| async function loadFaithfulnessCharts() { | |
| try { | |
| if (!metricsData) await loadMetrics(); | |
| if (!timeseriesData) { | |
| const response = await fetch(`${API_URL}/evaluation/timeseries`); | |
| timeseriesData = await response.json(); | |
| } | |
| const hallRate = metricsData.hallucination_rate || 0; | |
| const faithfulRate = 1 - hallRate; | |
| const pieTrace = { | |
| labels: ['Faithful Answers', 'Hallucinations'], | |
| values: [faithfulRate * 100, hallRate * 100], | |
| type: 'pie', | |
| marker: { colors: ['#16a34a', '#dc2626'] } | |
| }; | |
| Plotly.newPlot('hallucinationChart', [pieTrace], { | |
| responsive: true, | |
| paper_bgcolor: 'rgba(0,0,0,0)' | |
| }); | |
| if (timeseriesData.query_idx && timeseriesData.query_idx.length > 0) { | |
| const faithTrace = { | |
| x: timeseriesData.query_idx, | |
| y: timeseriesData.faithfulness, | |
| name: 'Faithfulness', | |
| mode: 'lines+markers', | |
| line: { color: '#2563eb', width: 2 }, | |
| marker: { size: 6 } | |
| }; | |
| Plotly.newPlot('faithfulnessChart', [faithTrace], { | |
| xaxis: { title: 'Query Index' }, | |
| yaxis: { title: 'Score (0-1)' }, | |
| responsive: true, | |
| paper_bgcolor: 'rgba(0,0,0,0)', | |
| plot_bgcolor: 'rgba(0,0,0,0)' | |
| }); | |
| } | |
| } catch (e) { | |
| console.error('Error loading faithfulness charts:', e); | |
| } | |
| } | |
| async function loadPerformanceCharts() { | |
| try { | |
| if (!metricsData) await loadMetrics(); | |
| if (!timeseriesData) { | |
| const response = await fetch(`${API_URL}/evaluation/timeseries`); | |
| timeseriesData = await response.json(); | |
| } | |
| if (timeseriesData.query_idx && timeseriesData.query_idx.length > 0) { | |
| const latencyTrace = { | |
| x: timeseriesData.query_idx, | |
| y: timeseriesData.latency_ms, | |
| mode: 'lines+markers', | |
| line: { color: '#2563eb' }, | |
| marker: { size: 6 } | |
| }; | |
| Plotly.newPlot('latencyChart', [latencyTrace], { | |
| xaxis: { title: 'Query Index' }, | |
| yaxis: { title: 'Latency (ms)' }, | |
| responsive: true, | |
| paper_bgcolor: 'rgba(0,0,0,0)', | |
| plot_bgcolor: 'rgba(0,0,0,0)' | |
| }); | |
| } | |
| document.getElementById('p50Value').textContent = metricsData.latency_p50?.toFixed(0) || '-'; | |
| document.getElementById('p95Value').textContent = metricsData.latency_p95?.toFixed(0) || '-'; | |
| document.getElementById('p99Value').textContent = metricsData.latency_p99?.toFixed(0) || '-'; | |
| document.getElementById('costValue').textContent = metricsData.cost_per_query ? (metricsData.cost_per_query / 100).toFixed(4) : '-'; | |
| } catch (e) { | |
| console.error('Error loading performance charts:', e); | |
| } | |
| } | |
| async function loadFailureCharts() { | |
| try { | |
| const response = await fetch(`${API_URL}/evaluation/failures`); | |
| failureData = await response.json(); | |
| const failureChart = { | |
| x: Object.keys(failureData.failure_modes), | |
| y: Object.values(failureData.failure_modes), | |
| type: 'bar', | |
| marker: { color: '#dc2626' } | |
| }; | |
| Plotly.newPlot('failureChart', [failureChart], { | |
| yaxis: { title: 'Count' }, | |
| responsive: true, | |
| showlegend: false, | |
| paper_bgcolor: 'rgba(0,0,0,0)', | |
| plot_bgcolor: 'rgba(0,0,0,0)' | |
| }); | |
| document.getElementById('totalFailures').textContent = failureData.total_failures; | |
| document.getElementById('hallCount').textContent = failureData.failure_modes.hallucinations; | |
| document.getElementById('retCount').textContent = failureData.failure_modes.low_retrieval; | |
| document.getElementById('genCount').textContent = failureData.failure_modes.low_generation; | |
| // Show recent failures | |
| const failureList = document.getElementById('failureList'); | |
| let html = ''; | |
| const allFailures = [ | |
| ...failureData.failure_details.hallucinations.slice(0, 3), | |
| ...failureData.failure_details.low_retrieval.slice(0, 2) | |
| ]; | |
| allFailures.forEach(f => { | |
| html += `<div class="failure-item"> | |
| <div class="failure-item-query">${f.query}</div> | |
| <div class="failure-item-score">Score: ${f.score.toFixed(3)}</div> | |
| </div>`; | |
| }); | |
| failureList.innerHTML = html || '<p style="padding: 20px; color: var(--text-muted);">No failures detected! ๐</p>'; | |
| } catch (e) { | |
| console.error('Error loading failure analysis:', e); | |
| } | |
| } | |
| function showLoading(show) { | |
| document.getElementById('loading').style.display = show ? 'block' : 'none'; | |
| } | |
| async function exportResults() { | |
| try { | |
| const response = await fetch(`${API_URL}/evaluation/export`); | |
| const blob = await response.blob(); | |
| const url = window.URL.createObjectURL(blob); | |
| const a = document.createElement('a'); | |
| a.href = url; | |
| a.download = `rag_evaluation_${new Date().toISOString().split('T')[0]}.csv`; | |
| a.click(); | |
| window.URL.revokeObjectURL(url); | |
| } catch (e) { | |
| alert('Error exporting results: ' + e); | |
| } | |
| } | |
| async function clearResults() { | |
| if (confirm('Are you sure you want to clear all results?')) { | |
| try { | |
| await fetch(`${API_URL}/evaluation/reset`, { method: 'POST' }); | |
| metricsData = null; | |
| timeseriesData = null; | |
| failureData = null; | |
| alert('Results cleared!'); | |
| loadMetrics(); | |
| } catch (e) { | |
| alert('Error clearing results: ' + e); | |
| } | |
| } | |
| } | |
| // Dark mode toggle | |
| const themeToggle = document.getElementById("themeToggle"); | |
| const root = document.documentElement; | |
| const savedTheme = localStorage.getItem("theme"); | |
| const prefersDark = window.matchMedia("(prefers-color-scheme: dark)").matches; | |
| if (savedTheme) { | |
| root.setAttribute("data-theme", savedTheme); | |
| } else if (prefersDark) { | |
| root.setAttribute("data-theme", "dark"); | |
| } | |
| function updateToggleText() { | |
| const isDark = root.getAttribute("data-theme") === "dark"; | |
| themeToggle.textContent = isDark ? "โ๏ธ Light mode" : "๐ Dark mode"; | |
| } | |
| updateToggleText(); | |
| themeToggle.addEventListener("click", () => { | |
| const isDark = root.getAttribute("data-theme") === "dark"; | |
| const newTheme = isDark ? "light" : "dark"; | |
| root.setAttribute("data-theme", newTheme); | |
| localStorage.setItem("theme", newTheme); | |
| updateToggleText(); | |
| }); | |
| // Load metrics on page load | |
| window.addEventListener('load', loadMetrics); | |
| </script> | |
| </body> | |
| </html> |