Spaces:

Abdallah4Zain
/

TasteEngine

Sleeping

App Files Files Community

TasteEngine / templates /evaluation.html

Abdallah4z

Refactor explainer methods and enhance evaluation template with detailed CF approach explanations

b1410eb 22 days ago

raw

history blame contribute delete

24.7 kB

	{% extends "base.html" %}
	{% block title %}TasteEngine — Evaluation{% endblock %}

	{% block head_extra %}
	<script src="https://cdn.jsdelivr.net/npm/chart.js@4.4.7/dist/chart.umd.min.js"></script>
	{% endblock %}

	{% block breadcrumb %}
	<ul class="breadcrumb">
	<li><a href="/">Home</a><span class="sep">/</span></li>
	<li class="current">Evaluate</li>
	</ul>
	{% endblock %}

	{% block content %}
	{% from "macros.html" import glass, spinner %}

	<div x-data="evalApp()">
	{% call glass("Evaluation & Comparison Dashboard") %}
	<p class="text-muted" style="font-size: 0.9rem;">
	Comprehensive analysis of all recommendation methods and approaches using 6 evaluation metrics.
	Methods are evaluated one at a time — results appear as they complete.
	</p>
	<div class="mt-2">
	<button class="btn btn-primary" @click="runEvaluation()" :disabled="running">
	<span x-show="!running">Run Evaluation</span>
	<span x-show="running">Running...</span>
	</button>
	</div>

	<div x-show="running" x-cloak x-transition class="mt-2">
	<div class="flex-between gap-sm" style="margin-bottom:0.5rem;">
	<span class="text-muted" style="font-size:0.85rem;">
	<span x-text="progressText"></span>
	<span x-show="subProgress.total > 0" class="text-dim" style="font-size:0.75rem;">
	— <span x-text="subProgress.current + '/' + subProgress.total"></span>
	</span>
	</span>
	<span class="text-dim" style="font-size:0.8rem;" x-text="progress.current + '/' + progress.total"></span>
	</div>
	<div style="height:4px;background:rgba(255,255,255,0.08);border-radius:2px;overflow:hidden;">
	<div style="height:100%;border-radius:2px;background:linear-gradient(90deg,var(--primary),var(--secondary));transition:width 0.4s ease;" :style="{ width: progressPct + '%' }"></div>
	</div>
	<div x-show="subProgress.total > 0" class="mt-1" style="height:3px;background:rgba(255,255,255,0.04);border-radius:2px;overflow:hidden;">
	<div style="height:100%;border-radius:2px;background:var(--primary);transition:width 0.2s ease;" :style="{ width: subPct + '%' }"></div>
	</div>
	</div>
	{% endcall %}

	<div x-show="hasResults \|\| running" x-cloak x-transition.duration.400ms>

	<div class="kpi-grid" x-show="hasResults" x-cloak>
	<div class="kpi-card kpi-primary">
	<div class="kpi-label">Best RMSE</div>
	<div class="kpi-value"><span x-text="animatedKpis.bestRmse"></span></div>
	<div class="kpi-unit" x-text="'via ' + methodLabel(bestCfMethod)"></div>
	</div>
	<div class="kpi-card kpi-success">
	<div class="kpi-label">Best Method</div>
	<div class="kpi-value" style="font-size:1rem;" x-text="methodLabel(bestCfMethod)"></div>
	<div class="kpi-unit">Lowest RMSE</div>
	</div>
	<div class="kpi-card kpi-secondary">
	<div class="kpi-label">Coverage</div>
	<div class="kpi-value"><span x-text="animatedKpis.coverage"></span></div>
	<div class="kpi-unit" x-text="'by ' + methodLabel(bestCfMethod)"></div>
	</div>
	<div class="kpi-card kpi-warning">
	<div class="kpi-label">Best Precision@5</div>
	<div class="kpi-value"><span x-text="animatedKpis.bestPrecision"></span></div>
	<div class="kpi-unit" x-text="bestApproach ? bestApproach : '—'"></div>
	</div>
	</div>

	{% call glass("Collaborative Filtering — Method Comparison") %}
	<div class="flex-between mb-2">
	<span class="text-dim" style="font-size:0.8rem;" x-show="hasResults">
	Rows appear as each method finishes
	</span>
	<button class="btn btn-ghost btn-sm" @click="exportCSV()" x-show="hasResults">Export CSV</button>
	</div>
	<div style="overflow-x: auto;">
	<table class="eval-table">
	<thead>
	<tr>
	<th>Method</th>
	<th>RMSE ↓</th>
	<th>MAE ↓</th>
	<th>Precision@5 ↑</th>
	<th>Recall@5 ↑</th>
	<th>F1@5 ↑</th>
	<th>Coverage ↑</th>
	</tr>
	</thead>
	<tbody>
	<template x-for="row in cfData" :key="row.method">
	<tr :class="{ 'best-row': row.method === bestCfMethod }" x-cloak x-transition>
	<td>
	<strong x-text="methodLabel(row.method)"></strong>
	<span x-show="row.method === bestCfMethod" class="badge-best">BEST</span>
	</td>
	<td x-text="row.RMSE?.toFixed(4)"></td>
	<td x-text="row.MAE?.toFixed(4)"></td>
	<td x-text="row['Precision@5']?.toFixed(4)"></td>
	<td x-text="row['Recall@5']?.toFixed(4)"></td>
	<td x-text="row['F1@5']?.toFixed(4)"></td>
	<td x-text="row.Coverage?.toFixed(4)"></td>
	</tr>
	</template>
	<template x-if="cfData.length === 0 && !running">
	<tr><td colspan="7" class="text-center text-dim" style="padding:2rem;">Click "Run Evaluation" to start</td></tr>
	</template>
	</tbody>
	</table>
	</div>

	<div class="mt-2" x-show="bestCfMethod" x-cloak>
	<span class="badge-best">Best CF Method: <span x-text="methodLabel(bestCfMethod)"></span></span>
	</div>

	<div class="mt-2 chart-container" x-show="chartReady" x-cloak>
	<canvas id="cfChart"></canvas>
	</div>
	{% endcall %}

	<div x-show="approachesLoaded" x-cloak x-transition>
	{% call glass("Approach Comparison (CF vs Content-Based vs Knowledge-Based)") %}
	<div style="overflow-x: auto;">
	<table class="eval-table">
	<thead>
	<tr>
	<th>Approach</th>
	<th>Precision@5 ↑</th>
	<th>Recall@5 ↑</th>
	</tr>
	</thead>
	<tbody>
	<template x-for="row in approachData" :key="row.approach">
	<tr :class="{ 'best-row': row.approach === bestApproach }">
	<td>
	<strong x-text="row.approach"></strong>
	<span x-show="row.approach === bestApproach" class="badge-best">BEST</span>
	</td>
	<td x-text="row['Precision@5']?.toFixed(4) \|\| 'N/A'"></td>
	<td x-text="row['Recall@5']?.toFixed(4) \|\| 'N/A'"></td>
	</tr>
	</template>
	</tbody>
	</table>
	</div>
	<div class="mt-2 chart-container">
	<canvas id="approachChart"></canvas>
	</div>
	{% endcall %}

	{% call glass("Multi-Method Radar Comparison") %}
	<p class="text-muted" style="font-size: 0.85rem; margin-bottom: 1rem;">
	Normalized comparison across all metrics for every CF method.
	</p>
	<div class="chart-container">
	<canvas id="radarChart"></canvas>
	</div>
	{% endcall %}

	{% call glass("Analysis & Insights") %}
	<div class="analysis-block">
	<h4>Which method performs best?</h4>
	<p x-html="analysis.bestMethod"></p>
	</div>
	<div class="analysis-block mt-2">
	<h4>Which approach performs best?</h4>
	<p x-html="analysis.bestApproach"></p>
	</div>
	<div class="analysis-block mt-2">
	<h4>Under what conditions does each perform better?</h4>
	<ul>
	<li><strong>Dense user data:</strong> Collaborative Filtering (leverages peer patterns)</li>
	<li><strong>Cold-start user:</strong> Knowledge-Based (no history needed)</li>
	<li><strong>Cold-start item:</strong> Content-Based (matches item features)</li>
	<li><strong>Explicit constraints:</strong> Knowledge-Based (precise filtering)</li>
	<li><strong>Niche categories:</strong> Content-Based (item features override sparsity)</li>
	</ul>
	</div>
	<div class="analysis-block mt-2">
	<h4>Why do differences occur?</h4>
	<p>Differences arise from algorithmic biases: CF relies on the collective behavior of users, making it powerful for popular items but weak for new users/items. Content-Based depends on feature representation quality and tends to overspecialize. Knowledge-Based is deterministic and transparent but requires explicit user input and domain rules. The choice depends on data availability, user context, and the specific recommendation goal.</p>
	</div>
	{% endcall %}
	</div>

	</div>
	</div>
	{% endblock %}

	{% block scripts %}
	<script>
	const methodLabels = {
	user_based: 'User-Based',
	item_based: 'Item-Based',
	svd: 'SVD',
	knn: 'KNN',
	slope_one: 'Slope One'
	};
	const cfMethodNames = ['user_based', 'item_based', 'svd', 'knn', 'slope_one'];

	document.addEventListener('alpine:init', () => {
	Alpine.data('evalApp', () => ({
	running: false,
	hasResults: false,
	cfData: [],
	approachData: [],
	bestCfMethod: null,
	bestApproach: null,
	cfChartInstance: null,
	approachChartInstance: null,
	radarChartInstance: null,
	chartReady: false,
	approachesLoaded: false,
	analysis: {
	bestMethod: 'Run evaluation to find out.',
	bestApproach: 'Run evaluation to find out.'
	},
	animatedKpis: { bestRmse: '—', coverage: '—', bestPrecision: '—' },
	progress: { current: 0, total: 5, label: '' },
	subProgress: { current: 0, total: 0 },

	get progressText() {
	if (!this.running) return '';
	const labels = { user_based: 'User-Based', item_based: 'Item-Based', svd: 'SVD', knn: 'KNN', slope_one: 'Slope One' };
	if (this.progress.current < cfMethodNames.length) {
	const name = cfMethodNames[this.progress.current];
	return `Evaluating ${labels[name] \|\| name}...`;
	}
	if (this.progress.current === cfMethodNames.length) return 'Comparing approaches...';
	return 'Done';
	},

	get progressPct() {
	if (this.progress.total === 0) return 0;
	return (this.progress.current / this.progress.total) * 100;
	},

	get subPct() {
	if (this.subProgress.total === 0) return 0;
	return (this.subProgress.current / this.subProgress.total) * 100;
	},

	methodLabel(id) {
	return methodLabels[id] \|\| id;
	},

	async evaluateMethod(method) {
	const streamMethods = ['svd', 'slope_one'];

	if (streamMethods.includes(method)) {
	return await this.evaluateWithStream(method);
	}

	const controller = new AbortController();
	const timeoutId = setTimeout(() => controller.abort(), 35000);

	try {
	const r = await fetch(`/api/evaluate/cf/${method}`, { signal: controller.signal });
	clearTimeout(timeoutId);
	const data = await r.json();
	if (!data.error) {
	return data;
	} else {
	toast(`${methodLabels[method] \|\| method}: ${data.error}`, 'error');
	return null;
	}
	} catch (err) {
	clearTimeout(timeoutId);
	if (err.name === 'AbortError') {
	toast(`${methodLabels[method] \|\| method} timed out (35s limit), skipping`, 'warning');
	} else {
	toast(`Error evaluating ${methodLabels[method] \|\| method}: ${err.message}`, 'error');
	}
	return null;
	}
	},

	async evaluateWithStream(method) {
	const label = method === 'svd' ? 'epoch' : 'item';
	this.subProgress = { current: 0, total: 0 };

	try {
	const r = await fetch(`/api/evaluate/cf/${method}/stream`);
	const reader = r.body.getReader();
	const decoder = new TextDecoder();
	let buffer = '';

	while (true) {
	const { done, value } = await reader.read();
	if (done) break;

	buffer += decoder.decode(value, { stream: true });
	const lines = buffer.split('\n');
	buffer = lines.pop() \|\| '';

	for (const line of lines) {
	if (!line.trim()) continue;
	try {
	const msg = JSON.parse(line);

	if (msg.type === 'progress') {
	this.subProgress = { current: msg.current, total: msg.total };
	} else if (msg.type === 'phase') {
	this.subProgress = { current: 0, total: 0 };
	} else if (msg.type === 'result') {
	return msg.data;
	} else if (msg.type === 'error') {
	toast(`${methodLabels[method] \|\| method}: ${msg.message}`, 'error');
	return null;
	}
	} catch (e) {
	// skip malformed lines
	}
	}
	}
	return null;
	} catch (err) {
	toast(`Error evaluating ${methodLabels[method] \|\| method}: ${err.message}`, 'error');
	return null;
	}
	},

	async runEvaluation() {
	this.running = true;
	this.hasResults = true;
	this.cfData = [];
	this.bestCfMethod = null;
	this.chartReady = false;
	this.approachesLoaded = false;
	this.progress = { current: 0, total: cfMethodNames.length + 1, label: '' };
	this.subProgress = { current: 0, total: 0 };

	for (let i = 0; i < cfMethodNames.length; i++) {
	const method = cfMethodNames[i];
	this.progress.current = i;
	this.progress.label = method;

	if (method === 'svd' \|\| method === 'slope_one') {
	this.subProgress = { current: 0, total: 0 };
	}

	const result = await this.evaluateMethod(method);
	if (result) {
	this.cfData.push(result);
	if (!this.bestCfMethod \|\| result.RMSE < this.getBestRmse()) {
	this.bestCfMethod = result.method;
	}
	}
	}

	this.subProgress = { current: 0, total: 0 };
	this.progress.current = cfMethodNames.length;
	this.progress.label = 'approaches';

	await this.$nextTick();
	this.renderCfChart();

	const best = this.cfData.find(r => r.method === this.bestCfMethod);
	if (best) {
	this.animatedKpis.bestRmse = '0.0000';
	this.animateCounter(this.animatedKpis, 'bestRmse', best.RMSE, 4, 800);
	this.animatedKpis.coverage = '0.0000';
	this.animateCounter(this.animatedKpis, 'coverage', best.Coverage, 4, 800);
	}

	try {
	const r = await fetch('/api/evaluate/approaches');
	const data = await r.json();
	if (!data.error) {
	this.approachData = data.approaches \|\| [];
	this.bestApproach = data.best_approach;
	this.approachesLoaded = true;
	this.analysis.bestMethod = this.buildBestMethodAnalysis();
	this.analysis.bestApproach = this.buildBestApproachAnalysis(data);

	await this.$nextTick();
	this.renderApproachChart(data);
	this.renderRadarChart(data);

	const bestApp = data.approaches?.find(a => a.approach === data.best_approach);
	if (bestApp) {
	this.animatedKpis.bestPrecision = '0.0000';
	this.animateCounter(this.animatedKpis, 'bestPrecision', bestApp['Precision@5'] \|\| 0, 4, 800);
	}
	toast('Evaluation complete', 'success');
	}
	} catch (err) {
	toast('Approach comparison error: ' + err.message, 'error');
	}

	this.running = false;
	this.chartReady = true;
	},

	getBestRmse() {
	const best = this.cfData.find(r => r.method === this.bestCfMethod);
	return best ? best.RMSE : Infinity;
	},

	buildBestMethodAnalysis() {
	const best = this.cfData.find(r => r.method === this.bestCfMethod);
	if (!best) return 'Run evaluation to find out.';

	const methodExplanations = {
	user_based: 'simplicity and direct peer comparison — works well when the target user has enough similar neighbors.',
	item_based: 'direct item-item similarity — effective on dense data where co-rated item pairs are plentiful.',
	svd: 'latent factor modeling — captures hidden patterns in the user-item matrix, excelling on sparse, large-scale data.',
	knn: 'nearest-neighbor aggregation — robust to noise, but performance depends on the quality of the neighbor set.',
	slope_one: 'simplicity and speed of prediction — performs well when rating deviations are consistent across users.',
	};
	const why = methodExplanations[this.bestCfMethod] \|\| 'strong performance on this dataset.';

	return `<strong>${methodLabels[this.bestCfMethod] \|\| this.bestCfMethod}</strong> achieves the lowest RMSE (${best.RMSE.toFixed(4)}) among all CF methods. ` +
	`${methodLabels[this.bestCfMethod] \|\| this.bestCfMethod} excels because of ${why}`;
	},

	buildBestApproachAnalysis(data) {
	if (!data.best_approach) return 'Run evaluation to find out.';
	const bestApp = data.approaches?.find(a => a.approach === data.best_approach);
	const precVal = bestApp ? bestApp['Precision@5'].toFixed(4) : '—';
	const second = [...(data.approaches \|\| [])].sort((a, b) => (b['Precision@5'] \|\| 0) - (a['Precision@5'] \|\| 0));
	const runnerUp = second.length > 1 ? second[1].approach : null;

	const explanations = {
	'Collaborative Filtering': `strongest when rating data is dense (${this.cfData.length} methods averaged). Works by leveraging peer behavior patterns.`,
	'Content-Based': `best when item features are informative (${data.approaches?.length \|\| 0} approaches compared). Shines for cold-start items and niche categories where collaborative signals are weak.`,
	'Knowledge-Based': `ideal for cold-start users with explicit constraints. Deterministic and transparent, but limited by the quality of hand-crafted rules.`,
	};
	const why = explanations[data.best_approach] \|\| 'strong performance on this dataset.';

	let text = `<strong>${data.best_approach}</strong> achieves the highest Precision@5 (${precVal}) on this dataset. ` +
	`${data.best_approach} performs ${why}`;
	if (runnerUp) {
	text += ` <strong>${runnerUp}</strong> follows as the runner-up.`;
	}
	return text;
	},

	animateCounter(obj, key, target, decimals = 4, duration = 800) {
	const start = performance.now();
	const step = (now) => {
	const elapsed = now - start;
	const pct = Math.min(elapsed / duration, 1);
	const eased = 1 - Math.pow(1 - pct, 3);
	obj[key] = (target * eased).toFixed(decimals);
	if (pct < 1) requestAnimationFrame(step);
	};
	requestAnimationFrame(step);
	},

	exportCSV() {
	if (!this.cfData.length) { toast('No data to export', 'warning'); return; }
	let csv = 'Method,RMSE,MAE,Precision@5,Recall@5,F1@5,Coverage\n';
	this.cfData.forEach(r => {
	csv += `${methodLabels[r.method] \|\| r.method},${r.RMSE},${r.MAE},${r['Precision@5']},${r['Recall@5']},${r['F1@5']},${r.Coverage}\n`;
	});
	if (this.approachData.length) {
	csv += '\nApproach,Precision@5,Recall@5\n';
	this.approachData.forEach(r => {
	csv += `${r.approach},${r['Precision@5'] \|\| 'N/A'},${r['Recall@5'] \|\| 'N/A'}\n`;
	});
	}
	const blob = new Blob([csv], { type: 'text/csv' });
	const url = URL.createObjectURL(blob);
	const a = document.createElement('a');
	a.href = url; a.download = 'tasteengine_evaluation.csv';
	a.click(); URL.revokeObjectURL(url);
	toast('CSV exported', 'success');
	},

	renderCfChart() {
	if (this.cfChartInstance) this.cfChartInstance.destroy();
	if (this.cfData.length === 0) return;

	const labels = this.cfData.map(r => methodLabels[r.method] \|\| r.method);
	const ctx = document.getElementById('cfChart').getContext('2d');

	this.cfChartInstance = new Chart(ctx, {
	type: 'bar',
	data: {
	labels: labels,
	datasets: [
	{
	label: 'RMSE (lower is better)',
	data: this.cfData.map(r => r.RMSE),
	backgroundColor: 'rgba(108, 99, 255, 0.6)',
	borderColor: '#6C63FF',
	borderWidth: 1
	},
	{
	label: 'MAE (lower is better)',
	data: this.cfData.map(r => r.MAE),
	backgroundColor: 'rgba(255, 101, 132, 0.6)',
	borderColor: '#FF6584',
	borderWidth: 1
	}
	]
	},
	options: {
	responsive: true, maintainAspectRatio: false,
	plugins: { legend: { labels: { color: 'rgba(255,255,255,0.7)' } } },
	scales: {
	x: { ticks: { color: 'rgba(255,255,255,0.5)' }, grid: { color: 'rgba(255,255,255,0.05)' } },
	y: { beginAtZero: true, ticks: { color: 'rgba(255,255,255,0.5)' }, grid: { color: 'rgba(255,255,255,0.05)' } }
	}
	}
	});
	},

	renderApproachChart(data) {
	if (this.approachChartInstance) this.approachChartInstance.destroy();
	const approaches = data.approaches \|\| [];
	if (approaches.length === 0) return;

	const ctx = document.getElementById('approachChart').getContext('2d');
	this.approachChartInstance = new Chart(ctx, {
	type: 'bar',
	data: {
	labels: approaches.map(r => r.approach),
	datasets: [
	{ label: 'Precision@5', data: approaches.map(r => r['Precision@5'] \|\| 0), backgroundColor: 'rgba(108, 99, 255, 0.6)', borderColor: '#6C63FF', borderWidth: 1 },
	{ label: 'Recall@5', data: approaches.map(r => r['Recall@5'] \|\| 0), backgroundColor: 'rgba(255, 101, 132, 0.6)', borderColor: '#FF6584', borderWidth: 1 }
	]
	},
	options: {
	responsive: true, maintainAspectRatio: false,
	plugins: { legend: { labels: { color: 'rgba(255,255,255,0.7)' } } },
	scales: {
	x: { ticks: { color: 'rgba(255,255,255,0.5)' }, grid: { color: 'rgba(255,255,255,0.05)' } },
	y: { beginAtZero: true, ticks: { color: 'rgba(255,255,255,0.5)' }, grid: { color: 'rgba(255,255,255,0.05)' } }
	}
	}
	});
	},

	renderRadarChart(data) {
	if (this.radarChartInstance) this.radarChartInstance.destroy();
	if (this.cfData.length === 0) return;

	const metrics = ['RMSE', 'MAE', 'Precision@5', 'Recall@5', 'F1@5', 'Coverage'];
	const colors = ['rgba(108, 99, 255, 0.3)', 'rgba(255, 101, 132, 0.3)', 'rgba(34, 197, 94, 0.3)', 'rgba(245, 158, 11, 0.3)', 'rgba(59, 130, 246, 0.3)'];
	const borders = ['#6C63FF', '#FF6584', '#22C55E', '#F59E0B', '#3B82F6'];

	const ctx = document.getElementById('radarChart').getContext('2d');
	this.radarChartInstance = new Chart(ctx, {
	type: 'radar',
	data: {
	labels: metrics,
	datasets: this.cfData.map((m, i) => ({
	label: methodLabels[m.method] \|\| m.method,
	data: metrics.map(metric => {
	const vals = this.cfData.map(x => x[metric] \|\| 0);
	const mn = Math.min(...vals);
	const mx = Math.max(...vals);
	return mx === mn ? 1 : ((m[metric] \|\| 0) - mn) / (mx - mn);
	}),
	backgroundColor: colors[i % colors.length],
	borderColor: borders[i % borders.length],
	borderWidth: 2,
	pointBackgroundColor: borders[i % borders.length]
	}))
	},
	options: {
	responsive: true, maintainAspectRatio: false,
	plugins: { legend: { labels: { color: 'rgba(255,255,255,0.7)' } } },
	scales: {
	r: {
	beginAtZero: true, max: 1,
	ticks: { color: 'rgba(255,255,255,0.4)', backdropColor: 'transparent' },
	grid: { color: 'rgba(255,255,255,0.08)' },
	angleLines: { color: 'rgba(255,255,255,0.08)' },
	pointLabels: { color: 'rgba(255,255,255,0.7)', font: { size: 11 } }
	}
	}
	}
	});
	},
	}));
	});
	</script>
	{% endblock %}