Spaces:

Omarrran
/

EDU_Recommender

Sleeping

App Files Files Community

EDU_Recommender / server /src /recommender.js

Omarrran's picture

Add EduRecommender HuggingFace Spaces app

5bd3663 30 days ago

history blame contribute delete

8.5 kB

	/**
	* Recommendation pipeline.
	*
	* Steps:
	* 1. Compute semantic similarity via HF Inference API (all-MiniLM-L6-v2).
	* 2. Retrieve top-5 candidates by cosine similarity.
	* 3. LLM re-ranking (Kimi-K2.5 via HF Router) -> top-3 with explanations.
	* Falls back to rule-based ranking if LLM call fails.
	*/

	import { CONTENT_ITEMS } from "./data.js";
	import { retrieveTopK } from "./embeddings.js";

	// ---------------------------------------------------------------------------
	// Style -> preferred formats mapping
	// ---------------------------------------------------------------------------
	const STYLE_FORMAT_MAP = {
	visual: new Set(["video"]),
	reading: new Set(["slides", "lecture"]),
	"hands-on": new Set(["video", "lecture"]),
	};

	const DIFFICULTY_ORDER = { Beginner: 0, Intermediate: 1, Advanced: 2 };

	// ---------------------------------------------------------------------------
	// Rule-based re-rank (fallback)
	// ---------------------------------------------------------------------------
	function ruleRerank(profile, candidates) {
	const userTags = new Set(profile.interest_tags \|\| []);
	const prefFormats = STYLE_FORMAT_MAP[profile.learning_style] \|\| new Set();
	const prefDiff = DIFFICULTY_ORDER[profile.preferred_difficulty] ?? 1;

	const scored = candidates
	.filter((c) => {
	if ((profile.viewed_content_ids \|\| []).includes(c.id)) return false;
	if (c.duration_minutes > profile.time_per_day) return false;
	return true;
	})
	.map((item) => {
	const itemTags = new Set(item.tags);
	const overlap =
	[...userTags].filter((t) => itemTags.has(t)).length /
	Math.max(userTags.size, 1);
	const fmtBonus = prefFormats.has(item.format) ? 0.2 : 0;
	const diffPenalty =
	Math.abs((DIFFICULTY_ORDER[item.difficulty] ?? 1) - prefDiff) * 0.15;
	const score = overlap + fmtBonus - diffPenalty;
	const common = [...userTags].filter((t) => itemTags.has(t));
	return { item, score, common };
	})
	.sort((a, b) => b.score - a.score)
	.slice(0, 3);

	return scored.map((s, i) => ({
	rank: i + 1,
	id: s.item.id,
	title: s.item.title,
	format: s.item.format,
	difficulty: s.item.difficulty,
	duration_minutes: s.item.duration_minutes,
	tags: s.item.tags,
	explanation: `Matched on tags (${s.common.join(", ")}), format fits your ${profile.learning_style} style, and difficulty is ${s.item.difficulty}.`,
	match_score: Math.round(s.score * 1000) / 1000,
	}));
	}

	// ---------------------------------------------------------------------------
	// LLM re-rank via HuggingFace Router
	// ---------------------------------------------------------------------------
	const LLM_API_KEY =
	process.env.HF_TOKEN \|\| process.env.OPENROUTER_API_KEY \|\| "";
	const LLM_MODEL = process.env.LLM_MODEL \|\| "moonshotai/Kimi-K2.5:novita";
	const LLM_BASE_URL =
	process.env.LLM_BASE_URL \|\|
	"https://router.huggingface.co/v1/chat/completions";

	const SYSTEM_PROMPT = `You are an expert educational content recommender. Re-rank the candidate items for the learner below and return the top 3 as a strict JSON array.

	CONSTRAINTS:
	1. Prefer items matching the learner's preferred difficulty.
	2. Each item's duration_minutes must fit the learner's time_per_day budget.
	3. Favour formats that suit the learning style (visual -> video, reading -> slides/lecture, hands-on -> video/lecture).
	4. Never recommend already-viewed content.
	5. Provide a concise, personalised explanation (1-2 sentences) per pick.

	Return ONLY a JSON array with exactly 3 objects, each having:
	rank (int 1-3), id (int), title (str), format (str), difficulty (str),
	duration_minutes (int), tags (list[str]), explanation (str).
	No text outside the JSON array.`;

	async function llmRerank(profile, candidates) {
	const userPrompt = [
	"### Learner Profile",
	`- Name: ${profile.name}`,
	`- Goal: ${profile.goal}`,
	`- Learning style: ${profile.learning_style}`,
	`- Preferred difficulty: ${profile.preferred_difficulty}`,
	`- Time per day: ${profile.time_per_day} minutes`,
	`- Interests: ${(profile.interest_tags \|\| []).join(", ")}`,
	`- Already viewed IDs: ${JSON.stringify(profile.viewed_content_ids \|\| [])}`,
	"",
	"### Candidate Items",
	"```json",
	JSON.stringify(
	candidates.map(({ _simScore, ...rest }) => rest),
	null,
	2
	),
	"```",
	"",
	"Re-rank and return the top 3 as a JSON array.",
	].join("\n");

	const resp = await fetch(LLM_BASE_URL, {
	method: "POST",
	headers: {
	Authorization: `Bearer ${LLM_API_KEY}`,
	"Content-Type": "application/json",
	},
	body: JSON.stringify({
	model: LLM_MODEL,
	messages: [
	{ role: "system", content: SYSTEM_PROMPT },
	{ role: "user", content: userPrompt },
	],
	temperature: 0.3,
	max_tokens: 2048,
	}),
	});

	if (!resp.ok) {
	const body = await resp.text();
	throw new Error(`LLM returned ${resp.status}: ${body}`);
	}

	const data = await resp.json();
	const msg = data.choices?.[0]?.message;
	let text = (msg?.content \|\| msg?.reasoning \|\| "").trim();
	if (!text) throw new Error("LLM returned empty response.");

	// Strip markdown fences
	text = text.replace(/```(?:json)?\s*/g, "").replace(/```/g, "").trim();

	// Extract JSON array
	const match = text.match(/\[[\s\S]*\]/);
	if (!match) throw new Error("No JSON array in LLM response.");
	const parsed = JSON.parse(match[0]);
	if (!Array.isArray(parsed) \|\| parsed.length === 0) {
	throw new Error("Invalid LLM JSON.");
	}

	const candMap = Object.fromEntries(candidates.map((c) => [c.id, c]));
	return parsed.slice(0, 3).map((item, i) => {
	const fb = candMap[item.id];
	return {
	rank: i + 1,
	id: item.id,
	title: item.title \|\| fb?.title \|\| "",
	format: item.format \|\| fb?.format \|\| "",
	difficulty: item.difficulty \|\| fb?.difficulty \|\| "",
	duration_minutes: item.duration_minutes \|\| fb?.duration_minutes \|\| 0,
	tags: item.tags \|\| fb?.tags \|\| [],
	explanation: item.explanation \|\| "Recommended based on your profile.",
	};
	});
	}

	// ---------------------------------------------------------------------------
	// Main pipeline
	// ---------------------------------------------------------------------------
	export async function recommend(profile) {
	const pipelineLog = [];
	const tStart = performance.now();

	// Steps 1-2 — semantic similarity retrieval via all-MiniLM-L6-v2
	const t0 = performance.now();
	const retrieval = await retrieveTopK(
	profile,
	CONTENT_ITEMS,
	5,
	profile.viewed_content_ids \|\| []
	);
	const candidates = retrieval.candidates;
	const retrievalMs = Math.round(performance.now() - t0);
	const eligible = CONTENT_ITEMS.length - (profile.viewed_content_ids \|\| []).length;
	pipelineLog.push({
	step: "Semantic similarity retrieval",
	status: "done",
	detail: `Retrieved top-5 from ${eligible} items via ${retrieval.method} (${retrievalMs}ms)`,
	duration_ms: retrievalMs,
	});

	// Step 4 — LLM re-rank or rule-based fallback
	let recommendations;
	let reasoning = null;
	let method = "rule-based";

	if (LLM_API_KEY) {
	try {
	const t3 = performance.now();
	recommendations = await llmRerank(profile, candidates);
	const llmMs = Math.round(performance.now() - t3);
	method = "llm";
	reasoning = "LLM-based constraint-aware re-ranking.";
	pipelineLog.push({
	step: "LLM re-ranking",
	status: "done",
	detail: `Ranked via LLM -> top 3 (${llmMs}ms)`,
	duration_ms: llmMs,
	});
	} catch (err) {
	console.warn(
	"[llm_rerank] Failed:",
	err.message,
	"— falling back to rules."
	);
	recommendations = null;
	}
	}

	if (!recommendations) {
	const t4 = performance.now();
	recommendations = ruleRerank(profile, candidates);
	const ruleMs = Math.round(performance.now() - t4);
	method = "rule-based";
	reasoning =
	"Rule-based scoring: tag overlap + format match + difficulty proximity.";
	pipelineLog.push({
	step: "Rule-based ranking",
	status: "done",
	detail: `Ranked via rules -> top 3 (${ruleMs}ms)`,
	duration_ms: ruleMs,
	});
	}

	const totalMs = Math.round(performance.now() - tStart);

	return {
	user_id: profile.user_id \|\| "custom",
	recommendations,
	pipeline_log: pipelineLog,
	llm_reasoning: reasoning,
	total_duration_ms: totalMs,
	};
	}