Spaces:
Sleeping
Sleeping
| /** | |
| * Recommendation pipeline. | |
| * | |
| * Steps: | |
| * 1. Compute semantic similarity via HF Inference API (all-MiniLM-L6-v2). | |
| * 2. Retrieve top-5 candidates by cosine similarity. | |
| * 3. LLM re-ranking (Kimi-K2.5 via HF Router) -> top-3 with explanations. | |
| * Falls back to rule-based ranking if LLM call fails. | |
| */ | |
| import { CONTENT_ITEMS } from "./data.js"; | |
| import { retrieveTopK } from "./embeddings.js"; | |
| // --------------------------------------------------------------------------- | |
| // Style -> preferred formats mapping | |
| // --------------------------------------------------------------------------- | |
| const STYLE_FORMAT_MAP = { | |
| visual: new Set(["video"]), | |
| reading: new Set(["slides", "lecture"]), | |
| "hands-on": new Set(["video", "lecture"]), | |
| }; | |
| const DIFFICULTY_ORDER = { Beginner: 0, Intermediate: 1, Advanced: 2 }; | |
| // --------------------------------------------------------------------------- | |
| // Rule-based re-rank (fallback) | |
| // --------------------------------------------------------------------------- | |
| function ruleRerank(profile, candidates) { | |
| const userTags = new Set(profile.interest_tags || []); | |
| const prefFormats = STYLE_FORMAT_MAP[profile.learning_style] || new Set(); | |
| const prefDiff = DIFFICULTY_ORDER[profile.preferred_difficulty] ?? 1; | |
| const scored = candidates | |
| .filter((c) => { | |
| if ((profile.viewed_content_ids || []).includes(c.id)) return false; | |
| if (c.duration_minutes > profile.time_per_day) return false; | |
| return true; | |
| }) | |
| .map((item) => { | |
| const itemTags = new Set(item.tags); | |
| const overlap = | |
| [...userTags].filter((t) => itemTags.has(t)).length / | |
| Math.max(userTags.size, 1); | |
| const fmtBonus = prefFormats.has(item.format) ? 0.2 : 0; | |
| const diffPenalty = | |
| Math.abs((DIFFICULTY_ORDER[item.difficulty] ?? 1) - prefDiff) * 0.15; | |
| const score = overlap + fmtBonus - diffPenalty; | |
| const common = [...userTags].filter((t) => itemTags.has(t)); | |
| return { item, score, common }; | |
| }) | |
| .sort((a, b) => b.score - a.score) | |
| .slice(0, 3); | |
| return scored.map((s, i) => ({ | |
| rank: i + 1, | |
| id: s.item.id, | |
| title: s.item.title, | |
| format: s.item.format, | |
| difficulty: s.item.difficulty, | |
| duration_minutes: s.item.duration_minutes, | |
| tags: s.item.tags, | |
| explanation: `Matched on tags (${s.common.join(", ")}), format fits your ${profile.learning_style} style, and difficulty is ${s.item.difficulty}.`, | |
| match_score: Math.round(s.score * 1000) / 1000, | |
| })); | |
| } | |
| // --------------------------------------------------------------------------- | |
| // LLM re-rank via HuggingFace Router | |
| // --------------------------------------------------------------------------- | |
| const LLM_API_KEY = | |
| process.env.HF_TOKEN || process.env.OPENROUTER_API_KEY || ""; | |
| const LLM_MODEL = process.env.LLM_MODEL || "moonshotai/Kimi-K2.5:novita"; | |
| const LLM_BASE_URL = | |
| process.env.LLM_BASE_URL || | |
| "https://router.huggingface.co/v1/chat/completions"; | |
| const SYSTEM_PROMPT = `You are an expert educational content recommender. Re-rank the candidate items for the learner below and return the top 3 as a strict JSON array. | |
| CONSTRAINTS: | |
| 1. Prefer items matching the learner's preferred difficulty. | |
| 2. Each item's duration_minutes must fit the learner's time_per_day budget. | |
| 3. Favour formats that suit the learning style (visual -> video, reading -> slides/lecture, hands-on -> video/lecture). | |
| 4. Never recommend already-viewed content. | |
| 5. Provide a concise, personalised explanation (1-2 sentences) per pick. | |
| Return ONLY a JSON array with exactly 3 objects, each having: | |
| rank (int 1-3), id (int), title (str), format (str), difficulty (str), | |
| duration_minutes (int), tags (list[str]), explanation (str). | |
| No text outside the JSON array.`; | |
| async function llmRerank(profile, candidates) { | |
| const userPrompt = [ | |
| "### Learner Profile", | |
| `- Name: ${profile.name}`, | |
| `- Goal: ${profile.goal}`, | |
| `- Learning style: ${profile.learning_style}`, | |
| `- Preferred difficulty: ${profile.preferred_difficulty}`, | |
| `- Time per day: ${profile.time_per_day} minutes`, | |
| `- Interests: ${(profile.interest_tags || []).join(", ")}`, | |
| `- Already viewed IDs: ${JSON.stringify(profile.viewed_content_ids || [])}`, | |
| "", | |
| "### Candidate Items", | |
| "```json", | |
| JSON.stringify( | |
| candidates.map(({ _simScore, ...rest }) => rest), | |
| null, | |
| 2 | |
| ), | |
| "```", | |
| "", | |
| "Re-rank and return the top 3 as a JSON array.", | |
| ].join("\n"); | |
| const resp = await fetch(LLM_BASE_URL, { | |
| method: "POST", | |
| headers: { | |
| Authorization: `Bearer ${LLM_API_KEY}`, | |
| "Content-Type": "application/json", | |
| }, | |
| body: JSON.stringify({ | |
| model: LLM_MODEL, | |
| messages: [ | |
| { role: "system", content: SYSTEM_PROMPT }, | |
| { role: "user", content: userPrompt }, | |
| ], | |
| temperature: 0.3, | |
| max_tokens: 2048, | |
| }), | |
| }); | |
| if (!resp.ok) { | |
| const body = await resp.text(); | |
| throw new Error(`LLM returned ${resp.status}: ${body}`); | |
| } | |
| const data = await resp.json(); | |
| const msg = data.choices?.[0]?.message; | |
| let text = (msg?.content || msg?.reasoning || "").trim(); | |
| if (!text) throw new Error("LLM returned empty response."); | |
| // Strip markdown fences | |
| text = text.replace(/```(?:json)?\s*/g, "").replace(/```/g, "").trim(); | |
| // Extract JSON array | |
| const match = text.match(/\[[\s\S]*\]/); | |
| if (!match) throw new Error("No JSON array in LLM response."); | |
| const parsed = JSON.parse(match[0]); | |
| if (!Array.isArray(parsed) || parsed.length === 0) { | |
| throw new Error("Invalid LLM JSON."); | |
| } | |
| const candMap = Object.fromEntries(candidates.map((c) => [c.id, c])); | |
| return parsed.slice(0, 3).map((item, i) => { | |
| const fb = candMap[item.id]; | |
| return { | |
| rank: i + 1, | |
| id: item.id, | |
| title: item.title || fb?.title || "", | |
| format: item.format || fb?.format || "", | |
| difficulty: item.difficulty || fb?.difficulty || "", | |
| duration_minutes: item.duration_minutes || fb?.duration_minutes || 0, | |
| tags: item.tags || fb?.tags || [], | |
| explanation: item.explanation || "Recommended based on your profile.", | |
| }; | |
| }); | |
| } | |
| // --------------------------------------------------------------------------- | |
| // Main pipeline | |
| // --------------------------------------------------------------------------- | |
| export async function recommend(profile) { | |
| const pipelineLog = []; | |
| const tStart = performance.now(); | |
| // Steps 1-2 — semantic similarity retrieval via all-MiniLM-L6-v2 | |
| const t0 = performance.now(); | |
| const retrieval = await retrieveTopK( | |
| profile, | |
| CONTENT_ITEMS, | |
| 5, | |
| profile.viewed_content_ids || [] | |
| ); | |
| const candidates = retrieval.candidates; | |
| const retrievalMs = Math.round(performance.now() - t0); | |
| const eligible = CONTENT_ITEMS.length - (profile.viewed_content_ids || []).length; | |
| pipelineLog.push({ | |
| step: "Semantic similarity retrieval", | |
| status: "done", | |
| detail: `Retrieved top-5 from ${eligible} items via ${retrieval.method} (${retrievalMs}ms)`, | |
| duration_ms: retrievalMs, | |
| }); | |
| // Step 4 — LLM re-rank or rule-based fallback | |
| let recommendations; | |
| let reasoning = null; | |
| let method = "rule-based"; | |
| if (LLM_API_KEY) { | |
| try { | |
| const t3 = performance.now(); | |
| recommendations = await llmRerank(profile, candidates); | |
| const llmMs = Math.round(performance.now() - t3); | |
| method = "llm"; | |
| reasoning = "LLM-based constraint-aware re-ranking."; | |
| pipelineLog.push({ | |
| step: "LLM re-ranking", | |
| status: "done", | |
| detail: `Ranked via LLM -> top 3 (${llmMs}ms)`, | |
| duration_ms: llmMs, | |
| }); | |
| } catch (err) { | |
| console.warn( | |
| "[llm_rerank] Failed:", | |
| err.message, | |
| "— falling back to rules." | |
| ); | |
| recommendations = null; | |
| } | |
| } | |
| if (!recommendations) { | |
| const t4 = performance.now(); | |
| recommendations = ruleRerank(profile, candidates); | |
| const ruleMs = Math.round(performance.now() - t4); | |
| method = "rule-based"; | |
| reasoning = | |
| "Rule-based scoring: tag overlap + format match + difficulty proximity."; | |
| pipelineLog.push({ | |
| step: "Rule-based ranking", | |
| status: "done", | |
| detail: `Ranked via rules -> top 3 (${ruleMs}ms)`, | |
| duration_ms: ruleMs, | |
| }); | |
| } | |
| const totalMs = Math.round(performance.now() - tStart); | |
| return { | |
| user_id: profile.user_id || "custom", | |
| recommendations, | |
| pipeline_log: pipelineLog, | |
| llm_reasoning: reasoning, | |
| total_duration_ms: totalMs, | |
| }; | |
| } | |