Spaces:

Omarrran
/

EDU_Recommender

Sleeping

File size: 8,500 Bytes

5bd3663

/**
 * Recommendation pipeline.
 *
 * Steps:
 *   1. Compute semantic similarity via HF Inference API (all-MiniLM-L6-v2).
 *   2. Retrieve top-5 candidates by cosine similarity.
 *   3. LLM re-ranking (Kimi-K2.5 via HF Router) -> top-3 with explanations.
 *      Falls back to rule-based ranking if LLM call fails.
 */

import { CONTENT_ITEMS } from "./data.js";
import { retrieveTopK } from "./embeddings.js";

// ---------------------------------------------------------------------------
// Style -> preferred formats mapping
// ---------------------------------------------------------------------------
const STYLE_FORMAT_MAP = {
  visual: new Set(["video"]),
  reading: new Set(["slides", "lecture"]),
  "hands-on": new Set(["video", "lecture"]),
};

const DIFFICULTY_ORDER = { Beginner: 0, Intermediate: 1, Advanced: 2 };

// ---------------------------------------------------------------------------
// Rule-based re-rank (fallback)
// ---------------------------------------------------------------------------
function ruleRerank(profile, candidates) {
  const userTags = new Set(profile.interest_tags || []);
  const prefFormats = STYLE_FORMAT_MAP[profile.learning_style] || new Set();
  const prefDiff = DIFFICULTY_ORDER[profile.preferred_difficulty] ?? 1;

  const scored = candidates
    .filter((c) => {
      if ((profile.viewed_content_ids || []).includes(c.id)) return false;
      if (c.duration_minutes > profile.time_per_day) return false;
      return true;
    })
    .map((item) => {
      const itemTags = new Set(item.tags);
      const overlap =
        [...userTags].filter((t) => itemTags.has(t)).length /
        Math.max(userTags.size, 1);
      const fmtBonus = prefFormats.has(item.format) ? 0.2 : 0;
      const diffPenalty =
        Math.abs((DIFFICULTY_ORDER[item.difficulty] ?? 1) - prefDiff) * 0.15;
      const score = overlap + fmtBonus - diffPenalty;
      const common = [...userTags].filter((t) => itemTags.has(t));
      return { item, score, common };
    })
    .sort((a, b) => b.score - a.score)
    .slice(0, 3);

  return scored.map((s, i) => ({
    rank: i + 1,
    id: s.item.id,
    title: s.item.title,
    format: s.item.format,
    difficulty: s.item.difficulty,
    duration_minutes: s.item.duration_minutes,
    tags: s.item.tags,
    explanation: `Matched on tags (${s.common.join(", ")}), format fits your ${profile.learning_style} style, and difficulty is ${s.item.difficulty}.`,
    match_score: Math.round(s.score * 1000) / 1000,
  }));
}

// ---------------------------------------------------------------------------
// LLM re-rank via HuggingFace Router
// ---------------------------------------------------------------------------
const LLM_API_KEY =
  process.env.HF_TOKEN || process.env.OPENROUTER_API_KEY || "";
const LLM_MODEL = process.env.LLM_MODEL || "moonshotai/Kimi-K2.5:novita";
const LLM_BASE_URL =
  process.env.LLM_BASE_URL ||
  "https://router.huggingface.co/v1/chat/completions";

const SYSTEM_PROMPT = `You are an expert educational content recommender. Re-rank the candidate items for the learner below and return the top 3 as a strict JSON array.

CONSTRAINTS:
1. Prefer items matching the learner's preferred difficulty.
2. Each item's duration_minutes must fit the learner's time_per_day budget.
3. Favour formats that suit the learning style (visual -> video, reading -> slides/lecture, hands-on -> video/lecture).
4. Never recommend already-viewed content.
5. Provide a concise, personalised explanation (1-2 sentences) per pick.

Return ONLY a JSON array with exactly 3 objects, each having:
  rank (int 1-3), id (int), title (str), format (str), difficulty (str),
  duration_minutes (int), tags (list[str]), explanation (str).
No text outside the JSON array.`;

async function llmRerank(profile, candidates) {
  const userPrompt = [
    "### Learner Profile",
    `- Name: ${profile.name}`,
    `- Goal: ${profile.goal}`,
    `- Learning style: ${profile.learning_style}`,
    `- Preferred difficulty: ${profile.preferred_difficulty}`,
    `- Time per day: ${profile.time_per_day} minutes`,
    `- Interests: ${(profile.interest_tags || []).join(", ")}`,
    `- Already viewed IDs: ${JSON.stringify(profile.viewed_content_ids || [])}`,
    "",
    "### Candidate Items",
    "```json",
    JSON.stringify(
      candidates.map(({ _simScore, ...rest }) => rest),
      null,
      2
    ),
    "```",
    "",
    "Re-rank and return the top 3 as a JSON array.",
  ].join("\n");

  const resp = await fetch(LLM_BASE_URL, {
    method: "POST",
    headers: {
      Authorization: `Bearer ${LLM_API_KEY}`,
      "Content-Type": "application/json",
    },
    body: JSON.stringify({
      model: LLM_MODEL,
      messages: [
        { role: "system", content: SYSTEM_PROMPT },
        { role: "user", content: userPrompt },
      ],
      temperature: 0.3,
      max_tokens: 2048,
    }),
  });

  if (!resp.ok) {
    const body = await resp.text();
    throw new Error(`LLM returned ${resp.status}: ${body}`);
  }

  const data = await resp.json();
  const msg = data.choices?.[0]?.message;
  let text = (msg?.content || msg?.reasoning || "").trim();
  if (!text) throw new Error("LLM returned empty response.");

  // Strip markdown fences
  text = text.replace(/```(?:json)?\s*/g, "").replace(/```/g, "").trim();

  // Extract JSON array
  const match = text.match(/\[[\s\S]*\]/);
  if (!match) throw new Error("No JSON array in LLM response.");
  const parsed = JSON.parse(match[0]);
  if (!Array.isArray(parsed) || parsed.length === 0) {
    throw new Error("Invalid LLM JSON.");
  }

  const candMap = Object.fromEntries(candidates.map((c) => [c.id, c]));
  return parsed.slice(0, 3).map((item, i) => {
    const fb = candMap[item.id];
    return {
      rank: i + 1,
      id: item.id,
      title: item.title || fb?.title || "",
      format: item.format || fb?.format || "",
      difficulty: item.difficulty || fb?.difficulty || "",
      duration_minutes: item.duration_minutes || fb?.duration_minutes || 0,
      tags: item.tags || fb?.tags || [],
      explanation: item.explanation || "Recommended based on your profile.",
    };
  });
}

// ---------------------------------------------------------------------------
// Main pipeline
// ---------------------------------------------------------------------------
export async function recommend(profile) {
  const pipelineLog = [];
  const tStart = performance.now();

  // Steps 1-2 — semantic similarity retrieval via all-MiniLM-L6-v2
  const t0 = performance.now();
  const retrieval = await retrieveTopK(
    profile,
    CONTENT_ITEMS,
    5,
    profile.viewed_content_ids || []
  );
  const candidates = retrieval.candidates;
  const retrievalMs = Math.round(performance.now() - t0);
  const eligible = CONTENT_ITEMS.length - (profile.viewed_content_ids || []).length;
  pipelineLog.push({
    step: "Semantic similarity retrieval",
    status: "done",
    detail: `Retrieved top-5 from ${eligible} items via ${retrieval.method} (${retrievalMs}ms)`,
    duration_ms: retrievalMs,
  });

  // Step 4 — LLM re-rank or rule-based fallback
  let recommendations;
  let reasoning = null;
  let method = "rule-based";

  if (LLM_API_KEY) {
    try {
      const t3 = performance.now();
      recommendations = await llmRerank(profile, candidates);
      const llmMs = Math.round(performance.now() - t3);
      method = "llm";
      reasoning = "LLM-based constraint-aware re-ranking.";
      pipelineLog.push({
        step: "LLM re-ranking",
        status: "done",
        detail: `Ranked via LLM -> top 3 (${llmMs}ms)`,
        duration_ms: llmMs,
      });
    } catch (err) {
      console.warn(
        "[llm_rerank] Failed:",
        err.message,
        "— falling back to rules."
      );
      recommendations = null;
    }
  }

  if (!recommendations) {
    const t4 = performance.now();
    recommendations = ruleRerank(profile, candidates);
    const ruleMs = Math.round(performance.now() - t4);
    method = "rule-based";
    reasoning =
      "Rule-based scoring: tag overlap + format match + difficulty proximity.";
    pipelineLog.push({
      step: "Rule-based ranking",
      status: "done",
      detail: `Ranked via rules -> top 3 (${ruleMs}ms)`,
      duration_ms: ruleMs,
    });
  }

  const totalMs = Math.round(performance.now() - tStart);

  return {
    user_id: profile.user_id || "custom",
    recommendations,
    pipeline_log: pipelineLog,
    llm_reasoning: reasoning,
    total_duration_ms: totalMs,
  };
}