/**
 * Persistent cache for app moderation verdicts, backed by a
 * HuggingFace dataset. Twin of `categoryCache.js` - same rationale
 * (the Docker Space filesystem is wiped on every rebuild, so we
 * persist to a dataset to avoid re-running the whole LLM sweep on
 * each cold start), same in-memory-hot / dataset-cold tiering.
 *
 * Storage shape
 * ─────────────
 *   <dataset>/cache/moderation.json
 *
 *   {
 *     "version": 1,
 *     "policyVersion": 1,
 *     "updatedAt": "2026-05-29T09:00:00Z",
 *     "entries": {
 *       "<spaceId>": {
 *         "lastModified": "2026-05-08T22:13:01Z",
 *         "decision": "allow" | "block" | "review",
 *         "category": "none",
 *         "reason": "llm: allow/none",
 *         "severity": null | "hard" | "soft",
 *         "source": "regex" | "llm",
 *         "moderatedAt": "2026-05-29T09:00:00Z",
 *         "policyVersion": 1
 *       }
 *     }
 *   }
 *
 * `entries` holds the automated verdicts (regex + LLM), re-computed
 * when a Space's README changes or the policy version bumps. The
 * MANUAL killswitch lives elsewhere: a hand-edited
 * `config/blocked-app-list.json` on the official dataset (see `index.js`),
 * so anyone with dataset write access can block an app without
 * touching this cache.
 */

import { commit, createRepo } from '@huggingface/hub';

import { MODERATION_POLICY_VERSION } from './moderate.js';

// Single store control-plane dataset (shared with official-app-list.json,
// blocked-app-list.json and categories.json - see index.js `STORE_DATASET`).
// The HF_TOKEN must have WRITE access here since this cache commits
// `moderation.json`. Precedence: a dedicated `HF_MODERATION_DATASET`
// wins (escape hatch), else the unified `STORE_DATASET`, else the
// pollen-robotics default.
const DEFAULT_DATASET = 'pollen-robotics/reachy_mini_store_data';

const CACHE_FILE_PATH = 'cache/moderation.json';
const CACHE_FORMAT_VERSION = 1;

class ModerationCache {
  constructor() {
    this.entries = new Map();
    this.repoName =
      process.env.HF_MODERATION_DATASET ||
      process.env.STORE_DATASET ||
      DEFAULT_DATASET;
    this.loaded = false;
    this.dirty = false;
    this.flushing = false;
  }

  /**
   * Load the dataset cache into memory. Best-effort: a missing
   * dataset / 404 / malformed JSON collapses to "start empty, the
   * warmup repopulates". Never blocks server boot.
   */
  async load() {
    if (this.loaded) return;
    this.loaded = true;

    const url = `https://huggingface.co/datasets/${this.repoName}/resolve/main/${CACHE_FILE_PATH}`;
    try {
      const res = await fetch(url, {
        headers: process.env.HF_TOKEN
          ? { Authorization: `Bearer ${process.env.HF_TOKEN}` }
          : undefined,
      });
      if (!res.ok) {
        if (res.status === 404) {
          console.log(
            `[ModerationCache] Dataset ${this.repoName} or ${CACHE_FILE_PATH} not found yet - starting empty.`,
          );
        } else {
          console.warn(
            `[ModerationCache] HTTP ${res.status} loading cache from ${this.repoName}, starting empty.`,
          );
        }
        return;
      }
      const data = await res.json();

      // Verdicts: drop entries from an older policy version (the
      // prompt/regex moved, so they must be re-moderated).
      const entries = data?.entries || {};
      let kept = 0;
      let stale = 0;
      for (const [id, raw] of Object.entries(entries)) {
        if (!raw || typeof raw !== 'object') continue;
        if (raw.policyVersion !== MODERATION_POLICY_VERSION) {
          stale++;
          continue;
        }
        this.entries.set(id, {
          lastModified: raw.lastModified || null,
          decision: raw.decision,
          category: raw.category || 'none',
          reason: raw.reason || '',
          severity: raw.severity ?? null,
          source: raw.source || 'llm',
          moderatedAt: raw.moderatedAt || null,
          policyVersion: raw.policyVersion,
        });
        kept++;
      }

      console.log(
        `[ModerationCache] Loaded ${kept} verdicts from ${this.repoName}` +
          (stale ? ` (dropped ${stale} stale policy)` : ''),
      );
    } catch (err) {
      console.warn(
        `[ModerationCache] Load failed (${err.message}); starting empty.`,
      );
    }
  }

  get(spaceId) {
    return this.entries.get(spaceId) || null;
  }

  /**
   * Does `spaceId` need a fresh moderation call? Yes when we have no
   * verdict, the policy version moved, or the Space's `lastModified`
   * advanced past our cached one (the README may have changed).
   */
  needsModeration(spaceId, lastModified) {
    const entry = this.entries.get(spaceId);
    if (!entry) return true;
    if (entry.policyVersion !== MODERATION_POLICY_VERSION) return true;
    if (lastModified && entry.lastModified !== lastModified) return true;
    return false;
  }

  set(spaceId, { decision, category, reason, severity, source, lastModified }) {
    if (!decision) return;
    const next = {
      lastModified: lastModified || null,
      decision,
      category: category || 'none',
      reason: reason || '',
      severity: severity ?? null,
      source: source || 'llm',
      moderatedAt: new Date().toISOString(),
      policyVersion: MODERATION_POLICY_VERSION,
    };
    const prev = this.entries.get(spaceId);
    if (
      prev &&
      prev.lastModified === next.lastModified &&
      prev.policyVersion === next.policyVersion &&
      prev.decision === next.decision &&
      prev.category === next.category
    ) {
      return; // no material change - skip the dirty flag / commit
    }
    this.entries.set(spaceId, next);
    this.dirty = true;
  }

  /**
   * Persist the in-memory cache to the dataset (one commit, one
   * file). No-op when nothing changed. Auto-creates the dataset on
   * first write so a fresh `HF_MODERATION_DATASET` bootstraps cleanly.
   */
  async flush() {
    if (!this.dirty || this.flushing) return;
    if (!process.env.HF_TOKEN) {
      console.warn('[ModerationCache] HF_TOKEN missing; skipping flush.');
      return;
    }
    this.flushing = true;
    try {
      const payload = this.serialize();
      const blob = new Blob([JSON.stringify(payload, null, 2)], {
        type: 'application/json',
      });

      const repo = { type: 'dataset', name: this.repoName };
      const credentials = { accessToken: process.env.HF_TOKEN };

      try {
        await commit({
          repo,
          credentials,
          title: `Update moderation (${this.entries.size} verdicts)`,
          operations: [
            { operation: 'addOrUpdate', path: CACHE_FILE_PATH, content: blob },
          ],
        });
      } catch (err) {
        const msg = err?.message || '';
        const looksMissing =
          msg.includes('404') ||
          msg.toLowerCase().includes('not found') ||
          msg.toLowerCase().includes('does not exist');
        if (!looksMissing) throw err;
        console.log(
          `[ModerationCache] Dataset ${this.repoName} missing - creating it.`,
        );
        await createRepo({
          repo,
          credentials,
          private: false,
          files: [{ path: CACHE_FILE_PATH, content: await blob.arrayBuffer() }],
        });
      }

      this.dirty = false;
      console.log(
        `[ModerationCache] Flushed ${this.entries.size} verdicts to ${this.repoName}`,
      );
    } catch (err) {
      console.error(`[ModerationCache] Flush failed: ${err?.message || err}`);
    } finally {
      this.flushing = false;
    }
  }

  serialize() {
    const entries = {};
    for (const [id, entry] of this.entries) entries[id] = entry;
    return {
      version: CACHE_FORMAT_VERSION,
      policyVersion: MODERATION_POLICY_VERSION,
      updatedAt: new Date().toISOString(),
      entries,
    };
  }

  /**
   * Diagnostic snapshot for the `/api/js-apps` `moderation`
   * sub-payload. Counts are over the verdict cache only.
   */
  stats() {
    let blocked = 0;
    let review = 0;
    for (const entry of this.entries.values()) {
      if (entry.decision === 'block') blocked++;
      else if (entry.decision === 'review') review++;
    }
    return {
      total: this.entries.size,
      blocked,
      review,
      dataset: this.repoName,
      policyVersion: MODERATION_POLICY_VERSION,
    };
  }
}

// Singleton: one cache per server process.
export const moderationCache = new ModerationCache();