import fs from "node:fs"; import path from "node:path"; import { logger } from "./logger"; const DATA_DIR = path.resolve(process.cwd(), "data"); const STATE_FILE = path.join(DATA_DIR, "self_improve_state.json"); const REPAIR_LOG = path.join(DATA_DIR, "repair_log.jsonl"); // ── Domain definitions ──────────────────────────────────────────────────────── export type Domain = | "science" | "history" | "technology" | "medicine" | "geography" | "philosophy" | "mathematics" | "creative" | "code" | "law" | "economics" | "culture" | "sports" | "news" | "general"; const DOMAIN_KEYWORDS: Record = { science: ["physics","quantum","biology","chemistry","astronomy","molecule","atom","dna","cell","evolution","gravity","relativity","electron","photon","enzyme"], history: ["war","century","ancient","historical","founded","empire","dynasty","king","queen","revolution","treaty","civilization","medieval","renaissance"], technology: ["software","hardware","algorithm","computer","internet","network","semiconductor","processor","cloud","blockchain","ai","machine learning","neural","database"], medicine: ["disease","treatment","drug","vaccine","symptom","diagnosis","surgery","cancer","virus","bacteria","clinical","therapy","patient","genome","cell"], geography: ["country","city","capital","river","mountain","continent","ocean","island","population","climate","latitude","longitude","region","territory"], philosophy: ["ethics","consciousness","metaphysics","epistemology","morality","free will","existence","logic","argument","kant","aristotle","plato","nietzsche","phenomenology"], mathematics: ["theorem","proof","equation","integral","derivative","algebra","geometry","topology","probability","statistics","matrix","vector","prime","fibonacci"], creative: ["art","music","literature","poetry","painting","sculpture","novel","film","architecture","design","composition","aesthetic","style","genre","narrative"], code: ["script","function","class","library","framework","api","deploy","debug","variable","loop","array","object","interface","method","parameter"], law: ["legal","court","statute","regulation","contract","constitution","jurisdiction","precedent","plaintiff","defendant","legislation","rights","liability"], economics: ["market","gdp","inflation","trade","supply","demand","investment","monetary","fiscal","recession","growth","price","currency","interest","equity"], culture: ["tradition","society","religion","ritual","language","custom","festival","heritage","identity","community","belief","myth","folklore","ethnicity"], sports: ["championship","athlete","tournament","league","season","record","score","team","coach","stadium","training","olympic","medal","competition"], news: ["president","election","government","policy","summit","crisis","conflict","agreement","sanctions","diplomatic","minister","parliament","vote","poll"], general: [], }; // ── State ───────────────────────────────────────────────────────────────────── interface DomainStats { queries: number; successes: number; failures: number; avgResponseMs: number; avgSourceCount: number; lastTuneAt: number; // Adaptive parameters minCoverage: number; // fragment filter threshold targetSentences: number; // synthesis target sourceBoosts: Record; // extra weight for specific sources } interface SelfImproveState { version: number; domains: Partial>; weakQueries: Array<{ query: string; domain: Domain; failedAt: number; retries: number }>; totalRepairs: number; lastRepairAt: number; } const DEFAULT_DOMAIN_STATS: Omit = { queries: 0, successes: 0, failures: 0, avgResponseMs: 0, avgSourceCount: 0, minCoverage: 0.35, targetSentences: 20, sourceBoosts: {}, }; let state: SelfImproveState = { version: 1, domains: {}, weakQueries: [], totalRepairs: 0, lastRepairAt: 0, }; // ── Domain detection ───────────────────────────────────────────────────────── export function detectDomain(query: string): Domain { const q = query.toLowerCase(); let bestDomain: Domain = "general"; let bestScore = 0; for (const [domain, keywords] of Object.entries(DOMAIN_KEYWORDS) as [Domain, string[]][]) { if (!keywords.length) continue; const score = keywords.filter(k => q.includes(k)).length; if (score > bestScore) { bestScore = score; bestDomain = domain; } } return bestDomain; } // ── Persistence ─────────────────────────────────────────────────────────────── function saveState() { try { fs.mkdirSync(DATA_DIR, { recursive: true }); fs.writeFileSync(STATE_FILE, JSON.stringify(state, null, 2), "utf-8"); } catch (err) { logger.warn({ err }, "self-improve: failed to save state"); } } export function loadSelfImproveState() { try { fs.mkdirSync(DATA_DIR, { recursive: true }); if (fs.existsSync(STATE_FILE)) { state = { ...state, ...JSON.parse(fs.readFileSync(STATE_FILE, "utf-8")) }; logger.info({ domains: Object.keys(state.domains).length, repairs: state.totalRepairs }, "Self-improve state loaded"); } } catch (err) { logger.warn({ err }, "self-improve: failed to load state (starting fresh)"); } } // ── Domain stats access ──────────────────────────────────────────────────────── function getDomainStats(domain: Domain): DomainStats { if (!state.domains[domain]) { state.domains[domain] = { ...DEFAULT_DOMAIN_STATS, lastTuneAt: Date.now(), sourceBoosts: {} }; } return state.domains[domain]!; } export function getAdaptiveParams(domain: Domain): { minCoverage: number; targetSentences: number; sourceBoosts: Record } { const s = getDomainStats(domain); return { minCoverage: s.minCoverage, targetSentences: s.targetSentences, sourceBoosts: s.sourceBoosts }; } // ── Result recording ────────────────────────────────────────────────────────── export interface QueryResult { query: string; domain: Domain; success: boolean; responseMs: number; sourceCount: number; answerLength: number; } export function recordQueryResult(r: QueryResult) { const s = getDomainStats(r.domain); s.queries++; if (r.success) s.successes++; else { s.failures++; // Add to weak queries for re-test const existing = state.weakQueries.find(w => w.query === r.query); if (!existing) { state.weakQueries.push({ query: r.query, domain: r.domain, failedAt: Date.now(), retries: 0 }); if (state.weakQueries.length > 200) state.weakQueries.splice(0, state.weakQueries.length - 200); } else { existing.retries++; } } // Running average const n = s.queries; s.avgResponseMs = (s.avgResponseMs * (n - 1) + r.responseMs) / n; s.avgSourceCount = (s.avgSourceCount * (n - 1) + r.sourceCount) / n; // Auto-tune if enough data if (s.queries >= 5 && s.failures / s.queries > 0.4 && Date.now() - s.lastTuneAt > 60_000) { autotune(r.domain, s); } // Periodic save if (s.queries % 10 === 0) saveState(); } // ── Auto-tuning ──────────────────────────────────────────────────────────────── const DOMAIN_SOURCE_HINTS: Partial>> = { science: { arXiv: 0.3, "Semantic Scholar": 0.25, "Europe PMC": 0.2, "OpenAlex": 0.2, Wikipedia: 0.1 }, history: { Wikipedia: 0.3, "Simple Wikipedia": 0.2, "Open Library": 0.15, Wikidata: 0.1 }, technology: { "MDN": 0.25, "Stack Overflow": 0.2, "GitHub": 0.2, Wikipedia: 0.1, "npm": 0.1 }, medicine: { "Europe PMC": 0.3, "PubMed": 0.25, "OpenAlex": 0.2, arXiv: 0.1 }, geography: { Wikipedia: 0.3, "OpenStreetMap": 0.25, Wikidata: 0.2, "Simple Wikipedia": 0.1 }, philosophy: { Wikipedia: 0.25, "Open Library": 0.2, "Internet Archive": 0.15 }, mathematics: { Wikipedia: 0.2, arXiv: 0.2, "Semantic Scholar": 0.15, "Open Library": 0.1 }, economics: { Wikipedia: 0.2, "World Bank": 0.2, "Data.gov": 0.15, "The Guardian": 0.1 }, law: { Wikipedia: 0.2, "Open Library": 0.15 }, creative: { Wikipedia: 0.2, "Open Library": 0.2, "Internet Archive": 0.15 }, code: { "MDN": 0.3, "Stack Overflow": 0.25, "GitHub": 0.2, "npm": 0.15 }, sports: { Wikipedia: 0.3, Wikidata: 0.15 }, news: { "The Guardian": 0.3, "DuckDuckGo": 0.2, "Reddit": 0.1 }, culture: { Wikipedia: 0.25, "Open Library": 0.2, Wikidata: 0.1 }, }; function autotune(domain: Domain, s: DomainStats) { const failRate = s.failures / Math.max(s.queries, 1); const prevCoverage = s.minCoverage; const prevTarget = s.targetSentences; // Loosen coverage threshold if failing too much if (failRate > 0.5) { s.minCoverage = Math.max(0.15, s.minCoverage - 0.05); s.targetSentences = Math.min(24, s.targetSentences + 1); } else if (failRate < 0.2 && s.minCoverage < 0.45) { // Tighten if doing well s.minCoverage = Math.min(0.45, s.minCoverage + 0.02); } // Apply domain-specific source hints const hints = DOMAIN_SOURCE_HINTS[domain]; if (hints) { for (const [src, boost] of Object.entries(hints)) { const cur = s.sourceBoosts[src] ?? 0; s.sourceBoosts[src] = Math.min(cur + boost * 0.5, 0.6); } } s.lastTuneAt = Date.now(); state.totalRepairs++; state.lastRepairAt = Date.now(); const repairEntry = { ts: Date.now(), domain, failRate: Number(failRate.toFixed(3)), minCoverage: { from: prevCoverage, to: s.minCoverage }, targetSentences: { from: prevTarget, to: s.targetSentences }, appliedHints: !!hints, }; try { fs.appendFileSync(REPAIR_LOG, JSON.stringify(repairEntry) + "\n", "utf-8"); } catch {} logger.info(repairEntry, `self-improve: auto-tuned domain=${domain}`); saveState(); } // ── Negative feedback trigger ───────────────────────────────────────────────── export function onNegativeFeedback(query: string, sources: string[]) { const domain = detectDomain(query); const s = getDomainStats(domain); s.failures++; const existing = state.weakQueries.find(w => w.query === query); if (!existing) { state.weakQueries.push({ query, domain, failedAt: Date.now(), retries: 0 }); } // Penalize sources that were in a thumbs-down for (const src of sources) { const cur = s.sourceBoosts[src] ?? 0; s.sourceBoosts[src] = Math.max(cur - 0.1, -0.3); } if (s.failures >= 3 && Date.now() - s.lastTuneAt > 30_000) { autotune(domain, s); } saveState(); } export function onPositiveFeedback(query: string, sources: string[]) { const domain = detectDomain(query); const s = getDomainStats(domain); s.successes++; // Reinforce sources that were in a thumbs-up for (const src of sources) { const cur = s.sourceBoosts[src] ?? 0; s.sourceBoosts[src] = Math.min(cur + 0.08, 0.5); } // Remove from weak queries state.weakQueries = state.weakQueries.filter(w => w.query !== query); saveState(); } // ── Get weak queries for re-testing ────────────────────────────────────────── export function getWeakQueries(limit = 20) { return [...state.weakQueries] .sort((a, b) => b.failedAt - a.failedAt) .slice(0, limit); } export function getSelfImproveStats() { return { totalRepairs: state.totalRepairs, lastRepairAt: state.lastRepairAt ? new Date(state.lastRepairAt).toISOString() : null, weakQueryCount: state.weakQueries.length, domains: Object.entries(state.domains).map(([domain, s]) => ({ domain, queries: s!.queries, successRate: s!.queries > 0 ? Number((s!.successes / s!.queries).toFixed(3)) : null, avgResponseMs: Math.round(s!.avgResponseMs), minCoverage: s!.minCoverage, targetSentences: s!.targetSentences, })), }; }