import fs from "fs-extra"; import path from "path"; import matter from "gray-matter"; import { askOpenAI } from "./llm.js"; import { addDocuments } from "./vector.js"; export async function curateText(rawText, config) { const OUTPUT_DIR = config.OUTPUT_DIR || "/data/context-tree"; const prompt = `Extract knowledge from this text and return ONLY JSON, nothing else. TEXT: ${rawText} RETURN THIS JSON (no markdown, no explanation): { "title": "clear title (max 8 words)", "topic": "ONE WORD: auth OR database OR api OR infrastructure OR OTHER", "type": "ONE: fact OR decision OR context", "summary": "one sentence", "content": "key details as markdown", "facts": ["fact 1", "fact 2"] }`; const raw = await askOpenAI(rawText, prompt); const parsed = parseJSON(raw, rawText); const id = makeId(parsed.topic, parsed.title); const filePath = await writeMarkdownFile( { id, title: parsed.title, topic: parsed.topic, type: parsed.type, content: parsed.content, facts: parsed.facts || [], }, OUTPUT_DIR, ); try { await addDocuments({ documents: [ { id, text: `${parsed.title}\n\n${parsed.content}`, metadata: { title: parsed.title, topic: parsed.topic, type: parsed.type, filePath, summary: parsed.summary, }, }, ], }); } catch (err) { console.error("Warning: Failed to add to ChromaDB:", err.message); } return { id, title: parsed.title, topic: parsed.topic, type: parsed.type, summary: parsed.summary, content: parsed.content, facts: parsed.facts || [], filePath, }; } function parseJSON(rawResponse, fallbackText) { try { if (!rawResponse || rawResponse.trim() === "") { throw new Error("Empty response"); } const clean = rawResponse .replace(/```json/g, "") .replace(/```/g, "") .trim(); if (!clean.startsWith("{") || !clean.endsWith("}")) { throw new Error("Invalid JSON format"); } const parsed = JSON.parse(clean); if (!parsed.title || !parsed.topic || !parsed.type) { throw new Error("Missing required fields"); } return parsed; } catch (err) { const lines = fallbackText.split("\n").filter((l) => l.trim()); const firstLine = lines[0] || fallbackText; return { title: firstLine.slice(0, 50), topic: "general", type: "context", summary: fallbackText.slice(0, 120), content: fallbackText, facts: [fallbackText.slice(0, 100)], }; } } function makeId(topic, title) { return `${topic}-${title}` .toLowerCase() .replace(/[^a-z0-9]+/g, "-") .replace(/^-|-$/g, "") .slice(0, 60); } async function writeMarkdownFile(entry, outputDir) { const { id, title, topic, type, content, facts } = entry; const dir = path.join(outputDir, topic); await fs.ensureDir(dir); const filePath = path.join(dir, `${id}.md`); let importance = 5; let existingFacts = []; if (await fs.pathExists(filePath)) { const existing = matter(await fs.readFile(filePath, "utf-8")); importance = Math.min(10, (existing.data.importance || 5) + 1); existingFacts = existing.data.facts || []; } const allFacts = [...new Set([...existingFacts, ...facts])]; const fileContent = matter.stringify(content, { id, title, topic, type, importance, facts: allFacts, updatedAt: new Date().toISOString(), }); await fs.writeFile(filePath, fileContent, "utf-8"); return filePath; }