Spaces:
Build error
Build error
| import Database from 'better-sqlite3'; | |
| import fs from 'fs'; | |
| import path from 'path'; | |
| type DocRow = { | |
| id: string; | |
| yuque_id?: number; | |
| title: string; | |
| slug: string; | |
| url?: string | null; | |
| namespace?: string | null; | |
| content_preview?: string | null; | |
| word_count?: number | null; | |
| updated_at?: number | null; | |
| created_at?: number | null; | |
| tags?: string | null; | |
| sort_order?: number | null; | |
| }; | |
| type KbRow = { | |
| namespace: string; | |
| name: string; | |
| description?: string | null; | |
| synced_at: number; | |
| last_offset?: number | null; | |
| }; | |
| function safeSlug(s: string) { | |
| return s.replace(/[\\/]/g, '_').replace(/\s+/g, '-'); | |
| } | |
| function ensureDir(p: string) { | |
| if (!fs.existsSync(p)) fs.mkdirSync(p, { recursive: true }); | |
| } | |
| function writeJson(filePath: string, data: unknown) { | |
| fs.writeFileSync(filePath, JSON.stringify(data, null, 2), 'utf8'); | |
| } | |
| async function main() { | |
| const cwd = process.cwd(); | |
| const dbPath = path.join(cwd, 'rag-kb.db'); | |
| const outDir = path.join(cwd, '..', 'hf_dataset_rag-kb-system'); | |
| const filesDir = path.join(outDir, 'files'); | |
| const metaDir = path.join(outDir, 'metadata'); | |
| ensureDir(outDir); | |
| ensureDir(filesDir); | |
| ensureDir(metaDir); | |
| const db = new Database(dbPath); | |
| const docs = db | |
| .prepare( | |
| `SELECT id, yuque_id, title, slug, url, namespace, content_preview, word_count, updated_at, created_at, tags, sort_order | |
| FROM documents | |
| ORDER BY namespace ASC, sort_order ASC, synced_at DESC` | |
| ) | |
| .all() as DocRow[]; | |
| const kbs = db | |
| .prepare( | |
| `SELECT namespace, name, description, synced_at, last_offset | |
| FROM knowledge_bases | |
| ORDER BY synced_at DESC` | |
| ) | |
| .all() as KbRow[]; | |
| const index: Array<Omit<DocRow, 'content_preview'>> = []; | |
| for (const d of docs) { | |
| const ns = d.namespace || 'UNKNOWN'; | |
| const nsDir = path.join(filesDir, ns); | |
| ensureDir(nsDir); | |
| const slug = safeSlug(d.slug); | |
| const filePath = path.join(nsDir, `${slug}.md`); | |
| const content = d.content_preview || ''; | |
| fs.writeFileSync(filePath, content, 'utf8'); | |
| index.push({ | |
| id: d.id, | |
| yuque_id: d.yuque_id, | |
| title: d.title, | |
| slug: d.slug, | |
| url: d.url, | |
| namespace: d.namespace, | |
| word_count: d.word_count, | |
| updated_at: d.updated_at, | |
| created_at: d.created_at, | |
| tags: d.tags, | |
| sort_order: d.sort_order, | |
| }); | |
| } | |
| writeJson(path.join(metaDir, 'documents.json'), { | |
| count: index.length, | |
| documents: index, | |
| }); | |
| writeJson(path.join(metaDir, 'knowledge_bases.json'), { | |
| count: kbs.length, | |
| knowledge_bases: kbs, | |
| }); | |
| const summary = { | |
| generated_at: new Date().toISOString(), | |
| files_dir: 'files', | |
| metadata_dir: 'metadata', | |
| namespaces: Array.from(new Set(index.map((d) => d.namespace || 'UNKNOWN'))), | |
| }; | |
| writeJson(path.join(outDir, 'dataset_summary.json'), summary); | |
| console.log(`Exported ${index.length} documents to: ${outDir}`); | |
| } | |
| main().catch((e) => { | |
| console.error(e); | |
| process.exit(1); | |
| }); | |