// src/question/question_core.mjs import { preview } from '../pipeline/util.mjs'; /** * Safely parse JSON. Returns: * - a parsed value on success * - null on failure (and optionally an error object if needed) */ function tryParseJson(raw) { if (!raw || typeof raw !== 'string') return null; const trimmed = raw.trim(); // Quick sanity: must start with { or [ if (!trimmed.startsWith('{') && !trimmed.startsWith('[')) { return null; } try { return JSON.parse(trimmed); } catch { return null; } } /** * Extract questions from a plain-text response. * * This is designed to handle real LLM outputs like: * * What is the primary purpose of practicing presence according to the text? * How does Q'uo characterize the physical vehicle's limitations? * What is the role of pain and struggle in spiritual growth? * * as well as numbered/bulleted lists: * * 1. What is ... ? * - How does ... ? * * Why is ... ? */ function extractQuestionsFromText(rawText) { if (!rawText || typeof rawText !== 'string') return []; // Strip trivial XML/HTML-ish tags like , , etc. const stripped = rawText.replace(/<\/?[a-zA-Z0-9_:-]+>/g, ' '); const lines = stripped .split(/\r?\n/) .map((l) => l.trim()) .filter(Boolean); const questions = []; for (const line of lines) { // Must contain a question mark somewhere if (!line.includes('?')) continue; // Common prefixes: "1. ", "1) ", "- ", "* " const cleaned = line.replace(/^(?:\d+\s*[.)]\s*|[-*]\s*)/, '').trim(); // Take up to the first '?' as the end of the question const qPart = cleaned.split('?')[0].trim(); if (!qPart) continue; const q = (qPart + '?').trim(); // Filter out tiny or degenerate things if (q.length < 10) continue; if (!/[a-zA-Z]/.test(q)) continue; questions.push(q); } // If we didn't find anything line-based, optional fallback: // try to split the whole text by '?' and recover sentence-like chunks. if (questions.length === 0) { const segments = stripped.split('?'); for (let i = 0; i < segments.length - 1; i++) { const seg = segments[i].trim(); if (!seg) continue; // Consider only reasonable-length segments if (seg.length < 10) continue; const candidate = seg + '?'; if (!/[a-zA-Z]/.test(candidate)) continue; questions.push(candidate); } } // Deduplicate while preserving order const seen = new Set(); const deduped = []; for (const q of questions) { if (seen.has(q)) continue; seen.add(q); deduped.push(q); } return deduped; } /** * Core helper: take raw model string and return: * { * questions: string[], * raw: string, * parsed: any | { error: 'invalid_json', rawSnippet?: string } * } * * - Tries JSON first: { questions: [...] } or [...] array root. * - If JSON fails, falls back to text-based extraction. */ export function parseQuestionResponse(raw, { maxQuestions } = {}) { const result = { questions: [], raw: raw ?? '', parsed: null, }; if (!raw || typeof raw !== 'string') { result.parsed = { error: 'empty_response' }; return result; } const parsed = tryParseJson(raw); if (parsed != null) { result.parsed = parsed; // Case 1: { questions: [...] } if ( parsed && typeof parsed === 'object' && Array.isArray(parsed.questions) ) { const qs = parsed.questions .map((q) => (typeof q === 'string' ? q.trim() : '')) .filter((q) => q && q.endsWith('?')); result.questions = maxQuestions ? qs.slice(0, maxQuestions) : qs; return result; } // Case 2: array root if (Array.isArray(parsed)) { const qs = parsed .map((item) => { if (typeof item === 'string') return item.trim(); if (item && typeof item === 'object') { if (typeof item.question === 'string') { return item.question.trim(); } if (typeof item.question_text === 'string') { return item.question_text.trim(); } } return ''; }) .filter((q) => q && q.endsWith('?')); result.questions = maxQuestions ? qs.slice(0, maxQuestions) : qs; return result; } // Parsed JSON but not in a recognized shape result.parsed = { error: 'unrecognized_json_shape', rawSnippet: preview(raw, 200), }; } else { // Not valid JSON at all result.parsed = { error: 'invalid_json', rawSnippet: preview(raw, 200), }; } // Fallback: extract questions from plain text const textQs = extractQuestionsFromText(raw); result.questions = maxQuestions ? textQs.slice(0, maxQuestions) : textQs; return result; } /** * High-level helper used by the pipeline: * * const { questions, raw, parsed } = await runQuestionGenerator(contextText, provider, { maxQuestions }) */ export async function runQuestionGenerator( contextText, provider, { maxQuestions = 5 } = {}, ) { if (!provider || typeof provider.generate !== 'function') { throw new Error('Question provider must implement .generate(prompt)'); } if (!contextText || !contextText.trim()) { return { questions: [], raw: '', parsed: { error: 'empty_context' } }; } // Minimal built-in prompt; if you have a richer prompt file, you can // load it and inject {{CONTEXT}} before calling provider.generate. const prompt = [ 'You are a question generation assistant.', '', 'You will be given a chunk of spiritual teaching text as CONTEXT.', 'Generate diverse, high-quality questions that:', '- are answerable from the context only,', '- require some thinking, not just copying a sentence,', '- are phrased as clear, direct questions.', '', 'Return either:', '- JSON: { "questions": ["Q1?", "Q2?", ...] }', ' or an array of question-like objects/strings; OR', '- Plain text with one question per line.', '', '---', 'CONTEXT:', contextText, '---', ].join('\n'); const raw = await provider.generate(prompt); const parsed = parseQuestionResponse(raw, { maxQuestions }); return parsed; }