File size: 6,294 Bytes
ebd14c3 68e4117 ebd14c3 68e4117 ebd14c3 68e4117 2baa954 68e4117 2baa954 68e4117 2baa954 68e4117 2baa954 68e4117 2baa954 68e4117 2baa954 68e4117 2baa954 68e4117 ebd14c3 68e4117 ebd14c3 68e4117 ebd14c3 68e4117 ebd14c3 68e4117 ebd14c3 68e4117 ebd14c3 68e4117 ebd14c3 68e4117 ebd14c3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 | // src/question/question_core.mjs
import { preview } from '../pipeline/util.mjs';
/**
* Safely parse JSON. Returns:
* - a parsed value on success
* - null on failure (and optionally an error object if needed)
*/
function tryParseJson(raw) {
if (!raw || typeof raw !== 'string') return null;
const trimmed = raw.trim();
// Quick sanity: must start with { or [
if (!trimmed.startsWith('{') && !trimmed.startsWith('[')) {
return null;
}
try {
return JSON.parse(trimmed);
} catch {
return null;
}
}
/**
* Extract questions from a plain-text response.
*
* This is designed to handle real LLM outputs like:
*
* What is the primary purpose of practicing presence according to the text?
* How does Q'uo characterize the physical vehicle's limitations?
* What is the role of pain and struggle in spiritual growth?
*
* as well as numbered/bulleted lists:
*
* 1. What is ... ?
* - How does ... ?
* * Why is ... ?
*/
function extractQuestionsFromText(rawText) {
if (!rawText || typeof rawText !== 'string') return [];
// Strip trivial XML/HTML-ish tags like <analysis>, <reasoning>, etc.
const stripped = rawText.replace(/<\/?[a-zA-Z0-9_:-]+>/g, ' ');
const lines = stripped
.split(/\r?\n/)
.map((l) => l.trim())
.filter(Boolean);
const questions = [];
for (const line of lines) {
// Must contain a question mark somewhere
if (!line.includes('?')) continue;
// Common prefixes: "1. ", "1) ", "- ", "* "
const cleaned = line.replace(/^(?:\d+\s*[.)]\s*|[-*]\s*)/, '').trim();
// Take up to the first '?' as the end of the question
const qPart = cleaned.split('?')[0].trim();
if (!qPart) continue;
const q = (qPart + '?').trim();
// Filter out tiny or degenerate things
if (q.length < 10) continue;
if (!/[a-zA-Z]/.test(q)) continue;
questions.push(q);
}
// If we didn't find anything line-based, optional fallback:
// try to split the whole text by '?' and recover sentence-like chunks.
if (questions.length === 0) {
const segments = stripped.split('?');
for (let i = 0; i < segments.length - 1; i++) {
const seg = segments[i].trim();
if (!seg) continue;
// Consider only reasonable-length segments
if (seg.length < 10) continue;
const candidate = seg + '?';
if (!/[a-zA-Z]/.test(candidate)) continue;
questions.push(candidate);
}
}
// Deduplicate while preserving order
const seen = new Set();
const deduped = [];
for (const q of questions) {
if (seen.has(q)) continue;
seen.add(q);
deduped.push(q);
}
return deduped;
}
/**
* Core helper: take raw model string and return:
* {
* questions: string[],
* raw: string,
* parsed: any | { error: 'invalid_json', rawSnippet?: string }
* }
*
* - Tries JSON first: { questions: [...] } or [...] array root.
* - If JSON fails, falls back to text-based extraction.
*/
export function parseQuestionResponse(raw, { maxQuestions } = {}) {
const result = {
questions: [],
raw: raw ?? '',
parsed: null,
};
if (!raw || typeof raw !== 'string') {
result.parsed = { error: 'empty_response' };
return result;
}
const parsed = tryParseJson(raw);
if (parsed != null) {
result.parsed = parsed;
// Case 1: { questions: [...] }
if (
parsed &&
typeof parsed === 'object' &&
Array.isArray(parsed.questions)
) {
const qs = parsed.questions
.map((q) => (typeof q === 'string' ? q.trim() : ''))
.filter((q) => q && q.endsWith('?'));
result.questions = maxQuestions
? qs.slice(0, maxQuestions)
: qs;
return result;
}
// Case 2: array root
if (Array.isArray(parsed)) {
const qs = parsed
.map((item) => {
if (typeof item === 'string') return item.trim();
if (item && typeof item === 'object') {
if (typeof item.question === 'string') {
return item.question.trim();
}
if (typeof item.question_text === 'string') {
return item.question_text.trim();
}
}
return '';
})
.filter((q) => q && q.endsWith('?'));
result.questions = maxQuestions
? qs.slice(0, maxQuestions)
: qs;
return result;
}
// Parsed JSON but not in a recognized shape
result.parsed = {
error: 'unrecognized_json_shape',
rawSnippet: preview(raw, 200),
};
} else {
// Not valid JSON at all
result.parsed = {
error: 'invalid_json',
rawSnippet: preview(raw, 200),
};
}
// Fallback: extract questions from plain text
const textQs = extractQuestionsFromText(raw);
result.questions = maxQuestions
? textQs.slice(0, maxQuestions)
: textQs;
return result;
}
/**
* High-level helper used by the pipeline:
*
* const { questions, raw, parsed } = await runQuestionGenerator(contextText, provider, { maxQuestions })
*/
export async function runQuestionGenerator(
contextText,
provider,
{ maxQuestions = 5 } = {},
) {
if (!provider || typeof provider.generate !== 'function') {
throw new Error('Question provider must implement .generate(prompt)');
}
if (!contextText || !contextText.trim()) {
return { questions: [], raw: '', parsed: { error: 'empty_context' } };
}
// Minimal built-in prompt; if you have a richer prompt file, you can
// load it and inject {{CONTEXT}} before calling provider.generate.
const prompt = [
'You are a question generation assistant.',
'',
'You will be given a chunk of spiritual teaching text as CONTEXT.',
'Generate diverse, high-quality questions that:',
'- are answerable from the context only,',
'- require some thinking, not just copying a sentence,',
'- are phrased as clear, direct questions.',
'',
'Return either:',
'- JSON: { "questions": ["Q1?", "Q2?", ...] }',
' or an array of question-like objects/strings; OR',
'- Plain text with one question per line.',
'',
'---',
'CONTEXT:',
contextText,
'---',
].join('\n');
const raw = await provider.generate(prompt);
const parsed = parseQuestionResponse(raw, { maxQuestions });
return parsed;
}
|