|
|
|
|
|
import { preview } from '../pipeline/util.mjs'; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function tryParseJson(raw) { |
|
|
if (!raw || typeof raw !== 'string') return null; |
|
|
const trimmed = raw.trim(); |
|
|
|
|
|
|
|
|
if (!trimmed.startsWith('{') && !trimmed.startsWith('[')) { |
|
|
return null; |
|
|
} |
|
|
|
|
|
try { |
|
|
return JSON.parse(trimmed); |
|
|
} catch { |
|
|
return null; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function extractQuestionsFromText(rawText) { |
|
|
if (!rawText || typeof rawText !== 'string') return []; |
|
|
|
|
|
|
|
|
const stripped = rawText.replace(/<\/?[a-zA-Z0-9_:-]+>/g, ' '); |
|
|
|
|
|
const lines = stripped |
|
|
.split(/\r?\n/) |
|
|
.map((l) => l.trim()) |
|
|
.filter(Boolean); |
|
|
|
|
|
const questions = []; |
|
|
|
|
|
for (const line of lines) { |
|
|
|
|
|
if (!line.includes('?')) continue; |
|
|
|
|
|
|
|
|
const cleaned = line.replace(/^(?:\d+\s*[.)]\s*|[-*]\s*)/, '').trim(); |
|
|
|
|
|
|
|
|
const qPart = cleaned.split('?')[0].trim(); |
|
|
if (!qPart) continue; |
|
|
|
|
|
const q = (qPart + '?').trim(); |
|
|
|
|
|
|
|
|
if (q.length < 10) continue; |
|
|
if (!/[a-zA-Z]/.test(q)) continue; |
|
|
|
|
|
questions.push(q); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
if (questions.length === 0) { |
|
|
const segments = stripped.split('?'); |
|
|
for (let i = 0; i < segments.length - 1; i++) { |
|
|
const seg = segments[i].trim(); |
|
|
if (!seg) continue; |
|
|
|
|
|
if (seg.length < 10) continue; |
|
|
const candidate = seg + '?'; |
|
|
if (!/[a-zA-Z]/.test(candidate)) continue; |
|
|
questions.push(candidate); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
const seen = new Set(); |
|
|
const deduped = []; |
|
|
for (const q of questions) { |
|
|
if (seen.has(q)) continue; |
|
|
seen.add(q); |
|
|
deduped.push(q); |
|
|
} |
|
|
|
|
|
return deduped; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
export function parseQuestionResponse(raw, { maxQuestions } = {}) { |
|
|
const result = { |
|
|
questions: [], |
|
|
raw: raw ?? '', |
|
|
parsed: null, |
|
|
}; |
|
|
|
|
|
if (!raw || typeof raw !== 'string') { |
|
|
result.parsed = { error: 'empty_response' }; |
|
|
return result; |
|
|
} |
|
|
|
|
|
const parsed = tryParseJson(raw); |
|
|
if (parsed != null) { |
|
|
result.parsed = parsed; |
|
|
|
|
|
|
|
|
if ( |
|
|
parsed && |
|
|
typeof parsed === 'object' && |
|
|
Array.isArray(parsed.questions) |
|
|
) { |
|
|
const qs = parsed.questions |
|
|
.map((q) => (typeof q === 'string' ? q.trim() : '')) |
|
|
.filter((q) => q && q.endsWith('?')); |
|
|
result.questions = maxQuestions |
|
|
? qs.slice(0, maxQuestions) |
|
|
: qs; |
|
|
return result; |
|
|
} |
|
|
|
|
|
|
|
|
if (Array.isArray(parsed)) { |
|
|
const qs = parsed |
|
|
.map((item) => { |
|
|
if (typeof item === 'string') return item.trim(); |
|
|
if (item && typeof item === 'object') { |
|
|
if (typeof item.question === 'string') { |
|
|
return item.question.trim(); |
|
|
} |
|
|
if (typeof item.question_text === 'string') { |
|
|
return item.question_text.trim(); |
|
|
} |
|
|
} |
|
|
return ''; |
|
|
}) |
|
|
.filter((q) => q && q.endsWith('?')); |
|
|
result.questions = maxQuestions |
|
|
? qs.slice(0, maxQuestions) |
|
|
: qs; |
|
|
return result; |
|
|
} |
|
|
|
|
|
|
|
|
result.parsed = { |
|
|
error: 'unrecognized_json_shape', |
|
|
rawSnippet: preview(raw, 200), |
|
|
}; |
|
|
} else { |
|
|
|
|
|
result.parsed = { |
|
|
error: 'invalid_json', |
|
|
rawSnippet: preview(raw, 200), |
|
|
}; |
|
|
} |
|
|
|
|
|
|
|
|
const textQs = extractQuestionsFromText(raw); |
|
|
result.questions = maxQuestions |
|
|
? textQs.slice(0, maxQuestions) |
|
|
: textQs; |
|
|
|
|
|
return result; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
export async function runQuestionGenerator( |
|
|
contextText, |
|
|
provider, |
|
|
{ maxQuestions = 5 } = {}, |
|
|
) { |
|
|
if (!provider || typeof provider.generate !== 'function') { |
|
|
throw new Error('Question provider must implement .generate(prompt)'); |
|
|
} |
|
|
|
|
|
if (!contextText || !contextText.trim()) { |
|
|
return { questions: [], raw: '', parsed: { error: 'empty_context' } }; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
const prompt = [ |
|
|
'You are a question generation assistant.', |
|
|
'', |
|
|
'You will be given a chunk of spiritual teaching text as CONTEXT.', |
|
|
'Generate diverse, high-quality questions that:', |
|
|
'- are answerable from the context only,', |
|
|
'- require some thinking, not just copying a sentence,', |
|
|
'- are phrased as clear, direct questions.', |
|
|
'', |
|
|
'Return either:', |
|
|
'- JSON: { "questions": ["Q1?", "Q2?", ...] }', |
|
|
' or an array of question-like objects/strings; OR', |
|
|
'- Plain text with one question per line.', |
|
|
'', |
|
|
'---', |
|
|
'CONTEXT:', |
|
|
contextText, |
|
|
'---', |
|
|
].join('\n'); |
|
|
|
|
|
const raw = await provider.generate(prompt); |
|
|
const parsed = parseQuestionResponse(raw, { maxQuestions }); |
|
|
|
|
|
return parsed; |
|
|
} |
|
|
|