import { pipeline } from '@xenova/transformers'; let generator = null; /** * Initialize the question generation model (LaMini-Flan-T5-248M) */ export async function initQuestionGenerator() { if ( !generator ) { console.log( 'Loading question generation model (LaMini-Flan-T5-248M)...' ); generator = await pipeline( 'text2text-generation', 'Xenova/LaMini-Flan-T5-248M' ); console.log( 'Question generation model loaded.' ); } return generator; } /** * Generate a single question from a text passage * * @param {string} text - The passage to generate a question about * @returns {Promise} - Generated question or null */ async function generateSingleQuestion( text ) { // More specific prompt to encourage factual questions const prompt = `Ask a specific factual question that can be answered by the following passage: ${ text }`; const result = await generator( prompt, { max_new_tokens: 60, num_beams: 2, do_sample: false } ); const output = result[ 0 ].generated_text.trim(); // Ensure it ends with a question mark if ( output.length > 10 ) { return output.endsWith( '?' ) ? output : output + '?'; } return null; } /** * Group sentences into chunks of N for more context * * @param {string[]} sentences - Array of sentences * @param {number} groupSize - Number of sentences per group * @returns {string[]} - Array of grouped sentence strings */ function groupSentences( sentences, groupSize = 3 ) { const groups = []; for ( let i = 0; i < sentences.length; i += groupSize ) { const group = sentences.slice( i, i + groupSize ).join( ' ' ); groups.push( group ); } return groups; } /** * Generate questions from a text passage * * @param {string} text - The passage to generate questions about * @param {number} numQuestions - Number of questions to generate (default: 5) * @returns {Promise} - Array of generated questions */ export async function generateQuestions( text, numQuestions = 5 ) { if ( !generator ) { await initQuestionGenerator(); } // Split text into sentences const sentences = text .split( /(?<=[.!?])\s+/ ) .filter( ( s ) => s.length > 30 ); // Group sentences (2-3 at a time) for more context per question const chunks = groupSentences( sentences, 2 ); // Take a sample of chunks to generate questions from const sampleSize = Math.min( numQuestions * 2, chunks.length ); const sampled = chunks.slice( 0, sampleSize ); const questions = []; const seen = new Set(); try { for ( const chunk of sampled ) { if ( questions.length >= numQuestions ) { break; } const question = await generateSingleQuestion( chunk ); if ( question && !seen.has( question.toLowerCase() ) ) { seen.add( question.toLowerCase() ); questions.push( question ); } } return questions; } catch ( error ) { console.error( 'Question generation failed:', error ); return []; } } /** * Extract lead section text from chunks * * @param {Array} chunks - Article chunks with sectionTitle * @returns {string} - Combined text from the introduction/lead section */ export function getLeadSectionText( chunks ) { const leadChunks = chunks.filter( ( chunk ) => chunk.sectionTitle === 'Introduction' || chunk.sectionId === null ); // Take up to first 3 paragraphs from the lead return leadChunks .slice( 0, 3 ) .map( ( c ) => c.text ) .join( ' ' ); }