question-explorer-api / services /questionGenerator.js
Eric Gardner
Use LaMini-Flan-T5-248M (distilled model)
8067185
import { pipeline } from '@xenova/transformers';
let generator = null;
/**
* Initialize the question generation model (LaMini-Flan-T5-248M)
*/
export async function initQuestionGenerator() {
if ( !generator ) {
console.log( 'Loading question generation model (LaMini-Flan-T5-248M)...' );
generator = await pipeline( 'text2text-generation', 'Xenova/LaMini-Flan-T5-248M' );
console.log( 'Question generation model loaded.' );
}
return generator;
}
/**
* Generate a single question from a text passage
*
* @param {string} text - The passage to generate a question about
* @returns {Promise<string|null>} - Generated question or null
*/
async function generateSingleQuestion( text ) {
// More specific prompt to encourage factual questions
const prompt = `Ask a specific factual question that can be answered by the following passage: ${ text }`;
const result = await generator( prompt, {
max_new_tokens: 60,
num_beams: 2,
do_sample: false
} );
const output = result[ 0 ].generated_text.trim();
// Ensure it ends with a question mark
if ( output.length > 10 ) {
return output.endsWith( '?' ) ? output : output + '?';
}
return null;
}
/**
* Group sentences into chunks of N for more context
*
* @param {string[]} sentences - Array of sentences
* @param {number} groupSize - Number of sentences per group
* @returns {string[]} - Array of grouped sentence strings
*/
function groupSentences( sentences, groupSize = 3 ) {
const groups = [];
for ( let i = 0; i < sentences.length; i += groupSize ) {
const group = sentences.slice( i, i + groupSize ).join( ' ' );
groups.push( group );
}
return groups;
}
/**
* Generate questions from a text passage
*
* @param {string} text - The passage to generate questions about
* @param {number} numQuestions - Number of questions to generate (default: 5)
* @returns {Promise<string[]>} - Array of generated questions
*/
export async function generateQuestions( text, numQuestions = 5 ) {
if ( !generator ) {
await initQuestionGenerator();
}
// Split text into sentences
const sentences = text
.split( /(?<=[.!?])\s+/ )
.filter( ( s ) => s.length > 30 );
// Group sentences (2-3 at a time) for more context per question
const chunks = groupSentences( sentences, 2 );
// Take a sample of chunks to generate questions from
const sampleSize = Math.min( numQuestions * 2, chunks.length );
const sampled = chunks.slice( 0, sampleSize );
const questions = [];
const seen = new Set();
try {
for ( const chunk of sampled ) {
if ( questions.length >= numQuestions ) {
break;
}
const question = await generateSingleQuestion( chunk );
if ( question && !seen.has( question.toLowerCase() ) ) {
seen.add( question.toLowerCase() );
questions.push( question );
}
}
return questions;
} catch ( error ) {
console.error( 'Question generation failed:', error );
return [];
}
}
/**
* Extract lead section text from chunks
*
* @param {Array} chunks - Article chunks with sectionTitle
* @returns {string} - Combined text from the introduction/lead section
*/
export function getLeadSectionText( chunks ) {
const leadChunks = chunks.filter(
( chunk ) => chunk.sectionTitle === 'Introduction' || chunk.sectionId === null
);
// Take up to first 3 paragraphs from the lead
return leadChunks
.slice( 0, 3 )
.map( ( c ) => c.text )
.join( ' ' );
}