Spaces:
Running
Running
| import { pipeline } from '@xenova/transformers'; | |
| let generator = null; | |
| /** | |
| * Initialize the question generation model (LaMini-Flan-T5-248M) | |
| */ | |
| export async function initQuestionGenerator() { | |
| if ( !generator ) { | |
| console.log( 'Loading question generation model (LaMini-Flan-T5-248M)...' ); | |
| generator = await pipeline( 'text2text-generation', 'Xenova/LaMini-Flan-T5-248M' ); | |
| console.log( 'Question generation model loaded.' ); | |
| } | |
| return generator; | |
| } | |
| /** | |
| * Generate a single question from a text passage | |
| * | |
| * @param {string} text - The passage to generate a question about | |
| * @returns {Promise<string|null>} - Generated question or null | |
| */ | |
| async function generateSingleQuestion( text ) { | |
| // More specific prompt to encourage factual questions | |
| const prompt = `Ask a specific factual question that can be answered by the following passage: ${ text }`; | |
| const result = await generator( prompt, { | |
| max_new_tokens: 60, | |
| num_beams: 2, | |
| do_sample: false | |
| } ); | |
| const output = result[ 0 ].generated_text.trim(); | |
| // Ensure it ends with a question mark | |
| if ( output.length > 10 ) { | |
| return output.endsWith( '?' ) ? output : output + '?'; | |
| } | |
| return null; | |
| } | |
| /** | |
| * Group sentences into chunks of N for more context | |
| * | |
| * @param {string[]} sentences - Array of sentences | |
| * @param {number} groupSize - Number of sentences per group | |
| * @returns {string[]} - Array of grouped sentence strings | |
| */ | |
| function groupSentences( sentences, groupSize = 3 ) { | |
| const groups = []; | |
| for ( let i = 0; i < sentences.length; i += groupSize ) { | |
| const group = sentences.slice( i, i + groupSize ).join( ' ' ); | |
| groups.push( group ); | |
| } | |
| return groups; | |
| } | |
| /** | |
| * Generate questions from a text passage | |
| * | |
| * @param {string} text - The passage to generate questions about | |
| * @param {number} numQuestions - Number of questions to generate (default: 5) | |
| * @returns {Promise<string[]>} - Array of generated questions | |
| */ | |
| export async function generateQuestions( text, numQuestions = 5 ) { | |
| if ( !generator ) { | |
| await initQuestionGenerator(); | |
| } | |
| // Split text into sentences | |
| const sentences = text | |
| .split( /(?<=[.!?])\s+/ ) | |
| .filter( ( s ) => s.length > 30 ); | |
| // Group sentences (2-3 at a time) for more context per question | |
| const chunks = groupSentences( sentences, 2 ); | |
| // Take a sample of chunks to generate questions from | |
| const sampleSize = Math.min( numQuestions * 2, chunks.length ); | |
| const sampled = chunks.slice( 0, sampleSize ); | |
| const questions = []; | |
| const seen = new Set(); | |
| try { | |
| for ( const chunk of sampled ) { | |
| if ( questions.length >= numQuestions ) { | |
| break; | |
| } | |
| const question = await generateSingleQuestion( chunk ); | |
| if ( question && !seen.has( question.toLowerCase() ) ) { | |
| seen.add( question.toLowerCase() ); | |
| questions.push( question ); | |
| } | |
| } | |
| return questions; | |
| } catch ( error ) { | |
| console.error( 'Question generation failed:', error ); | |
| return []; | |
| } | |
| } | |
| /** | |
| * Extract lead section text from chunks | |
| * | |
| * @param {Array} chunks - Article chunks with sectionTitle | |
| * @returns {string} - Combined text from the introduction/lead section | |
| */ | |
| export function getLeadSectionText( chunks ) { | |
| const leadChunks = chunks.filter( | |
| ( chunk ) => chunk.sectionTitle === 'Introduction' || chunk.sectionId === null | |
| ); | |
| // Take up to first 3 paragraphs from the lead | |
| return leadChunks | |
| .slice( 0, 3 ) | |
| .map( ( c ) => c.text ) | |
| .join( ' ' ); | |
| } | |