File size: 3,392 Bytes
be647a4
 
 
 
 
8067185
be647a4
 
 
8067185
 
be647a4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import { pipeline } from '@xenova/transformers';

let generator = null;

/**
 * Initialize the question generation model (LaMini-Flan-T5-248M)
 */
export async function initQuestionGenerator() {
	if ( !generator ) {
		console.log( 'Loading question generation model (LaMini-Flan-T5-248M)...' );
		generator = await pipeline( 'text2text-generation', 'Xenova/LaMini-Flan-T5-248M' );
		console.log( 'Question generation model loaded.' );
	}
	return generator;
}

/**
 * Generate a single question from a text passage
 *
 * @param {string} text - The passage to generate a question about
 * @returns {Promise<string|null>} - Generated question or null
 */
async function generateSingleQuestion( text ) {
	// More specific prompt to encourage factual questions
	const prompt = `Ask a specific factual question that can be answered by the following passage: ${ text }`;

	const result = await generator( prompt, {
		max_new_tokens: 60,
		num_beams: 2,
		do_sample: false
	} );

	const output = result[ 0 ].generated_text.trim();

	// Ensure it ends with a question mark
	if ( output.length > 10 ) {
		return output.endsWith( '?' ) ? output : output + '?';
	}
	return null;
}

/**
 * Group sentences into chunks of N for more context
 *
 * @param {string[]} sentences - Array of sentences
 * @param {number} groupSize - Number of sentences per group
 * @returns {string[]} - Array of grouped sentence strings
 */
function groupSentences( sentences, groupSize = 3 ) {
	const groups = [];
	for ( let i = 0; i < sentences.length; i += groupSize ) {
		const group = sentences.slice( i, i + groupSize ).join( ' ' );
		groups.push( group );
	}
	return groups;
}

/**
 * Generate questions from a text passage
 *
 * @param {string} text - The passage to generate questions about
 * @param {number} numQuestions - Number of questions to generate (default: 5)
 * @returns {Promise<string[]>} - Array of generated questions
 */
export async function generateQuestions( text, numQuestions = 5 ) {
	if ( !generator ) {
		await initQuestionGenerator();
	}

	// Split text into sentences
	const sentences = text
		.split( /(?<=[.!?])\s+/ )
		.filter( ( s ) => s.length > 30 );

	// Group sentences (2-3 at a time) for more context per question
	const chunks = groupSentences( sentences, 2 );

	// Take a sample of chunks to generate questions from
	const sampleSize = Math.min( numQuestions * 2, chunks.length );
	const sampled = chunks.slice( 0, sampleSize );

	const questions = [];
	const seen = new Set();

	try {
		for ( const chunk of sampled ) {
			if ( questions.length >= numQuestions ) {
				break;
			}

			const question = await generateSingleQuestion( chunk );
			if ( question && !seen.has( question.toLowerCase() ) ) {
				seen.add( question.toLowerCase() );
				questions.push( question );
			}
		}

		return questions;
	} catch ( error ) {
		console.error( 'Question generation failed:', error );
		return [];
	}
}

/**
 * Extract lead section text from chunks
 *
 * @param {Array} chunks - Article chunks with sectionTitle
 * @returns {string} - Combined text from the introduction/lead section
 */
export function getLeadSectionText( chunks ) {
	const leadChunks = chunks.filter(
		( chunk ) => chunk.sectionTitle === 'Introduction' || chunk.sectionId === null
	);

	// Take up to first 3 paragraphs from the lead
	return leadChunks
		.slice( 0, 3 )
		.map( ( c ) => c.text )
		.join( ' ' );
}