Spaces:
Sleeping
Sleeping
File size: 3,392 Bytes
be647a4 8067185 be647a4 8067185 be647a4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 |
import { pipeline } from '@xenova/transformers';
let generator = null;
/**
* Initialize the question generation model (LaMini-Flan-T5-248M)
*/
export async function initQuestionGenerator() {
if ( !generator ) {
console.log( 'Loading question generation model (LaMini-Flan-T5-248M)...' );
generator = await pipeline( 'text2text-generation', 'Xenova/LaMini-Flan-T5-248M' );
console.log( 'Question generation model loaded.' );
}
return generator;
}
/**
* Generate a single question from a text passage
*
* @param {string} text - The passage to generate a question about
* @returns {Promise<string|null>} - Generated question or null
*/
async function generateSingleQuestion( text ) {
// More specific prompt to encourage factual questions
const prompt = `Ask a specific factual question that can be answered by the following passage: ${ text }`;
const result = await generator( prompt, {
max_new_tokens: 60,
num_beams: 2,
do_sample: false
} );
const output = result[ 0 ].generated_text.trim();
// Ensure it ends with a question mark
if ( output.length > 10 ) {
return output.endsWith( '?' ) ? output : output + '?';
}
return null;
}
/**
* Group sentences into chunks of N for more context
*
* @param {string[]} sentences - Array of sentences
* @param {number} groupSize - Number of sentences per group
* @returns {string[]} - Array of grouped sentence strings
*/
function groupSentences( sentences, groupSize = 3 ) {
const groups = [];
for ( let i = 0; i < sentences.length; i += groupSize ) {
const group = sentences.slice( i, i + groupSize ).join( ' ' );
groups.push( group );
}
return groups;
}
/**
* Generate questions from a text passage
*
* @param {string} text - The passage to generate questions about
* @param {number} numQuestions - Number of questions to generate (default: 5)
* @returns {Promise<string[]>} - Array of generated questions
*/
export async function generateQuestions( text, numQuestions = 5 ) {
if ( !generator ) {
await initQuestionGenerator();
}
// Split text into sentences
const sentences = text
.split( /(?<=[.!?])\s+/ )
.filter( ( s ) => s.length > 30 );
// Group sentences (2-3 at a time) for more context per question
const chunks = groupSentences( sentences, 2 );
// Take a sample of chunks to generate questions from
const sampleSize = Math.min( numQuestions * 2, chunks.length );
const sampled = chunks.slice( 0, sampleSize );
const questions = [];
const seen = new Set();
try {
for ( const chunk of sampled ) {
if ( questions.length >= numQuestions ) {
break;
}
const question = await generateSingleQuestion( chunk );
if ( question && !seen.has( question.toLowerCase() ) ) {
seen.add( question.toLowerCase() );
questions.push( question );
}
}
return questions;
} catch ( error ) {
console.error( 'Question generation failed:', error );
return [];
}
}
/**
* Extract lead section text from chunks
*
* @param {Array} chunks - Article chunks with sectionTitle
* @returns {string} - Combined text from the introduction/lead section
*/
export function getLeadSectionText( chunks ) {
const leadChunks = chunks.filter(
( chunk ) => chunk.sectionTitle === 'Introduction' || chunk.sectionId === null
);
// Take up to first 3 paragraphs from the lead
return leadChunks
.slice( 0, 3 )
.map( ( c ) => c.text )
.join( ' ' );
}
|