import Anthropic from '@anthropic-ai/sdk'; let client = null; /** * Initialize the Anthropic client */ function getClient() { if ( !client ) { const apiKey = process.env.ANTHROPIC_API_KEY; if ( !apiKey ) { throw new Error( 'ANTHROPIC_API_KEY environment variable is required for Claude question generation' ); } client = new Anthropic( { apiKey } ); } return client; } /** * Generate questions using Claude based on the full article text. * * This approach reads the entire article and generates questions designed to * draw readers deeper into the content, beyond surface-level facts. * * @param {Array} chunks - Article chunks with text and section info * @param {string} articleTitle - The title of the article * @param {number} numQuestions - Number of questions to generate (default: 5) * @returns {Promise} - Array of generated questions */ export async function generateQuestionsWithClaude( chunks, articleTitle, numQuestions = 5 ) { const anthropic = getClient(); // Build a structured representation of the article const articleContent = buildArticleContent( chunks ); // Estimate token count - Claude can handle ~100k tokens, but we'll be conservative const estimatedTokens = Math.ceil( articleContent.length / 4 ); console.log( `Article content: ~${ estimatedTokens } tokens estimated` ); // If article is very long, summarize sections const contentToUse = estimatedTokens > 50000 ? truncateArticleContent( chunks, 50000 ) : articleContent; const prompt = `You are helping create an interactive Wikipedia reading experience. Given the following Wikipedia article about "${articleTitle}", generate ${numQuestions} short, simple questions that invite readers to explore the article. **CRITICAL: Base questions ONLY on the provided article text.** You must generate questions answerable using ONLY information in the article below. Do not use external knowledge. If you know facts about "${articleTitle}" not mentioned in this text, do NOT ask about them. **Question style:** - **Keep it short** - Questions should be 5-10 words. Simple, open-ended phrasing. - **Use plain language** - Write for casual readers, not academics. - **Be inviting, not testing** - Questions should spark curiosity, not feel like a quiz. Good examples: - "Why did Plato write about this?" - "What happened to the search expeditions?" - "How did this influence later writers?" Avoid: - Long, complex questions with multiple clauses - Academic or formal phrasing - Questions answered in the opening paragraph **Content guidelines:** - Look for interesting details deeper in the article, not just the lead - Reference specific things mentioned in the text - Vary the topics covered across your questions
${contentToUse}
Generate exactly ${numQuestions} questions, one per line. Output only the questions, no numbering. Keep each question short and simple.`; try { const response = await anthropic.messages.create( { model: 'claude-sonnet-4-5', max_tokens: 1024, messages: [ { role: 'user', content: prompt } ] } ); const text = response.content[ 0 ].text; const questions = text .split( '\n' ) .map( ( q ) => q.trim() ) .filter( ( q ) => q.length > 10 && q.endsWith( '?' ) ); console.log( `Claude generated ${ questions.length } questions` ); return questions.slice( 0, numQuestions ); } catch ( error ) { console.error( 'Claude question generation failed:', error.message ); throw error; } } /** * Build a structured text representation of the article from chunks * * @param {Array} chunks - Article chunks * @returns {string} - Formatted article content */ function buildArticleContent( chunks ) { const sections = new Map(); // Group chunks by section for ( const chunk of chunks ) { const sectionTitle = chunk.sectionTitle || 'Introduction'; if ( !sections.has( sectionTitle ) ) { sections.set( sectionTitle, [] ); } sections.get( sectionTitle ).push( chunk.text ); } // Build formatted content const parts = []; for ( const [ sectionTitle, texts ] of sections ) { parts.push( `## ${sectionTitle}\n` ); parts.push( texts.join( '\n\n' ) ); parts.push( '' ); } return parts.join( '\n' ); } /** * Truncate article content to fit within token budget * * @param {Array} chunks - Article chunks * @param {number} maxTokens - Maximum estimated tokens * @returns {string} - Truncated content */ function truncateArticleContent( chunks, maxTokens ) { const sections = new Map(); // Group chunks by section for ( const chunk of chunks ) { const sectionTitle = chunk.sectionTitle || 'Introduction'; if ( !sections.has( sectionTitle ) ) { sections.set( sectionTitle, [] ); } sections.get( sectionTitle ).push( chunk.text ); } // Include all section headers and first paragraph of each const parts = []; let estimatedTokens = 0; const charsPerToken = 4; for ( const [ sectionTitle, texts ] of sections ) { const header = `## ${sectionTitle}\n`; const sectionContent = texts.join( '\n\n' ); const headerTokens = Math.ceil( header.length / charsPerToken ); const contentTokens = Math.ceil( sectionContent.length / charsPerToken ); if ( estimatedTokens + headerTokens + contentTokens < maxTokens ) { parts.push( header ); parts.push( sectionContent ); parts.push( '' ); estimatedTokens += headerTokens + contentTokens; } else if ( estimatedTokens + headerTokens + 500 < maxTokens ) { // Include header and truncated content parts.push( header ); const availableChars = ( maxTokens - estimatedTokens - headerTokens ) * charsPerToken; parts.push( sectionContent.slice( 0, availableChars ) + '...' ); parts.push( '' ); break; } else { break; } } return parts.join( '\n' ); } /** * Check if Claude question generation is available * * @returns {boolean} - True if ANTHROPIC_API_KEY is set */ export function isClaudeAvailable() { return Boolean( process.env.ANTHROPIC_API_KEY ); }