Spaces:
Sleeping
Sleeping
| import Anthropic from '@anthropic-ai/sdk'; | |
| let client = null; | |
| /** | |
| * Initialize the Anthropic client | |
| */ | |
| function getClient() { | |
| if ( !client ) { | |
| const apiKey = process.env.ANTHROPIC_API_KEY; | |
| if ( !apiKey ) { | |
| throw new Error( 'ANTHROPIC_API_KEY environment variable is required for Claude question generation' ); | |
| } | |
| client = new Anthropic( { apiKey } ); | |
| } | |
| return client; | |
| } | |
| /** | |
| * Generate questions using Claude based on the full article text. | |
| * | |
| * This approach reads the entire article and generates questions designed to | |
| * draw readers deeper into the content, beyond surface-level facts. | |
| * | |
| * @param {Array} chunks - Article chunks with text and section info | |
| * @param {string} articleTitle - The title of the article | |
| * @param {number} numQuestions - Number of questions to generate (default: 5) | |
| * @returns {Promise<string[]>} - Array of generated questions | |
| */ | |
| export async function generateQuestionsWithClaude( chunks, articleTitle, numQuestions = 5 ) { | |
| const anthropic = getClient(); | |
| // Build a structured representation of the article | |
| const articleContent = buildArticleContent( chunks ); | |
| // Estimate token count - Claude can handle ~100k tokens, but we'll be conservative | |
| const estimatedTokens = Math.ceil( articleContent.length / 4 ); | |
| console.log( `Article content: ~${ estimatedTokens } tokens estimated` ); | |
| // If article is very long, summarize sections | |
| const contentToUse = estimatedTokens > 50000 | |
| ? truncateArticleContent( chunks, 50000 ) | |
| : articleContent; | |
| const prompt = `You are helping create an interactive Wikipedia reading experience. Given the following Wikipedia article about "${articleTitle}", generate ${numQuestions} short, simple questions that invite readers to explore the article. | |
| **CRITICAL: Base questions ONLY on the provided article text.** | |
| You must generate questions answerable using ONLY information in the article below. Do not use external knowledge. If you know facts about "${articleTitle}" not mentioned in this text, do NOT ask about them. | |
| **Question style:** | |
| - **Keep it short** - Questions should be 5-10 words. Simple, open-ended phrasing. | |
| - **Use plain language** - Write for casual readers, not academics. | |
| - **Be inviting, not testing** - Questions should spark curiosity, not feel like a quiz. | |
| Good examples: | |
| - "Why did Plato write about this?" | |
| - "What happened to the search expeditions?" | |
| - "How did this influence later writers?" | |
| Avoid: | |
| - Long, complex questions with multiple clauses | |
| - Academic or formal phrasing | |
| - Questions answered in the opening paragraph | |
| **Content guidelines:** | |
| - Look for interesting details deeper in the article, not just the lead | |
| - Reference specific things mentioned in the text | |
| - Vary the topics covered across your questions | |
| <article> | |
| ${contentToUse} | |
| </article> | |
| Generate exactly ${numQuestions} questions, one per line. Output only the questions, no numbering. Keep each question short and simple.`; | |
| try { | |
| const response = await anthropic.messages.create( { | |
| model: 'claude-sonnet-4-5', | |
| max_tokens: 1024, | |
| messages: [ | |
| { | |
| role: 'user', | |
| content: prompt | |
| } | |
| ] | |
| } ); | |
| const text = response.content[ 0 ].text; | |
| const questions = text | |
| .split( '\n' ) | |
| .map( ( q ) => q.trim() ) | |
| .filter( ( q ) => q.length > 10 && q.endsWith( '?' ) ); | |
| console.log( `Claude generated ${ questions.length } questions` ); | |
| return questions.slice( 0, numQuestions ); | |
| } catch ( error ) { | |
| console.error( 'Claude question generation failed:', error.message ); | |
| throw error; | |
| } | |
| } | |
| /** | |
| * Build a structured text representation of the article from chunks | |
| * | |
| * @param {Array} chunks - Article chunks | |
| * @returns {string} - Formatted article content | |
| */ | |
| function buildArticleContent( chunks ) { | |
| const sections = new Map(); | |
| // Group chunks by section | |
| for ( const chunk of chunks ) { | |
| const sectionTitle = chunk.sectionTitle || 'Introduction'; | |
| if ( !sections.has( sectionTitle ) ) { | |
| sections.set( sectionTitle, [] ); | |
| } | |
| sections.get( sectionTitle ).push( chunk.text ); | |
| } | |
| // Build formatted content | |
| const parts = []; | |
| for ( const [ sectionTitle, texts ] of sections ) { | |
| parts.push( `## ${sectionTitle}\n` ); | |
| parts.push( texts.join( '\n\n' ) ); | |
| parts.push( '' ); | |
| } | |
| return parts.join( '\n' ); | |
| } | |
| /** | |
| * Truncate article content to fit within token budget | |
| * | |
| * @param {Array} chunks - Article chunks | |
| * @param {number} maxTokens - Maximum estimated tokens | |
| * @returns {string} - Truncated content | |
| */ | |
| function truncateArticleContent( chunks, maxTokens ) { | |
| const sections = new Map(); | |
| // Group chunks by section | |
| for ( const chunk of chunks ) { | |
| const sectionTitle = chunk.sectionTitle || 'Introduction'; | |
| if ( !sections.has( sectionTitle ) ) { | |
| sections.set( sectionTitle, [] ); | |
| } | |
| sections.get( sectionTitle ).push( chunk.text ); | |
| } | |
| // Include all section headers and first paragraph of each | |
| const parts = []; | |
| let estimatedTokens = 0; | |
| const charsPerToken = 4; | |
| for ( const [ sectionTitle, texts ] of sections ) { | |
| const header = `## ${sectionTitle}\n`; | |
| const sectionContent = texts.join( '\n\n' ); | |
| const headerTokens = Math.ceil( header.length / charsPerToken ); | |
| const contentTokens = Math.ceil( sectionContent.length / charsPerToken ); | |
| if ( estimatedTokens + headerTokens + contentTokens < maxTokens ) { | |
| parts.push( header ); | |
| parts.push( sectionContent ); | |
| parts.push( '' ); | |
| estimatedTokens += headerTokens + contentTokens; | |
| } else if ( estimatedTokens + headerTokens + 500 < maxTokens ) { | |
| // Include header and truncated content | |
| parts.push( header ); | |
| const availableChars = ( maxTokens - estimatedTokens - headerTokens ) * charsPerToken; | |
| parts.push( sectionContent.slice( 0, availableChars ) + '...' ); | |
| parts.push( '' ); | |
| break; | |
| } else { | |
| break; | |
| } | |
| } | |
| return parts.join( '\n' ); | |
| } | |
| /** | |
| * Check if Claude question generation is available | |
| * | |
| * @returns {boolean} - True if ANTHROPIC_API_KEY is set | |
| */ | |
| export function isClaudeAvailable() { | |
| return Boolean( process.env.ANTHROPIC_API_KEY ); | |
| } | |