Spaces:

egardner
/

question-explorer-api

Sleeping

File size: 6,022 Bytes

ce30646

import Anthropic from '@anthropic-ai/sdk';

let client = null;

/**
 * Initialize the Anthropic client
 */
function getClient() {
	if ( !client ) {
		const apiKey = process.env.ANTHROPIC_API_KEY;
		if ( !apiKey ) {
			throw new Error( 'ANTHROPIC_API_KEY environment variable is required for Claude question generation' );
		}
		client = new Anthropic( { apiKey } );
	}
	return client;
}

/**
 * Generate questions using Claude based on the full article text.
 *
 * This approach reads the entire article and generates questions designed to
 * draw readers deeper into the content, beyond surface-level facts.
 *
 * @param {Array} chunks - Article chunks with text and section info
 * @param {string} articleTitle - The title of the article
 * @param {number} numQuestions - Number of questions to generate (default: 5)
 * @returns {Promise<string[]>} - Array of generated questions
 */
export async function generateQuestionsWithClaude( chunks, articleTitle, numQuestions = 5 ) {
	const anthropic = getClient();

	// Build a structured representation of the article
	const articleContent = buildArticleContent( chunks );

	// Estimate token count - Claude can handle ~100k tokens, but we'll be conservative
	const estimatedTokens = Math.ceil( articleContent.length / 4 );
	console.log( `Article content: ~${ estimatedTokens } tokens estimated` );

	// If article is very long, summarize sections
	const contentToUse = estimatedTokens > 50000
		? truncateArticleContent( chunks, 50000 )
		: articleContent;

	const prompt = `You are helping create an interactive Wikipedia reading experience. Given the following Wikipedia article about "${articleTitle}", generate ${numQuestions} short, simple questions that invite readers to explore the article.

**CRITICAL: Base questions ONLY on the provided article text.**

You must generate questions answerable using ONLY information in the article below. Do not use external knowledge. If you know facts about "${articleTitle}" not mentioned in this text, do NOT ask about them.

**Question style:**

- **Keep it short** - Questions should be 5-10 words. Simple, open-ended phrasing.
- **Use plain language** - Write for casual readers, not academics.
- **Be inviting, not testing** - Questions should spark curiosity, not feel like a quiz.

Good examples:
- "Why did Plato write about this?"
- "What happened to the search expeditions?"
- "How did this influence later writers?"

Avoid:
- Long, complex questions with multiple clauses
- Academic or formal phrasing
- Questions answered in the opening paragraph

**Content guidelines:**

- Look for interesting details deeper in the article, not just the lead
- Reference specific things mentioned in the text
- Vary the topics covered across your questions

<article>
${contentToUse}
</article>

Generate exactly ${numQuestions} questions, one per line. Output only the questions, no numbering. Keep each question short and simple.`;

	try {
		const response = await anthropic.messages.create( {
			model: 'claude-sonnet-4-5',
			max_tokens: 1024,
			messages: [
				{
					role: 'user',
					content: prompt
				}
			]
		} );

		const text = response.content[ 0 ].text;
		const questions = text
			.split( '\n' )
			.map( ( q ) => q.trim() )
			.filter( ( q ) => q.length > 10 && q.endsWith( '?' ) );

		console.log( `Claude generated ${ questions.length } questions` );
		return questions.slice( 0, numQuestions );

	} catch ( error ) {
		console.error( 'Claude question generation failed:', error.message );
		throw error;
	}
}

/**
 * Build a structured text representation of the article from chunks
 *
 * @param {Array} chunks - Article chunks
 * @returns {string} - Formatted article content
 */
function buildArticleContent( chunks ) {
	const sections = new Map();

	// Group chunks by section
	for ( const chunk of chunks ) {
		const sectionTitle = chunk.sectionTitle || 'Introduction';
		if ( !sections.has( sectionTitle ) ) {
			sections.set( sectionTitle, [] );
		}
		sections.get( sectionTitle ).push( chunk.text );
	}

	// Build formatted content
	const parts = [];
	for ( const [ sectionTitle, texts ] of sections ) {
		parts.push( `## ${sectionTitle}\n` );
		parts.push( texts.join( '\n\n' ) );
		parts.push( '' );
	}

	return parts.join( '\n' );
}

/**
 * Truncate article content to fit within token budget
 *
 * @param {Array} chunks - Article chunks
 * @param {number} maxTokens - Maximum estimated tokens
 * @returns {string} - Truncated content
 */
function truncateArticleContent( chunks, maxTokens ) {
	const sections = new Map();

	// Group chunks by section
	for ( const chunk of chunks ) {
		const sectionTitle = chunk.sectionTitle || 'Introduction';
		if ( !sections.has( sectionTitle ) ) {
			sections.set( sectionTitle, [] );
		}
		sections.get( sectionTitle ).push( chunk.text );
	}

	// Include all section headers and first paragraph of each
	const parts = [];
	let estimatedTokens = 0;
	const charsPerToken = 4;

	for ( const [ sectionTitle, texts ] of sections ) {
		const header = `## ${sectionTitle}\n`;
		const sectionContent = texts.join( '\n\n' );

		const headerTokens = Math.ceil( header.length / charsPerToken );
		const contentTokens = Math.ceil( sectionContent.length / charsPerToken );

		if ( estimatedTokens + headerTokens + contentTokens < maxTokens ) {
			parts.push( header );
			parts.push( sectionContent );
			parts.push( '' );
			estimatedTokens += headerTokens + contentTokens;
		} else if ( estimatedTokens + headerTokens + 500 < maxTokens ) {
			// Include header and truncated content
			parts.push( header );
			const availableChars = ( maxTokens - estimatedTokens - headerTokens ) * charsPerToken;
			parts.push( sectionContent.slice( 0, availableChars ) + '...' );
			parts.push( '' );
			break;
		} else {
			break;
		}
	}

	return parts.join( '\n' );
}

/**
 * Check if Claude question generation is available
 *
 * @returns {boolean} - True if ANTHROPIC_API_KEY is set
 */
export function isClaudeAvailable() {
	return Boolean( process.env.ANTHROPIC_API_KEY );
}