Spaces:
Sleeping
Sleeping
File size: 6,022 Bytes
ce30646 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 |
import Anthropic from '@anthropic-ai/sdk';
let client = null;
/**
* Initialize the Anthropic client
*/
function getClient() {
if ( !client ) {
const apiKey = process.env.ANTHROPIC_API_KEY;
if ( !apiKey ) {
throw new Error( 'ANTHROPIC_API_KEY environment variable is required for Claude question generation' );
}
client = new Anthropic( { apiKey } );
}
return client;
}
/**
* Generate questions using Claude based on the full article text.
*
* This approach reads the entire article and generates questions designed to
* draw readers deeper into the content, beyond surface-level facts.
*
* @param {Array} chunks - Article chunks with text and section info
* @param {string} articleTitle - The title of the article
* @param {number} numQuestions - Number of questions to generate (default: 5)
* @returns {Promise<string[]>} - Array of generated questions
*/
export async function generateQuestionsWithClaude( chunks, articleTitle, numQuestions = 5 ) {
const anthropic = getClient();
// Build a structured representation of the article
const articleContent = buildArticleContent( chunks );
// Estimate token count - Claude can handle ~100k tokens, but we'll be conservative
const estimatedTokens = Math.ceil( articleContent.length / 4 );
console.log( `Article content: ~${ estimatedTokens } tokens estimated` );
// If article is very long, summarize sections
const contentToUse = estimatedTokens > 50000
? truncateArticleContent( chunks, 50000 )
: articleContent;
const prompt = `You are helping create an interactive Wikipedia reading experience. Given the following Wikipedia article about "${articleTitle}", generate ${numQuestions} short, simple questions that invite readers to explore the article.
**CRITICAL: Base questions ONLY on the provided article text.**
You must generate questions answerable using ONLY information in the article below. Do not use external knowledge. If you know facts about "${articleTitle}" not mentioned in this text, do NOT ask about them.
**Question style:**
- **Keep it short** - Questions should be 5-10 words. Simple, open-ended phrasing.
- **Use plain language** - Write for casual readers, not academics.
- **Be inviting, not testing** - Questions should spark curiosity, not feel like a quiz.
Good examples:
- "Why did Plato write about this?"
- "What happened to the search expeditions?"
- "How did this influence later writers?"
Avoid:
- Long, complex questions with multiple clauses
- Academic or formal phrasing
- Questions answered in the opening paragraph
**Content guidelines:**
- Look for interesting details deeper in the article, not just the lead
- Reference specific things mentioned in the text
- Vary the topics covered across your questions
<article>
${contentToUse}
</article>
Generate exactly ${numQuestions} questions, one per line. Output only the questions, no numbering. Keep each question short and simple.`;
try {
const response = await anthropic.messages.create( {
model: 'claude-sonnet-4-5',
max_tokens: 1024,
messages: [
{
role: 'user',
content: prompt
}
]
} );
const text = response.content[ 0 ].text;
const questions = text
.split( '\n' )
.map( ( q ) => q.trim() )
.filter( ( q ) => q.length > 10 && q.endsWith( '?' ) );
console.log( `Claude generated ${ questions.length } questions` );
return questions.slice( 0, numQuestions );
} catch ( error ) {
console.error( 'Claude question generation failed:', error.message );
throw error;
}
}
/**
* Build a structured text representation of the article from chunks
*
* @param {Array} chunks - Article chunks
* @returns {string} - Formatted article content
*/
function buildArticleContent( chunks ) {
const sections = new Map();
// Group chunks by section
for ( const chunk of chunks ) {
const sectionTitle = chunk.sectionTitle || 'Introduction';
if ( !sections.has( sectionTitle ) ) {
sections.set( sectionTitle, [] );
}
sections.get( sectionTitle ).push( chunk.text );
}
// Build formatted content
const parts = [];
for ( const [ sectionTitle, texts ] of sections ) {
parts.push( `## ${sectionTitle}\n` );
parts.push( texts.join( '\n\n' ) );
parts.push( '' );
}
return parts.join( '\n' );
}
/**
* Truncate article content to fit within token budget
*
* @param {Array} chunks - Article chunks
* @param {number} maxTokens - Maximum estimated tokens
* @returns {string} - Truncated content
*/
function truncateArticleContent( chunks, maxTokens ) {
const sections = new Map();
// Group chunks by section
for ( const chunk of chunks ) {
const sectionTitle = chunk.sectionTitle || 'Introduction';
if ( !sections.has( sectionTitle ) ) {
sections.set( sectionTitle, [] );
}
sections.get( sectionTitle ).push( chunk.text );
}
// Include all section headers and first paragraph of each
const parts = [];
let estimatedTokens = 0;
const charsPerToken = 4;
for ( const [ sectionTitle, texts ] of sections ) {
const header = `## ${sectionTitle}\n`;
const sectionContent = texts.join( '\n\n' );
const headerTokens = Math.ceil( header.length / charsPerToken );
const contentTokens = Math.ceil( sectionContent.length / charsPerToken );
if ( estimatedTokens + headerTokens + contentTokens < maxTokens ) {
parts.push( header );
parts.push( sectionContent );
parts.push( '' );
estimatedTokens += headerTokens + contentTokens;
} else if ( estimatedTokens + headerTokens + 500 < maxTokens ) {
// Include header and truncated content
parts.push( header );
const availableChars = ( maxTokens - estimatedTokens - headerTokens ) * charsPerToken;
parts.push( sectionContent.slice( 0, availableChars ) + '...' );
parts.push( '' );
break;
} else {
break;
}
}
return parts.join( '\n' );
}
/**
* Check if Claude question generation is available
*
* @returns {boolean} - True if ANTHROPIC_API_KEY is set
*/
export function isClaudeAvailable() {
return Boolean( process.env.ANTHROPIC_API_KEY );
}
|