Spaces:

egardner
/

question-explorer-api

Sleeping

App Files Files Community

Eric Gardner commited on Dec 19, 2025

Commit

ce30646

1 Parent(s): 8067185

Use Claude API for question generation

Browse files

Files changed (6) hide show

Dockerfile +4 -0
index.js +9 -8
package-lock.json +62 -0
package.json +4 -2
routes/article.js +12 -12
services/claudeQuestionGenerator.js +193 -0

Dockerfile CHANGED Viewed

@@ -17,6 +17,10 @@ RUN mkdir -p /app/cache && chmod 777 /app/cache
 # Hugging Face Spaces uses port 7860
 ENV PORT=7860
 # Expose the port
 EXPOSE 7860

 # Hugging Face Spaces uses port 7860
 ENV PORT=7860
+# Anthropic API key for question generation
+# Set this as a secret in your deployment platform (e.g., HF Spaces secrets)
+ENV ANTHROPIC_API_KEY=""
 # Expose the port
 EXPOSE 7860

index.js CHANGED Viewed

@@ -1,9 +1,10 @@
 import express from 'express';
 import cors from 'cors';
 import articleRoutes from './routes/article.js';
 import searchRoutes from './routes/search.js';
 import { initEmbedder } from './services/embedder.js';
-import { initQuestionGenerator } from './services/questionGenerator.js';
 const app = express();
 const PORT = process.env.PORT || 3000;
@@ -23,16 +24,16 @@ app.get( '/api/health', ( _, res ) => {
 	res.json( { status: 'ok' } );
 } );
-// Pre-warm the models on startup
-console.log( 'Starting server and loading models...' );
-Promise.all( [
-	initEmbedder(),
-	initQuestionGenerator()
-] ).then( () => {
 	app.listen( PORT, () => {
 		console.log( `Server running on http://localhost:${ PORT }` );
 	} );
 } ).catch( ( err ) => {
-	console.error( 'Failed to initialize models:', err );
 	process.exit( 1 );
 } );

+import 'dotenv/config';
 import express from 'express';
 import cors from 'cors';
 import articleRoutes from './routes/article.js';
 import searchRoutes from './routes/search.js';
 import { initEmbedder } from './services/embedder.js';
+import { isClaudeAvailable } from './services/claudeQuestionGenerator.js';
 const app = express();
 const PORT = process.env.PORT || 3000;
 	res.json( { status: 'ok' } );
 } );
+// Pre-warm the embedding model on startup
+console.log( 'Starting server and loading embedding model...' );
+initEmbedder().then( () => {
+	if ( !isClaudeAvailable() ) {
+		console.warn( 'Warning: ANTHROPIC_API_KEY not set. Question generation will be disabled.' );
+	}
 	app.listen( PORT, () => {
 		console.log( `Server running on http://localhost:${ PORT }` );
 	} );
 } ).catch( ( err ) => {
+	console.error( 'Failed to initialize embedding model:', err );
 	process.exit( 1 );
 } );

package-lock.json CHANGED Viewed

@@ -8,12 +8,34 @@
       "name": "question-explorer-server",
       "version": "1.0.0",
       "dependencies": {
         "@xenova/transformers": "^2.17.2",
         "cors": "^2.8.5",
         "express": "^4.18.2",
         "jsdom": "^24.1.0"
       }
     },
     "node_modules/@asamuzakjp/css-color": {
       "version": "3.2.0",
       "resolved": "https://registry.npmjs.org/@asamuzakjp/css-color/-/css-color-3.2.0.tgz",
@@ -27,6 +49,15 @@
         "lru-cache": "^10.4.3"
       }
     },
     "node_modules/@csstools/color-helpers": {
       "version": "5.1.0",
       "resolved": "https://registry.npmjs.org/@csstools/color-helpers/-/color-helpers-5.1.0.tgz",
@@ -711,6 +742,18 @@
         "node": ">=8"
       }
     },
     "node_modules/dunder-proto": {
       "version": "1.0.1",
       "resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz",
@@ -1273,6 +1316,19 @@
         }
       }
     },
     "node_modules/long": {
       "version": "4.0.0",
       "resolved": "https://registry.npmjs.org/long/-/long-4.0.0.tgz",
@@ -2119,6 +2175,12 @@
         "node": ">=18"
       }
     },
     "node_modules/tunnel-agent": {
       "version": "0.6.0",
       "resolved": "https://registry.npmjs.org/tunnel-agent/-/tunnel-agent-0.6.0.tgz",

       "name": "question-explorer-server",
       "version": "1.0.0",
       "dependencies": {
+        "@anthropic-ai/sdk": "^0.71.2",
         "@xenova/transformers": "^2.17.2",
         "cors": "^2.8.5",
+        "dotenv": "^16.4.5",
         "express": "^4.18.2",
         "jsdom": "^24.1.0"
       }
     },
+    "node_modules/@anthropic-ai/sdk": {
+      "version": "0.71.2",
+      "resolved": "https://registry.npmjs.org/@anthropic-ai/sdk/-/sdk-0.71.2.tgz",
+      "integrity": "sha512-TGNDEUuEstk/DKu0/TflXAEt+p+p/WhTlFzEnoosvbaDU2LTjm42igSdlL0VijrKpWejtOKxX0b8A7uc+XiSAQ==",
+      "license": "MIT",
+      "dependencies": {
+        "json-schema-to-ts": "^3.1.1"
+      },
+      "bin": {
+        "anthropic-ai-sdk": "bin/cli"
+      },
+      "peerDependencies": {
+        "zod": "^3.25.0 || ^4.0.0"
+      },
+      "peerDependenciesMeta": {
+        "zod": {
+          "optional": true
+        }
+      }
+    },
     "node_modules/@asamuzakjp/css-color": {
       "version": "3.2.0",
       "resolved": "https://registry.npmjs.org/@asamuzakjp/css-color/-/css-color-3.2.0.tgz",
         "lru-cache": "^10.4.3"
       }
     },
+    "node_modules/@babel/runtime": {
+      "version": "7.28.4",
+      "resolved": "https://registry.npmjs.org/@babel/runtime/-/runtime-7.28.4.tgz",
+      "integrity": "sha512-Q/N6JNWvIvPnLDvjlE1OUBLPQHH6l3CltCEsHIujp45zQUSSh8K+gHnaEX45yAT1nyngnINhvWtzN+Nb9D8RAQ==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=6.9.0"
+      }
+    },
     "node_modules/@csstools/color-helpers": {
       "version": "5.1.0",
       "resolved": "https://registry.npmjs.org/@csstools/color-helpers/-/color-helpers-5.1.0.tgz",
         "node": ">=8"
       }
     },
+    "node_modules/dotenv": {
+      "version": "16.6.1",
+      "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.6.1.tgz",
+      "integrity": "sha512-uBq4egWHTcTt33a72vpSG0z3HnPuIl6NqYcTrKEg2azoEyl2hpW0zqlxysq2pK9HlDIHyHyakeYaYnSAwd8bow==",
+      "license": "BSD-2-Clause",
+      "engines": {
+        "node": ">=12"
+      },
+      "funding": {
+        "url": "https://dotenvx.com"
+      }
+    },
     "node_modules/dunder-proto": {
       "version": "1.0.1",
       "resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz",
         }
       }
     },
+    "node_modules/json-schema-to-ts": {
+      "version": "3.1.1",
+      "resolved": "https://registry.npmjs.org/json-schema-to-ts/-/json-schema-to-ts-3.1.1.tgz",
+      "integrity": "sha512-+DWg8jCJG2TEnpy7kOm/7/AxaYoaRbjVB4LFZLySZlWn8exGs3A4OLJR966cVvU26N7X9TWxl+Jsw7dzAqKT6g==",
+      "license": "MIT",
+      "dependencies": {
+        "@babel/runtime": "^7.18.3",
+        "ts-algebra": "^2.0.0"
+      },
+      "engines": {
+        "node": ">=16"
+      }
+    },
     "node_modules/long": {
       "version": "4.0.0",
       "resolved": "https://registry.npmjs.org/long/-/long-4.0.0.tgz",
         "node": ">=18"
       }
     },
+    "node_modules/ts-algebra": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/ts-algebra/-/ts-algebra-2.0.0.tgz",
+      "integrity": "sha512-FPAhNPFMrkwz76P7cdjdmiShwMynZYN6SgOujD1urY4oNm80Ou9oMdmbR45LotcKOXoy7wSmHkRFE6Mxbrhefw==",
+      "license": "MIT"
+    },
     "node_modules/tunnel-agent": {
       "version": "0.6.0",
       "resolved": "https://registry.npmjs.org/tunnel-agent/-/tunnel-agent-0.6.0.tgz",

package.json CHANGED Viewed

@@ -7,9 +7,11 @@
     "start": "node index.js"
   },
   "dependencies": {
-    "express": "^4.18.2",
-    "cors": "^2.8.5",
     "@xenova/transformers": "^2.17.2",
     "jsdom": "^24.1.0"
   }
 }

     "start": "node index.js"
   },
   "dependencies": {
+    "@anthropic-ai/sdk": "^0.71.2",
     "@xenova/transformers": "^2.17.2",
+    "cors": "^2.8.5",
+    "dotenv": "^16.4.5",
+    "express": "^4.18.2",
     "jsdom": "^24.1.0"
   }
 }

routes/article.js CHANGED Viewed

@@ -5,7 +5,7 @@ import { embedTexts, embedSingle } from '../services/embedder.js';
 import { search } from '../services/vectorSearch.js';
 import { getCached, setCache, isCacheValid } from '../services/cache.js';
 import { getProcessingState, setProcessing } from '../services/processingState.js';
-import { generateQuestions, getLeadSectionText } from '../services/questionGenerator.js';
 const router = Router();
@@ -204,15 +204,13 @@ async function processArticle( title, revisionId ) {
 		chunk.embedding = embeddings[ i ];
 	} );
-	// Generate suggested questions from the lead section
 	let suggestedQuestions = [];
-	try {
-		const leadText = getLeadSectionText( chunks );
-		console.log( `Lead text length: ${ leadText.length } chars` );
-		if ( leadText.length > 100 ) {
-			console.log( 'Generating suggested questions...' );
-			const rawQuestions = await generateQuestions( leadText, 5 );
-			console.log( `Raw questions from model:`, rawQuestions );
 			// Validate questions by checking if they match article content
 			const validatedQuestions = [];
@@ -234,11 +232,13 @@ async function processArticle( title, revisionId ) {
 				}
 			}
-			suggestedQuestions = validatedQuestions.slice( 0, 3 );
 			console.log( `Generated ${ suggestedQuestions.length } validated questions` );
 		}
-	} catch ( err ) {
-		console.warn( 'Question generation failed, continuing without suggestions:', err.message );
 	}
 	// Save to cache

 import { search } from '../services/vectorSearch.js';
 import { getCached, setCache, isCacheValid } from '../services/cache.js';
 import { getProcessingState, setProcessing } from '../services/processingState.js';
+import { generateQuestionsWithClaude, isClaudeAvailable } from '../services/claudeQuestionGenerator.js';
 const router = Router();
 		chunk.embedding = embeddings[ i ];
 	} );
+	// Generate suggested questions using Claude
 	let suggestedQuestions = [];
+	if ( isClaudeAvailable() ) {
+		try {
+			console.log( 'Generating questions with Claude...' );
+			const rawQuestions = await generateQuestionsWithClaude( chunks, articleData.title, 5 );
+			console.log( `Claude generated questions:`, rawQuestions );
 			// Validate questions by checking if they match article content
 			const validatedQuestions = [];
 				}
 			}
+			suggestedQuestions = validatedQuestions.slice( 0, 5 );
 			console.log( `Generated ${ suggestedQuestions.length } validated questions` );
+		} catch ( err ) {
+			console.warn( 'Question generation failed, continuing without suggestions:', err.message );
 		}
+	} else {
+		console.log( 'ANTHROPIC_API_KEY not set, skipping question generation' );
 	}
 	// Save to cache

services/claudeQuestionGenerator.js ADDED Viewed

	@@ -0,0 +1,193 @@

+import Anthropic from '@anthropic-ai/sdk';
+let client = null;
+/**
+ * Initialize the Anthropic client
+ */
+function getClient() {
+	if ( !client ) {
+		const apiKey = process.env.ANTHROPIC_API_KEY;
+		if ( !apiKey ) {
+			throw new Error( 'ANTHROPIC_API_KEY environment variable is required for Claude question generation' );
+		}
+		client = new Anthropic( { apiKey } );
+	}
+	return client;
+}
+/**
+ * Generate questions using Claude based on the full article text.
+ *
+ * This approach reads the entire article and generates questions designed to
+ * draw readers deeper into the content, beyond surface-level facts.
+ *
+ * @param {Array} chunks - Article chunks with text and section info
+ * @param {string} articleTitle - The title of the article
+ * @param {number} numQuestions - Number of questions to generate (default: 5)
+ * @returns {Promise<string[]>} - Array of generated questions
+ */
+export async function generateQuestionsWithClaude( chunks, articleTitle, numQuestions = 5 ) {
+	const anthropic = getClient();
+	// Build a structured representation of the article
+	const articleContent = buildArticleContent( chunks );
+	// Estimate token count - Claude can handle ~100k tokens, but we'll be conservative
+	const estimatedTokens = Math.ceil( articleContent.length / 4 );
+	console.log( `Article content: ~${ estimatedTokens } tokens estimated` );
+	// If article is very long, summarize sections
+	const contentToUse = estimatedTokens > 50000
+		? truncateArticleContent( chunks, 50000 )
+		: articleContent;
+	const prompt = `You are helping create an interactive Wikipedia reading experience. Given the following Wikipedia article about "${articleTitle}", generate ${numQuestions} short, simple questions that invite readers to explore the article.
+**CRITICAL: Base questions ONLY on the provided article text.**
+You must generate questions answerable using ONLY information in the article below. Do not use external knowledge. If you know facts about "${articleTitle}" not mentioned in this text, do NOT ask about them.
+**Question style:**
+- **Keep it short** - Questions should be 5-10 words. Simple, open-ended phrasing.
+- **Use plain language** - Write for casual readers, not academics.
+- **Be inviting, not testing** - Questions should spark curiosity, not feel like a quiz.
+Good examples:
+- "Why did Plato write about this?"
+- "What happened to the search expeditions?"
+- "How did this influence later writers?"
+Avoid:
+- Long, complex questions with multiple clauses
+- Academic or formal phrasing
+- Questions answered in the opening paragraph
+**Content guidelines:**
+- Look for interesting details deeper in the article, not just the lead
+- Reference specific things mentioned in the text
+- Vary the topics covered across your questions
+<article>
+${contentToUse}
+</article>
+Generate exactly ${numQuestions} questions, one per line. Output only the questions, no numbering. Keep each question short and simple.`;
+	try {
+		const response = await anthropic.messages.create( {
+			model: 'claude-sonnet-4-5',
+			max_tokens: 1024,
+			messages: [
+				{
+					role: 'user',
+					content: prompt
+				}
+			]
+		} );
+		const text = response.content[ 0 ].text;
+		const questions = text
+			.split( '\n' )
+			.map( ( q ) => q.trim() )
+			.filter( ( q ) => q.length > 10 && q.endsWith( '?' ) );
+		console.log( `Claude generated ${ questions.length } questions` );
+		return questions.slice( 0, numQuestions );
+	} catch ( error ) {
+		console.error( 'Claude question generation failed:', error.message );
+		throw error;
+	}
+}
+/**
+ * Build a structured text representation of the article from chunks
+ *
+ * @param {Array} chunks - Article chunks
+ * @returns {string} - Formatted article content
+ */
+function buildArticleContent( chunks ) {
+	const sections = new Map();
+	// Group chunks by section
+	for ( const chunk of chunks ) {
+		const sectionTitle = chunk.sectionTitle || 'Introduction';
+		if ( !sections.has( sectionTitle ) ) {
+			sections.set( sectionTitle, [] );
+		}
+		sections.get( sectionTitle ).push( chunk.text );
+	}
+	// Build formatted content
+	const parts = [];
+	for ( const [ sectionTitle, texts ] of sections ) {
+		parts.push( `## ${sectionTitle}\n` );
+		parts.push( texts.join( '\n\n' ) );
+		parts.push( '' );
+	}
+	return parts.join( '\n' );
+}
+/**
+ * Truncate article content to fit within token budget
+ *
+ * @param {Array} chunks - Article chunks
+ * @param {number} maxTokens - Maximum estimated tokens
+ * @returns {string} - Truncated content
+ */
+function truncateArticleContent( chunks, maxTokens ) {
+	const sections = new Map();
+	// Group chunks by section
+	for ( const chunk of chunks ) {
+		const sectionTitle = chunk.sectionTitle || 'Introduction';
+		if ( !sections.has( sectionTitle ) ) {
+			sections.set( sectionTitle, [] );
+		}
+		sections.get( sectionTitle ).push( chunk.text );
+	}
+	// Include all section headers and first paragraph of each
+	const parts = [];
+	let estimatedTokens = 0;
+	const charsPerToken = 4;
+	for ( const [ sectionTitle, texts ] of sections ) {
+		const header = `## ${sectionTitle}\n`;
+		const sectionContent = texts.join( '\n\n' );
+		const headerTokens = Math.ceil( header.length / charsPerToken );
+		const contentTokens = Math.ceil( sectionContent.length / charsPerToken );
+		if ( estimatedTokens + headerTokens + contentTokens < maxTokens ) {
+			parts.push( header );
+			parts.push( sectionContent );
+			parts.push( '' );
+			estimatedTokens += headerTokens + contentTokens;
+		} else if ( estimatedTokens + headerTokens + 500 < maxTokens ) {
+			// Include header and truncated content
+			parts.push( header );
+			const availableChars = ( maxTokens - estimatedTokens - headerTokens ) * charsPerToken;
+			parts.push( sectionContent.slice( 0, availableChars ) + '...' );
+			parts.push( '' );
+			break;
+		} else {
+			break;
+		}
+	}
+	return parts.join( '\n' );
+}
+/**
+ * Check if Claude question generation is available
+ *
+ * @returns {boolean} - True if ANTHROPIC_API_KEY is set
+ */
+export function isClaudeAvailable() {
+	return Boolean( process.env.ANTHROPIC_API_KEY );
+}