Spaces:
Sleeping
Sleeping
Eric Gardner
commited on
Commit
·
ce30646
1
Parent(s):
8067185
Use Claude API for question generation
Browse files- Dockerfile +4 -0
- index.js +9 -8
- package-lock.json +62 -0
- package.json +4 -2
- routes/article.js +12 -12
- services/claudeQuestionGenerator.js +193 -0
Dockerfile
CHANGED
|
@@ -17,6 +17,10 @@ RUN mkdir -p /app/cache && chmod 777 /app/cache
|
|
| 17 |
# Hugging Face Spaces uses port 7860
|
| 18 |
ENV PORT=7860
|
| 19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
# Expose the port
|
| 21 |
EXPOSE 7860
|
| 22 |
|
|
|
|
| 17 |
# Hugging Face Spaces uses port 7860
|
| 18 |
ENV PORT=7860
|
| 19 |
|
| 20 |
+
# Anthropic API key for question generation
|
| 21 |
+
# Set this as a secret in your deployment platform (e.g., HF Spaces secrets)
|
| 22 |
+
ENV ANTHROPIC_API_KEY=""
|
| 23 |
+
|
| 24 |
# Expose the port
|
| 25 |
EXPOSE 7860
|
| 26 |
|
index.js
CHANGED
|
@@ -1,9 +1,10 @@
|
|
|
|
|
| 1 |
import express from 'express';
|
| 2 |
import cors from 'cors';
|
| 3 |
import articleRoutes from './routes/article.js';
|
| 4 |
import searchRoutes from './routes/search.js';
|
| 5 |
import { initEmbedder } from './services/embedder.js';
|
| 6 |
-
import {
|
| 7 |
|
| 8 |
const app = express();
|
| 9 |
const PORT = process.env.PORT || 3000;
|
|
@@ -23,16 +24,16 @@ app.get( '/api/health', ( _, res ) => {
|
|
| 23 |
res.json( { status: 'ok' } );
|
| 24 |
} );
|
| 25 |
|
| 26 |
-
// Pre-warm the
|
| 27 |
-
console.log( 'Starting server and loading
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
app.listen( PORT, () => {
|
| 33 |
console.log( `Server running on http://localhost:${ PORT }` );
|
| 34 |
} );
|
| 35 |
} ).catch( ( err ) => {
|
| 36 |
-
console.error( 'Failed to initialize
|
| 37 |
process.exit( 1 );
|
| 38 |
} );
|
|
|
|
| 1 |
+
import 'dotenv/config';
|
| 2 |
import express from 'express';
|
| 3 |
import cors from 'cors';
|
| 4 |
import articleRoutes from './routes/article.js';
|
| 5 |
import searchRoutes from './routes/search.js';
|
| 6 |
import { initEmbedder } from './services/embedder.js';
|
| 7 |
+
import { isClaudeAvailable } from './services/claudeQuestionGenerator.js';
|
| 8 |
|
| 9 |
const app = express();
|
| 10 |
const PORT = process.env.PORT || 3000;
|
|
|
|
| 24 |
res.json( { status: 'ok' } );
|
| 25 |
} );
|
| 26 |
|
| 27 |
+
// Pre-warm the embedding model on startup
|
| 28 |
+
console.log( 'Starting server and loading embedding model...' );
|
| 29 |
+
initEmbedder().then( () => {
|
| 30 |
+
if ( !isClaudeAvailable() ) {
|
| 31 |
+
console.warn( 'Warning: ANTHROPIC_API_KEY not set. Question generation will be disabled.' );
|
| 32 |
+
}
|
| 33 |
app.listen( PORT, () => {
|
| 34 |
console.log( `Server running on http://localhost:${ PORT }` );
|
| 35 |
} );
|
| 36 |
} ).catch( ( err ) => {
|
| 37 |
+
console.error( 'Failed to initialize embedding model:', err );
|
| 38 |
process.exit( 1 );
|
| 39 |
} );
|
package-lock.json
CHANGED
|
@@ -8,12 +8,34 @@
|
|
| 8 |
"name": "question-explorer-server",
|
| 9 |
"version": "1.0.0",
|
| 10 |
"dependencies": {
|
|
|
|
| 11 |
"@xenova/transformers": "^2.17.2",
|
| 12 |
"cors": "^2.8.5",
|
|
|
|
| 13 |
"express": "^4.18.2",
|
| 14 |
"jsdom": "^24.1.0"
|
| 15 |
}
|
| 16 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
"node_modules/@asamuzakjp/css-color": {
|
| 18 |
"version": "3.2.0",
|
| 19 |
"resolved": "https://registry.npmjs.org/@asamuzakjp/css-color/-/css-color-3.2.0.tgz",
|
|
@@ -27,6 +49,15 @@
|
|
| 27 |
"lru-cache": "^10.4.3"
|
| 28 |
}
|
| 29 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
"node_modules/@csstools/color-helpers": {
|
| 31 |
"version": "5.1.0",
|
| 32 |
"resolved": "https://registry.npmjs.org/@csstools/color-helpers/-/color-helpers-5.1.0.tgz",
|
|
@@ -711,6 +742,18 @@
|
|
| 711 |
"node": ">=8"
|
| 712 |
}
|
| 713 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 714 |
"node_modules/dunder-proto": {
|
| 715 |
"version": "1.0.1",
|
| 716 |
"resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz",
|
|
@@ -1273,6 +1316,19 @@
|
|
| 1273 |
}
|
| 1274 |
}
|
| 1275 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1276 |
"node_modules/long": {
|
| 1277 |
"version": "4.0.0",
|
| 1278 |
"resolved": "https://registry.npmjs.org/long/-/long-4.0.0.tgz",
|
|
@@ -2119,6 +2175,12 @@
|
|
| 2119 |
"node": ">=18"
|
| 2120 |
}
|
| 2121 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2122 |
"node_modules/tunnel-agent": {
|
| 2123 |
"version": "0.6.0",
|
| 2124 |
"resolved": "https://registry.npmjs.org/tunnel-agent/-/tunnel-agent-0.6.0.tgz",
|
|
|
|
| 8 |
"name": "question-explorer-server",
|
| 9 |
"version": "1.0.0",
|
| 10 |
"dependencies": {
|
| 11 |
+
"@anthropic-ai/sdk": "^0.71.2",
|
| 12 |
"@xenova/transformers": "^2.17.2",
|
| 13 |
"cors": "^2.8.5",
|
| 14 |
+
"dotenv": "^16.4.5",
|
| 15 |
"express": "^4.18.2",
|
| 16 |
"jsdom": "^24.1.0"
|
| 17 |
}
|
| 18 |
},
|
| 19 |
+
"node_modules/@anthropic-ai/sdk": {
|
| 20 |
+
"version": "0.71.2",
|
| 21 |
+
"resolved": "https://registry.npmjs.org/@anthropic-ai/sdk/-/sdk-0.71.2.tgz",
|
| 22 |
+
"integrity": "sha512-TGNDEUuEstk/DKu0/TflXAEt+p+p/WhTlFzEnoosvbaDU2LTjm42igSdlL0VijrKpWejtOKxX0b8A7uc+XiSAQ==",
|
| 23 |
+
"license": "MIT",
|
| 24 |
+
"dependencies": {
|
| 25 |
+
"json-schema-to-ts": "^3.1.1"
|
| 26 |
+
},
|
| 27 |
+
"bin": {
|
| 28 |
+
"anthropic-ai-sdk": "bin/cli"
|
| 29 |
+
},
|
| 30 |
+
"peerDependencies": {
|
| 31 |
+
"zod": "^3.25.0 || ^4.0.0"
|
| 32 |
+
},
|
| 33 |
+
"peerDependenciesMeta": {
|
| 34 |
+
"zod": {
|
| 35 |
+
"optional": true
|
| 36 |
+
}
|
| 37 |
+
}
|
| 38 |
+
},
|
| 39 |
"node_modules/@asamuzakjp/css-color": {
|
| 40 |
"version": "3.2.0",
|
| 41 |
"resolved": "https://registry.npmjs.org/@asamuzakjp/css-color/-/css-color-3.2.0.tgz",
|
|
|
|
| 49 |
"lru-cache": "^10.4.3"
|
| 50 |
}
|
| 51 |
},
|
| 52 |
+
"node_modules/@babel/runtime": {
|
| 53 |
+
"version": "7.28.4",
|
| 54 |
+
"resolved": "https://registry.npmjs.org/@babel/runtime/-/runtime-7.28.4.tgz",
|
| 55 |
+
"integrity": "sha512-Q/N6JNWvIvPnLDvjlE1OUBLPQHH6l3CltCEsHIujp45zQUSSh8K+gHnaEX45yAT1nyngnINhvWtzN+Nb9D8RAQ==",
|
| 56 |
+
"license": "MIT",
|
| 57 |
+
"engines": {
|
| 58 |
+
"node": ">=6.9.0"
|
| 59 |
+
}
|
| 60 |
+
},
|
| 61 |
"node_modules/@csstools/color-helpers": {
|
| 62 |
"version": "5.1.0",
|
| 63 |
"resolved": "https://registry.npmjs.org/@csstools/color-helpers/-/color-helpers-5.1.0.tgz",
|
|
|
|
| 742 |
"node": ">=8"
|
| 743 |
}
|
| 744 |
},
|
| 745 |
+
"node_modules/dotenv": {
|
| 746 |
+
"version": "16.6.1",
|
| 747 |
+
"resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.6.1.tgz",
|
| 748 |
+
"integrity": "sha512-uBq4egWHTcTt33a72vpSG0z3HnPuIl6NqYcTrKEg2azoEyl2hpW0zqlxysq2pK9HlDIHyHyakeYaYnSAwd8bow==",
|
| 749 |
+
"license": "BSD-2-Clause",
|
| 750 |
+
"engines": {
|
| 751 |
+
"node": ">=12"
|
| 752 |
+
},
|
| 753 |
+
"funding": {
|
| 754 |
+
"url": "https://dotenvx.com"
|
| 755 |
+
}
|
| 756 |
+
},
|
| 757 |
"node_modules/dunder-proto": {
|
| 758 |
"version": "1.0.1",
|
| 759 |
"resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz",
|
|
|
|
| 1316 |
}
|
| 1317 |
}
|
| 1318 |
},
|
| 1319 |
+
"node_modules/json-schema-to-ts": {
|
| 1320 |
+
"version": "3.1.1",
|
| 1321 |
+
"resolved": "https://registry.npmjs.org/json-schema-to-ts/-/json-schema-to-ts-3.1.1.tgz",
|
| 1322 |
+
"integrity": "sha512-+DWg8jCJG2TEnpy7kOm/7/AxaYoaRbjVB4LFZLySZlWn8exGs3A4OLJR966cVvU26N7X9TWxl+Jsw7dzAqKT6g==",
|
| 1323 |
+
"license": "MIT",
|
| 1324 |
+
"dependencies": {
|
| 1325 |
+
"@babel/runtime": "^7.18.3",
|
| 1326 |
+
"ts-algebra": "^2.0.0"
|
| 1327 |
+
},
|
| 1328 |
+
"engines": {
|
| 1329 |
+
"node": ">=16"
|
| 1330 |
+
}
|
| 1331 |
+
},
|
| 1332 |
"node_modules/long": {
|
| 1333 |
"version": "4.0.0",
|
| 1334 |
"resolved": "https://registry.npmjs.org/long/-/long-4.0.0.tgz",
|
|
|
|
| 2175 |
"node": ">=18"
|
| 2176 |
}
|
| 2177 |
},
|
| 2178 |
+
"node_modules/ts-algebra": {
|
| 2179 |
+
"version": "2.0.0",
|
| 2180 |
+
"resolved": "https://registry.npmjs.org/ts-algebra/-/ts-algebra-2.0.0.tgz",
|
| 2181 |
+
"integrity": "sha512-FPAhNPFMrkwz76P7cdjdmiShwMynZYN6SgOujD1urY4oNm80Ou9oMdmbR45LotcKOXoy7wSmHkRFE6Mxbrhefw==",
|
| 2182 |
+
"license": "MIT"
|
| 2183 |
+
},
|
| 2184 |
"node_modules/tunnel-agent": {
|
| 2185 |
"version": "0.6.0",
|
| 2186 |
"resolved": "https://registry.npmjs.org/tunnel-agent/-/tunnel-agent-0.6.0.tgz",
|
package.json
CHANGED
|
@@ -7,9 +7,11 @@
|
|
| 7 |
"start": "node index.js"
|
| 8 |
},
|
| 9 |
"dependencies": {
|
| 10 |
-
"
|
| 11 |
-
"cors": "^2.8.5",
|
| 12 |
"@xenova/transformers": "^2.17.2",
|
|
|
|
|
|
|
|
|
|
| 13 |
"jsdom": "^24.1.0"
|
| 14 |
}
|
| 15 |
}
|
|
|
|
| 7 |
"start": "node index.js"
|
| 8 |
},
|
| 9 |
"dependencies": {
|
| 10 |
+
"@anthropic-ai/sdk": "^0.71.2",
|
|
|
|
| 11 |
"@xenova/transformers": "^2.17.2",
|
| 12 |
+
"cors": "^2.8.5",
|
| 13 |
+
"dotenv": "^16.4.5",
|
| 14 |
+
"express": "^4.18.2",
|
| 15 |
"jsdom": "^24.1.0"
|
| 16 |
}
|
| 17 |
}
|
routes/article.js
CHANGED
|
@@ -5,7 +5,7 @@ import { embedTexts, embedSingle } from '../services/embedder.js';
|
|
| 5 |
import { search } from '../services/vectorSearch.js';
|
| 6 |
import { getCached, setCache, isCacheValid } from '../services/cache.js';
|
| 7 |
import { getProcessingState, setProcessing } from '../services/processingState.js';
|
| 8 |
-
import {
|
| 9 |
|
| 10 |
const router = Router();
|
| 11 |
|
|
@@ -204,15 +204,13 @@ async function processArticle( title, revisionId ) {
|
|
| 204 |
chunk.embedding = embeddings[ i ];
|
| 205 |
} );
|
| 206 |
|
| 207 |
-
// Generate suggested questions
|
| 208 |
let suggestedQuestions = [];
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
console.log(
|
| 214 |
-
const rawQuestions = await generateQuestions( leadText, 5 );
|
| 215 |
-
console.log( `Raw questions from model:`, rawQuestions );
|
| 216 |
|
| 217 |
// Validate questions by checking if they match article content
|
| 218 |
const validatedQuestions = [];
|
|
@@ -234,11 +232,13 @@ async function processArticle( title, revisionId ) {
|
|
| 234 |
}
|
| 235 |
}
|
| 236 |
|
| 237 |
-
suggestedQuestions = validatedQuestions.slice( 0,
|
| 238 |
console.log( `Generated ${ suggestedQuestions.length } validated questions` );
|
|
|
|
|
|
|
| 239 |
}
|
| 240 |
-
}
|
| 241 |
-
console.
|
| 242 |
}
|
| 243 |
|
| 244 |
// Save to cache
|
|
|
|
| 5 |
import { search } from '../services/vectorSearch.js';
|
| 6 |
import { getCached, setCache, isCacheValid } from '../services/cache.js';
|
| 7 |
import { getProcessingState, setProcessing } from '../services/processingState.js';
|
| 8 |
+
import { generateQuestionsWithClaude, isClaudeAvailable } from '../services/claudeQuestionGenerator.js';
|
| 9 |
|
| 10 |
const router = Router();
|
| 11 |
|
|
|
|
| 204 |
chunk.embedding = embeddings[ i ];
|
| 205 |
} );
|
| 206 |
|
| 207 |
+
// Generate suggested questions using Claude
|
| 208 |
let suggestedQuestions = [];
|
| 209 |
+
if ( isClaudeAvailable() ) {
|
| 210 |
+
try {
|
| 211 |
+
console.log( 'Generating questions with Claude...' );
|
| 212 |
+
const rawQuestions = await generateQuestionsWithClaude( chunks, articleData.title, 5 );
|
| 213 |
+
console.log( `Claude generated questions:`, rawQuestions );
|
|
|
|
|
|
|
| 214 |
|
| 215 |
// Validate questions by checking if they match article content
|
| 216 |
const validatedQuestions = [];
|
|
|
|
| 232 |
}
|
| 233 |
}
|
| 234 |
|
| 235 |
+
suggestedQuestions = validatedQuestions.slice( 0, 5 );
|
| 236 |
console.log( `Generated ${ suggestedQuestions.length } validated questions` );
|
| 237 |
+
} catch ( err ) {
|
| 238 |
+
console.warn( 'Question generation failed, continuing without suggestions:', err.message );
|
| 239 |
}
|
| 240 |
+
} else {
|
| 241 |
+
console.log( 'ANTHROPIC_API_KEY not set, skipping question generation' );
|
| 242 |
}
|
| 243 |
|
| 244 |
// Save to cache
|
services/claudeQuestionGenerator.js
ADDED
|
@@ -0,0 +1,193 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import Anthropic from '@anthropic-ai/sdk';
|
| 2 |
+
|
| 3 |
+
let client = null;
|
| 4 |
+
|
| 5 |
+
/**
|
| 6 |
+
* Initialize the Anthropic client
|
| 7 |
+
*/
|
| 8 |
+
function getClient() {
|
| 9 |
+
if ( !client ) {
|
| 10 |
+
const apiKey = process.env.ANTHROPIC_API_KEY;
|
| 11 |
+
if ( !apiKey ) {
|
| 12 |
+
throw new Error( 'ANTHROPIC_API_KEY environment variable is required for Claude question generation' );
|
| 13 |
+
}
|
| 14 |
+
client = new Anthropic( { apiKey } );
|
| 15 |
+
}
|
| 16 |
+
return client;
|
| 17 |
+
}
|
| 18 |
+
|
| 19 |
+
/**
|
| 20 |
+
* Generate questions using Claude based on the full article text.
|
| 21 |
+
*
|
| 22 |
+
* This approach reads the entire article and generates questions designed to
|
| 23 |
+
* draw readers deeper into the content, beyond surface-level facts.
|
| 24 |
+
*
|
| 25 |
+
* @param {Array} chunks - Article chunks with text and section info
|
| 26 |
+
* @param {string} articleTitle - The title of the article
|
| 27 |
+
* @param {number} numQuestions - Number of questions to generate (default: 5)
|
| 28 |
+
* @returns {Promise<string[]>} - Array of generated questions
|
| 29 |
+
*/
|
| 30 |
+
export async function generateQuestionsWithClaude( chunks, articleTitle, numQuestions = 5 ) {
|
| 31 |
+
const anthropic = getClient();
|
| 32 |
+
|
| 33 |
+
// Build a structured representation of the article
|
| 34 |
+
const articleContent = buildArticleContent( chunks );
|
| 35 |
+
|
| 36 |
+
// Estimate token count - Claude can handle ~100k tokens, but we'll be conservative
|
| 37 |
+
const estimatedTokens = Math.ceil( articleContent.length / 4 );
|
| 38 |
+
console.log( `Article content: ~${ estimatedTokens } tokens estimated` );
|
| 39 |
+
|
| 40 |
+
// If article is very long, summarize sections
|
| 41 |
+
const contentToUse = estimatedTokens > 50000
|
| 42 |
+
? truncateArticleContent( chunks, 50000 )
|
| 43 |
+
: articleContent;
|
| 44 |
+
|
| 45 |
+
const prompt = `You are helping create an interactive Wikipedia reading experience. Given the following Wikipedia article about "${articleTitle}", generate ${numQuestions} short, simple questions that invite readers to explore the article.
|
| 46 |
+
|
| 47 |
+
**CRITICAL: Base questions ONLY on the provided article text.**
|
| 48 |
+
|
| 49 |
+
You must generate questions answerable using ONLY information in the article below. Do not use external knowledge. If you know facts about "${articleTitle}" not mentioned in this text, do NOT ask about them.
|
| 50 |
+
|
| 51 |
+
**Question style:**
|
| 52 |
+
|
| 53 |
+
- **Keep it short** - Questions should be 5-10 words. Simple, open-ended phrasing.
|
| 54 |
+
- **Use plain language** - Write for casual readers, not academics.
|
| 55 |
+
- **Be inviting, not testing** - Questions should spark curiosity, not feel like a quiz.
|
| 56 |
+
|
| 57 |
+
Good examples:
|
| 58 |
+
- "Why did Plato write about this?"
|
| 59 |
+
- "What happened to the search expeditions?"
|
| 60 |
+
- "How did this influence later writers?"
|
| 61 |
+
|
| 62 |
+
Avoid:
|
| 63 |
+
- Long, complex questions with multiple clauses
|
| 64 |
+
- Academic or formal phrasing
|
| 65 |
+
- Questions answered in the opening paragraph
|
| 66 |
+
|
| 67 |
+
**Content guidelines:**
|
| 68 |
+
|
| 69 |
+
- Look for interesting details deeper in the article, not just the lead
|
| 70 |
+
- Reference specific things mentioned in the text
|
| 71 |
+
- Vary the topics covered across your questions
|
| 72 |
+
|
| 73 |
+
<article>
|
| 74 |
+
${contentToUse}
|
| 75 |
+
</article>
|
| 76 |
+
|
| 77 |
+
Generate exactly ${numQuestions} questions, one per line. Output only the questions, no numbering. Keep each question short and simple.`;
|
| 78 |
+
|
| 79 |
+
try {
|
| 80 |
+
const response = await anthropic.messages.create( {
|
| 81 |
+
model: 'claude-sonnet-4-5',
|
| 82 |
+
max_tokens: 1024,
|
| 83 |
+
messages: [
|
| 84 |
+
{
|
| 85 |
+
role: 'user',
|
| 86 |
+
content: prompt
|
| 87 |
+
}
|
| 88 |
+
]
|
| 89 |
+
} );
|
| 90 |
+
|
| 91 |
+
const text = response.content[ 0 ].text;
|
| 92 |
+
const questions = text
|
| 93 |
+
.split( '\n' )
|
| 94 |
+
.map( ( q ) => q.trim() )
|
| 95 |
+
.filter( ( q ) => q.length > 10 && q.endsWith( '?' ) );
|
| 96 |
+
|
| 97 |
+
console.log( `Claude generated ${ questions.length } questions` );
|
| 98 |
+
return questions.slice( 0, numQuestions );
|
| 99 |
+
|
| 100 |
+
} catch ( error ) {
|
| 101 |
+
console.error( 'Claude question generation failed:', error.message );
|
| 102 |
+
throw error;
|
| 103 |
+
}
|
| 104 |
+
}
|
| 105 |
+
|
| 106 |
+
/**
|
| 107 |
+
* Build a structured text representation of the article from chunks
|
| 108 |
+
*
|
| 109 |
+
* @param {Array} chunks - Article chunks
|
| 110 |
+
* @returns {string} - Formatted article content
|
| 111 |
+
*/
|
| 112 |
+
function buildArticleContent( chunks ) {
|
| 113 |
+
const sections = new Map();
|
| 114 |
+
|
| 115 |
+
// Group chunks by section
|
| 116 |
+
for ( const chunk of chunks ) {
|
| 117 |
+
const sectionTitle = chunk.sectionTitle || 'Introduction';
|
| 118 |
+
if ( !sections.has( sectionTitle ) ) {
|
| 119 |
+
sections.set( sectionTitle, [] );
|
| 120 |
+
}
|
| 121 |
+
sections.get( sectionTitle ).push( chunk.text );
|
| 122 |
+
}
|
| 123 |
+
|
| 124 |
+
// Build formatted content
|
| 125 |
+
const parts = [];
|
| 126 |
+
for ( const [ sectionTitle, texts ] of sections ) {
|
| 127 |
+
parts.push( `## ${sectionTitle}\n` );
|
| 128 |
+
parts.push( texts.join( '\n\n' ) );
|
| 129 |
+
parts.push( '' );
|
| 130 |
+
}
|
| 131 |
+
|
| 132 |
+
return parts.join( '\n' );
|
| 133 |
+
}
|
| 134 |
+
|
| 135 |
+
/**
|
| 136 |
+
* Truncate article content to fit within token budget
|
| 137 |
+
*
|
| 138 |
+
* @param {Array} chunks - Article chunks
|
| 139 |
+
* @param {number} maxTokens - Maximum estimated tokens
|
| 140 |
+
* @returns {string} - Truncated content
|
| 141 |
+
*/
|
| 142 |
+
function truncateArticleContent( chunks, maxTokens ) {
|
| 143 |
+
const sections = new Map();
|
| 144 |
+
|
| 145 |
+
// Group chunks by section
|
| 146 |
+
for ( const chunk of chunks ) {
|
| 147 |
+
const sectionTitle = chunk.sectionTitle || 'Introduction';
|
| 148 |
+
if ( !sections.has( sectionTitle ) ) {
|
| 149 |
+
sections.set( sectionTitle, [] );
|
| 150 |
+
}
|
| 151 |
+
sections.get( sectionTitle ).push( chunk.text );
|
| 152 |
+
}
|
| 153 |
+
|
| 154 |
+
// Include all section headers and first paragraph of each
|
| 155 |
+
const parts = [];
|
| 156 |
+
let estimatedTokens = 0;
|
| 157 |
+
const charsPerToken = 4;
|
| 158 |
+
|
| 159 |
+
for ( const [ sectionTitle, texts ] of sections ) {
|
| 160 |
+
const header = `## ${sectionTitle}\n`;
|
| 161 |
+
const sectionContent = texts.join( '\n\n' );
|
| 162 |
+
|
| 163 |
+
const headerTokens = Math.ceil( header.length / charsPerToken );
|
| 164 |
+
const contentTokens = Math.ceil( sectionContent.length / charsPerToken );
|
| 165 |
+
|
| 166 |
+
if ( estimatedTokens + headerTokens + contentTokens < maxTokens ) {
|
| 167 |
+
parts.push( header );
|
| 168 |
+
parts.push( sectionContent );
|
| 169 |
+
parts.push( '' );
|
| 170 |
+
estimatedTokens += headerTokens + contentTokens;
|
| 171 |
+
} else if ( estimatedTokens + headerTokens + 500 < maxTokens ) {
|
| 172 |
+
// Include header and truncated content
|
| 173 |
+
parts.push( header );
|
| 174 |
+
const availableChars = ( maxTokens - estimatedTokens - headerTokens ) * charsPerToken;
|
| 175 |
+
parts.push( sectionContent.slice( 0, availableChars ) + '...' );
|
| 176 |
+
parts.push( '' );
|
| 177 |
+
break;
|
| 178 |
+
} else {
|
| 179 |
+
break;
|
| 180 |
+
}
|
| 181 |
+
}
|
| 182 |
+
|
| 183 |
+
return parts.join( '\n' );
|
| 184 |
+
}
|
| 185 |
+
|
| 186 |
+
/**
|
| 187 |
+
* Check if Claude question generation is available
|
| 188 |
+
*
|
| 189 |
+
* @returns {boolean} - True if ANTHROPIC_API_KEY is set
|
| 190 |
+
*/
|
| 191 |
+
export function isClaudeAvailable() {
|
| 192 |
+
return Boolean( process.env.ANTHROPIC_API_KEY );
|
| 193 |
+
}
|