lenzcom's picture
Upload folder using huggingface_hub
e706de2 verified
/**
* Exercise 11 Solution: Streaming Responses
*/
import {HumanMessage, SystemMessage, LlamaCppLLM} from '../../../../src/index.js';
async function exercise3() {
console.log('=== Exercise 3: Streaming Responses ===\n');
const llm = new LlamaCppLLM({
modelPath: './models/Meta-Llama-3.1-8B-Instruct-Q5_K_S.gguf',
temperature: 0.7,
maxTokens: 200
});
try {
// Part 1: Basic streaming
console.log('Part 1: Basic streaming');
console.log('Question: Tell me a long fun fact about space.\n');
console.log('Response: ');
for await (const chunk of llm.stream("Tell me a long fun fact about space.")) {
process.stdout.write(chunk.content); // No newline
}
console.log('\n');
// Part 2: Streaming with progress indicator
console.log('Part 2: Streaming with progress indicator');
console.log('Question: Explain what a black hole is in 2-3 sentences.\n');
let charCount = 0;
console.log('Progress: ');
console.log('Response: ');
for await (const chunk of llm.stream("Explain what a black hole is in 2-3 sentences.")) {
process.stdout.write(chunk.content);
charCount += chunk.content.length;
}
console.log(`\n\nTotal characters streamed: ${charCount}`);
console.log();
// Part 3: Collecting streamed chunks
console.log('Part 3: Collecting full response from stream');
const messages = [
new SystemMessage("You are a helpful assistant"),
new HumanMessage("What are the three primary colors? Answer briefly.")
];
let fullResponse = '';
for await (const chunk of llm.stream(messages)) {
fullResponse += chunk.content;
}
console.log('Full response:', fullResponse);
console.log();
// Part 4: Compare streaming vs regular invoke
console.log('Part 4: Streaming vs Regular invoke');
const question = "What is JavaScript? Answer in one sentence.";
// Streaming
console.log('Streaming:');
const streamStart = Date.now();
let streamedText = '';
for await (const chunk of llm.stream(question)) {
streamedText += chunk.content;
}
const streamTime = Date.now() - streamStart;
console.log(`Time: ${streamTime}ms`);
console.log(`Response: ${streamedText}`);
console.log();
// Regular invoke
console.log('Regular invoke:');
const invokeStart = Date.now();
const invokeResponse = await llm.invoke(question);
const invokeTime = Date.now() - invokeStart;
console.log(`Time: ${invokeTime}ms`);
console.log(`Response: ${invokeResponse.content}`);
console.log(`\nTime difference: ${Math.abs(streamTime - invokeTime)}ms`);
console.log('Note: Streaming feels faster because you see results immediately!');
} finally {
await llm.dispose();
}
console.log('\n✓ Exercise 3 complete!');
}
// Run the solution
exercise3().catch(console.error);
/**
* Key Takeaways:
*
* 1. Streaming API:
* - for await (const chunk of llm.stream(input)) { }
* - Each chunk is an AIMessage with partial content
* - Use process.stdout.write() to print without newlines
*
* 2. User Experience:
* - Streaming shows immediate feedback
* - Users see progress as it happens
* - Feels faster even if total time is similar
* - Essential for long responses
*
* 3. Collection pattern:
* - Initialize empty string: let full = ''
* - Accumulate: full += chunk.content
* - Use when you need the complete response
*
* 4. When to stream:
* - Long-form content generation
* - Interactive chat interfaces
* - When user experience matters
* - When you want to show progress
*
* 5. When NOT to stream:
* - Need to parse complete response
* - Batch processing
* - Automated testing
* - Response needs to be processed as a whole
*/