lenzcom's picture
Upload folder using huggingface_hub
e706de2 verified
/**
* Exercise 11: Streaming Responses
*
* Goal: Learn to stream LLM responses in real-time
*
* In this exercise, you'll:
* 1. Stream a response and print it character by character
* 2. Build a progress indicator while streaming
* 3. Collect chunks into a full response
* 4. Compare streaming vs non-streaming
*
* This creates the "ChatGPT typing effect"!
*/
import {HumanMessage, SystemMessage, LlamaCppLLM} from '../../../../src/index.js';
async function exercise3() {
console.log('=== Exercise 3: Streaming Responses ===\n');
const llm = new LlamaCppLLM({
modelPath: './models/Meta-Llama-3.1-8B-Instruct-Q5_K_S.gguf',
temperature: 0.7,
maxTokens: 200
});
try {
// Part 1: Basic streaming
console.log('Part 1: Basic streaming');
console.log('Question: Tell me a short fun fact about space.\n');
console.log('Response: ');
// TODO: Use llm.stream() to stream the response
// Use a for await loop to iterate through chunks
// Print each chunk without a newline (use process.stdout.write)
// (code here)
console.log('\n');
// Part 2: Streaming with progress indicator
console.log('Part 2: Streaming with progress indicator');
console.log('Question: Explain what a black hole is in 2-3 sentences.\n');
let charCount = 0;
// TODO: Stream the response and count characters
// Every 10 characters, print a dot (.) as progress
// Print the actual response too
// Hint: Use charCount % 10 === 0 to check
console.log('Progress: ');
// (code here)
console.log(`\n\nTotal characters streamed: ${charCount}`);
console.log();
// Part 3: Collecting streamed chunks
console.log('Part 3: Collecting full response from stream');
const messages = [
new SystemMessage("You are a helpful assistant"),
new HumanMessage("What are the three primary colors? Answer briefly.")
];
// TODO: Stream the response and collect all chunks
let fullResponse = '';
// Use for await loop to collect chunks
// Build up fullResponse by concatenating chunk.content
// (code here)
console.log('Full response:', fullResponse);
console.log();
// Part 4: Compare streaming vs regular invoke
console.log('Part 4: Streaming vs Regular invoke');
const question = "What is JavaScript? Answer in one sentence.";
// TODO: Time a streaming response
console.log('Streaming:');
const streamStart = Date.now();
let streamedText = '';
// (code here - stream and collect)
const streamTime = Date.now() - streamStart;
console.log(`Time: ${streamTime}ms`);
console.log(`Response: ${streamedText}`);
console.log();
// TODO: Time a regular invoke
console.log('Regular invoke:');
const invokeStart = Date.now();
// (code here - use invoke)
const invokeTime = Date.now() - invokeStart;
console.log(`Time: ${invokeTime}ms`);
// console.log response
console.log(`\nTime difference: ${Math.abs(streamTime - invokeTime)}ms`);
} finally {
await llm.dispose();
}
console.log('\n✓ Exercise 3 complete!');
}
// Run the exercise
exercise3().catch(console.error);
/**
* Expected Output:
* - Part 1: Text appearing character by character
* - Part 2: Progress dots while streaming
* - Part 3: Full collected response
* - Part 4: Similar times for both methods (streaming shows progress)
*
* Learning Points:
* 1. Streaming shows results as they generate (better UX)
* 2. for await...of loop handles async generators
* 3. Each chunk is an AIMessage with partial content
* 4. Total time similar, but perceived as faster
*/