Spaces:

lenzcom
/

Email

Sleeping

File size: 4,011 Bytes

e706de2

/**

 * Exercise 11: Streaming Responses

 *

 * Goal: Learn to stream LLM responses in real-time

 *

 * In this exercise, you'll:

 * 1. Stream a response and print it character by character

 * 2. Build a progress indicator while streaming

 * 3. Collect chunks into a full response

 * 4. Compare streaming vs non-streaming

 *

 * This creates the "ChatGPT typing effect"!

 */

import {HumanMessage, SystemMessage, LlamaCppLLM} from '../../../../src/index.js';

async function exercise3() {
    console.log('=== Exercise 3: Streaming Responses ===\n');

    const llm = new LlamaCppLLM({
        modelPath: './models/Meta-Llama-3.1-8B-Instruct-Q5_K_S.gguf',
        temperature: 0.7,
        maxTokens: 200
    });

    try {
        // Part 1: Basic streaming
        console.log('Part 1: Basic streaming');
        console.log('Question: Tell me a short fun fact about space.\n');
        console.log('Response: ');

        // TODO: Use llm.stream() to stream the response
        // Use a for await loop to iterate through chunks
        // Print each chunk without a newline (use process.stdout.write)
        // (code here)

        console.log('\n');

        // Part 2: Streaming with progress indicator
        console.log('Part 2: Streaming with progress indicator');
        console.log('Question: Explain what a black hole is in 2-3 sentences.\n');

        let charCount = 0;
        // TODO: Stream the response and count characters
        // Every 10 characters, print a dot (.) as progress
        // Print the actual response too
        // Hint: Use charCount % 10 === 0 to check
        console.log('Progress: ');
        // (code here)

        console.log(`\n\nTotal characters streamed: ${charCount}`);
        console.log();

        // Part 3: Collecting streamed chunks
        console.log('Part 3: Collecting full response from stream');

        const messages = [
            new SystemMessage("You are a helpful assistant"),
            new HumanMessage("What are the three primary colors? Answer briefly.")
        ];

        // TODO: Stream the response and collect all chunks
        let fullResponse = '';
        // Use for await loop to collect chunks
        // Build up fullResponse by concatenating chunk.content
        // (code here)

        console.log('Full response:', fullResponse);
        console.log();

        // Part 4: Compare streaming vs regular invoke
        console.log('Part 4: Streaming vs Regular invoke');
        const question = "What is JavaScript? Answer in one sentence.";

        // TODO: Time a streaming response
        console.log('Streaming:');
        const streamStart = Date.now();
        let streamedText = '';
        // (code here - stream and collect)
        const streamTime = Date.now() - streamStart;
        console.log(`Time: ${streamTime}ms`);
        console.log(`Response: ${streamedText}`);
        console.log();

        // TODO: Time a regular invoke
        console.log('Regular invoke:');
        const invokeStart = Date.now();
        // (code here - use invoke)
        const invokeTime = Date.now() - invokeStart;
        console.log(`Time: ${invokeTime}ms`);
        // console.log response

        console.log(`\nTime difference: ${Math.abs(streamTime - invokeTime)}ms`);

    } finally {
        await llm.dispose();
    }

    console.log('\n✓ Exercise 3 complete!');
}

// Run the exercise
exercise3().catch(console.error);

/**

 * Expected Output:

 * - Part 1: Text appearing character by character

 * - Part 2: Progress dots while streaming

 * - Part 3: Full collected response

 * - Part 4: Similar times for both methods (streaming shows progress)

 *

 * Learning Points:

 * 1. Streaming shows results as they generate (better UX)

 * 2. for await...of loop handles async generators

 * 3. Each chunk is an AIMessage with partial content

 * 4. Total time similar, but perceived as faster

 */