File size: 4,213 Bytes
e706de2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
/**

 * Exercise 11 Solution: Streaming Responses

 */

import {HumanMessage, SystemMessage, LlamaCppLLM} from '../../../../src/index.js';

async function exercise3() {
    console.log('=== Exercise 3: Streaming Responses ===\n');

    const llm = new LlamaCppLLM({
        modelPath: './models/Meta-Llama-3.1-8B-Instruct-Q5_K_S.gguf',
        temperature: 0.7,
        maxTokens: 200
    });

    try {
        // Part 1: Basic streaming
        console.log('Part 1: Basic streaming');
        console.log('Question: Tell me a long fun fact about space.\n');
        console.log('Response: ');

        for await (const chunk of llm.stream("Tell me a long fun fact about space.")) {
            process.stdout.write(chunk.content); // No newline
        }

        console.log('\n');

        // Part 2: Streaming with progress indicator
        console.log('Part 2: Streaming with progress indicator');
        console.log('Question: Explain what a black hole is in 2-3 sentences.\n');

        let charCount = 0;
        console.log('Progress: ');
        console.log('Response: ');

        for await (const chunk of llm.stream("Explain what a black hole is in 2-3 sentences.")) {
            process.stdout.write(chunk.content);
            charCount += chunk.content.length;
        }

        console.log(`\n\nTotal characters streamed: ${charCount}`);
        console.log();

        // Part 3: Collecting streamed chunks
        console.log('Part 3: Collecting full response from stream');

        const messages = [
            new SystemMessage("You are a helpful assistant"),
            new HumanMessage("What are the three primary colors? Answer briefly.")
        ];

        let fullResponse = '';
        for await (const chunk of llm.stream(messages)) {
            fullResponse += chunk.content;
        }

        console.log('Full response:', fullResponse);
        console.log();

        // Part 4: Compare streaming vs regular invoke
        console.log('Part 4: Streaming vs Regular invoke');
        const question = "What is JavaScript? Answer in one sentence.";

        // Streaming
        console.log('Streaming:');
        const streamStart = Date.now();
        let streamedText = '';
        for await (const chunk of llm.stream(question)) {
            streamedText += chunk.content;
        }
        const streamTime = Date.now() - streamStart;
        console.log(`Time: ${streamTime}ms`);
        console.log(`Response: ${streamedText}`);
        console.log();

        // Regular invoke
        console.log('Regular invoke:');
        const invokeStart = Date.now();
        const invokeResponse = await llm.invoke(question);
        const invokeTime = Date.now() - invokeStart;
        console.log(`Time: ${invokeTime}ms`);
        console.log(`Response: ${invokeResponse.content}`);

        console.log(`\nTime difference: ${Math.abs(streamTime - invokeTime)}ms`);
        console.log('Note: Streaming feels faster because you see results immediately!');

    } finally {
        await llm.dispose();
    }

    console.log('\n✓ Exercise 3 complete!');
}

// Run the solution
exercise3().catch(console.error);

/**

 * Key Takeaways:

 *

 * 1. Streaming API:

 *    - for await (const chunk of llm.stream(input)) { }

 *    - Each chunk is an AIMessage with partial content

 *    - Use process.stdout.write() to print without newlines

 *

 * 2. User Experience:

 *    - Streaming shows immediate feedback

 *    - Users see progress as it happens

 *    - Feels faster even if total time is similar

 *    - Essential for long responses

 *

 * 3. Collection pattern:

 *    - Initialize empty string: let full = ''

 *    - Accumulate: full += chunk.content

 *    - Use when you need the complete response

 *

 * 4. When to stream:

 *    - Long-form content generation

 *    - Interactive chat interfaces

 *    - When user experience matters

 *    - When you want to show progress

 *

 * 5. When NOT to stream:

 *    - Need to parse complete response

 *    - Batch processing

 *    - Automated testing

 *    - Response needs to be processed as a whole

 */