Spaces:

lenzcom
/

Email

Running

App Files Files Community

Email / tutorial /01-foundation /03-llm-wrapper /exercises /11-streaming.js

lenzcom's picture

Upload folder using huggingface_hub

e706de2 verified 2 days ago

history blame contribute delete

4.01 kB

	/**
	* Exercise 11: Streaming Responses
	*
	* Goal: Learn to stream LLM responses in real-time
	*
	* In this exercise, you'll:
	* 1. Stream a response and print it character by character
	* 2. Build a progress indicator while streaming
	* 3. Collect chunks into a full response
	* 4. Compare streaming vs non-streaming
	*
	* This creates the "ChatGPT typing effect"!
	*/

	import {HumanMessage, SystemMessage, LlamaCppLLM} from '../../../../src/index.js';

	async function exercise3() {
	console.log('=== Exercise 3: Streaming Responses ===\n');

	const llm = new LlamaCppLLM({
	modelPath: './models/Meta-Llama-3.1-8B-Instruct-Q5_K_S.gguf',
	temperature: 0.7,
	maxTokens: 200
	});

	try {
	// Part 1: Basic streaming
	console.log('Part 1: Basic streaming');
	console.log('Question: Tell me a short fun fact about space.\n');
	console.log('Response: ');

	// TODO: Use llm.stream() to stream the response
	// Use a for await loop to iterate through chunks
	// Print each chunk without a newline (use process.stdout.write)
	// (code here)

	console.log('\n');

	// Part 2: Streaming with progress indicator
	console.log('Part 2: Streaming with progress indicator');
	console.log('Question: Explain what a black hole is in 2-3 sentences.\n');

	let charCount = 0;
	// TODO: Stream the response and count characters
	// Every 10 characters, print a dot (.) as progress
	// Print the actual response too
	// Hint: Use charCount % 10 === 0 to check
	console.log('Progress: ');
	// (code here)

	console.log(`\n\nTotal characters streamed: ${charCount}`);
	console.log();

	// Part 3: Collecting streamed chunks
	console.log('Part 3: Collecting full response from stream');

	const messages = [
	new SystemMessage("You are a helpful assistant"),
	new HumanMessage("What are the three primary colors? Answer briefly.")
	];

	// TODO: Stream the response and collect all chunks
	let fullResponse = '';
	// Use for await loop to collect chunks
	// Build up fullResponse by concatenating chunk.content
	// (code here)

	console.log('Full response:', fullResponse);
	console.log();

	// Part 4: Compare streaming vs regular invoke
	console.log('Part 4: Streaming vs Regular invoke');
	const question = "What is JavaScript? Answer in one sentence.";

	// TODO: Time a streaming response
	console.log('Streaming:');
	const streamStart = Date.now();
	let streamedText = '';
	// (code here - stream and collect)
	const streamTime = Date.now() - streamStart;
	console.log(`Time: ${streamTime}ms`);
	console.log(`Response: ${streamedText}`);
	console.log();

	// TODO: Time a regular invoke
	console.log('Regular invoke:');
	const invokeStart = Date.now();
	// (code here - use invoke)
	const invokeTime = Date.now() - invokeStart;
	console.log(`Time: ${invokeTime}ms`);
	// console.log response

	console.log(`\nTime difference: ${Math.abs(streamTime - invokeTime)}ms`);

	} finally {
	await llm.dispose();
	}

	console.log('\n✓ Exercise 3 complete!');
	}

	// Run the exercise
	exercise3().catch(console.error);

	/**
	* Expected Output:
	* - Part 1: Text appearing character by character
	* - Part 2: Progress dots while streaming
	* - Part 3: Full collected response
	* - Part 4: Similar times for both methods (streaming shows progress)
	*
	* Learning Points:
	* 1. Streaming shows results as they generate (better UX)
	* 2. for await...of loop handles async generators
	* 3. Each chunk is an AIMessage with partial content
	* 4. Total time similar, but perceived as faster
	*/