File size: 12,572 Bytes
e706de2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
/**

 * Solution 22: Contact Information Extractor

 *

 * Difficulty: β­β­β˜†β˜† (Intermediate)

 *

 * Skills gained:

 * - JSON extraction from unstructured text

 * - List parsing from various formats

 * - Including format instructions

 * - Schema validation

 */

import {Runnable, PromptTemplate, JsonOutputParser, ListOutputParser} from '../../../../src/index.js';
import {LlamaCppLLM} from '../../../../src/llm/llama-cpp-llm.js';
import {QwenChatWrapper} from "node-llama-cpp";

// Sample text snippets with contact information
const TEXT_SAMPLES = [
    "Contact John Smith at john.smith@email.com or call 555-0123. He's based in New York.",
    "For inquiries, reach out to Sarah Johnson (sarah.j@company.com), phone: 555-9876, located in San Francisco.",
    "Please contact Dr. Michael Chen at m.chen@hospital.org or 555-4567. Office in Boston."
];

/**

 * Build a chain that extracts structured contact information:

 * - name

 * - email

 * - phone

 * - location

 */
async function createContactExtractor() {
    const parser = new JsonOutputParser({
        schema: {
            name: 'string',
            email: 'string',
            phone: 'number',
            location: 'string'
        }
    });

    const prompt = new PromptTemplate({
        template: `Extract info from: {text}



{format_instructions}`,
        inputVariables: ["text"],
        partialVariables: {
            format_instructions: parser.getFormatInstructions()
        }
    });

    const llm = new LlamaCppLLM({
        modelPath: './models/Qwen3-1.7B-Q6_K.gguf',
        chatWrapper: new QwenChatWrapper({
            thoughts: 'discourage'  // Prevents the model from outputting thinking tokens
        }),
    });

    const chain = prompt.pipe(llm).pipe(parser);

    return chain;
}

/**

 * Build a chain that extracts a list of skills from a job description

 * Should return array of strings

 */
async function createSkillsExtractor() {
    const parser = new ListOutputParser();

    const prompt = new PromptTemplate({
        template: `List skill found in this text numbered: {description}



{format_instructions}`,
        inputVariables: ["description"],
        partialVariables: {
            format_instructions: parser.getFormatInstructions()
        }
    });

    const llm = new LlamaCppLLM({
        modelPath: './models/Qwen3-1.7B-Q6_K.gguf',
        chatWrapper: new QwenChatWrapper({
            thoughts: 'discourage'  // Prevents the model from outputting thinking tokens
        }),
    });

    const chain = prompt.pipe(llm).pipe(parser);

    return chain;
}

/**

 * Build a chain that extracts company info including multiple contacts

 */
async function createCompanyExtractor() {
    const parser = new JsonOutputParser();

    const prompt = new PromptTemplate({
        template: `From this text: {text} i need following information extracted: company name, industry, year founded, employee count. {format_instructions}`,
        inputVariables: ["text"],
        partialVariables: {
            format_instructions: parser.getFormatInstructions()
        }
    });

    const llm = new LlamaCppLLM({
        modelPath: './models/Qwen3-1.7B-Q6_K.gguf',
        chatWrapper: new QwenChatWrapper({
            thoughts: 'discourage'  // Prevents the model from outputting thinking tokens
        }),
    });

    const chain = prompt.pipe(llm).pipe(parser);

    return chain;
}

async function extractContactInfo() {
    console.log('=== Exercise 22: Contact Information Extractor ===\n');

    const contactChain = await createContactExtractor();
    const skillsChain = await createSkillsExtractor();
    const companyChain = await createCompanyExtractor();

    // Test 1: Extract contact info
    console.log('--- Test 1: Extracting Contact Information ---\n');

    for (let i = 0; i < TEXT_SAMPLES.length; i++) {
        const text = TEXT_SAMPLES[i];
        console.log(`Text ${i + 1}: "${text}"`);

        const contact = await contactChain.invoke({text});

        console.log('Extracted:', contact);
        console.log();
    }

    // Test 2: Extract skills from job description
    console.log('--- Test 2: Extracting Skills List ---\n');

    const description = `We're looking for a Full Stack Developer with experience in:

JavaScript, Python, React, Node.js, PostgreSQL, Docker, AWS, and Git.

Strong communication and problem-solving skills required.`;

    console.log(`Job Description: "${description}"\n`);

    const skills = await skillsChain.invoke({description});

    console.log('Extracted Skills:', skills);
    console.log();

    // Test 3: Extract company info
    console.log('--- Test 3: Extracting Company Information ---\n');

    const companyText = `TechCorp is a leading software company in the cloud computing industry.

Founded in 2010, the company now employs over 500 people across three continents.`;

    console.log(`Company Text: "${companyText}"\n`);

    const companyInfo = await companyChain.invoke({text: companyText});

    console.log('Extracted Info:', companyInfo);
    console.log();

    console.log('βœ“ Exercise 2 Complete!');

    return {contactChain, skillsChain, companyChain};
}

// Run the exercise
extractContactInfo()
    .then(runTests)
    .catch(console.error);

// ============================================================================
// AUTOMATED TESTS
// ============================================================================

async function runTests(results) {
    const {contactChain, skillsChain, companyChain} = results;

    console.log('\n' + '='.repeat(60));
    console.log('RUNNING AUTOMATED TESTS');
    console.log('='.repeat(60) + '\n');

    const assert = (await import('assert')).default;
    let passed = 0;
    let failed = 0;

    async function test(name, fn) {
        try {
            await fn();
            passed++;
            console.log(`βœ… ${name}`);
        } catch (error) {
            failed++;
            console.error(`❌ ${name}`);
            console.error(`   ${error.message}\n`);
        }
    }

    // Test 1: Chains created
    await test('Contact extractor chain created', async () => {
        assert(contactChain !== null && contactChain !== undefined, 'Create contactChain');
        assert(contactChain instanceof Runnable, 'Should be Runnable');
    });

    await test('Skills extractor chain created', async () => {
        assert(skillsChain !== null && skillsChain !== undefined, 'Create skillsChain');
        assert(skillsChain instanceof Runnable, 'Should be Runnable');
    });

    await test('Company extractor chain created', async () => {
        assert(companyChain !== null && companyChain !== undefined, 'Create companyChain');
        assert(companyChain instanceof Runnable, 'Should be Runnable');
    });

    // Test 2: Contact extraction (only run if chain exists)
    if (contactChain !== null && contactChain !== undefined) {
        await test('Contact extractor returns object', async () => {
            const result = await contactChain.invoke({
                text: "Contact Alice at alice@email.com, phone 555-1234, in Seattle"
            });
            assert(typeof result === 'object', 'Should return object');
            assert(!Array.isArray(result), 'Should not be array');
        });

        await test('Contact object has required fields', async () => {
            const result = await contactChain.invoke({
                text: "Contact Bob at bob@email.com, phone 555-5678, in Portland"
            });
            assert('name' in result, 'Should have name field');
            assert('email' in result, 'Should have email field');
            assert('phone' in result, 'Should have phone field');
        });

        await test('Contact fields are strings', async () => {
            const result = await contactChain.invoke({
                text: "Contact Carol at carol@email.com"
            });
            if (result.name) assert(typeof result.name === 'string', 'name should be string');
            if (result.email) assert(typeof result.email === 'string', 'email should be string');
        });
    } else {
        failed += 3;
        console.error(`❌ Contact extractor returns object`);
        console.error(`   Cannot test - contactChain is not created\n`);
        console.error(`❌ Contact object has required fields`);
        console.error(`   Cannot test - contactChain is not created\n`);
        console.error(`❌ Contact fields are strings`);
        console.error(`   Cannot test - contactChain is not created\n`);
    }

    // Test 3: Skills extraction (only run if chain exists)
    if (skillsChain !== null && skillsChain !== undefined) {
        await test('Skills extractor returns array', async () => {
            const result = await skillsChain.invoke({
                description: "Looking for: JavaScript, Python, SQL"
            });
            assert(Array.isArray(result), 'Should return array');
        });

        await test('Skills array contains strings', async () => {
            const result = await skillsChain.invoke({
                description: "Requirements: Java, C++, Git, Docker"
            });
            assert(result.length > 0, 'Should extract at least one skill');
            assert(
                result.every(skill => typeof skill === 'string'),
                'All skills should be strings'
            );
        });

        await test('Skills array has no empty strings', async () => {
            const result = await skillsChain.invoke({
                description: "Skills: React, Node.js, MongoDB"
            });
            assert(
                result.every(skill => skill.trim().length > 0),
                'Should have no empty strings'
            );
        });
    } else {
        failed += 3;
        console.error(`❌ Skills extractor returns array`);
        console.error(`   Cannot test - skillsChain is not created\n`);
        console.error(`❌ Skills array contains strings`);
        console.error(`   Cannot test - skillsChain is not created\n`);
        console.error(`❌ Skills array has no empty strings`);
        console.error(`   Cannot test - skillsChain is not created\n`);
    }

    // Test 4: Company extraction (only run if chain exists)
    if (companyChain !== null && companyChain !== undefined) {
        await test('Company extractor returns object', async () => {
            const result = await companyChain.invoke({
                text: "CloudTech was founded in 2015 in the SaaS industry with 100 employees"
            });
            assert(typeof result === 'object', 'Should return object');
        });
    } else {
        failed++;
        console.error(`❌ Company extractor returns object`);
        console.error(`   Cannot test - companyChain is not created\n`);
    }

    // Test 5: JSON parsing robustness (always run - tests parser capability)
    await test('JsonParser handles markdown code blocks', async () => {
        // The parser should extract JSON even if LLM wraps it in ```json
        // This test verifies the parser class exists and has the capability
        const parser = new JsonOutputParser();
        assert(parser !== null, 'JsonOutputParser should be instantiable');
        assert(typeof parser.parse === 'function', 'Parser should have parse method');
    });

    // Summary
    console.log('\n' + '='.repeat(60));
    console.log('TEST SUMMARY');
    console.log('='.repeat(60));
    console.log(`Total: ${passed + failed}`);
    console.log(`βœ… Passed: ${passed}`);
    console.log(`❌ Failed: ${failed}`);
    console.log('='.repeat(60));

    if (failed === 0) {
        console.log('\nπŸŽ‰ All tests passed!\n');
        console.log('πŸ“š What you learned:');
        console.log('  β€’ JsonOutputParser extracts structured data reliably');
        console.log('  β€’ ListOutputParser handles multiple list formats');
        console.log('  β€’ getFormatInstructions() tells LLM what you expect');
        console.log('  β€’ Schema validation ensures data quality');
        console.log('  β€’ Parsers handle markdown and extra text gracefully\n');
    } else {
        console.log('\n⚠️  Some tests failed. Check your implementation.\n');
    }
}