File size: 50,818 Bytes
b386992
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Fine-Tuning LLMs for Function Calling\n",
    "\n",
    "In the application of LLMs, agents represent an exciting field. They are intelligent systems capable of simulating human-like intelligent behavior to perform specific tasks or services. LLM-based agents can leverage the powerful comprehension and generation capabilities of LLMs while also incorporating the planning and function-calling abilities to accomplish many complex tasks.\n",
    "\n",
    "In this tutorial, we will demonstrate how to perform Supervised Fine-Tuning (SFT) and Parameter Efficient Fine-Tuning (PEFT) to learn function-calling (tool learning) using NeMo 2.0. NeMo 2.0 introduces Python-based configurations, PyTorch Lightning’s modular abstractions, and NeMo-Run for scaling experiments across multiple GPUs. In this notebook, we will use NeMo-Run to streamline the configuration and execution of our experiments."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# NeMo Tools and Resources\n",
    "\n",
    "* [NeMo Framework](https://docs.nvidia.com/nemo-framework/user-guide/latest/overview.html)\n",
    "\n",
    "# Software Requirements\n",
    "\n",
    "* Access to latest NeMo Framework NGC Containers\n",
    "\n",
    "\n",
    "# Hardware Requirements\n",
    "\n",
    "* This playbook has been tested on the following hardware: Single A6000, Single H100, 2xA6000, 8xH100. It can be scaled to multiple GPUs as well as multiple nodes by modifying the appropriate parameters."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Launch the NeMo Framework container as follows: \n",
    "\n",
    "Depending on the number of gpus, `--gpus` might need to adjust accordingly:\n",
    "```\n",
    "docker run -it -p 8080:8080 -p 8088:8088 --rm --gpus '\"device=0,1\"' --ipc=host --network host -v $(pwd):/workspace nvcr.io/nvidia/nemo:25.02\n",
    "```\n",
    "\n",
    "#### Launch Jupyter Notebook as follows: \n",
    "```\n",
    "jupyter notebook --allow-root --ip 0.0.0.0 --port 8088 --no-browser --NotebookApp.token=''\n",
    "\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Step 1. Construct the Dataset\n",
    "\n",
    "An LLM agent is a system that leverages an LLM as its core engine, and is capable of executing specific tasks by invoking external functions or tools. These tools can be APIs, databases, calculators, etc., allowing the agent to obtain the necessary information from external sources while completing tasks. As shown in an example dataset below, an LLM understands the usage of each tool via three text fields: `name`, `description`, `parameters`."
   ]
  },
  {
   "cell_type": "code",
   "metadata": {},
   "source": [
    "tool1 = {\n",
    "    'name': 'strategy_query', \n",
    "    'description': 'Check the initial quotes for financial products.', \n",
    "    'parameters': {'product': {'type': 'string', 'description': 'Product type.'},\n",
    "                   'term': {'type': 'string', 'description': 'Term.'}}\n",
    "}\n",
    "tool2 = {}\n",
    "tools = [tool1, tool2]"
   ],
   "outputs": [],
   "execution_count": null
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "To perform fine-tuning in NeMo 2.0, we should first transform the training dataset into a predefined format in NeMo 2.0. According to the different training strategies you use, there are two types of function-calling dataset formats:\n",
    "\n",
    "* The first type is a single-turn function-calling dataset. For each piece of data, the assistant only calls the function once, and the conversation does not record the function-calling execution result. We focus on training the LLM to correctly select the function and its parameters.\n",
    "\n",
    "* The second type is a multi-turn function-calling dataset. For each piece of data, the assistant calls the function once or more than once, and the conversation records the function-calling execution result. The assistant will make the next generation based on the function's return values. The is closer to a real-life, interactive chatbot scenario.\n",
    "\n",
    "Both types of datasets are supported in NeMo function-calling fine-tuning. You can choose one of them or a combination of them, as long as it's suitable for your target.\n",
    "\n",
    "#### Option 1: Use the Single-Turn Function-Calling Dataset\n",
    "\n",
    "Let's take a data from [glaiveai/glaive-function-calling-v2](https://huggingface.co/datasets/glaiveai/glaive-function-calling-v2) as an example. The original data format is shown below."
   ]
  },
  {
   "cell_type": "code",
   "metadata": {},
   "source": [
    "{\n",
    "    \"system\": \"SYSTEM: You are a helpful assistant with access to the following functions. Use them if required -\\n{\\n    \\\"name\\\": \\\"generate_password\\\",\\n    \\\"description\\\": \\\"Generate a random password\\\",\\n    \\\"parameters\\\": {\\n        \\\"type\\\": \\\"object\\\",\\n        \\\"properties\\\": {\\n            \\\"length\\\": {\\n                \\\"type\\\": \\\"integer\\\",\\n                \\\"description\\\": \\\"The length of the password\\\"\\n            },\\n            \\\"include_symbols\\\": {\\n                \\\"type\\\": \\\"boolean\\\",\\n                \\\"description\\\": \\\"Whether to include symbols in the password\\\"\\n            }\\n        },\\n        \\\"required\\\": [\\n            \\\"length\\\"\\n        ]\\n    }\\n}\\n\",\n",
    "    \"chat\": \"USER: Hi, I need a new password for my account. Can you generate one for me?\\n\\n\\nASSISTANT: Of course, I can help with that. How long would you like your password to be? And do you want it to include symbols? <|endoftext|>\\n\\n\\nUSER: I would like it to be 12 characters long and yes, please include symbols.\\n\\n\\nASSISTANT: <functioncall> {\\\"name\\\": \\\"generate_password\\\", \\\"arguments\\\": '{\\\"length\\\": 12, \\\"include_symbols\\\": true}'} <|endoftext|>\\n\\n\\nFUNCTION RESPONSE: {\\\"password\\\": \\\"4&7jB#9L!2Z%\\\"}\\n\\n\\nASSISTANT: Here is your new password: 4&7jB#9L!2Z%. Please make sure to save it in a secure place. <|endoftext|>\\n\\n\\nUSER: Great, thanks! Can you also help me book a flight to New York?\\n\\n\\nASSISTANT: I'm sorry, but as an AI, I don't have the capability to book flights or perform other external tasks. My primary function is to assist with tasks related to the functions provided to me. In this case, I can help generate passwords. For booking a flight, I recommend using a trusted travel website or contacting your travel agent. <|endoftext|>\\n\\n\\n\"\n",
    "},"
   ],
   "outputs": [],
   "execution_count": null
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We should transform it into the NeMo chat dataset format, which consists of three fields: `mask`, `system` and `conversations`. \n",
    "\n",
    "* `mask`: The role that needs to be masked out to prevent the role from participating in loss calculation.\n",
    "\n",
    "* `system`: System prompt.\n",
    "\n",
    "* `conversations`: For each role, the conversation consists of two fields `from` and `value`.\n",
    "\n",
    "We can transform the original data into the format shown below. Since we're constructing a single-turn function-calling dataset, you can end the conversation at the point of the tool call."
   ]
  },
  {
   "cell_type": "code",
   "metadata": {},
   "source": [
    "{ \n",
    "    \"mask\": \"User\", \n",
    "    \"system\": \"\",\n",
    "    \"conversations\": [\n",
    "        {\n",
    "            \"from\": \"User\", \n",
    "            \"value\": \"You are an expert in composing functions. You are given a question and a set of possible functions. Based on the question, you will need to make one or more function/tool calls to achieve the purpose. If none of the function can be used, point it out. If the given question lacks the parameters required by the function, also point it out. You should only return the function call in tools call sections. Here is a list of functions in JSON format that you can invoke.\\n<AVAILABLE_TOOLS>\\n{\\n    \\\"name\\\": \\\"generate_password\\\",\\n    \\\"description\\\": \\\"Generate a random password\\\",\\n    \\\"parameters\\\": {\\n        \\\"type\\\": \\\"object\\\",\\n        \\\"properties\\\": {\\n            \\\"length\\\": {\\n                \\\"type\\\": \\\"integer\\\",\\n                \\\"description\\\": \\\"The length of the password\\\"\\n            },\\n            \\\"include_symbols\\\": {\\n                \\\"type\\\": \\\"boolean\\\",\\n                \\\"description\\\": \\\"Whether to include symbols in the password\\\"\\n            }\\n        },\\n        \\\"required\\\": [\\n            \\\"length\\\"\\n        ]\\n    }\\n}\\n\\n{\\n    \\\"name\\\": \\\"create_task\\\",\\n    \\\"description\\\": \\\"Create a new task in a task management system\\\",\\n    \\\"parameters\\\": {\\n        \\\"type\\\": \\\"object\\\",\\n        \\\"properties\\\": {\\n            \\\"title\\\": {\\n                \\\"type\\\": \\\"string\\\",\\n                \\\"description\\\": \\\"The title of the task\\\"\\n            },\\n            \\\"due_date\\\": {\\n                \\\"type\\\": \\\"string\\\",\\n                \\\"format\\\": \\\"date\\\",\\n                \\\"description\\\": \\\"The due date of the task\\\"\\n            },\\n            \\\"priority\\\": {\\n                \\\"type\\\": \\\"string\\\",\\n                \\\"enum\\\": [\\n                    \\\"low\\\",\\n                    \\\"medium\\\",\\n                    \\\"high\\\"\\n                ],\\n                \\\"description\\\": \\\"The priority of the task\\\"\\n            }\\n        },\\n        \\\"required\\\": [\\n            \\\"title\\\",\\n            \\\"due_date\\\",\\n            \\\"priority\\\"\\n        ]\\n    }\\n}\\n\\n</AVAILABLE_TOOLS>\\nIf you decide to invoke any of the function(s), put it in the format of <TOOLCALL>[func_name1(params_name1=params_value1, params_name2=params_value2...), func_name2(params)]</TOOLCALL>\\nYou SHOULD NOT include any other information in the response.\\n\\nI need a new password. Can you generate one for me?\"\n",
    "        }, \n",
    "        {\n",
    "            \"from\": \"Assistant\", \n",
    "            \"value\": \"Of course. How long would you like your password to be? And would you like it to include symbols?\"\n",
    "        }, \n",
    "        {\n",
    "            \"from\": \"User\", \n",
    "            \"value\": \"I would like it to be 12 characters long and yes, please include symbols.\"\n",
    "        }, \n",
    "        {\n",
    "            \"from\": \"Assistant\", \n",
    "            \"value\": \"<TOOLCALL>[generate_password(length=12, include_symbols=True)]</TOOLCALL>\"\n",
    "        }\n",
    "    ]\n",
    "},"
   ],
   "outputs": [],
   "execution_count": null
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Using the single-turn function-calling datasets described above, we've successfully fine-tuned [nvidia/Mistral-NeMo-Minitron-8B-Instruct](https://huggingface.co/nvidia/Mistral-NeMo-Minitron-8B-Instruct) based on [nvidia/Mistral-NeMo-Minitron-8B-Base](https://huggingface.co/nvidia/Mistral-NeMo-Minitron-8B-Base), which has a general function-calling ability. If you're interested, you can quickly experience its capabilities [NIM online](https://build.nvidia.com/nvidia/mistral-nemo-minitron-8b-8k-instruct). If you want to reproduce a model like [nvidia/Mistral-NeMo-Minitron-8B-Instruct](https://huggingface.co/nvidia/Mistral-NeMo-Minitron-8B-Instruct) using NeMo, you can refer to the three open-source datasets we used. \n",
    "*Note that we also used some internal datasets that are not open-sourced.*\n",
    "\n",
    "* [nvidia/Daring-Anteater](https://huggingface.co/datasets/nvidia/Daring-Anteater)\n",
    "\n",
    "* [glaiveai/glaive-function-calling-v2](https://huggingface.co/datasets/glaiveai/glaive-function-calling-v2)\n",
    "\n",
    "* [Salesforce/xlam-function-calling-60k](https://huggingface.co/datasets/Salesforce/xlam-function-calling-60k)\n",
    "\n",
    "* lr=1e-6 and 3 epoches.\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Step 1.1 Download the HuggingFace Dataset\n",
    "As the purpose of this tutorial, we are going to download [Salesforce/xlam-function-calling-60k](https://huggingface.co/datasets/Salesforce/xlam-function-calling-60k).\n",
    "First let's download the datasets from Hugging Face. You need to have valid HuggingFace token in order to access this gated repo."
   ]
  },
  {
   "cell_type": "code",
   "metadata": {},
   "source": [
    "!huggingface-cli login --token <HF_TOKEN>"
   ],
   "outputs": [],
   "execution_count": null
  },
  {
   "cell_type": "code",
   "metadata": {},
   "source": [
    "from datasets import load_dataset\n",
    "xlam_ds = load_dataset('Salesforce/xlam-function-calling-60k', split='train')\n",
    "xlam_ds.to_json('xlam.jsonl')"
   ],
   "outputs": [],
   "execution_count": null
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "You should now have `Salesforce/xlam-function-calling-60k` raw dataset file downloaded as `xlam.jsonl`."
   ]
  },
  {
   "cell_type": "code",
   "metadata": {},
   "source": [
    "!ls"
   ],
   "outputs": [],
   "execution_count": null
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Step 1.2 Convert the Dataset to NeMo Chat SFT Dataset\n",
    "We now convert the raw dataset to NeMo format using the following data transformation script.\n",
    "NeMo's `ChatDataModule` requires `data_root` to contain one `training.jsonl` and `validation.jsonl` for training and validation sets.\n",
    "\n",
    "Let's first define some helper functions:"
   ]
  },
  {
   "cell_type": "code",
   "metadata": {},
   "source": [
    "import json\n",
    "import random\n",
    "import ast, copy\n",
    "\n",
    "random.seed(1234)\n",
    "\n",
    "possible_headers = [\n",
    "    \"You are an expert in composing functions. You are given a question and a set of possible functions. Based on the question, you will need to make one or more function/tool calls to achieve the purpose. If none of the function can be used, point it out. If the given question lacks the parameters required by the function, also point it out. You should only return the function call in tools call sections. Here is a list of functions in JSON format that you can invoke.\\n\",\n",
    "    \"You are a function-calling assistant. Your task is to identify and execute the appropriate functions from a given list based on the user's question. If no suitable function is available, specify this. If required parameters are missing, indicate this as well. Return only the function call in the specified format. Here is the list of available functions in JSON format\",\n",
    "    \"Imagine you are an AI designed to call functions. Given a question and a set of functions, your role is to make the necessary function calls. If a function cannot be used, state this. If parameters are missing, mention it. Here are the available functions\\n\",\n",
    "    \"You are an AI agent specialized in executing function calls. Your mission is to interpret questions and determine the correct functions to execute from a provided list. If no function applies, or if parameters are missing, you must indicate this. Below are the functions you can call\\n\",\n",
    "    \"You are an intelligent agent capable of invoking functions based on user queries. Given a question and a list of functions, your task is to identify and execute the appropriate functions. If no function is suitable, specify this. If required parameters are missing, indicate this as well. Return only the function call in the specified format. Here is the list of available functions in JSON format\\n\",\n",
    "    \"As an AI assistant, you are tasked with determining the appropriate function calls based on a question and a list of available functions. If no function can be used, or if parameters are missing, indicate this. Return only the function calls in the specified format. Functions are detailed in JSON format.\"\n",
    "]\n",
    "rejection_prompts = [\n",
    "    \"I'm sorry, but after reviewing the available tools, I couldn't find a function that suits your request. Please provide more information or specify a different function. If you need assistance with anything else, feel free to ask.\",\n",
    "    \"<TOOLCALL>[]</TOOLCALL>\"\n",
    "]\n",
    "def process_system_turn(j):\n",
    "    if random.choice([0,1]) == 0:\n",
    "        j[\"tools\"] = json.loads(j[\"tools\"])\n",
    "    header = random.choice(possible_headers)\n",
    "    tools = json.dumps(j[\"tools\"], indent=4) if isinstance(j[\"tools\"], dict) else j[\"tools\"]\n",
    "    if isinstance(tools, list):\n",
    "        tools = str(tools)\n",
    "    if random.choice([0,1]) == 0:\n",
    "        system = header + \"<AVAILABLE_TOOLS>\\n\" + tools + \"</AVAILABLE_TOOLS>\" + '\\n' + \"\"\"If you decide to invoke any of the function(s), put it in the format of <TOOLCALL>[func_name1(params_name1=params_value1, params_name2=params_value2...), func_name2(params)]</TOOLCALL>\\nYou SHOULD NOT include any other information in the response.\"\"\"\n",
    "    else:\n",
    "        system = header +  \"\"\"If you decide to invoke any of the function(s), put it in the format of <TOOLCALL>[func_name1(params_name1=params_value1, params_name2=params_value2...), func_name2(params)]</TOOLCALL>\\nYou SHOULD NOT include any other information in the response.\\n\"\"\" + \"<AVAILABLE_TOOLS>\\n\" + json.dumps(j[\"tools\"], indent=4) + \"</AVAILABLE_TOOLS>\" \n",
    "\n",
    "\n",
    "    return system\n",
    "\n",
    "def put_system_ito_user(j):\n",
    "    \n",
    "    system = j[\"system\"]\n",
    "    j[\"system\"] = \"\"\n",
    "    j[\"conversations\"][0][\"value\"] = system + '\\n\\n' + j[\"conversations\"][0][\"value\"]\n",
    "\n",
    "    return j\n",
    "\n",
    "def get_all_functions(jlines, arg=\"tools\"):\n",
    "    functions = []\n",
    "    for j in jlines:\n",
    "        f = get_functions(j, arg)\n",
    "        functions += f\n",
    "    return functions\n",
    "\n",
    "def get_functions(j, arg=\"tools\"):\n",
    "    try:\n",
    "        f = ast.literal_eval(j[arg])\n",
    "    except:\n",
    "        f = json.loads(j[arg])\n",
    "    if not isinstance(f, list):\n",
    "        f = [f]\n",
    "    \n",
    "    return f\n",
    "\n",
    "def process_function(functions):\n",
    "    try:\n",
    "        functions = ast.literal_eval(functions)\n",
    "    except:\n",
    "        try:\n",
    "            functions = json.loads(functions)\n",
    "        except:\n",
    "            print(functions)\n",
    "            return None\n",
    "\n",
    "    outputs=[]\n",
    "    for function in functions:\n",
    "        out = \"\"\n",
    "        name = function[\"name\"]\n",
    "        out += name + \"(\"\n",
    "        try:\n",
    "            arguments = json.loads(function[\"arguments\"]) if isinstance(function[\"arguments\"], str) else function[\"arguments\"]\n",
    "            if len(arguments) == 0:\n",
    "                return out + \")\"\n",
    "\n",
    "            for arg, v in arguments.items():\n",
    "                if isinstance(v, str):\n",
    "                    out += arg + \"=\" + '\"' + str(v) + '\", '\n",
    "                else:\n",
    "                    out += arg + \"=\" + str(v) + ', '\n",
    "            out = out[:-2] + \")\"\n",
    "            outputs.append(out)\n",
    "        except:\n",
    "            print(\"sec error\", function)\n",
    "\n",
    "    return \"<TOOLCALL>[\" + \", \".join(outputs) + \"]</TOOLCALL>\"\n",
    "\n",
    "def write_nemo_datasetfile(json_objects, output_folder, rejection_rate=0.3, train_ratio=0.95):\n",
    "    all_function = get_all_functions(json_objects, \"tools\")\n",
    "    os.makedirs(output_folder, exist_ok=True)\n",
    "    #augmentation: add more functions to increase difficulty\n",
    "    for j in json_objects:\n",
    "        tools = json.loads(j[\"tools\"])\n",
    "        n = random.choice([0]*10 + [i for i in range(10)])\n",
    "        aug_f = random.sample(all_function, n)\n",
    "        diff_f = [f for f in aug_f if f not in tools]\n",
    "        tools += diff_f\n",
    "        random.shuffle(tools)\n",
    "        j[\"tools\"] = json.dumps(tools)\n",
    "\n",
    "    # augmentation: add rejection\n",
    "    rejs = []\n",
    "    for j in random.sample(json_objects, int(rejection_rate * len(json_objects))):\n",
    "        tools = json.loads(j[\"tools\"])\n",
    "        n = len(tools)\n",
    "        aug_f = random.sample(all_function, n)\n",
    "        diff_f = [f for f in aug_f if f not in tools]\n",
    "        tools = diff_f\n",
    "        if len(tools) == 0:\n",
    "            continue\n",
    "        new_j = copy.deepcopy(j)\n",
    "        new_j[\"tools\"] = json.dumps(tools)\n",
    "        new_j[\"rejection\"] = True\n",
    "        rejs.append(new_j)\n",
    "\n",
    "    # Adding the rejections to the list\n",
    "    json_objects += rejs\n",
    "    output = []\n",
    "    for j in json_objects:\n",
    "        d = {}\n",
    "        d[\"system\"] = process_system_turn(j)\n",
    "        d[\"mask\"] = \"User\"\n",
    "        if j.get(\"rejection\", False):\n",
    "            answer = random.choice(rejection_prompts)\n",
    "        else:\n",
    "            answer = process_function(j[\"answers\"])\n",
    "    \n",
    "        if answer == None:\n",
    "            continue\n",
    "        q = j[\"query\"]\n",
    "        d[\"conversations\"] = [{\"from\":\"User\", \"value\": q}, {\"from\":\"Assistant\", \"value\": answer}]\n",
    "    \n",
    "        output.append(d)\n",
    "        d = put_system_ito_user(d)\n",
    "        output.append(d)\n",
    "\n",
    "    # Split into train/val set\n",
    "    train_fout = open(f'{output_folder}/training.jsonl', 'w')\n",
    "    validation_fout = open(f'{output_folder}/validation.jsonl', 'w')\n",
    "    split_index = int(len(output) * train_ratio)\n",
    "    random.shuffle(output)\n",
    "    train_objects = output[:split_index]\n",
    "    val_objects = output[split_index:]\n",
    "\n",
    "    with open(f'{output_folder}/training.jsonl', 'w') as f:\n",
    "        for obj in train_objects:\n",
    "            f.write(json.dumps(obj) + '\\n')\n",
    "    with open(f'{output_folder}/validation.jsonl', 'w') as f:\n",
    "        for obj in val_objects:\n",
    "            f.write(json.dumps(obj) + '\\n')\n",
    "    print(f'Saved training.jsonl and validation.jsonl to {output_folder}.')\n"
   ],
   "outputs": [],
   "execution_count": null
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We split the raw dataset into training set and validation set using a fraction of 95%/5%:"
   ]
  },
  {
   "cell_type": "code",
   "metadata": {},
   "source": [
    "import os\n",
    "\n",
    "f_input = open(\"xlam.jsonl\")\n",
    "train_ratio = 0.90\n",
    "all_objects = [json.loads(l) for l in f_input.readlines()][:10000]\n",
    "\n",
    "write_nemo_datasetfile(all_objects, 'xlam_dataset', train_ratio=0.95)\n",
    "\n"
   ],
   "outputs": [],
   "execution_count": null
  },
  {
   "cell_type": "code",
   "metadata": {},
   "source": [
    "!ls xlam_dataset"
   ],
   "outputs": [],
   "execution_count": null
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Option 2: Use the Multi-Turns Function-Calling Dataset\n",
    "\n",
    "For multi-turn function-calling dataset construction, the process is similar to constructing a single-turn function-calling dataset. The only difference is that we need to add one more role, 'Function,' to represent the function-calling return. Let's take the data below as an example."
   ]
  },
  {
   "cell_type": "code",
   "metadata": {},
   "source": [
    "{\n",
    "    \"chat\": [\n",
    "            {\n",
    "                \"role\": \"user\",\n",
    "                \"content\": \"Is there overnight lending available?\"\n",
    "            },\n",
    "            {\n",
    "                \"role\": \"assistant\",\n",
    "                \"func_call\": {\n",
    "                    \"function\": \"strategy_query\",\n",
    "                    \"params\": {\n",
    "                        \"term\": \"overnight\"\n",
    "                    }\n",
    "                }\n",
    "            },\n",
    "            {\n",
    "                \"role\": \"assistant\",\n",
    "                \"func_return\": {\n",
    "                    \"strategy_query\": [\n",
    "                        {\n",
    "                            \"product\": \"lending\",\n",
    "                            \"term\": \"overnight\",\n",
    "                            \"amount\": \"1 billion\",\n",
    "                            \"interest_rate\": \"2.0%\"\n",
    "                        }\n",
    "                    ]\n",
    "                }\n",
    "            },\n",
    "            {\n",
    "                \"role\": \"assistant\",\n",
    "                \"content\": \"Yes,1 billion,2.0%. Are you interested?\"\n",
    "            },\n",
    "            {\n",
    "                \"role\": \"user\",\n",
    "                \"content\": \"2.0% is too high. I have to think about it.\"\n",
    "            },\n",
    "            {\n",
    "                \"role\": \"assistant\",\n",
    "                \"func_call\": {\n",
    "                    \"function\": \"transaction_cancel\",\n",
    "                    \"params\": {}\n",
    "                }\n",
    "            },\n",
    "            {\n",
    "                \"role\": \"assistant\",\n",
    "                \"func_return\": {\n",
    "                    \"response\": \"The transaction has been cancelled.\"\n",
    "                }\n",
    "            }\n",
    "        ]\n",
    "}"
   ],
   "outputs": [],
   "execution_count": null
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We transform the original data into the NeMo chat format shown below. Note that, compared to the single-turn function-calling dataset, we add the 'Function' role in conversations to record the function-calling execution result. We should also mask out the user and function roles to prevent them from participating in loss calculation."
   ]
  },
  {
   "cell_type": "code",
   "metadata": {},
   "source": [
    "{\n",
    "    \"mask\": \"User,Function\",\n",
    "    \"system\": \"Answer the following questions as best you can. You have access to the following tools:\\n\\ninquiry: Call this tool to interact with the inquiry API. What is the inquiry API useful for? 查询金融产品的价格。 Parameters: [{'name': 'product', 'type': 'string', 'description': '产品类型', 'required': True},\\n                  {'name': 'term', 'type': 'string', 'description': '期限', 'required': True},\\n                  {'name': 'amount', 'type': 'string', 'description': '交易额度', 'required': True},\\n                  {'name': 'interest_rate', 'type': 'string', 'description': '利率', 'required': True}]\\nstrategy_query: Call this tool to interact with the strategy_query API. What is the strategy_query API useful for? 查询金融产品的交易策略。 Parameters: [{'name': 'product', 'type': 'string', 'description': '产品类型', 'required': True},\\n                  {'name': 'term', 'type': 'string', 'description': '期限', 'required': True}]\\ntransaction_confirm: Call this tool to interact with the transaction_confirm API. What is the transaction_confirm API useful for? 确认交易。 Parameters: [{'name': 'product', 'type': 'string', 'description': '产品类型', 'required': True},\\n                  {'name': 'term', 'type': 'string', 'description': '期限', 'required': True},\\n                  {'name': 'amount', 'type': 'string', 'description': '交易额度', 'required': True},\\n                  {'name': 'interest_rate', 'type': 'string', 'description': '利率', 'required': True}]\\ntransaction_cancel: Call this tool to interact with the transaction_cancel API. What is the transaction_cancel API useful for? 取消交易。 Parameters: []\\n\\nOutput the following format:\\n\\nAction: the action to take, should be one of [inquiry, strategy_query, transaction_confirm, transaction_cancel]\\nAction Input: the input to the action\",\n",
    "    \"conversations\": [\n",
    "        {\n",
    "            \"from\": \"User\",\n",
    "            \"value\": \"Is there overnight lending available?\"\n",
    "        },\n",
    "        {\n",
    "            \"from\": \"Assistant\",\n",
    "            \"value\": \"Action: strategy_query\\\\nAction Input: {\\\\n \\\"term\\\": \\\"overnight\\\"\\\\n}\"\n",
    "        },\n",
    "        {\n",
    "            \"from\": \"Function\",\n",
    "            \"value\": \"{\\\"error\\\": \\\"\\\", \\\"response\\\": {\\\\n \\\"product\\\": \\\"lending\\\",\\\\n \\\"term\\\": \\\"overnight\\\",\\\\n \\\"amount\\\": \\\"1 billion\\\",\\\\n \\\"interest_rate\\\": \\\"2.0%\\\"\\\\n}}\"\n",
    "        },\n",
    "        {\n",
    "            \"from\": \"Assistant\",\n",
    "            \"value\": \"Yes, 1 billion, 2.0%. Are you interested?\"\n",
    "        },\n",
    "        {\n",
    "            \"from\": \"User\",\n",
    "            \"value\": \"2.0% is too high. I have to think about it.\"\n",
    "        },\n",
    "        {\n",
    "            \"from\": \"Assistant\",\n",
    "            \"value\": \"Action: transaction_cancel\\\\nAction Input: {}\"\n",
    "        },\n",
    "        {\n",
    "            \"from\": \"Function\",\n",
    "            \"value\": \"{\\\"error\\\": \\\"\\\", \\\"response\\\": \\\"The transaction has been cancelled.\\\"}\"\n",
    "        }\n",
    "    ]\n",
    "}"
   ],
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": "For the rest of the notebook, we will proceed with type 1: single-turn function calling dataset, since the dataset is publicly available on Hugging Face."
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Step 2. Use NeMo-Run with NeMo2 Recipe\n",
    "\n",
    "After transforming the datasets, we should split and save the datasets into `training.jsonl`, `validation.jsonl` and `test.jsonl` under a folder. We can now start fine-tuning using NeMo-Run and assign the datasets directory path to `dataset_root` in `nemo_run.Config`. NeMo-Run will automatically tokenize the datasets and save the binary under the same data folder. Despite the different dataset formats, whether it is a single-turn function-calling dataset or a multi-turn function-calling dataset, the training script using NeMo-Run remains the same.\n",
    "\n",
    "For this tutorial, we will showcase function-calling capabilities using the Baichuan-7B-Base model. You can see the list of all available models and their recipes [here](https://docs.nvidia.com/nemo-framework/user-guide/latest/llms/index.html).\n",
    "For Baichuan, we need to install `bitsandbytes` in the container."
   ]
  },
  {
   "cell_type": "code",
   "metadata": {},
   "source": [
    "# Install Baichuan dependency\n",
    "!pip install bitsandbytes"
   ],
   "outputs": [],
   "execution_count": null
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 2.1: Auto Download and Convert the Baichuan2 7B model to NeMo2\n",
    "Baichuan2 7B model can be automatically downloaded and converted th NeMo2 format with the following script:"
   ]
  },
  {
   "cell_type": "code",
   "metadata": {},
   "source": [
    "%%writefile import_baichuan2_7b.py\n",
    "from nemo.collections import llm\n",
    "\n",
    "if __name__ == '__main__':\n",
    "    llm.import_ckpt(\n",
    "        model=llm.Baichuan2Model(config=llm.Baichuan2Config7B()),\n",
    "        source=\"hf://baichuan-inc/Baichuan2-7B-Base\",\n",
    "        overwrite=True,\n",
    "    )"
   ],
   "outputs": [],
   "execution_count": null
  },
  {
   "cell_type": "code",
   "metadata": {},
   "source": [
    "!torchrun import_baichuan2_7b.py"
   ],
   "outputs": [],
   "execution_count": null
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The above script \n",
    "- Downloads the Baichuan2 7B model from Hugging Face (if not already downloaded).\n",
    "- Automatically converts it into the NeMo format.\n",
    "\n",
    "Note:\n",
    "- The script can only run in a Python environment, not in a Jupyter notebook.\n",
    "- You need to have access to `baichuan-inc/Baichuan2-7B-Base` [repo on Hugging Face](https://huggingface.co/baichuan-inc/Baichuan2-7B-Base).\n",
    "\n",
    "The conversion will create a `baichuan-inc/Baichuan2-7B-Base` folder in the default `$NEMO_HOME/models` directory. \n",
    "`$NEMO_HOME` centralizes and stores all models and datasets used for NeMo training. By default `$NEMO_HOME` stores to `/root/.cache/nemo`."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 2.2: Finetuning Baichuan2 7B using Function-Calling Dataset\n",
    "\n",
    "For this step we use the NeMo 2 predefined recipe. \n",
    "\n",
    "First we define the recipe and executor for using NeMo 2. The predefined recipe uses LoRA fine-tuning to run on one A6000 GPU. If you would like to perform full-parameter fine-tuning, you can set `peft_scheme=None`. You can also use larger or smaller models depending on your needs and compute resources.\n"
   ]
  },
  {
   "cell_type": "code",
   "metadata": {},
   "source": [
    "import nemo_run as run\n",
    "from nemo.collections import llm\n",
    "\n",
    "def configure_recipe(nodes: int = 1, gpus_per_node: int = 1):\n",
    "    recipe = llm.recipes.baichuan2_7b.finetune_recipe(\n",
    "        num_nodes=nodes,\n",
    "        num_gpus_per_node=gpus_per_node,\n",
    "        peft_scheme='lora',\n",
    "    )\n",
    "    return recipe\n",
    "\n",
    "def local_executor_torchrun(devices: int = 1) -> run.LocalExecutor:\n",
    "    executor = run.LocalExecutor(ntasks_per_node=devices, launcher=\"torchrun\")\n",
    "    return executor"
   ],
   "outputs": [],
   "execution_count": null
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "You can learn more about NeMo Executor [here](https://github.com/NVIDIA/NeMo-Run/blob/main/docs/source/guides/execution.md).\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "metadata": {},
   "source": [
    "# Instantiate the recipe\n",
    "# Make sure you set the gpus_per_node as expected\n",
    "recipe = configure_recipe(gpus_per_node=8) "
   ],
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": "Now, we modify the recipe to use our function-calling chat dataset. For this tutorial, we will only train for 40 steps. You can adjust other hyperparameters as needed. We launch training with NeMo-Run's local executor."
  },
  {
   "cell_type": "code",
   "metadata": {},
   "source": [
    "recipe.resume.restore_config.path = \"nemo://baichuan-inc/Baichuan2-7B-Base\"\n",
    "recipe.data = run.Config(\n",
    "    llm.ChatDataModule,\n",
    "    dataset_root=\"xlam_dataset\",\n",
    "    seq_length=4096,\n",
    "    micro_batch_size=1,\n",
    "    global_batch_size=32,\n",
    ")\n",
    "recipe.trainer.limit_val_batches = 0\n",
    "recipe.trainer.max_steps = 40\n",
    "recipe.log.use_datetime_version = False\n",
    "recipe.log.explicit_log_dir = 'chat_sft_function_calling_demo'\n",
    "# adjust other hyperparameters as needed\n",
    "# for example:\n",
    "# recipe.optim.config.lr = 1e-6\n",
    "# recipe.trainer.strategy.tensor_model_parallel_size = 2\n",
    "# recipe.log.ckpt.save_top_k = 3\n",
    "\n",
    "executor = local_executor_torchrun(devices=recipe.trainer.devices)\n",
    "run.run(recipe, executor=executor)"
   ],
   "outputs": [],
   "execution_count": null
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": "When the training finishes, you should see the logs and find the final checkpoint location:"
  },
  {
   "cell_type": "code",
   "metadata": {},
   "source": [
    "!ls chat_sft_function_calling_demo/checkpoints/"
   ],
   "outputs": [],
   "execution_count": null
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Step 3. Evaluate the Trained Model\n",
    "\n",
    "After successfully training a checkpoint, we should evaluate the effectiveness of the trained model. First, as a sanity check, we can quickly check the trained model performance via NeMo in-framework inference. \n",
    "\n",
    "### Run NeMo Framework Inference\n"
   ]
  },
  {
   "cell_type": "code",
   "metadata": {},
   "source": [
    "%%writefile nemo_inference.py\n",
    "\n",
    "import torch.distributed\n",
    "from megatron.core.inference.common_inference_params import CommonInferenceParams\n",
    "import nemo.lightning as nl\n",
    "import re\n",
    "\n",
    "strategy = nl.MegatronStrategy(\n",
    "    tensor_model_parallel_size=1,\n",
    "    pipeline_model_parallel_size=1,\n",
    "    context_parallel_size=1,\n",
    "    sequence_parallel=False,\n",
    "    setup_optimizers=False,\n",
    "    store_optimizer_states=False,\n",
    ")\n",
    "\n",
    "trainer = nl.Trainer(\n",
    "    accelerator=\"gpu\",\n",
    "    devices=1,\n",
    "    num_nodes=1,\n",
    "    strategy=strategy,\n",
    "    plugins=nl.MegatronMixedPrecision(\n",
    "        precision=\"bf16-mixed\",\n",
    "        params_dtype=torch.bfloat16,\n",
    "        pipeline_dtype=torch.bfloat16,\n",
    "        autocast_enabled=False,\n",
    "        grad_reduce_in_fp32=False,\n",
    "    ),\n",
    ")\n",
    "\n",
    "source = {\n",
    "    \"mask\": \"User\",\n",
    "    \"system\": \"\",\n",
    "    \"conversations\": [\n",
    "        {\n",
    "            \"from\": \"User\",\n",
    "            \"value\": \"Imagine you are an AI designed to call functions. Given a question and a set of functions, your role is to make the necessary function calls. If a function cannot be used, state this. If parameters are missing, mention it. Here are the available functions\\n<AVAILABLE_TOOLS>\\n[{\\\"name\\\": \\\"player_statistics_seasons\\\", \\\"description\\\": \\\"Fetch the season statistics for a given player using the SofaScores API.\\\", \\\"parameters\\\": {\\\"player_id\\\": {\\\"description\\\": \\\"The unique identifier for the player whose statistics are to be fetched.\\\", \\\"type\\\": \\\"int\\\", \\\"default\\\": \\\"12994\\\"}}}, {\\\"name\\\": \\\"matchstreakodds\\\", \\\"description\\\": \\\"Fetch odds data related to streaks for a specific football match using its ID.\\\", \\\"parameters\\\": {\\\"is_id\\\": {\\\"description\\\": \\\"The ID of the match for which the streaks odds data is to be retrieved.\\\", \\\"type\\\": \\\"int\\\", \\\"default\\\": 10114139}}}]</AVAILABLE_TOOLS>\\nIf you decide to invoke any of the function(s), put it in the format of <TOOLCALL>[func_name1(params_name1=params_value1, params_name2=params_value2...), func_name2(params)]</TOOLCALL>\\nYou SHOULD NOT include any other information in the response.\\n\\nFetch the season statistics for player with ID 67890.\"\n",
    "        },\n",
    "    ]\n",
    "}\n",
    "special_tokens = {\n",
    "                \"system_turn_start\": \"<extra_id_0>\",\n",
    "                \"turn_start\": \"<extra_id_1>\",\n",
    "                \"label_start\": \"<extra_id_2>\",\n",
    "                \"end_of_turn\": \"\\n\",\n",
    "                \"end_of_name\": \"\\n\",\n",
    "            }\n",
    "from nemo.collections.nlp.data.language_modeling.megatron.gpt_sft_chat_dataset import _get_header_conversation_type_mask_role\n",
    "# Apply prompt template to be the same format as training\n",
    "header, conversation, data_type, mask_role = _get_header_conversation_type_mask_role(source, special_tokens)\n",
    "prompts = [conversation]\n",
    "\n",
    "from nemo.collections.llm import api\n",
    "results = api.generate(\n",
    "    path=\"chat_sft_function_calling_demo/checkpoints/model_name=0--val_loss=0.00-step=39-consumed_samples=1280.0-last\",\n",
    "    prompts=prompts,\n",
    "    trainer=trainer,\n",
    "    inference_params=CommonInferenceParams(\n",
    "        temperature=1.0,\n",
    "        top_p=0,  # greedy decoding\n",
    "        top_k=1,  # greedy decoding\n",
    "        num_tokens_to_generate=50,\n",
    "    ),\n",
    "    text_only=True,\n",
    ")\n",
    "if torch.distributed.get_rank() == 0:\n",
    "    for i, r in enumerate(results):\n",
    "        print(\"=\" * 50)\n",
    "        print(prompts[i])\n",
    "        print(\"*\" * 50)\n",
    "        match = re.search(r'(<TOOLCALL>.*?</TOOLCALL>)', r, re.DOTALL)\n",
    "        if match:\n",
    "            print(match.group(0))\n",
    "        else:\n",
    "            print(r)\n",
    "        print(\"=\" * 50)\n",
    "        print(\"\\n\\n\")"
   ],
   "outputs": [],
   "execution_count": null
  },
  {
   "cell_type": "code",
   "metadata": {},
   "source": [
    "!torchrun nemo_inference.py"
   ],
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": [
    "We can see that the model has correctly generated the function call `<TOOLCALL>[player_statistics_seasons(player_id=67890)]</TOOLCALL>`.\n",
    "\n",
    "After the initial verification, we can now convert the checkpoint back to a Hugging Face checkpoint to deploy for inference, perform benchmark testing, and verify on downstream tasks.\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Convert NeMo2 Model to HuggingFace Format\n",
    "\n",
    "If you're satisfied with the trained model's performance, we can continue. For the benchmark and downstream task assessment in the next two steps, the applications we will use only accept OpenAI API format inference requests. Therefore, we should first convert the saved checkpoint to a Hugging Face checkpoint for further deployment."
   ]
  },
  {
   "cell_type": "code",
   "metadata": {},
   "source": [
    "%%writefile convert_to_hf.py\n",
    "from pathlib import Path\n",
    "from nemo.collections.llm import export_ckpt\n",
    "\n",
    "if __name__ == \"__main__\":\n",
    "    export_ckpt(\n",
    "        path=Path(\"chat_sft_function_calling_demo/checkpoints/model_name=0--val_loss=0.00-step=39-consumed_samples=1280.0-last\"),\n",
    "        target=\"hf\",\n",
    "        output_path=Path(\"chat_sft_function_calling_demo/sft_hf\"),\n",
    "        overwrite=True,\n",
    "    )"
   ],
   "outputs": [],
   "execution_count": null
  },
  {
   "cell_type": "code",
   "metadata": {},
   "source": [
    "!torchrun convert_to_hf.py"
   ],
   "outputs": [],
   "execution_count": null
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Then, we can follow the steps in [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM/tree/main/examples/apps) to establish an OpenAI API, allowing us to deploy the model and handle inference requests efficiently.\n",
    "\n",
    "### Benchmark the Fine-Tuned Model\n",
    "\n",
    "To benchmark the function-calling ability of the fine-tuned LLM, you can refer to the [berkeley-function-call-leaderboard](https://github.com/ShishirPatil/gorilla/tree/main/berkeley-function-call-leaderboard). On a converged model, you will get benchmark results similar to those shown below.\n",
    "\n",
    "...\n",
    "\n",
    "🔍 Running test: multiple\n",
    "\n",
    "✅ Test completed: multiple. 🎯 Accuracy: 0.89\n",
    "\n",
    "🔍 Running test: parallel\n",
    "\n",
    "✅ Test completed: parallel. 🎯 Accuracy: 0.87\n",
    "\n",
    "🔍 Running test: parallel_multiple\n",
    "\n",
    "✅ Test completed: parallel_multiple. 🎯 Accuracy: 0.835\n",
    "\n",
    "...\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Perform Downstream Task Assessment\n",
    "\n",
    "If you are not ready with your own agent, below we provide an agent demo for your quick assessment. The prompts in this section come from a different dataset on financial Q&A. \n",
    "\n",
    "Since the returns of the function calls are hard-coded in this demo, we recommend you use the conversations below and input them in order:\n",
    "\n",
    "> Do you have overnight call loan?\n",
    "\n",
    "> If the interest rate can drop to 1.5%, I will proceed.\n",
    "\n",
    "> Okay, confirm the transaction."
   ]
  },
  {
   "cell_type": "code",
   "metadata": {},
   "source": [
    "!pip install -U \"qwen-agent[gui,rag,code_interpreter,python_executor]\""
   ],
   "outputs": [],
   "execution_count": null
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Note that you should modify the LLM service configuration according to your model name and server address, such as `http://10.123.123.123:8000/v1`. If you want to try LLMs running on [NIM online](https://build.nvidia.com/explore/discover), you need to apply for a free API key and use the server address `https://integrate.api.nvidia.com/v1`."
   ]
  },
  {
   "cell_type": "code",
   "metadata": {},
   "source": [
    "%%writefile start_app.py\n",
    "\n",
    "import os\n",
    "from qwen_agent.agents import ReActChat\n",
    "from qwen_agent.gui import WebUI\n",
    "\n",
    "from qwen_agent.tools.base import BaseTool, register_tool\n",
    "import json\n",
    "\n",
    "def init_agent_service():\n",
    "    llm_cfg = {\n",
    "        'model': 'nvidia/mistral-nemo-minitron-8b-instruct',\n",
    "        'model_server': 'https://integrate.api.nvidia.com/v1', # http://10.137.164.245:8000/v1\n",
    "        'api_key': \"nvapi-YOUR-API-KEY\",\n",
    "    }\n",
    "    tools = ['inquiry', 'strategy_query', 'transaction_confirm', 'transaction_cancel']\n",
    "    bot = ReActChat(llm=llm_cfg,\n",
    "                    name='match transaction agent',\n",
    "                    description='This agent can help to match transaction.',\n",
    "                    function_list=tools)\n",
    "    return bot\n",
    "\n",
    "@register_tool('inquiry')\n",
    "class Inquiry(BaseTool): \n",
    "    description = 'After the initial quote, if the customer negotiates, use this tool to check the prices available for financial products.'\n",
    "    parameters = [{'name': 'product', 'type': 'string', 'description': 'Product type.', 'required': True},\n",
    "                  {'name': 'term', 'type': 'string', 'description': 'Term.', 'required': True},\n",
    "                  {'name': 'amount', 'type': 'string', 'description': 'Transaction amount.', 'required': True},\n",
    "                  {'name': 'interest_rate', 'type': 'string', 'description': 'Interest rate.', 'required': True},]\n",
    "    \n",
    "    def call(self, params: str, **kwargs) -> str:\n",
    "        return json.dumps({'term': 'overnight', 'amount': '1 billion', 'interest_rate': '1.5%'},\n",
    "                ensure_ascii=False)\n",
    "\n",
    "@register_tool('strategy_query')\n",
    "class StrategyQuery(BaseTool): \n",
    "    description = 'Check the initial quotes for financial products.'\n",
    "    parameters = [{'name': 'product', 'type': 'string', 'description': 'Product type.', 'required': True},\n",
    "                  {'name': 'term', 'type': 'string', 'description': 'Term.', 'required': True},]\n",
    "    def call(self, params: str, **kwargs) -> str:\n",
    "        return json.dumps({'product': 'call loan', 'term': 'overnight', 'amount': '1 billion', 'interest_rate': '1.6%'},\n",
    "                ensure_ascii=False)\n",
    "\n",
    "@register_tool('transaction_confirm')\n",
    "class TransactionConfirm(BaseTool):\n",
    "    description = 'Confirm the transaction.'\n",
    "    parameters = [{'name': 'product', 'type': 'string', 'description': 'Product type.', 'required': True},\n",
    "                  {'name': 'term', 'type': 'string', 'description': 'Term.', 'required': True},\n",
    "                  {'name': 'amount', 'type': 'string', 'description': 'Transaction amount.', 'required': True},\n",
    "                  {'name': 'interest_rate', 'type': 'string', 'description': 'Interest rate.', 'required': True},]\n",
    "    def call(self, params: str, **kwargs) -> str:\n",
    "        return json.dumps({'response': 'success'},\n",
    "                ensure_ascii=False)\n",
    "\n",
    "@register_tool('transaction_cancel')\n",
    "class TransactionCancel(BaseTool): \n",
    "    description = 'Cancel the transaction.'\n",
    "    parameters = []\n",
    "    def call(self, params: str, **kwargs) -> str:\n",
    "        return json.dumps({'response': 'success'},\n",
    "                ensure_ascii=False)\n",
    "\n",
    "\n",
    "def app_gui():\n",
    "    bot = init_agent_service()\n",
    "    chatbot_config = {\n",
    "        'prompt.suggestions': ['Do you have overnight call loan?', 'If the interest rate can drop to 1.5%, I will proceed.', 'Okay, confirm the transaction.']\n",
    "    }\n",
    "    WebUI(bot, chatbot_config=chatbot_config).run(share=True)\n",
    "\n",
    "\n",
    "if __name__ == '__main__':\n",
    "    app_gui()\n"
   ],
   "outputs": [],
   "execution_count": null
  },
  {
   "cell_type": "code",
   "metadata": {},
   "source": [
    "## Please Run When You're Done!\n",
    "import IPython\n",
    "app = IPython.Application.instance()\n",
    "app.kernel.do_shutdown(True)"
   ],
   "outputs": [],
   "execution_count": null
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}