File size: 60,580 Bytes
1227663
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
{
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "lElF3o5PR6ys"
      },
      "source": [
        "# Your First RAG Application\n",
        "\n",
        "In this notebook, we'll walk you through each of the components that are involved in a simple RAG application.\n",
        "\n",
        "We won't be leveraging any fancy tools, just the OpenAI Python SDK, Numpy, and some classic Python.\n",
        "\n",
        "> NOTE: This was done with Python 3.11.4.\n",
        "\n",
        "> NOTE: There might be [compatibility issues](https://github.com/wandb/wandb/issues/7683) if you're on NVIDIA driver >552.44 As an interim solution - you can rollback your drivers to the 552.44."
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "5CtcL8P8R6yt"
      },
      "source": [
        "## Table of Contents:\n",
        "\n",
        "- Task 1: Imports and Utilities\n",
        "- Task 2: Documents\n",
        "- Task 3: Embeddings and Vectors\n",
        "- Task 4: Prompts\n",
        "- Task 5: Retrieval Augmented Generation\n",
        "  - 🚧 Activity #1: Augment RAG"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "1Dz6GYilR6yt"
      },
      "source": [
        "Let's look at a rather complicated looking visual representation of a basic RAG application.\n",
        "\n",
        "<img src=\"https://i.imgur.com/vD8b016.png\" />"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "PjmC0KFtR6yt"
      },
      "source": [
        "## Task 1: Imports and Utility\n",
        "\n",
        "We're just doing some imports and enabling `async` to work within the Jupyter environment here, nothing too crazy!"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 1,
      "metadata": {
        "id": "Z1dyrG4hR6yt"
      },
      "outputs": [],
      "source": [
        "from aimakerspace.text_utils import TextFileLoader, CharacterTextSplitter\n",
        "from aimakerspace.vectordatabase import VectorDatabase\n",
        "import asyncio"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 2,
      "metadata": {
        "id": "9OrFZRnER6yt"
      },
      "outputs": [],
      "source": [
        "import nest_asyncio\n",
        "nest_asyncio.apply()"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "M0jGnpQsR6yu"
      },
      "source": [
        "## Task 2: Documents\n",
        "\n",
        "We'll be concerning ourselves with this part of the flow in the following section:\n",
        "\n",
        "<img src=\"https://i.imgur.com/jTm9gjk.png\" />"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "-SFPWvRUR6yu"
      },
      "source": [
        "### Loading Source Documents\n",
        "\n",
        "So, first things first, we need some documents to work with.\n",
        "\n",
        "While we could work directly with the `.txt` files (or whatever file-types you wanted to extend this to) we can instead do some batch processing of those documents at the beginning in order to store them in a more machine compatible format.\n",
        "\n",
        "In this case, we're going to parse our text file into a single document in memory.\n",
        "\n",
        "Let's look at the relevant bits of the `TextFileLoader` class:\n",
        "\n",
        "```python\n",
        "def load_file(self):\n",
        "        with open(self.path, \"r\", encoding=self.encoding) as f:\n",
        "            self.documents.append(f.read())\n",
        "```\n",
        "\n",
        "We're simply loading the document using the built in `open` method, and storing that output in our `self.documents` list.\n",
        "\n",
        "> NOTE: We're using blogs from PMarca (Marc Andreessen) as our sample data. This data is largely irrelevant as we want to focus on the mechanisms of RAG, which includes out data's shape and quality - but not specifically what the contents of the data are. \n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 3,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "Ia2sUEuGR6yu",
        "outputId": "84937ecc-c35f-4c4a-a4ab-9da72625954c"
      },
      "outputs": [
        {
          "data": {
            "text/plain": [
              "1"
            ]
          },
          "execution_count": 3,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "text_loader = TextFileLoader(\"data/PMarcaBlogs.txt\")\n",
        "documents = text_loader.load_documents()\n",
        "len(documents)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 4,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "bV-tj5WFR6yu",
        "outputId": "674eb315-1ff3-4597-bcf5-38ece0a812ac"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "\n",
            "The Pmarca Blog Archives\n",
            "(select posts from 2007-2009)\n",
            "Marc Andreessen\n",
            "copyright: Andreessen Horow\n"
          ]
        }
      ],
      "source": [
        "print(documents[0][:100])"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "nHlTvCzYR6yu"
      },
      "source": [
        "### Splitting Text Into Chunks\n",
        "\n",
        "As we can see, there is one massive document.\n",
        "\n",
        "We'll want to chunk the document into smaller parts so it's easier to pass the most relevant snippets to the LLM.\n",
        "\n",
        "There is no fixed way to split/chunk documents - and you'll need to rely on some intuition as well as knowing your data *very* well in order to build the most robust system.\n",
        "\n",
        "For this toy example, we'll just split blindly on length.\n",
        "\n",
        ">There's an opportunity to clear up some terminology here, for this course we will be stick to the following:\n",
        ">\n",
        ">- \"source documents\" : The `.txt`, `.pdf`, `.html`, ..., files that make up the files and information we start with in its raw format\n",
        ">- \"document(s)\" : single (or more) text object(s)\n",
        ">- \"corpus\" : the combination of all of our documents"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "2G6Voc0jR6yv"
      },
      "source": [
        "As you can imagine (though it's not specifically true in this toy example) the idea of splitting documents is to break them into managable sized chunks that retain the most relevant local context."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 5,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "UMC4tsEmR6yv",
        "outputId": "08689c0b-57cd-4040-942a-8193e997f5cb"
      },
      "outputs": [
        {
          "data": {
            "text/plain": [
              "373"
            ]
          },
          "execution_count": 5,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "text_splitter = CharacterTextSplitter()\n",
        "split_documents = text_splitter.split_texts(documents)\n",
        "len(split_documents)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "W2wKT0WLR6yv"
      },
      "source": [
        "Let's take a look at some of the documents we've managed to split."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 6,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "vcYMwWJoR6yv",
        "outputId": "20d69876-feca-4826-b4be-32915276987a"
      },
      "outputs": [
        {
          "data": {
            "text/plain": [
              "['\\ufeff\\nThe Pmarca Blog Archives\\n(select posts from 2007-2009)\\nMarc Andreessen\\ncopyright: Andreessen Horowitz\\ncover design: Jessica Hagy\\nproduced using: Pressbooks\\nContents\\nTHE PMARCA GUIDE TO STARTUPS\\nPart 1: Why not to do a startup 2\\nPart 2: When the VCs say \"no\" 10\\nPart 3: \"But I don\\'t know any VCs!\" 18\\nPart 4: The only thing that matters 25\\nPart 5: The Moby Dick theory of big companies 33\\nPart 6: How much funding is too little? Too much? 41\\nPart 7: Why a startup\\'s initial business plan doesn\\'t\\nmatter that much\\n49\\nTHE PMARCA GUIDE TO HIRING\\nPart 8: Hiring, managing, promoting, and Dring\\nexecutives\\n54\\nPart 9: How to hire a professional CEO 68\\nHow to hire the best people you\\'ve ever worked\\nwith\\n69\\nTHE PMARCA GUIDE TO BIG COMPANIES\\nPart 1: Turnaround! 82\\nPart 2: Retaining great people 86\\nTHE PMARCA GUIDE TO CAREER, PRODUCTIVITY,\\nAND SOME OTHER THINGS\\nIntroduction 97\\nPart 1: Opportunity 99\\nPart 2: Skills and education 107\\nPart 3: Where to go and why 120\\nThe Pmarca Guide to Personal Productivi']"
            ]
          },
          "execution_count": 6,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "split_documents[0:1]"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "HOU-RFP_R6yv"
      },
      "source": [
        "## Task 3: Embeddings and Vectors\n",
        "\n",
        "Next, we have to convert our corpus into a \"machine readable\" format as we explored in the Embedding Primer notebook.\n",
        "\n",
        "Today, we're going to talk about the actual process of creating, and then storing, these embeddings, and how we can leverage that to intelligently add context to our queries."
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "### OpenAI API Key\n",
        "\n",
        "In order to access OpenAI's APIs, we'll need to provide our OpenAI API Key!\n",
        "\n",
        "You can work through the folder \"OpenAI API Key Setup\" for more information on this process if you don't already have an API Key!"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 7,
      "metadata": {},
      "outputs": [],
      "source": [
        "import os\n",
        "import openai\n",
        "from getpass import getpass\n",
        "\n",
        "openai.api_key = getpass(\"OpenAI API Key: \")\n",
        "os.environ[\"OPENAI_API_KEY\"] = openai.api_key"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "### Vector Database\n",
        "\n",
        "Let's set up our vector database to hold all our documents and their embeddings!"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "kDQrfAR1R6yv"
      },
      "source": [
        "While this is all baked into 1 call - we can look at some of the code that powers this process to get a better understanding:\n",
        "\n",
        "Let's look at our `VectorDatabase().__init__()`:\n",
        "\n",
        "```python\n",
        "def __init__(self, embedding_model: EmbeddingModel = None):\n",
        "        self.vectors = defaultdict(np.array)\n",
        "        self.embedding_model = embedding_model or EmbeddingModel()\n",
        "```\n",
        "\n",
        "As you can see - our vectors are merely stored as a dictionary of `np.array` objects.\n",
        "\n",
        "Secondly, our `VectorDatabase()` has a default `EmbeddingModel()` which is a wrapper for OpenAI's `text-embedding-3-small` model.\n",
        "\n",
        "> **Quick Info About `text-embedding-3-small`**:\n",
        "> - It has a context window of **8191** tokens\n",
        "> - It returns vectors with dimension **1536**"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "L273pRdeR6yv"
      },
      "source": [
        "#### ❓Question #1:\n",
        "\n",
        "The default embedding dimension of `text-embedding-3-small` is 1536, as noted above. \n",
        "\n",
        "1. Is there any way to modify this dimension?\n",
        "Yes you can reduce dimension but not increase it. Helps with speeding up searches and reducing cost. We can reduce it up to 256\n",
        "2. What technique does OpenAI use to achieve this?\n",
        "OpenAI uses Principal Component Analysis(PCA) the model first generates a 1536 then uses PCA to reduce the dimension\n",
        "Reducing retains all the important information and benefits relationships between texts reduces resources needed and has good performance even at lower dimensions\n",
        "\n",
        "\n",
        "> NOTE: Check out this [API documentation](https://platform.openai.com/docs/api-reference/embeddings/create) for the answer to question #1, and [this documentation](https://platform.openai.com/docs/guides/embeddings/use-cases) for an answer to question #2!"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "w5FZY7K3R6yv"
      },
      "source": [
        "We can call the `async_get_embeddings` method of our `EmbeddingModel()` on a list of `str` and receive a list of `float` back!\n",
        "\n",
        "```python\n",
        "async def async_get_embeddings(self, list_of_text: List[str]) -> List[List[float]]:\n",
        "        return await aget_embeddings(\n",
        "            list_of_text=list_of_text, engine=self.embeddings_model_name\n",
        "        )\n",
        "```"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "cSct6X0aR6yv"
      },
      "source": [
        "We cast those to `np.array` when we build our `VectorDatabase()`:\n",
        "\n",
        "```python\n",
        "async def abuild_from_list(self, list_of_text: List[str]) -> \"VectorDatabase\":\n",
        "        embeddings = await self.embedding_model.async_get_embeddings(list_of_text)\n",
        "        for text, embedding in zip(list_of_text, embeddings):\n",
        "            self.insert(text, np.array(embedding))\n",
        "        return self\n",
        "```\n",
        "\n",
        "And that's all we need to do!"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 8,
      "metadata": {
        "id": "O4KoLbVDR6yv"
      },
      "outputs": [],
      "source": [
        "vector_db = VectorDatabase()\n",
        "vector_db = asyncio.run(vector_db.abuild_from_list(split_documents))"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "SSZwaGvpR6yv"
      },
      "source": [
        "#### ❓Question #2:\n",
        "\n",
        "What are the benefits of using an `async` approach to collecting our embeddings?\n",
        "\n",
        "Improves scalability, uses resources more efficiently and has faster processing when having many concurrent requests\n",
        "In our case the multiple 100s of api calls and that way our chunks can be processed faster instead of waiting for each chunk to process and sending the next. The scalability of async is better for our examples.\n",
        "\n",
        "\n",
        "> NOTE: Determining the core difference between `async` and `sync` will be useful! If you get stuck - ask ChatGPT!"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "nRBdIt-xR6yw"
      },
      "source": [
        "So, to review what we've done so far in natural language:\n",
        "\n",
        "1. We load source documents\n",
        "2. We split those source documents into smaller chunks (documents)\n",
        "3. We send each of those documents to the `text-embedding-3-small` OpenAI API endpoint\n",
        "4. We store each of the text representations with the vector representations as keys/values in a dictionary"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "4-vWANZyR6yw"
      },
      "source": [
        "### Semantic Similarity\n",
        "\n",
        "The next step is to be able to query our `VectorDatabase()` with a `str` and have it return to us vectors and text that is most relevant from our corpus.\n",
        "\n",
        "We're going to use the following process to achieve this in our toy example:\n",
        "\n",
        "1. We need to embed our query with the same `EmbeddingModel()` as we used to construct our `VectorDatabase()`\n",
        "2. We loop through every vector in our `VectorDatabase()` and use a distance measure to compare how related they are\n",
        "3. We return a list of the top `k` closest vectors, with their text representations\n",
        "\n",
        "There's some very heavy optimization that can be done at each of these steps - but let's just focus on the basic pattern in this notebook.\n",
        "\n",
        "> We are using [cosine similarity](https://www.engati.com/glossary/cosine-similarity) as a distance metric in this example - but there are many many distance metrics you could use - like [these](https://flavien-vidal.medium.com/similarity-distances-for-natural-language-processing-16f63cd5ba55)\n",
        "\n",
        "> We are using a rather inefficient way of calculating relative distance between the query vector and all other vectors - there are more advanced approaches that are much more efficient, like [ANN](https://towardsdatascience.com/comprehensive-guide-to-approximate-nearest-neighbors-algorithms-8b94f057d6b6)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 9,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "76d96uavR6yw",
        "outputId": "bbfccc31-20a2-41c7-c14d-46554a43ed2d"
      },
      "outputs": [
        {
          "data": {
            "text/plain": [
              "[('ordingly.\\nSeventh, when hiring the executive to run your former specialty, be\\ncareful you don’t hire someone weak on purpose.\\nThis sounds silly, but you wouldn’t believe how oaen it happens.\\nThe CEO who used to be a product manager who has a weak\\nproduct management executive. The CEO who used to be in\\nsales who has a weak sales executive. The CEO who used to be\\nin marketing who has a weak marketing executive.\\nI call this the “Michael Eisner Memorial Weak Executive Problem” — aaer the CEO of Disney who had previously been a brilliant TV network executive. When he bought ABC at Disney, it\\npromptly fell to fourth place. His response? “If I had an extra\\ntwo days a week, I could turn around ABC myself.” Well, guess\\nwhat, he didn’t have an extra two days a week.\\nA CEO — or a startup founder — oaen has a hard time letting\\ngo of the function that brought him to the party. The result: you\\nhire someone weak into the executive role for that function so\\nthat you can continue to be “the man” — cons',\n",
              "  np.float64(0.6539043027545371)),\n",
              " ('m. They have areas where they are truly deXcient in judgment or skill set. That’s just life. Almost nobody is brilliant\\nat everything. When hiring and when Hring executives, you\\nmust therefore focus on strength rather than lack of weakness. Everybody has severe weaknesses even if you can’t see\\nthem yet. When managing, it’s oaen useful to micromanage and\\nto provide remedial training around these weaknesses. Doing so\\nmay make the diWerence between an executive succeeding or\\nfailing.\\nFor example, you might have a brilliant engineering executive\\nwho generates excellent team loyalty, has terriXc product judgment and makes the trains run on time. This same executive\\nmay be very poor at relating to the other functions in the company. She may generate far more than her share of cross-functional conYicts, cut herself oW from critical information, and\\nsigniXcantly impede your ability to sell and market eWectively.\\nYour alternatives are:\\n(a) Macro-manage and give her an annual or quarterly object',\n",
              "  np.float64(0.5036247837648782)),\n",
              " ('ed?\\nIn reality — as opposed to Marc’s warped view of reality — it will\\nbe extremely helpful for Marc [if he were actually the CEO,\\nwhich he is not] to meet with the new head of engineering daily\\nwhen she comes on board and review all of her thinking and\\ndecisions. This level of micromanagement will accelerate her\\ntraining and improve her long-term eWectiveness. It will make\\nher seem smarter to the rest of the organization which will build\\ncredibility and conXdence while she comes up to speed. Micromanaging new executives is generally a good idea for a limited\\nperiod of time.\\nHowever, that is not the only time that it makes sense to micro66 The Pmarca Blog Archives\\nmanage executives. It turns out that just about every executive\\nin the world has a few things that are seriously wrong with\\nthem. They have areas where they are truly deXcient in judgment or skill set. That’s just life. Almost nobody is brilliant\\nat everything. When hiring and when Hring executives, you\\nmust therefore focus o',\n",
              "  np.float64(0.4814861061791066))]"
            ]
          },
          "execution_count": 9,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "vector_db.search_by_text(\"What is the Michael Eisner Memorial Weak Executive Problem?\", k=3)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "TehsfIiKR6yw"
      },
      "source": [
        "## Task 4: Prompts\n",
        "\n",
        "In the following section, we'll be looking at the role of prompts - and how they help us to guide our application in the right direction.\n",
        "\n",
        "In this notebook, we're going to rely on the idea of \"zero-shot in-context learning\".\n",
        "\n",
        "This is a lot of words to say: \"We will ask it to perform our desired task in the prompt, and provide no examples.\""
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "yXpA0UveR6yw"
      },
      "source": [
        "### XYZRolePrompt\n",
        "\n",
        "Before we do that, let's stop and think a bit about how OpenAI's chat models work.\n",
        "\n",
        "We know they have roles - as is indicated in the following API [documentation](https://platform.openai.com/docs/api-reference/chat/create#chat/create-messages)\n",
        "\n",
        "There are three roles, and they function as follows (taken directly from [OpenAI](https://platform.openai.com/docs/guides/gpt/chat-completions-api)):\n",
        "\n",
        "- `{\"role\" : \"system\"}` : The system message helps set the behavior of the assistant. For example, you can modify the personality of the assistant or provide specific instructions about how it should behave throughout the conversation. However note that the system message is optional and the model’s behavior without a system message is likely to be similar to using a generic message such as \"You are a helpful assistant.\"\n",
        "- `{\"role\" : \"user\"}` : The user messages provide requests or comments for the assistant to respond to.\n",
        "- `{\"role\" : \"assistant\"}` : Assistant messages store previous assistant responses, but can also be written by you to give examples of desired behavior.\n",
        "\n",
        "The main idea is this:\n",
        "\n",
        "1. You start with a system message that outlines how the LLM should respond, what kind of behaviours you can expect from it, and more\n",
        "2. Then, you can provide a few examples in the form of \"assistant\"/\"user\" pairs\n",
        "3. Then, you prompt the model with the true \"user\" message.\n",
        "\n",
        "In this example, we'll be forgoing the 2nd step for simplicities sake."
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "gdZ2KWKSR6yw"
      },
      "source": [
        "#### Utility Functions\n",
        "\n",
        "You'll notice that we're using some utility functions from the `aimakerspace` module - let's take a peek at these and see what they're doing!"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "GFbeJDDsR6yw"
      },
      "source": [
        "##### XYZRolePrompt"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "5mojJSE3R6yw"
      },
      "source": [
        "Here we have our `system`, `user`, and `assistant` role prompts.\n",
        "\n",
        "Let's take a peek at what they look like:\n",
        "\n",
        "```python\n",
        "class BasePrompt:\n",
        "    def __init__(self, prompt):\n",
        "        \"\"\"\n",
        "        Initializes the BasePrompt object with a prompt template.\n",
        "\n",
        "        :param prompt: A string that can contain placeholders within curly braces\n",
        "        \"\"\"\n",
        "        self.prompt = prompt\n",
        "        self._pattern = re.compile(r\"\\{([^}]+)\\}\")\n",
        "\n",
        "    def format_prompt(self, **kwargs):\n",
        "        \"\"\"\n",
        "        Formats the prompt string using the keyword arguments provided.\n",
        "\n",
        "        :param kwargs: The values to substitute into the prompt string\n",
        "        :return: The formatted prompt string\n",
        "        \"\"\"\n",
        "        matches = self._pattern.findall(self.prompt)\n",
        "        return self.prompt.format(**{match: kwargs.get(match, \"\") for match in matches})\n",
        "\n",
        "    def get_input_variables(self):\n",
        "        \"\"\"\n",
        "        Gets the list of input variable names from the prompt string.\n",
        "\n",
        "        :return: List of input variable names\n",
        "        \"\"\"\n",
        "        return self._pattern.findall(self.prompt)\n",
        "```\n",
        "\n",
        "Then we have our `RolePrompt` which laser focuses us on the role pattern found in most API endpoints for LLMs.\n",
        "\n",
        "```python\n",
        "class RolePrompt(BasePrompt):\n",
        "    def __init__(self, prompt, role: str):\n",
        "        \"\"\"\n",
        "        Initializes the RolePrompt object with a prompt template and a role.\n",
        "\n",
        "        :param prompt: A string that can contain placeholders within curly braces\n",
        "        :param role: The role for the message ('system', 'user', or 'assistant')\n",
        "        \"\"\"\n",
        "        super().__init__(prompt)\n",
        "        self.role = role\n",
        "\n",
        "    def create_message(self, **kwargs):\n",
        "        \"\"\"\n",
        "        Creates a message dictionary with a role and a formatted message.\n",
        "\n",
        "        :param kwargs: The values to substitute into the prompt string\n",
        "        :return: Dictionary containing the role and the formatted message\n",
        "        \"\"\"\n",
        "        return {\"role\": self.role, \"content\": self.format_prompt(**kwargs)}\n",
        "```\n",
        "\n",
        "We'll look at how the `SystemRolePrompt` is constructed to get a better idea of how that extension works:\n",
        "\n",
        "```python\n",
        "class SystemRolePrompt(RolePrompt):\n",
        "    def __init__(self, prompt: str):\n",
        "        super().__init__(prompt, \"system\")\n",
        "```\n",
        "\n",
        "That pattern is repeated for our `UserRolePrompt` and our `AssistantRolePrompt` as well."
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "D361R6sMR6yw"
      },
      "source": [
        "##### ChatOpenAI"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "HJVQ2Pm8R6yw"
      },
      "source": [
        "Next we have our model, which is converted to a format analagous to libraries like LangChain and LlamaIndex.\n",
        "\n",
        "Let's take a peek at how that is constructed:\n",
        "\n",
        "```python\n",
        "class ChatOpenAI:\n",
        "    def __init__(self, model_name: str = \"gpt-4o-mini\"):\n",
        "        self.model_name = model_name\n",
        "        self.openai_api_key = os.getenv(\"OPENAI_API_KEY\")\n",
        "        if self.openai_api_key is None:\n",
        "            raise ValueError(\"OPENAI_API_KEY is not set\")\n",
        "\n",
        "    def run(self, messages, text_only: bool = True):\n",
        "        if not isinstance(messages, list):\n",
        "            raise ValueError(\"messages must be a list\")\n",
        "\n",
        "        openai.api_key = self.openai_api_key\n",
        "        response = openai.ChatCompletion.create(\n",
        "            model=self.model_name, messages=messages\n",
        "        )\n",
        "\n",
        "        if text_only:\n",
        "            return response.choices[0].message.content\n",
        "\n",
        "        return response\n",
        "```"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "qCU7FfhIR6yw"
      },
      "source": [
        "#### ❓ Question #3:\n",
        "\n",
        "When calling the OpenAI API - are there any ways we can achieve more reproducible outputs?\n",
        "\n",
        "We can have the temperature setting be fixed and it controls the randomness of the model\n",
        "Example: temperature of 1 returns more creative responses vs temperature 0 the responses are more focused\n",
        "combining this with top_p of 1 and the low temperature we can achieve more reproducible outputs \n",
        "\n",
        "> NOTE: Check out [this section](https://platform.openai.com/docs/guides/text-generation/) of the OpenAI documentation for the answer!"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "c5wcjMLCR6yw"
      },
      "source": [
        "### Creating and Prompting OpenAI's `gpt-4o-mini`!\n",
        "\n",
        "Let's tie all these together and use it to prompt `gpt-4o-mini`!"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 10,
      "metadata": {
        "id": "WIfpIot7R6yw"
      },
      "outputs": [],
      "source": [
        "from aimakerspace.openai_utils.prompts import (\n",
        "    UserRolePrompt,\n",
        "    SystemRolePrompt,\n",
        "    AssistantRolePrompt,\n",
        ")\n",
        "\n",
        "from aimakerspace.openai_utils.chatmodel import ChatOpenAI\n",
        "\n",
        "chat_openai = ChatOpenAI()\n",
        "user_prompt_template = \"{content}\"\n",
        "user_role_prompt = UserRolePrompt(user_prompt_template)\n",
        "system_prompt_template = (\n",
        "    \"You are an expert in {expertise}, you always answer in a kind way.\"\n",
        ")\n",
        "system_role_prompt = SystemRolePrompt(system_prompt_template)\n",
        "\n",
        "messages = [\n",
        "    system_role_prompt.create_message(expertise=\"Python\"),\n",
        "    user_role_prompt.create_message(\n",
        "        content=\"What is the best way to write a loop?\"\n",
        "    ),\n",
        "]\n",
        "\n",
        "response = chat_openai.run(messages)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 11,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "dHo7lssNR6yw",
        "outputId": "1d3823fa-bb6b-45f6-ddba-b41686388324"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "The \"best\" way to write a loop in Python often depends on the specific use case and what you're trying to accomplish. However, I can share some common practices and examples for writing efficient, readable loops.\n",
            "\n",
            "1. **Using a `for` loop**:\n",
            "   This is typically used for iterating over a sequence (like a list, tuple, string, or range).\n",
            "\n",
            "   ```python\n",
            "   # Example: Iterating over a list\n",
            "   fruits = ['apple', 'banana', 'cherry']\n",
            "   for fruit in fruits:\n",
            "       print(fruit)\n",
            "   ```\n",
            "\n",
            "2. **Using a `while` loop**:\n",
            "   This is useful when the number of iterations isn't known ahead of time and depends on a condition.\n",
            "\n",
            "   ```python\n",
            "   # Example: Simple counter\n",
            "   count = 0\n",
            "   while count < 5:\n",
            "       print(count)\n",
            "       count += 1\n",
            "   ```\n",
            "\n",
            "3. **Using list comprehensions**:\n",
            "   If you're creating a new list based on an existing one, list comprehensions are a more concise and often more efficient way to write a loop.\n",
            "\n",
            "   ```python\n",
            "   # Example: Creating a list of squared numbers\n",
            "   numbers = [1, 2, 3, 4, 5]\n",
            "   squares = [x**2 for x in numbers]\n",
            "   print(squares)\n",
            "   ```\n",
            "\n",
            "4. **Using `enumerate()`**:\n",
            "   If you need both the index and the value when looping through a list, `enumerate()` can be very handy.\n",
            "\n",
            "   ```python\n",
            "   # Example: Iterating with index\n",
            "   fruits = ['apple', 'banana', 'cherry']\n",
            "   for index, fruit in enumerate(fruits):\n",
            "       print(f\"{index}: {fruit}\")\n",
            "   ```\n",
            "\n",
            "5. **Avoiding deep nesting**:\n",
            "   If you find yourself needing deeply nested loops, consider whether you can flatten your logic or use functions to keep your code readable.\n",
            "\n",
            "Here’s an example of combining some of these practices for better structure:\n",
            "\n",
            "```python\n",
            "# Example: Using a function to process elements\n",
            "def process_fruits(fruits):\n",
            "    for index, fruit in enumerate(fruits):\n",
            "        print(f\"{index}: {fruit.upper()}\")\n",
            "\n",
            "fruits = ['apple', 'banana', 'cherry']\n",
            "process_fruits(fruits)\n",
            "```\n",
            "\n",
            "Each of these methods has its own advantages depending on the scenario. The key is to choose the one that makes your code easy to read and maintain while being efficient. If you have a specific use case in mind, feel free to share, and I’d be happy to help you refine your loop further!\n"
          ]
        }
      ],
      "source": [
        "print(response)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "r2nxxhB2R6yy"
      },
      "source": [
        "## Task 5: Retrieval Augmented Generation\n",
        "\n",
        "Now we can create a RAG prompt - which will help our system behave in a way that makes sense!\n",
        "\n",
        "There is much you could do here, many tweaks and improvements to be made!"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 12,
      "metadata": {
        "id": "D1hamzGaR6yy"
      },
      "outputs": [],
      "source": [
        "RAG_PROMPT_TEMPLATE = \"\"\" \\\n",
        "Use the provided context to answer the user's query.\n",
        "\n",
        "You may not answer the user's query unless there is specific context in the following text.\n",
        "\n",
        "If you do not know the answer, or cannot answer, please respond with \"I don't know\".\n",
        "\"\"\"\n",
        "\n",
        "rag_prompt = SystemRolePrompt(RAG_PROMPT_TEMPLATE)\n",
        "\n",
        "USER_PROMPT_TEMPLATE = \"\"\" \\\n",
        "Context:\n",
        "{context}\n",
        "\n",
        "User Query:\n",
        "{user_query}\n",
        "\"\"\"\n",
        "\n",
        "\n",
        "user_prompt = UserRolePrompt(USER_PROMPT_TEMPLATE)\n",
        "\n",
        "class RetrievalAugmentedQAPipeline:\n",
        "    def __init__(self, llm: ChatOpenAI(), vector_db_retriever: VectorDatabase) -> None:\n",
        "        self.llm = llm\n",
        "        self.vector_db_retriever = vector_db_retriever\n",
        "\n",
        "    def run_pipeline(self, user_query: str) -> str:\n",
        "        context_list = self.vector_db_retriever.search_by_text(user_query, k=4)\n",
        "\n",
        "        context_prompt = \"\"\n",
        "        for context in context_list:\n",
        "            context_prompt += context[0] + \"\\n\"\n",
        "\n",
        "        formatted_system_prompt = rag_prompt.create_message()\n",
        "\n",
        "        formatted_user_prompt = user_prompt.create_message(user_query=user_query, context=context_prompt)\n",
        "\n",
        "        return {\"response\" : self.llm.run([formatted_system_prompt, formatted_user_prompt]), \"context\" : context_list}"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "zZIJI19uR6yz"
      },
      "source": [
        "#### ❓ Question #4:\n",
        "\n",
        "What prompting strategies could you use to make the LLM have a more thoughtful, detailed response?\n",
        "\n",
        "What is that strategy called?\n",
        "\n",
        "Instruction prompting, we get the results we want by directly instructing the model how we want it to respond\n",
        "\n",
        "\n",
        "> NOTE: You can look through [\"Accessing GPT-3.5-turbo Like a Developer\"](https://colab.research.google.com/drive/1mOzbgf4a2SP5qQj33ZxTz2a01-5eXqk2?usp=sharing) for an answer to this question if you get stuck!"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 13,
      "metadata": {
        "id": "kqbE9fZ6R6yz"
      },
      "outputs": [],
      "source": [
        "retrieval_augmented_qa_pipeline = RetrievalAugmentedQAPipeline(\n",
        "    vector_db_retriever=vector_db,\n",
        "    llm=chat_openai\n",
        ")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 14,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "jAGhaCGOR6yz",
        "outputId": "e4fb3a1b-d2bc-4e18-ec31-dc0adf767163"
      },
      "outputs": [
        {
          "data": {
            "text/plain": [
              "{'response': 'The \\'Michael Eisner Memorial Weak Executive Problem\\' refers to the tendency of a CEO or founder, who has a strong background in a particular function (such as product management, sales, or marketing), to hire a weak executive in that same area. This often occurs because the CEO wants to retain control over that function, thus hiring someone less competent to ensure they remain \"the man.\" The term references Michael Eisner, the former CEO of Disney, who faced challenges when he bought ABC, which struggled under his leadership despite his expertise in television. The implication is that hiring weak executives can hinder the overall effectiveness of an organization.',\n",
              " 'context': [('ordingly.\\nSeventh, when hiring the executive to run your former specialty, be\\ncareful you don’t hire someone weak on purpose.\\nThis sounds silly, but you wouldn’t believe how oaen it happens.\\nThe CEO who used to be a product manager who has a weak\\nproduct management executive. The CEO who used to be in\\nsales who has a weak sales executive. The CEO who used to be\\nin marketing who has a weak marketing executive.\\nI call this the “Michael Eisner Memorial Weak Executive Problem” — aaer the CEO of Disney who had previously been a brilliant TV network executive. When he bought ABC at Disney, it\\npromptly fell to fourth place. His response? “If I had an extra\\ntwo days a week, I could turn around ABC myself.” Well, guess\\nwhat, he didn’t have an extra two days a week.\\nA CEO — or a startup founder — oaen has a hard time letting\\ngo of the function that brought him to the party. The result: you\\nhire someone weak into the executive role for that function so\\nthat you can continue to be “the man” — cons',\n",
              "   np.float64(0.6582349170764753)),\n",
              "  ('m. They have areas where they are truly deXcient in judgment or skill set. That’s just life. Almost nobody is brilliant\\nat everything. When hiring and when Hring executives, you\\nmust therefore focus on strength rather than lack of weakness. Everybody has severe weaknesses even if you can’t see\\nthem yet. When managing, it’s oaen useful to micromanage and\\nto provide remedial training around these weaknesses. Doing so\\nmay make the diWerence between an executive succeeding or\\nfailing.\\nFor example, you might have a brilliant engineering executive\\nwho generates excellent team loyalty, has terriXc product judgment and makes the trains run on time. This same executive\\nmay be very poor at relating to the other functions in the company. She may generate far more than her share of cross-functional conYicts, cut herself oW from critical information, and\\nsigniXcantly impede your ability to sell and market eWectively.\\nYour alternatives are:\\n(a) Macro-manage and give her an annual or quarterly object',\n",
              "   np.float64(0.5088668708442031)),\n",
              "  ('ed?\\nIn reality — as opposed to Marc’s warped view of reality — it will\\nbe extremely helpful for Marc [if he were actually the CEO,\\nwhich he is not] to meet with the new head of engineering daily\\nwhen she comes on board and review all of her thinking and\\ndecisions. This level of micromanagement will accelerate her\\ntraining and improve her long-term eWectiveness. It will make\\nher seem smarter to the rest of the organization which will build\\ncredibility and conXdence while she comes up to speed. Micromanaging new executives is generally a good idea for a limited\\nperiod of time.\\nHowever, that is not the only time that it makes sense to micro66 The Pmarca Blog Archives\\nmanage executives. It turns out that just about every executive\\nin the world has a few things that are seriously wrong with\\nthem. They have areas where they are truly deXcient in judgment or skill set. That’s just life. Almost nobody is brilliant\\nat everything. When hiring and when Hring executives, you\\nmust therefore focus o',\n",
              "   np.float64(0.4790308520865477)),\n",
              "  ('nYicts, cut herself oW from critical information, and\\nsigniXcantly impede your ability to sell and market eWectively.\\nYour alternatives are:\\n(a) Macro-manage and give her an annual or quarterly objective\\nto Xx it, or…\\n(b) Intensively micromanage her interactions until she learns\\nthe fundamental interpersonal skills required to be an eWective\\nexecutive.\\nI am arguing that doing (a) will likely result in weak performance. The reason is that she very likely has no idea how to be\\neWective with her peers. If somebody is an executive, it’s very\\nlikely that somewhere along the line somebody gave her feedback — perhaps abstractly — about all of her weaknesses. Yet\\nthe weakness remains. As a result, executives generally require\\nmore hands-on management than lower level employees to\\nimprove weak areas.\\nSo, micromanagement is like Xne wine. A little at the right times\\nwill really enhance things; too much all the time and you’ll end\\nup in rehab.\\nPart 8: Hiring, managing, promoting, and Dring execut',\n",
              "   np.float64(0.46812418038478015))]}"
            ]
          },
          "execution_count": 14,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "retrieval_augmented_qa_pipeline.run_pipeline(\"What is the 'Michael Eisner Memorial Weak Executive Problem'?\")"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "### 🏗️ Activity #1:\n",
        "\n",
        "Enhance your RAG application in some way! \n",
        "\n",
        "Suggestions are: \n",
        "\n",
        "- Allow it to work with PDF files\n",
        "- Implement a new distance metric\n",
        "- Add metadata support to the vector database\n",
        "\n",
        "While these are suggestions, you should feel free to make whatever augmentations you desire! \n",
        "\n",
        "> NOTE: These additions might require you to work within the `aimakerspace` library - that's expected!"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 41,
      "metadata": {},
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Response: He has a total of 27,649 days to expect to live.\n",
            "\n",
            "Context used:\n",
            "\n",
            "Context 1:\n",
            "oved ones, learn, \n",
            "build for the future, and help others. Whatever you’re doing \n",
            "today, is it worth 1/30,000 of your life?How many days is a \n",
            "typical human lifespan?Maybe you’re good at math; I’m sure...\n",
            "\n",
            "Context 2:\n",
            " AI or other events, \n",
            "use discussion boards, and work on finding some. If your mentors or manager don’t support \n",
            "your growth, find ones who do. I’m also working on how to grow a supportive AI communit...\n",
            "\n",
            "Context 3:\n",
            "nt to keep up.\n",
            "How can you maintain a steady pace of learning for years? If you can cultivate the habit of \n",
            "learning a little bit every week, you can make significant progress with what feels like les...\n",
            "\n",
            "Context 4:\n",
            "deepen \n",
            "your technical knowledge. I’ve known many machine learning engineers who benefitted from \n",
            "deeper skills in an application area such as natural language processing or computer vision, or in \n",
            "a ...\n"
          ]
        }
      ],
      "source": [
        "from aimakerspace.text_utils import PDFLoader, CharacterTextSplitter\n",
        "from aimakerspace.vectordatabase import VectorDatabase\n",
        "from aimakerspace.openai_utils.prompts import SystemRolePrompt, UserRolePrompt\n",
        "from aimakerspace.openai_utils.chatmodel import ChatOpenAI\n",
        "from aimakerspace.openai_utils.embedding import EmbeddingModel\n",
        "import asyncio\n",
        "import os\n",
        "from getpass import getpass\n",
        "\n",
        "# Set up OpenAI API key\n",
        "api_key = os.getenv(\"OPENAI_API_KEY\")\n",
        "if not api_key:\n",
        "    api_key = getpass(\"Enter your OpenAI API key: \")\n",
        "    os.environ[\"OPENAI_API_KEY\"] = api_key\n",
        "\n",
        "\n",
        "\n",
        "\n",
        "# Load the PDF\n",
        "pdf_loader = PDFLoader(\"data/How-to-Build-a-Career-in-AI.pdf\")\n",
        "documents = pdf_loader.load_documents()\n",
        "\n",
        "# Split the documents into chunks with optimized parameters\n",
        "text_splitter = CharacterTextSplitter(chunk_size=1500, chunk_overlap=300)\n",
        "split_documents = text_splitter.split_texts(documents)\n",
        "\n",
        "# Create and populate the vector database with explicit embedding model\n",
        "embedding_model = EmbeddingModel()\n",
        "vector_db = VectorDatabase(embedding_model=embedding_model)\n",
        "vector_db = asyncio.run(vector_db.abuild_from_list(split_documents))\n",
        "\n",
        "# Set up the RAG prompt templates\n",
        "RAG_PROMPT_TEMPLATE = \"\"\" \\\n",
        "Use the provided context to answer the user's query.\n",
        "\n",
        "You may not answer the user's query unless there is specific context in the following text.\n",
        "\n",
        "If you do not know the answer, or cannot answer, please respond with \"I don't know\".\n",
        "\"\"\"\n",
        "\n",
        "rag_prompt = SystemRolePrompt(RAG_PROMPT_TEMPLATE)\n",
        "\n",
        "USER_PROMPT_TEMPLATE = \"\"\" \\\n",
        "Context:\n",
        "{context}\n",
        "\n",
        "User Query:\n",
        "{user_query}\n",
        "\"\"\"\n",
        "\n",
        "user_prompt = UserRolePrompt(USER_PROMPT_TEMPLATE)\n",
        "\n",
        "# Create the ChatOpenAI instance\n",
        "chat_openai = ChatOpenAI()\n",
        "\n",
        "# Create the RAG pipeline\n",
        "class RetrievalAugmentedQAPipeline:\n",
        "    def __init__(self, llm: ChatOpenAI, vector_db_retriever: VectorDatabase) -> None:\n",
        "        self.llm = llm\n",
        "        self.vector_db_retriever = vector_db_retriever\n",
        "\n",
        "    def run_pipeline(self, user_query: str) -> str:\n",
        "        context_list = self.vector_db_retriever.search_by_text(user_query, k=4)\n",
        "\n",
        "        context_prompt = \"\"\n",
        "        for context in context_list:\n",
        "            context_prompt += context[0] + \"\\n\"\n",
        "\n",
        "        formatted_system_prompt = rag_prompt.create_message()\n",
        "        formatted_user_prompt = user_prompt.create_message(\n",
        "            user_query=user_query, \n",
        "            context=context_prompt\n",
        "        )\n",
        "\n",
        "        return {\n",
        "            \"response\": self.llm.run([formatted_system_prompt, formatted_user_prompt]),\n",
        "            \"context\": context_list\n",
        "        }\n",
        "\n",
        "# Create and run the pipeline\n",
        "retrieval_augmented_qa_pipeline = RetrievalAugmentedQAPipeline(\n",
        "    vector_db_retriever=vector_db,\n",
        "    llm=chat_openai\n",
        ")\n",
        "\n",
        "# Test \n",
        "result = retrieval_augmented_qa_pipeline.run_pipeline(\"how many days does he have\")\n",
        "print(\"Response:\", result[\"response\"])\n",
        "print(\"\\nContext used:\")\n",
        "for i, context in enumerate(result[\"context\"], 1):\n",
        "    print(f\"\\nContext {i}:\")\n",
        "    print(context[0][:200] + \"...\")"
      ]
    }
  ],
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "display_name": ".venv",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.13.2"
    },
    "orig_nbformat": 4,
    "widgets": {
      "application/vnd.jupyter.widget-state+json": {
        "1ce393d9afcf427d9d352259c5d32678": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "FloatProgressModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_4e6efd99f7d346e485b002fb0fa85cc7",
            "max": 1,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_3dfb67c39958461da6071e4c19c3fa41",
            "value": 1
          }
        },
        "3a4ba348cb004f8ab7b2b1395539c81b": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "LabelModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "LabelModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "LabelView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_d2ea5009dd16442cb5d8a0ac468e50a8",
            "placeholder": "​",
            "style": "IPY_MODEL_5f00135fe1044051a50ee5e841cbb8e3",
            "value": "0.018 MB of 0.018 MB uploaded\r"
          }
        },
        "3dfb67c39958461da6071e4c19c3fa41": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "ProgressStyleModel",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "4e6efd99f7d346e485b002fb0fa85cc7": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "56a8e24025594e5e9ff3b8581c344691": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "5f00135fe1044051a50ee5e841cbb8e3": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "bb904e05ece143c79ecc4f20de482f45": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "VBoxModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "VBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "VBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_3a4ba348cb004f8ab7b2b1395539c81b",
              "IPY_MODEL_1ce393d9afcf427d9d352259c5d32678"
            ],
            "layout": "IPY_MODEL_56a8e24025594e5e9ff3b8581c344691"
          }
        },
        "d2ea5009dd16442cb5d8a0ac468e50a8": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        }
      }
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}