File size: 30,426 Bytes
86beb7d
 
 
 
 
 
 
 
 
 
 
 
0693e1a
86beb7d
 
 
 
 
 
0693e1a
 
86beb7d
 
 
 
0693e1a
86beb7d
 
 
 
 
0693e1a
 
86beb7d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0693e1a
86beb7d
 
 
 
 
 
 
0693e1a
86beb7d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0693e1a
86beb7d
 
 
 
 
 
0693e1a
 
86beb7d
 
0693e1a
86beb7d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0693e1a
86beb7d
 
 
 
 
 
 
0693e1a
86beb7d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0693e1a
86beb7d
 
0693e1a
 
 
 
 
 
 
 
 
86beb7d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0693e1a
86beb7d
 
0693e1a
 
 
 
 
 
 
 
 
86beb7d
 
 
 
 
 
 
 
 
 
 
 
 
0693e1a
 
86beb7d
 
 
 
0693e1a
86beb7d
 
 
 
 
 
0693e1a
 
86beb7d
 
0693e1a
86beb7d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0693e1a
86beb7d
 
0693e1a
 
 
 
 
 
 
 
 
86beb7d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "4d1ab110",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "from dotenv import load_dotenv \n",
    "from langchain_mistralai import ChatMistralAI\n",
    "from langchain_google_genai import ChatGoogleGenerativeAI\n",
    "from langchain_core.messages import SystemMessage, HumanMessage\n",
    "from langgraph.prebuilt import create_react_agent\n",
    "\n",
    "from custom_tools import custom_tools\n",
    "\n",
    "load_dotenv() \n",
    "os.environ[\"MISTRAL_API_KEY\"] = os.getenv(\"MISTRAL\")\n",
    "os.environ[\"GOOGLE_API_KEY\"] = os.getenv(\"GOOGLE\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "7df8bade",
   "metadata": {},
   "outputs": [],
   "source": [
    "# LLM Configuration\n",
    "llm = ChatGoogleGenerativeAI( # ChatMistralAI\n",
    "    model=\"gemini-2.5-flash\", # mistral-small-latest\n",
    "    temperature=0,\n",
    "    max_retries=5\n",
    ")\n",
    "\n",
    "sys_prompt = \"You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, DON'T use comma to write your number NEITHER use units such as $ or percent sign unless specified otherwise. If you are asked for a string, DON'T use articles, NEITHER abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.\\n\\n\\n\\\n",
    "You will be provided with tools to help you answer questions. If you are asked to look for an information or make a calculation, absolutely use the tools provided to you. You should AVOID calculating by yourself and ABSOLUTELY use appropriate tools. If needed, use one tool first, then use the output of that tool as an input to another thinking then to the use of another tool.\""
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0c930d3c",
   "metadata": {},
   "source": [
    "## reasoning"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "0ce2dd73",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "b,e\n"
     ]
    }
   ],
   "source": [
    "hum_prompt = \"\"\"\n",
    "\"task_id\": \"6f37996b-2ac7-44b0-8e68-6d28256631b4\",\n",
    "        \"question\": \"Given this table defining * on the set S = {a, b, c, d, e}\\n\\n|*|a|b|c|d|e|\\n|---|---|---|---|---|---|\\n|a|a|b|c|b|d|\\n|b|b|c|a|e|c|\\n|c|c|a|b|b|a|\\n|d|b|e|b|e|d|\\n|e|d|b|a|d|c|\\n\\nprovide the subset of S involved in any possible counter-examples that prove * is not commutative. Provide your answer as a comma separated list of the elements in the set in alphabetical order.\",\n",
    "        \"Level\": \"1\",\n",
    "        \"file_name\": \"\"\n",
    "        \"\"\"\n",
    "\n",
    "# Réponse : b, e\n",
    "\n",
    "agent = create_react_agent(\n",
    "    model=llm,\n",
    "    tools=custom_tools,\n",
    "    prompt=sys_prompt,\n",
    ")\n",
    "\n",
    "response = agent.invoke(\n",
    "    {\"messages\": HumanMessage(content=hum_prompt)}\n",
    ")\n",
    "\n",
    "print(response[\"messages\"][-1].content.split(\"FINAL ANSWER: \")[-1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "17dc251b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'messages': [HumanMessage(content='\\n\"task_id\": \"6f37996b-2ac7-44b0-8e68-6d28256631b4\",\\n        \"question\": \"Given this table defining * on the set S = {a, b, c, d, e}\\n\\n|*|a|b|c|d|e|\\n|---|---|---|---|---|---|\\n|a|a|b|c|b|d|\\n|b|b|c|a|e|c|\\n|c|c|a|b|b|a|\\n|d|b|e|b|e|d|\\n|e|d|b|a|d|c|\\n\\nprovide the subset of S involved in any possible counter-examples that prove * is not commutative. Provide your answer as a comma separated list of the elements in the set in alphabetical order.\",\\n        \"Level\": \"1\",\\n        \"file_name\": \"\"\\n        ', additional_kwargs={}, response_metadata={}, id='17e37204-e33b-4a13-8a35-5b843358f3a4'),\n",
       "  AIMessage(content='FINAL ANSWER: b,e', additional_kwargs={}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-2.5-flash', 'safety_ratings': []}, id='run--30a88cdb-6076-4b29-822e-c5ded1b678e9-0', usage_metadata={'input_tokens': 1436, 'output_tokens': 6, 'total_tokens': 1864, 'input_token_details': {'cache_read': 0}, 'output_token_details': {'reasoning': 422}})]}"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "response"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "158a8b84",
   "metadata": {},
   "source": [
    "## Image question text"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "4b7ca6fa",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Qh4+\n"
     ]
    }
   ],
   "source": [
    "hum_prompt = \"\"\"{\n",
    "        \"task_id\": \"cca530fc-4052-43b2-b130-b30968d8aa44\",\n",
    "        \"question\": \"Review the chess position provided in the image. It is black's turn. Provide the correct next move for black which guarantees a win. Please provide your response in algebraic notation.\",\n",
    "        \"Level\": \"1\",\n",
    "        \"file_name\": \"cca530fc-4052-43b2-b130-b30968d8aa44.png\"\n",
    "    }\"\"\"\n",
    "\n",
    "agent = create_react_agent(\n",
    "    model=llm,\n",
    "    tools=custom_tools,\n",
    "    prompt=sys_prompt,\n",
    ")\n",
    "\n",
    "response = agent.invoke(\n",
    "    {\"messages\": HumanMessage(content=hum_prompt)}\n",
    ")\n",
    "\n",
    "print(response[\"messages\"][-1].content.split(\"FINAL ANSWER: \")[-1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "5c80d303",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'messages': [HumanMessage(content='{\\n        \"task_id\": \"cca530fc-4052-43b2-b130-b30968d8aa44\",\\n        \"question\": \"Review the chess position provided in the image. It is black\\'s turn. Provide the correct next move for black which guarantees a win. Please provide your response in algebraic notation.\",\\n        \"Level\": \"1\",\\n        \"file_name\": \"cca530fc-4052-43b2-b130-b30968d8aa44.png\"\\n    }', additional_kwargs={}, response_metadata={}, id='2ca8ec9c-b5b6-40e8-9082-2a386955c2c0'),\n",
       "  AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'xMCGT4mac', 'function': {'name': 'analyze_image', 'arguments': '{\"task_id\": \"cca530fc-4052-43b2-b130-b30968d8aa44\", \"file_name\": \"cca530fc-4052-43b2-b130-b30968d8aa44.png\", \"question\": \"Review the chess position provided in the image. It is black\\'s turn. Provide the correct next move for black which guarantees a win. Please provide your response in algebraic notation.\"}'}, 'index': 0}]}, response_metadata={'token_usage': {'prompt_tokens': 1386, 'total_tokens': 1505, 'completion_tokens': 119}, 'model_name': 'mistral-small-latest', 'model': 'mistral-small-latest', 'finish_reason': 'tool_calls'}, id='run--d95f4dbd-23b7-46fe-a1c1-0f25ffb8d882-0', tool_calls=[{'name': 'analyze_image', 'args': {'task_id': 'cca530fc-4052-43b2-b130-b30968d8aa44', 'file_name': 'cca530fc-4052-43b2-b130-b30968d8aa44.png', 'question': \"Review the chess position provided in the image. It is black's turn. Provide the correct next move for black which guarantees a win. Please provide your response in algebraic notation.\"}, 'id': 'xMCGT4mac', 'type': 'tool_call'}], usage_metadata={'input_tokens': 1386, 'output_tokens': 119, 'total_tokens': 1505}),\n",
       "  ToolMessage(content='Qd1#', name='analyze_image', id='df44830e-6b9c-476e-a129-d1625d90f607', tool_call_id='xMCGT4mac'),\n",
       "  AIMessage(content='FINAL ANSWER: Qd1#', additional_kwargs={}, response_metadata={'token_usage': {'prompt_tokens': 1523, 'total_tokens': 1534, 'completion_tokens': 11}, 'model_name': 'mistral-small-latest', 'model': 'mistral-small-latest', 'finish_reason': 'stop'}, id='run--462b434d-6416-4424-bd0d-f5d686137f35-0', usage_metadata={'input_tokens': 1523, 'output_tokens': 11, 'total_tokens': 1534})]}"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "response"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e0075e0f",
   "metadata": {},
   "source": [
    "## Wikipedia question test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "69cf16b8",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Claus\n"
     ]
    }
   ],
   "source": [
    "hum_prompt = \"What is the first name of the only Malko Competition recipient from the 20th Century (after 1977) whose nationality on record is a country that no longer exists?\"\n",
    "\n",
    "agent = create_react_agent(\n",
    "    model=llm,\n",
    "    tools=custom_tools,\n",
    "    prompt=sys_prompt,\n",
    ")\n",
    "\n",
    "response = agent.invoke(\n",
    "    {\"messages\": HumanMessage(content=hum_prompt)}\n",
    ")\n",
    "\n",
    "print(response[\"messages\"][-1].content.split(\"FINAL ANSWER: \")[-1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "id": "30ab19ba",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "content='What is the first name of the only Malko Competition recipient from the 20th Century (after 1977) whose nationality on record is a country that no longer exists?' additional_kwargs={} response_metadata={} id='0849f09f-04b6-4205-bb3e-2ae333e009a6'\n",
      "content='To answer this question, I need to follow these steps:\\n\\n1. Identify the Malko Competition recipients from the 20th century after 1977.\\n2. Filter the recipients based on their nationality, focusing on those whose nationality is a country that no longer exists.\\n3. Determine the first name of the recipient who meets the criteria.\\n\\nFirst, I will search for information about the Malko Competition recipients.' additional_kwargs={'tool_calls': [{'id': 'YGVjSpxN7', 'function': {'name': 'wiki_search', 'arguments': '{\"query\": \"Malko Competition recipients\"}'}, 'index': 0}]} response_metadata={'token_usage': {'prompt_tokens': 397, 'total_tokens': 500, 'completion_tokens': 103}, 'model_name': 'mistral-small-latest', 'model': 'mistral-small-latest', 'finish_reason': 'tool_calls'} id='run--a7a76884-fe23-4c92-8990-a39d72eb0696-0' tool_calls=[{'name': 'wiki_search', 'args': {'query': 'Malko Competition recipients'}, 'id': 'YGVjSpxN7', 'type': 'tool_call'}] usage_metadata={'input_tokens': 397, 'output_tokens': 103, 'total_tokens': 500}\n",
      "content='International competition for young conductors\\nThe\\nMalko Competition\\nis an international competition for young\\nconductors\\n. It is held every three years by the\\nDanish Radio Symphony Orchestra\\n, to commemorate its founding conductor,\\nNicolai Malko\\n.\\n[\\n1\\n]\\nRecipients\\n[\\nedit\\n]\\nYear\\nRecipient\\nLifetime\\nNationality\\nNotes\\n1965\\nRalf Weikert\\nb. 1940\\nAustria\\n1968\\nAvi Ostrowsky\\nb. 1939\\nIsrael\\n1971\\nWinston Dan Vogel\\nb. 1943\\nUnited States\\n1974\\nGotthard Lienicke\\n1977\\nPhilip Barry Greenberg\\nUnited States\\n[\\n2\\n]\\n1980\\nMaximiano Valdés\\nb. 1949\\nChile\\n[\\n3\\n]\\n1983\\nClaus Peter Flor\\nb. 1953\\nEast Germany\\n1986\\nKazufumi Yamashita\\nb. 1961\\nJapan\\n[\\n4\\n]\\n1989\\nFabio Mechetti\\nb. 1957\\nBrazil\\n[\\n5\\n]\\n1992\\nJin Wang\\nb. 1960\\nAustria\\n1995\\nJan Wagner\\nVenezuela\\n[\\n6\\n]\\n1998\\nSeikyo Kim\\nb. 1970\\nJapan\\n[\\n7\\n]\\n2001\\nJosep Caballé Domenech\\n[\\nnote 1\\n]\\nb. 1973\\nSpain\\n2005\\nMei-Ann Chen\\nb. 1973\\nUnited States\\n[\\n8\\n]\\n2009\\nJoshua Weilerstein\\nb. 1987\\nUnited States\\n[\\n9\\n]\\n2012\\nRafael Payare\\nb. 1980\\nVenezuela\\n[\\n10\\n]\\n2015\\nTung-Chieh Chuang\\nb. 1982\\nTaiwan\\n[\\n11\\n]\\n2018\\nRyan Bancroft\\nb. 1989\\nUnited States\\n[\\n12\\n]\\n2021\\nDmitry Matvienko\\nb. 1990\\nBelarus\\n[\\n13\\n]\\n2024\\nSamuel Seungwon Lee\\nb. 1990\\nSouth Korea\\nNotes\\n[\\nedit\\n]\\n^\\nNo first prize was awarded in 2001, and Caballé-Domenech was appointed the highest (2nd) prize.\\nReferences\\n[\\nedit\\n]\\n^\\n\"Denmark\\'s top orchestra plays\"\\n.\\nColumbus Ledger-Enquirer\\n. Vol.\\xa0165, no.\\xa0313 (Final\\xa0ed.). April 9, 1993. p.\\xa0B-1.\\n^\\nWritten at\\nCopenhagen\\n.\\n\"Award to Greenberg\"\\n.\\nDetroit Free Press\\n. Vol.\\xa0147, no.\\xa012 (metro\\xa0ed.).\\nDetroit\\n.\\nAssociated Press\\n. May 16, 1977. p.\\xa016-B.\\n^\\nWritten at\\nCopenhagen\\n.\\n\"Chilean named top conductor\"\\n.\\nThe Montana Standard\\n. Vol.\\xa0104, no.\\xa0356.\\nButte, Montana\\n.\\nAssociated Press\\n. May 21, 1980. p.\\xa02.\\n^\\n\"Japanese Maestro Top Prize Winner\"\\n.\\nLos Angeles Times\\n. July 1, 1986\\n. Retrieved\\nAugust 9,\\n2012\\n.\\n^\\nMacMillan, Kyle (February 3, 1994).\\n\"Brazilian Is Faithful to Composers\"\\n.\\nOmaha World-Herald\\n. Vol.\\xa0129. pp.\\n31–\\n32.\\n^\\n\"Hot conductor\"\\n. the ticket.\\nThe Miami Herald\\n. Vol.\\xa085, no.\\xa0288 (Palm Beach\\xa0ed.). September 14, 1995. p.\\xa07E.\\n^\\n\"ARTS & ENTERTAINMENT IN BRIEF 21/7\"\\n.\\nLook at Vietnam\\n. July 21, 2010. Archived from\\nthe original\\non September 25, 2010\\n. Retrieved\\nAugust 9,\\n2012\\n.\\n^\\nJohnson, Lawrence A. (4 August 2010).\\n\"Mei-Ann Chen named music director of the Chicago Sinfonietta\"\\n.\\nChicago Classical Review\\n. Chicago\\n. Retrieved\\n17 December\\n2017\\n.\\n^\\nEriksen, Jon Bonde (1 May 2015).\\n\"Former winner: Malko was the start of my conducting career\"\\n.\\ndr.dk\\n. Retrieved\\n17 December\\n2017\\n.\\n^\\nMellor, Andrew (14 May 2012).\\n\"Venezuelan Rafael Payare wins Malko Competition\"\\n.\\nGramophone\\n. Haymarket Media Group\\n. Retrieved\\n9 August\\n2012\\n.\\n^\\n\"Tung-Chieh Chuang er vinder af Malko Konkurrencen 2015\"\\n.\\nDR\\n(in Danish). 1 May 2015.\\n^\\n\"28-årige Ryan tager 1. plads i stor dansk musikkonkurrence: Nu vil jeg fejre det med en middag!\"\\n.\\nDR\\n(in Danish)\\n. Retrieved\\n28 April\\n2018\\n.\\n^\\n\"Congratulations to the winners of the Malko competition 2021!\"\\n.\\nMalko Competition\\n. Retrieved\\n12 June\\n2021\\n.\\nExternal links\\n[\\nedit\\n]\\nClassical music portal\\nOfficial website\\nThis music event–related article is a\\nstub\\n. You can help Wikipedia by\\nexpanding it\\n.\\nv\\nt\\ne\\nRetrieved from \"\\nhttps://en.wikipedia.org/w/index.php?title=Malko_Competition&oldid=1240218934\\n\"' name='wiki_search' id='3e0382ab-b8eb-466d-8535-f862a30fa791' tool_call_id='YGVjSpxN7'\n",
      "content='The only Malko Competition recipient from the 20th Century (after 1977) whose nationality on record is a country that no longer exists is Claus Peter Flor, who was awarded in 1983 and his nationality was East Germany.\\n\\nFINAL ANSWER: Claus Peter' additional_kwargs={} response_metadata={'token_usage': {'prompt_tokens': 1785, 'total_tokens': 1848, 'completion_tokens': 63}, 'model_name': 'mistral-small-latest', 'model': 'mistral-small-latest', 'finish_reason': 'stop'} id='run--eb173fbd-bc80-4b3f-b08f-2c5e7ab2f147-0' usage_metadata={'input_tokens': 1785, 'output_tokens': 63, 'total_tokens': 1848}\n"
     ]
    }
   ],
   "source": [
    "for res in response[\"messages\"]:\n",
    "    print(res)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "2f633f93",
   "metadata": {},
   "source": [
    "## Excel question test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "8eb59a66",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "hum_prompt = \"\"\"{'task_id': '7bd855d8-463d-4ed5-93ca-5fe35145f733', 'question': 'The attached Excel file contains the sales of menu items for a local fast-food chain. What were the total sales that the chain made from food (not including drinks)? Express your answer in USD with two decimal places.', 'Level': '1', 'file_name': '7bd855d8-463d-4ed5-93ca-5fe35145f733.xlsx'}\"\"\"\n",
    "\n",
    "agent = create_react_agent(\n",
    "    model=llm,\n",
    "    tools=custom_tools,\n",
    "    prompt=sys_prompt,\n",
    ")\n",
    "\n",
    "response = agent.invoke(\n",
    "    {\"messages\": HumanMessage(content=hum_prompt)}\n",
    ")\n",
    "\n",
    "print(response[\"messages\"][-1].content.split(\"FINAL ANSWER: \")[-1])\n",
    "# 89 706"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "2ca6f955",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'messages': [HumanMessage(content=\"{'task_id': '7bd855d8-463d-4ed5-93ca-5fe35145f733', 'question': 'The attached Excel file contains the sales of menu items for a local fast-food chain. What were the total sales that the chain made from food (not including drinks)? Express your answer in USD with two decimal places.', 'Level': '1', 'file_name': '7bd855d8-463d-4ed5-93ca-5fe35145f733.xlsx'}\", additional_kwargs={}, response_metadata={}, id='62ca610d-ba20-4584-81bd-232fe6b6ab66'),\n",
       "  AIMessage(content='', additional_kwargs={}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'MAX_TOKENS', 'model_name': 'gemini-2.5-flash', 'safety_ratings': []}, id='run--50d067b9-d6cc-44bf-a52c-1ac15bdd42dd-0', usage_metadata={'input_tokens': 1352, 'output_tokens': 0, 'total_tokens': 66887, 'input_token_details': {'cache_read': 0}, 'output_token_details': {'reasoning': 65535}})]}"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "response"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f6d75183",
   "metadata": {},
   "source": [
    "## Youtube transcript question test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "f80a471d",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "extremely\n"
     ]
    }
   ],
   "source": [
    "hum_prompt = \"\"\"{'task_id': '9d191bce-651d-4746-be2d-7ef8ecadb9c2', 'question': 'Examine the video at https://www.youtube.com/watch?v=1htKBjuUWec.\\n\\nWhat does Teal\\'c say in response to the question \"Isn\\'t that hot?\"', 'Level': '1', 'file_name': ''}\"\"\"\n",
    "\n",
    "agent = create_react_agent(\n",
    "    model=llm,\n",
    "    tools=custom_tools,\n",
    "    prompt=sys_prompt,\n",
    ")\n",
    "\n",
    "response = agent.invoke(\n",
    "    {\"messages\": HumanMessage(content=hum_prompt)}\n",
    ")\n",
    "\n",
    "print(response[\"messages\"][-1].content.split(\"FINAL ANSWER: \")[-1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "ff321c19",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'messages': [HumanMessage(content='{\\'task_id\\': \\'9d191bce-651d-4746-be2d-7ef8ecadb9c2\\', \\'question\\': \\'Examine the video at https://www.youtube.com/watch?v=1htKBjuUWec.\\n\\nWhat does Teal\\'c say in response to the question \"Isn\\'t that hot?\"\\', \\'Level\\': \\'1\\', \\'file_name\\': \\'\\'}', additional_kwargs={}, response_metadata={}, id='7d8e75fe-5813-4525-9111-5bf79ac45cb2'),\n",
       "  AIMessage(content='To answer the question, I need to retrieve the transcript of the YouTube video at the provided URL and then search for the relevant dialogue. Here are the steps I will take:\\n\\n1. Use the `youtube_transcript` tool to get the transcript of the video.\\n2. Analyze the transcript to find the dialogue where Teal\\'c responds to the question \"Isn\\'t that hot?\".\\n\\nLet\\'s proceed with these steps.', additional_kwargs={'tool_calls': [{'id': 'tZ8u0MxYO', 'function': {'name': 'youtube_transcript', 'arguments': '{\"url\": \"https://www.youtube.com/watch?v=1htKBjuUWec\"}'}, 'index': 0}]}, response_metadata={'token_usage': {'prompt_tokens': 1109, 'total_tokens': 1225, 'completion_tokens': 116}, 'model_name': 'mistral-small-latest', 'model': 'mistral-small-latest', 'finish_reason': 'tool_calls'}, id='run--1050bf56-fe88-4997-ac36-3cd0d5d828cb-0', tool_calls=[{'name': 'youtube_transcript', 'args': {'url': 'https://www.youtube.com/watch?v=1htKBjuUWec'}, 'id': 'tZ8u0MxYO', 'type': 'tool_call'}], usage_metadata={'input_tokens': 1109, 'output_tokens': 116, 'total_tokens': 1225}),\n",
       "  ToolMessage(content=\"Wow this coffee's great I was just thinking that yeah is that cinnamon chicory tea oak [Music] isn't that hot extremely\", name='youtube_transcript', id='1eda14f2-419f-46fc-9198-5d83df050d9a', tool_call_id='tZ8u0MxYO'),\n",
       "  AIMessage(content='FINAL ANSWER: extremely', additional_kwargs={}, response_metadata={'token_usage': {'prompt_tokens': 1183, 'total_tokens': 1191, 'completion_tokens': 8}, 'model_name': 'mistral-small-latest', 'model': 'mistral-small-latest', 'finish_reason': 'stop'}, id='run--d9cd2ac0-e1f0-4dc6-93b2-cea04ce7d98d-0', usage_metadata={'input_tokens': 1183, 'output_tokens': 8, 'total_tokens': 1191})]}"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "response"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e7e23b98",
   "metadata": {},
   "source": [
    "## python code understanding question"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "2ec6b62a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0\n"
     ]
    }
   ],
   "source": [
    "hum_prompt = \"\"\"{'task_id': 'f918266a-b3e0-4914-865d-4faa564f1aef', 'question': 'What is the final numeric output from the attached Python code?', 'Level': '1', 'file_name': 'f918266a-b3e0-4914-865d-4faa564f1aef.py'}\"\"\"\n",
    "\n",
    "agent = create_react_agent(\n",
    "    model=llm,\n",
    "    tools=custom_tools,\n",
    "    prompt=sys_prompt,\n",
    ")\n",
    "\n",
    "response = agent.invoke(\n",
    "    {\"messages\": HumanMessage(content=hum_prompt)}\n",
    ")\n",
    "\n",
    "print(response[\"messages\"][-1].content.split(\"FINAL ANSWER: \")[-1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "5ea71b09",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'messages': [HumanMessage(content=\"{'task_id': 'f918266a-b3e0-4914-865d-4faa564f1aef', 'question': 'What is the final numeric output from the attached Python code?', 'Level': '1', 'file_name': 'f918266a-b3e0-4914-865d-4faa564f1aef.py'}\", additional_kwargs={}, response_metadata={}, id='6a2762fc-e5dd-49d9-9a91-4ef9ee3f8be3'),\n",
       "  AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'c9Hw2xnQW', 'function': {'name': 'read_file', 'arguments': '{\"task_id\": \"f918266a-b3e0-4914-865d-4faa564f1aef\", \"file_name\": \"f918266a-b3e0-4914-865d-4faa564f1aef.py\"}'}, 'index': 0}]}, response_metadata={'token_usage': {'prompt_tokens': 1256, 'total_tokens': 1340, 'completion_tokens': 84}, 'model_name': 'mistral-small-latest', 'model': 'mistral-small-latest', 'finish_reason': 'tool_calls'}, id='run--223d11df-6446-4428-80a9-66bf0fa668ef-0', tool_calls=[{'name': 'read_file', 'args': {'task_id': 'f918266a-b3e0-4914-865d-4faa564f1aef', 'file_name': 'f918266a-b3e0-4914-865d-4faa564f1aef.py'}, 'id': 'c9Hw2xnQW', 'type': 'tool_call'}], usage_metadata={'input_tokens': 1256, 'output_tokens': 84, 'total_tokens': 1340}),\n",
       "  ToolMessage(content='from random import randint\\nimport time\\n\\nclass UhOh(Exception):\\n    pass\\n\\nclass Hmm:\\n    def __init__(self):\\n        self.value = randint(-100, 100)\\n\\n    def Yeah(self):\\n        if self.value == 0:\\n            return True\\n        else:\\n            raise UhOh()\\n\\ndef Okay():\\n    while True:\\n        yield Hmm()\\n\\ndef keep_trying(go, first_try=True):\\n    maybe = next(go)\\n    try:\\n        if maybe.Yeah():\\n            return maybe.value\\n    except UhOh:\\n        if first_try:\\n            print(\"Working...\")\\n            print(\"Please wait patiently...\")\\n        time.sleep(0.1)\\n        return keep_trying(go, first_try=False)\\n\\nif __name__ == \"__main__\":\\n    go = Okay()\\n    print(f\"{keep_trying(go)}\")\\n', name='read_file', id='1278381a-8f14-4679-85d2-b9e2c77bd0b5', tool_call_id='c9Hw2xnQW'),\n",
       "  AIMessage(content='The code defines a few classes and functions, and then executes a main block that prints the result of the `keep_trying` function.\\n\\nHere\\'s a step-by-step explanation of what the code does:\\n\\n1. **Imports**:\\n   - `randint` from the `random` module to generate random integers.\\n   - `time` module to introduce a delay using `time.sleep`.\\n\\n2. **Exception and Class Definitions**:\\n   - `UhOh`: A custom exception class.\\n   - `Hmm`: A class with an `__init__` method that initializes `self.value` with a random integer between -100 and 100. The `Yeah` method checks if `self.value` is 0. If it is, it returns `True`; otherwise, it raises the `UhOh` exception.\\n\\n3. **Generator Function**:\\n   - `Okay`: A generator function that yields instances of the `Hmm` class indefinitely.\\n\\n4. **Recursive Function**:\\n   - `keep_trying`: A recursive function that takes a generator `go` and a boolean `first_try`. It gets the next item from the generator, tries to call the `Yeah` method on it. If `Yeah` returns `True`, it returns the value of `maybe.value`. If an `UhOh` exception is raised, it prints a message on the first try, waits for 0.1 seconds, and recursively calls itself with `first_try` set to `False`.\\n\\n5. **Main Block**:\\n   - The `if __name__ == \"__main__\":` block initializes the generator `go` by calling `Okay()`, then prints the result of `keep_trying(go)`.\\n\\nThe code will keep generating new `Hmm` instances until it finds one where `value` is 0, then it will print 0.\\n\\nFINAL ANSWER: 0', additional_kwargs={}, response_metadata={'token_usage': {'prompt_tokens': 1547, 'total_tokens': 1946, 'completion_tokens': 399}, 'model_name': 'mistral-small-latest', 'model': 'mistral-small-latest', 'finish_reason': 'stop'}, id='run--399990c6-7aeb-40ce-a65b-e992bba2f8d5-0', usage_metadata={'input_tokens': 1547, 'output_tokens': 399, 'total_tokens': 1946})]}"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "response"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "153e2ed8",
   "metadata": {},
   "source": [
    "## youtube video analysis"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "0b351378",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "3\n"
     ]
    }
   ],
   "source": [
    "hum_prompt = \"In the video https://www.youtube.com/watch?v=L1vXCYZAYYM, what is the highest number of bird species to be on camera simultaneously?\"\n",
    "\n",
    "agent = create_react_agent(\n",
    "    model=llm,\n",
    "    tools=custom_tools,\n",
    "    prompt=sys_prompt,\n",
    ")\n",
    "\n",
    "response = agent.invoke(\n",
    "    {\"messages\": HumanMessage(content=hum_prompt)}\n",
    ")\n",
    "\n",
    "print(response[\"messages\"][-1].content.split(\"FINAL ANSWER: \")[-1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "32a58cbb",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'messages': [HumanMessage(content='In the video https://www.youtube.com/watch?v=L1vXCYZAYYM, what is the highest number of bird species to be on camera simultaneously?', additional_kwargs={}, response_metadata={}, id='42a3f876-04f0-414d-919a-9c55dfebe467'),\n",
       "  AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'rptE7ZSgP', 'function': {'name': 'analyse_youtube_video', 'arguments': '{\"url\": \"https://www.youtube.com/watch?v=L1vXCYZAYYM\", \"video_question\": \"What is the highest number of bird species to be on camera simultaneously?\"}'}, 'index': 0}]}, response_metadata={'token_usage': {'prompt_tokens': 941, 'total_tokens': 990, 'completion_tokens': 49}, 'model_name': 'mistral-small-latest', 'model': 'mistral-small-latest', 'finish_reason': 'tool_calls'}, id='run--dee8089a-9f3c-47b0-a030-e10980a9ea53-0', tool_calls=[{'name': 'analyse_youtube_video', 'args': {'url': 'https://www.youtube.com/watch?v=L1vXCYZAYYM', 'video_question': 'What is the highest number of bird species to be on camera simultaneously?'}, 'id': 'rptE7ZSgP', 'type': 'tool_call'}], usage_metadata={'input_tokens': 941, 'output_tokens': 49, 'total_tokens': 990}),\n",
       "  ToolMessage(content='3', name='analyse_youtube_video', id='3daf5ce9-c6b2-4d98-b616-82b2b37a3a33', tool_call_id='rptE7ZSgP'),\n",
       "  AIMessage(content='FINAL ANSWER: 3', additional_kwargs={}, response_metadata={'token_usage': {'prompt_tokens': 1011, 'total_tokens': 1020, 'completion_tokens': 9}, 'model_name': 'mistral-small-latest', 'model': 'mistral-small-latest', 'finish_reason': 'stop'}, id='run--747a6804-7425-42cc-8d49-76dce6d3e0ee-0', usage_metadata={'input_tokens': 1011, 'output_tokens': 9, 'total_tokens': 1020})]}"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "response"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}