neznib commited on
Commit
1691123
·
1 Parent(s): 5fbf8f6
agent.py CHANGED
@@ -139,11 +139,34 @@ def get_graph(llm):
139
  query: The search query."""
140
  print("\n-------------------- Tool (Wikipedia) has been called --------------------\n")
141
  search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
142
- formatted_search_docs = "\n\n---\n\n".join(
143
- [
144
- f'<Document source="{doc.metadata["source"]}" page="{doc.metadata["title"]} page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
145
- for doc in search_docs
146
- ])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
  return formatted_search_docs
148
 
149
  @tool
 
139
  query: The search query."""
140
  print("\n-------------------- Tool (Wikipedia) has been called --------------------\n")
141
  search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
142
+
143
+ parts: list[str] = []
144
+
145
+ for doc in search_docs:
146
+ parts.append(
147
+ f'<Document source="{doc.metadata["source"]}" '
148
+ f'title="{doc.metadata["title"]}" '
149
+ f'page="{doc.metadata.get("page", "")}">\n'
150
+ f'{doc.page_content}\n</Document>'
151
+ )
152
+
153
+ try:
154
+ print("---------------------------------")
155
+ print("Loading tables from: ", doc.metadata["source"])
156
+ print("---------------------------------")
157
+ tables = WikipediaTableLoader(url=doc.metadata["source"], title=doc.metadata["title"]).load()
158
+ for i, table in enumerate(tables):
159
+ parts.append(
160
+ f'<Document source="{table.metadata["source"]}" '
161
+ f'title="{table.metadata["title"]}" '
162
+ f'table_index="{i}">\n'
163
+ f'{table.page_content}\n</Document>'
164
+ )
165
+ except Exception:
166
+ pass
167
+
168
+ formatted_search_docs = "\n\n---\n\n".join(parts)
169
+
170
  return formatted_search_docs
171
 
172
  @tool
langchain_custom.py CHANGED
@@ -1,3 +1,5 @@
 
 
1
  from langchain_community.document_loaders.base import BaseLoader
2
  from langchain_core.documents import Document
3
  import pandas as pd
@@ -12,7 +14,7 @@ class WikipediaTableLoader(BaseLoader):
12
  def load(self):
13
 
14
  html = requests.get(self.url, timeout=10).text
15
- dfs = pd.read_html(html) # grab the tables
16
  docs = []
17
 
18
  for i, df in enumerate(dfs):
 
1
+ from io import StringIO
2
+
3
  from langchain_community.document_loaders.base import BaseLoader
4
  from langchain_core.documents import Document
5
  import pandas as pd
 
14
  def load(self):
15
 
16
  html = requests.get(self.url, timeout=10).text
17
+ dfs = pd.read_html(StringIO(html)) # grab the tables
18
  docs = []
19
 
20
  for i, df in enumerate(dfs):
playground_api.ipynb CHANGED
@@ -226,8 +226,8 @@
226
  {
227
  "metadata": {
228
  "ExecuteTime": {
229
- "end_time": "2025-04-30T13:04:59.351968Z",
230
- "start_time": "2025-04-30T13:04:59.338937Z"
231
  }
232
  },
233
  "cell_type": "code",
@@ -242,13 +242,13 @@
242
  ],
243
  "id": "799bcaab50ebff55",
244
  "outputs": [],
245
- "execution_count": 16
246
  },
247
  {
248
  "metadata": {
249
  "ExecuteTime": {
250
- "end_time": "2025-04-30T13:04:59.536418Z",
251
- "start_time": "2025-04-30T13:04:59.526232Z"
252
  }
253
  },
254
  "cell_type": "code",
@@ -495,12 +495,12 @@
495
  "</div>"
496
  ]
497
  },
498
- "execution_count": 17,
499
  "metadata": {},
500
  "output_type": "execute_result"
501
  }
502
  ],
503
- "execution_count": 17
504
  },
505
  {
506
  "metadata": {
 
226
  {
227
  "metadata": {
228
  "ExecuteTime": {
229
+ "end_time": "2025-05-03T17:49:49.209881Z",
230
+ "start_time": "2025-05-03T17:49:48.876295Z"
231
  }
232
  },
233
  "cell_type": "code",
 
242
  ],
243
  "id": "799bcaab50ebff55",
244
  "outputs": [],
245
+ "execution_count": 1
246
  },
247
  {
248
  "metadata": {
249
  "ExecuteTime": {
250
+ "end_time": "2025-05-03T17:49:49.230082Z",
251
+ "start_time": "2025-05-03T17:49:49.221606Z"
252
  }
253
  },
254
  "cell_type": "code",
 
495
  "</div>"
496
  ]
497
  },
498
+ "execution_count": 2,
499
  "metadata": {},
500
  "output_type": "execute_result"
501
  }
502
  ],
503
+ "execution_count": 2
504
  },
505
  {
506
  "metadata": {
requirements.txt CHANGED
@@ -17,4 +17,6 @@ matplotlib ~=3.10.1
17
  arxiv ~=2.2.0
18
  pymupdf ~=1.25.5
19
  youtube-transcript-api ~=1.0.3
20
- assemblyai ~=0.40.2
 
 
 
17
  arxiv ~=2.2.0
18
  pymupdf ~=1.25.5
19
  youtube-transcript-api ~=1.0.3
20
+ assemblyai ~=0.40.2
21
+ lxml ~=5.4.0
22
+ tabulate ~=0.9.0
test_playground.ipynb ADDED
@@ -0,0 +1,190 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "id": "initial_id",
6
+ "metadata": {
7
+ "collapsed": true,
8
+ "ExecuteTime": {
9
+ "end_time": "2025-05-03T18:40:33.108625Z",
10
+ "start_time": "2025-05-03T18:40:29.698376Z"
11
+ }
12
+ },
13
+ "source": [
14
+ "from io import StringIO\n",
15
+ "\n",
16
+ "from dotenv import load_dotenv\n",
17
+ "from gradio.server_messages import BaseMessage\n",
18
+ "from jinja2 import BaseLoader\n",
19
+ "\n",
20
+ "from agent import *\n",
21
+ "\n",
22
+ "load_dotenv()\n",
23
+ "\n",
24
+ "llm = get_llm()\n",
25
+ "\n",
26
+ "graph = get_graph(llm)\n",
27
+ "\n",
28
+ "# graph.invoke({\"messages\": [HumanMessage(content=question),]})\n"
29
+ ],
30
+ "outputs": [
31
+ {
32
+ "name": "stderr",
33
+ "output_type": "stream",
34
+ "text": [
35
+ "/Users/dennis/PycharmProjects/Final_Assignment_Template/.venv/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
36
+ " from .autonotebook import tqdm as notebook_tqdm\n"
37
+ ]
38
+ }
39
+ ],
40
+ "execution_count": 1
41
+ },
42
+ {
43
+ "metadata": {
44
+ "ExecuteTime": {
45
+ "end_time": "2025-05-03T18:41:12.685273Z",
46
+ "start_time": "2025-05-03T18:40:33.126333Z"
47
+ }
48
+ },
49
+ "cell_type": "code",
50
+ "source": [
51
+ "result = graph.invoke(\n",
52
+ " {\n",
53
+ " \"messages\": [\n",
54
+ " HumanMessage(\n",
55
+ " content=\"How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia.\"\n",
56
+ " ),\n",
57
+ " ]\n",
58
+ " }\n",
59
+ ")"
60
+ ],
61
+ "id": "96cdb32d9145e6b3",
62
+ "outputs": [
63
+ {
64
+ "name": "stdout",
65
+ "output_type": "stream",
66
+ "text": [
67
+ "\n",
68
+ "-------------------- Starting to create a plan --------------------\n",
69
+ "\n",
70
+ "Waiting for 5 seconds...\n",
71
+ "The plan is: 1. Access the English Wikipedia (2022 version if available).\n",
72
+ "2. Search for \"Mercedes Sosa discography\".\n",
73
+ "3. Identify the section listing studio albums.\n",
74
+ "4. Filter the list to include only albums released between 2000 and 2009 (inclusive).\n",
75
+ "5. Count the number of albums that meet the criteria.\n",
76
+ "6. Provide the count as the answer.\n",
77
+ "\n",
78
+ "-------------------- Agent has been called -----------------------------------\n",
79
+ "\n",
80
+ "Waiting for 5 seconds...\n",
81
+ "Agent has made a decision:\n",
82
+ " [{'name': 'wiki_search', 'args': {'query': 'Mercedes Sosa discography'}, 'id': '416cac28-6fa8-4f45-a5db-f6bc6d8e1d1a', 'type': 'tool_call'}]\n",
83
+ "\n",
84
+ "-------------------- Decision of forwarding has been made --------------------\n",
85
+ "\n",
86
+ "Waiting for 2 seconds...\n",
87
+ "This is round: 2\n",
88
+ "The last message is: content='' additional_kwargs={'function_call': {'name': 'wiki_search', 'arguments': '{\"query\": \"Mercedes Sosa discography\"}'}} response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-2.0-flash', 'safety_ratings': []} id='run-80c57808-f76d-495c-bc1a-e22b89556475-0' tool_calls=[{'name': 'wiki_search', 'args': {'query': 'Mercedes Sosa discography'}, 'id': '416cac28-6fa8-4f45-a5db-f6bc6d8e1d1a', 'type': 'tool_call'}] usage_metadata={'input_tokens': 622, 'output_tokens': 7, 'total_tokens': 629, 'input_token_details': {'cache_read': 0}}\n",
89
+ "\n",
90
+ "-------------------- Tool (Wikipedia) has been called --------------------\n",
91
+ "\n",
92
+ "---------------------------------\n",
93
+ "Loading tables from: https://en.wikipedia.org/wiki/Mercedes_Sosa\n",
94
+ "---------------------------------\n",
95
+ "---------------------------------\n",
96
+ "Loading tables from: https://en.wikipedia.org/wiki/Joan_Baez_discography\n",
97
+ "---------------------------------\n",
98
+ "\n",
99
+ "-------------------- Agent has been called -----------------------------------\n",
100
+ "\n",
101
+ "Waiting for 5 seconds...\n",
102
+ "Agent has made a decision:\n",
103
+ " [{'name': 'wiki_search', 'args': {'query': 'Mercedes Sosa discography'}, 'id': '77eb4086-528b-48ea-b139-70382e84a80c', 'type': 'tool_call'}]\n",
104
+ "\n",
105
+ "-------------------- Decision of forwarding has been made --------------------\n",
106
+ "\n",
107
+ "Waiting for 2 seconds...\n",
108
+ "This is round: 3\n",
109
+ "The last message is: content='' additional_kwargs={'function_call': {'name': 'wiki_search', 'arguments': '{\"query\": \"Mercedes Sosa discography\"}'}} response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-2.0-flash', 'safety_ratings': []} id='run-3240832b-3a9d-48b5-9c58-0f9aa5dd8658-0' tool_calls=[{'name': 'wiki_search', 'args': {'query': 'Mercedes Sosa discography'}, 'id': '77eb4086-528b-48ea-b139-70382e84a80c', 'type': 'tool_call'}] usage_metadata={'input_tokens': 17925, 'output_tokens': 7, 'total_tokens': 17932, 'input_token_details': {'cache_read': 0}}\n",
110
+ "\n",
111
+ "-------------------- Tool (Wikipedia) has been called --------------------\n",
112
+ "\n",
113
+ "---------------------------------\n",
114
+ "Loading tables from: https://en.wikipedia.org/wiki/Mercedes_Sosa\n",
115
+ "---------------------------------\n",
116
+ "---------------------------------\n",
117
+ "Loading tables from: https://en.wikipedia.org/wiki/Joan_Baez_discography\n",
118
+ "---------------------------------\n",
119
+ "\n",
120
+ "-------------------- Agent has been called -----------------------------------\n",
121
+ "\n",
122
+ "Waiting for 5 seconds...\n",
123
+ "Agent has made a decision:\n",
124
+ " Based on the Wikipedia search results for \"Mercedes Sosa discography\", the studio albums published by Mercedes Sosa between 2000 and 2009 (inclusive) are:\n",
125
+ "\n",
126
+ "* 2005 - Corazón Libre\n",
127
+ "* 2009 - Cantora 1\n",
128
+ "* 2009 - Cantora 2\n",
129
+ "\n",
130
+ "Therefore, the answer is 3. []\n",
131
+ "\n",
132
+ "-------------------- Decision of forwarding has been made --------------------\n",
133
+ "\n",
134
+ "Waiting for 2 seconds...\n",
135
+ "This is round: 4\n",
136
+ "The last message is: content='Based on the Wikipedia search results for \"Mercedes Sosa discography\", the studio albums published by Mercedes Sosa between 2000 and 2009 (inclusive) are:\\n\\n* 2005 - Corazón Libre\\n* 2009 - Cantora 1\\n* 2009 - Cantora 2\\n\\nTherefore, the answer is 3.' additional_kwargs={} response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-2.0-flash', 'safety_ratings': []} id='run-59b32104-a935-46ed-a8fc-c530bbe2816e-0' usage_metadata={'input_tokens': 35228, 'output_tokens': 79, 'total_tokens': 35307, 'input_token_details': {'cache_read': 0}}\n",
137
+ "\n",
138
+ "-------------------- Generating Answer -----------------------------------\n",
139
+ "\n",
140
+ "Waiting for 5 seconds...\n",
141
+ "The final answer is: 3\n"
142
+ ]
143
+ }
144
+ ],
145
+ "execution_count": 2
146
+ },
147
+ {
148
+ "metadata": {
149
+ "ExecuteTime": {
150
+ "end_time": "2025-05-03T18:30:59.650996Z",
151
+ "start_time": "2025-05-03T18:30:59.647709Z"
152
+ }
153
+ },
154
+ "cell_type": "code",
155
+ "source": "",
156
+ "id": "ce8f566e12feb242",
157
+ "outputs": [],
158
+ "execution_count": null
159
+ },
160
+ {
161
+ "metadata": {},
162
+ "cell_type": "code",
163
+ "outputs": [],
164
+ "execution_count": null,
165
+ "source": "",
166
+ "id": "27e7946b3191e98f"
167
+ }
168
+ ],
169
+ "metadata": {
170
+ "kernelspec": {
171
+ "display_name": "Python 3",
172
+ "language": "python",
173
+ "name": "python3"
174
+ },
175
+ "language_info": {
176
+ "codemirror_mode": {
177
+ "name": "ipython",
178
+ "version": 2
179
+ },
180
+ "file_extension": ".py",
181
+ "mimetype": "text/x-python",
182
+ "name": "python",
183
+ "nbconvert_exporter": "python",
184
+ "pygments_lexer": "ipython2",
185
+ "version": "2.7.6"
186
+ }
187
+ },
188
+ "nbformat": 4,
189
+ "nbformat_minor": 5
190
+ }