commit
Browse files- app.py +4 -4
- my_agent.py +1 -1
- my_base_wiki_api.py +124 -15
- my_tool_wiki_filter_tables.py +3 -1
- test_tools.py +18 -0
app.py
CHANGED
|
@@ -12,11 +12,11 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
|
| 12 |
# testing --------------------------------------------
|
| 13 |
testing_mode = True
|
| 14 |
questions_to_run = [
|
| 15 |
-
"8e867cd7-cff9-4e6c-867a-ff5ddc2550be", #
|
| 16 |
-
#"a1e91b78-d3d8-4675-bb8d-62741b4b68a6",
|
| 17 |
#"2d83110e-a098-4ebb-9987-066c06fa42d0", # OK
|
| 18 |
-
#
|
| 19 |
-
|
| 20 |
#"6f37996b-2ac7-44b0-8e68-6d28256631b4",
|
| 21 |
#"9d191bce-651d-4746-be2d-7ef8ecadb9c2",
|
| 22 |
#"cabe07ed-9eca-40ea-8ead-410ef5e83f91",
|
|
|
|
| 12 |
# testing --------------------------------------------
|
| 13 |
testing_mode = True
|
| 14 |
questions_to_run = [
|
| 15 |
+
#"8e867cd7-cff9-4e6c-867a-ff5ddc2550be", # not using tools
|
| 16 |
+
#"a1e91b78-d3d8-4675-bb8d-62741b4b68a6", # (!)
|
| 17 |
#"2d83110e-a098-4ebb-9987-066c06fa42d0", # OK
|
| 18 |
+
#"cca530fc-4052-43b2-b130-b30968d8aa44", # OK, need to remove FINAL_ANSWER string
|
| 19 |
+
"4fc2f1ae-8625-45b5-ab34-ad4433bc21f8", # <--------
|
| 20 |
#"6f37996b-2ac7-44b0-8e68-6d28256631b4",
|
| 21 |
#"9d191bce-651d-4746-be2d-7ef8ecadb9c2",
|
| 22 |
#"cabe07ed-9eca-40ea-8ead-410ef5e83f91",
|
my_agent.py
CHANGED
|
@@ -84,7 +84,7 @@ class MyAgent:
|
|
| 84 |
"my_tool_image_load",
|
| 85 |
"my_tool_reverse_string",
|
| 86 |
"my_tool_wiki_page_section",
|
| 87 |
-
"
|
| 88 |
],
|
| 89 |
)
|
| 90 |
#web_search_agent = CodeAgent(
|
|
|
|
| 84 |
"my_tool_image_load",
|
| 85 |
"my_tool_reverse_string",
|
| 86 |
"my_tool_wiki_page_section",
|
| 87 |
+
"_my_tool_wiki_table_filter"
|
| 88 |
],
|
| 89 |
)
|
| 90 |
#web_search_agent = CodeAgent(
|
my_base_wiki_api.py
CHANGED
|
@@ -43,6 +43,108 @@ class MyWikiAPI:
|
|
| 43 |
}).json()
|
| 44 |
return response["parse"][format]["*"]
|
| 45 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
def get_page_section(self, page_title, section_title):
|
| 47 |
section_id = self.__find_section_on_page__(page_title, section_title)
|
| 48 |
_ret = ""
|
|
@@ -53,29 +155,36 @@ class MyWikiAPI:
|
|
| 53 |
|
| 54 |
def filter_section_and_table(self, section_content, sub_section_name, year_start, year_end):
|
| 55 |
parsed = wtp.parse(section_content)
|
| 56 |
-
#print(f"***KS*** Parsed section:\n{parsed}")
|
| 57 |
-
|
| 58 |
sections = parsed.sections
|
| 59 |
-
print(f"Sections {len(sections)}")
|
| 60 |
section_found = None
|
| 61 |
for sec in sections:
|
| 62 |
-
print("---------------------------------------------------------------------")
|
| 63 |
-
print(f"{sec.title if sec.title is not None else ''}")
|
| 64 |
if sec.title is not None and sec.title.find(sub_section_name) >= 0:
|
| 65 |
section_found = sec
|
| 66 |
|
| 67 |
-
print(f"Found matching subsection: {section_found}")
|
| 68 |
-
|
| 69 |
-
print(f"Section has tables: {section_found.tables}")
|
| 70 |
|
| 71 |
-
table_data = section_found.tables[0].data()
|
| 72 |
-
print(f"Table has data: {table_data}")
|
| 73 |
rows_collected = []
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
|
| 78 |
-
|
| 79 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
|
| 81 |
-
|
|
|
|
| 43 |
}).json()
|
| 44 |
return response["parse"][format]["*"]
|
| 45 |
|
| 46 |
+
def __get_category_pages__(self, category, year):
|
| 47 |
+
# https://en.wikipedia.org/w/api.php?action=help&modules=main#main/datatype/timestamp
|
| 48 |
+
# https://www.mediawiki.org/wiki/API:Query#Generators
|
| 49 |
+
# https://www.mediawiki.org/wiki/API:Continue
|
| 50 |
+
# https://stackoverflow.com/questions/35826469/how-to-combine-two-wikipedia-api-calls-into-one
|
| 51 |
+
# 2001-01-15T14:56:00Z
|
| 52 |
+
# api.php?action=query&generator=categorymembers&gcmtitle=Category:Physics&prop=info [open in sandbox]
|
| 53 |
+
|
| 54 |
+
cmstart = f"{year}-01-01T00:00:00Z"
|
| 55 |
+
cmend = f"{year}-12-31T23:59:59Z"
|
| 56 |
+
|
| 57 |
+
print(f"Listing pages in category {category} from {cmstart} to {cmend}")
|
| 58 |
+
|
| 59 |
+
response = requests.get(
|
| 60 |
+
self.WIKI_BASE_URL,
|
| 61 |
+
headers=self.user_agent_headers,
|
| 62 |
+
params={
|
| 63 |
+
'action': 'query',
|
| 64 |
+
'format': 'json',
|
| 65 |
+
#'prop': 'extracts',
|
| 66 |
+
'list': 'categorymembers',
|
| 67 |
+
#'generator': 'categorymembers',
|
| 68 |
+
'cmtitle': f'Category:{category}',
|
| 69 |
+
'cmsort': 'timestamp',
|
| 70 |
+
'cmprop': 'ids|title|timestamp',
|
| 71 |
+
'cmstart': cmstart,
|
| 72 |
+
'cmend': cmend,
|
| 73 |
+
'cmlimit': '500',
|
| 74 |
+
#'cmlimit': '20',
|
| 75 |
+
}).json()
|
| 76 |
+
print(f"Response: \n{json.dumps(response,indent=4)}")
|
| 77 |
+
|
| 78 |
+
print(f"Got artickles: {len(response['query']['categorymembers'])}")
|
| 79 |
+
|
| 80 |
+
#cmcontinue = ""
|
| 81 |
+
#while(True):
|
| 82 |
+
# if "continue" in response and "cmcontinue" in response["continue"]:
|
| 83 |
+
# cmcontinue = response["continue"]["cmcontinue"]
|
| 84 |
+
# else:
|
| 85 |
+
# break
|
| 86 |
+
|
| 87 |
+
# print(f"cmcontinue: {cmcontinue}")
|
| 88 |
+
|
| 89 |
+
# response = requests.get(
|
| 90 |
+
# self.WIKI_BASE_URL,
|
| 91 |
+
# headers=self.user_agent_headers,
|
| 92 |
+
# params={
|
| 93 |
+
# 'action': 'query',
|
| 94 |
+
# 'format': 'json',
|
| 95 |
+
# #'prop': 'extracts',
|
| 96 |
+
# 'list': 'categorymembers',
|
| 97 |
+
# #'generator': 'categorymembers',
|
| 98 |
+
# 'cmcontinue': cmcontinue,
|
| 99 |
+
# 'cmtitle': f'Category:{category}',
|
| 100 |
+
# 'cmsort': 'timestamp',
|
| 101 |
+
# 'cmprop': 'ids|title|timestamp',
|
| 102 |
+
# 'cmstart': cmstart,
|
| 103 |
+
# #'cmend': cmend,
|
| 104 |
+
# #'cmlimit': '500',
|
| 105 |
+
# 'cmlimit': '20',
|
| 106 |
+
# }).json()
|
| 107 |
+
# print(f"Response: \n{json.dumps(response,indent=4)}")
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
#response = requests.get(
|
| 111 |
+
# self.WIKI_BASE_URL,
|
| 112 |
+
# headers=self.user_agent_headers,
|
| 113 |
+
# params={
|
| 114 |
+
# 'action': 'query',
|
| 115 |
+
# 'format': 'json',
|
| 116 |
+
# 'prop': 'extracts',
|
| 117 |
+
# #'list': 'categorymembers',
|
| 118 |
+
# 'generator': 'categorymembers',
|
| 119 |
+
# 'gcmtitle': f'Category:{category}',
|
| 120 |
+
# 'gcmsort': 'timestamp',
|
| 121 |
+
# 'gcmprop': 'ids|title|timestamp',
|
| 122 |
+
# 'gcmstart': cmstart,
|
| 123 |
+
# #'cmend': cmend,
|
| 124 |
+
# #'cmlimit': '500',
|
| 125 |
+
# 'gcmlimit': '20',
|
| 126 |
+
# }).json()
|
| 127 |
+
|
| 128 |
+
# ['query']['categorymembers']
|
| 129 |
+
# list -->
|
| 130 |
+
# title
|
| 131 |
+
|
| 132 |
+
|
| 133 |
+
|
| 134 |
+
return response
|
| 135 |
+
|
| 136 |
+
def __is_int__(self, s):
|
| 137 |
+
try:
|
| 138 |
+
int(s)
|
| 139 |
+
except ValueError:
|
| 140 |
+
return False
|
| 141 |
+
else:
|
| 142 |
+
return True
|
| 143 |
+
|
| 144 |
+
def get_category(self, category, year):
|
| 145 |
+
ret = self.__get_category_pages__(category, year)
|
| 146 |
+
print(f"Got category: {category}\n{ret}")
|
| 147 |
+
|
| 148 |
def get_page_section(self, page_title, section_title):
|
| 149 |
section_id = self.__find_section_on_page__(page_title, section_title)
|
| 150 |
_ret = ""
|
|
|
|
| 155 |
|
| 156 |
def filter_section_and_table(self, section_content, sub_section_name, year_start, year_end):
|
| 157 |
parsed = wtp.parse(section_content)
|
|
|
|
|
|
|
| 158 |
sections = parsed.sections
|
|
|
|
| 159 |
section_found = None
|
| 160 |
for sec in sections:
|
|
|
|
|
|
|
| 161 |
if sec.title is not None and sec.title.find(sub_section_name) >= 0:
|
| 162 |
section_found = sec
|
| 163 |
|
| 164 |
+
print(f"Found matching subsection: {section_found.title}")
|
|
|
|
|
|
|
| 165 |
|
|
|
|
|
|
|
| 166 |
rows_collected = []
|
| 167 |
+
if section_found is not None and section_found.tables is not None and len(section_found.tables) > 0:
|
| 168 |
+
table_data = section_found.tables[0].data()
|
| 169 |
+
for row in table_data:
|
| 170 |
+
if self.__is_int__(row[0]) and year_start <= int(row[0]) <= year_end:
|
| 171 |
+
rows_collected.append(row)
|
| 172 |
+
|
| 173 |
+
return rows_collected
|
| 174 |
+
|
| 175 |
+
def get_featured_articles(self, year):
|
| 176 |
+
featured_url = f"https://api.wikimedia.org/feed/v1/wikipedia/en/featured/{year}/01/01"
|
| 177 |
+
|
| 178 |
+
response = requests.get(
|
| 179 |
+
featured_url,
|
| 180 |
+
headers=self.user_agent_headers,
|
| 181 |
+
).json()
|
| 182 |
|
| 183 |
+
#print(f"Got featured list: {json.dumps(response,indent=4)}")
|
| 184 |
|
| 185 |
+
tfa = response['tfa']
|
| 186 |
+
print(f"tfa: {json.dumps(tfa,indent=4)}")
|
| 187 |
+
title = tfa['titles']['canonical']
|
| 188 |
+
extract = tfa['extract']
|
| 189 |
|
| 190 |
+
print(f"tfa:\ntitle: {title}\nextract: {extract}")
|
my_tool_wiki_filter_tables.py
CHANGED
|
@@ -56,4 +56,6 @@ class MyWikiTableFilterTool(Tool):
|
|
| 56 |
self.wiki = MyWikiAPI()
|
| 57 |
|
| 58 |
def forward(self, section_content, sub_section_name, year_start, year_end) -> str:
|
| 59 |
-
|
|
|
|
|
|
|
|
|
| 56 |
self.wiki = MyWikiAPI()
|
| 57 |
|
| 58 |
def forward(self, section_content, sub_section_name, year_start, year_end) -> str:
|
| 59 |
+
result = self.wiki.filter_section_and_table(section_content,sub_section_name, year_start, year_end)
|
| 60 |
+
print(f"***KS*** Wiki table filter tool, result: {result}")
|
| 61 |
+
return result
|
test_tools.py
CHANGED
|
@@ -5,12 +5,27 @@ from my_tool_fen import FENTool
|
|
| 5 |
from my_tool_chess_analysis import ChessAnalysisTool
|
| 6 |
from my_tool_wiki_page_section import MyWikiPageSectionTool
|
| 7 |
from my_tool_wiki_filter_tables import MyWikiTableFilterTool
|
|
|
|
| 8 |
import pytest
|
| 9 |
import matplotlib as mp
|
| 10 |
|
| 11 |
#pytest --capture=no
|
| 12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
@pytest.mark.parametrize("_page,_section,_sub_section,_year_start,_year_end,_exp",
|
| 15 |
[("Mercedes Sosa", "Discography", "Studio albums", 2000, 2009, 3)])
|
| 16 |
def test_tool_wiki_page_section(_page, _section, _sub_section, _year_start, _year_end, _exp):
|
|
@@ -28,6 +43,7 @@ def test_tool_wiki_page_section(_page, _section, _sub_section, _year_start, _yea
|
|
| 28 |
def test_tool_reverse_string(_inp,_exp):
|
| 29 |
assert ReverseStringTool().forward(_inp) == _exp
|
| 30 |
|
|
|
|
| 31 |
@pytest.mark.skip(reason="disabled")
|
| 32 |
@pytest.mark.parametrize("_task_id,_exp", [("cca530fc-4052-43b2-b130-b30968d8aa44", "")])
|
| 33 |
def test_tool_image_load(_task_id,_exp):
|
|
@@ -39,6 +55,7 @@ def test_tool_image_load(_task_id,_exp):
|
|
| 39 |
#plt.imshow(result)
|
| 40 |
#plt.show()
|
| 41 |
|
|
|
|
| 42 |
@pytest.mark.skip(reason="disabled")
|
| 43 |
@pytest.mark.parametrize("_task_id,_exp", [("cca530fc-4052-43b2-b130-b30968d8aa44",
|
| 44 |
"1K1111111PP11111P11RBBqP1111n111Q1111111p11b11111pp111pp1k11r111")])
|
|
@@ -67,6 +84,7 @@ def test_tool_fen(_pieces_list,_exp):
|
|
| 67 |
print(f"Got result: {fen}")
|
| 68 |
assert fen == _exp
|
| 69 |
|
|
|
|
| 70 |
@pytest.mark.skip(reason="disabled")
|
| 71 |
@pytest.mark.parametrize("_fen,_exp",
|
| 72 |
[
|
|
|
|
| 5 |
from my_tool_chess_analysis import ChessAnalysisTool
|
| 6 |
from my_tool_wiki_page_section import MyWikiPageSectionTool
|
| 7 |
from my_tool_wiki_filter_tables import MyWikiTableFilterTool
|
| 8 |
+
from my_base_wiki_api import MyWikiAPI
|
| 9 |
import pytest
|
| 10 |
import matplotlib as mp
|
| 11 |
|
| 12 |
#pytest --capture=no
|
| 13 |
|
| 14 |
+
@pytest.mark.parametrize("_year,_exp", [(2023, 3)])
|
| 15 |
+
def test_tool_wiki_contributions(_year, _exp):
|
| 16 |
+
api = MyWikiAPI()
|
| 17 |
+
#api.get_featured_articles(_year)
|
| 18 |
+
# https://en.wikipedia.org/wiki/Category:Featured_articles
|
| 19 |
+
category = 'Featured articles'
|
| 20 |
+
api.get_category(category, _year)
|
| 21 |
|
| 22 |
+
category = 'Former featured articles'
|
| 23 |
+
api.get_category(category, _year)
|
| 24 |
+
|
| 25 |
+
assert _exp == _exp
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
@pytest.mark.skip(reason="disabled")
|
| 29 |
@pytest.mark.parametrize("_page,_section,_sub_section,_year_start,_year_end,_exp",
|
| 30 |
[("Mercedes Sosa", "Discography", "Studio albums", 2000, 2009, 3)])
|
| 31 |
def test_tool_wiki_page_section(_page, _section, _sub_section, _year_start, _year_end, _exp):
|
|
|
|
| 43 |
def test_tool_reverse_string(_inp,_exp):
|
| 44 |
assert ReverseStringTool().forward(_inp) == _exp
|
| 45 |
|
| 46 |
+
|
| 47 |
@pytest.mark.skip(reason="disabled")
|
| 48 |
@pytest.mark.parametrize("_task_id,_exp", [("cca530fc-4052-43b2-b130-b30968d8aa44", "")])
|
| 49 |
def test_tool_image_load(_task_id,_exp):
|
|
|
|
| 55 |
#plt.imshow(result)
|
| 56 |
#plt.show()
|
| 57 |
|
| 58 |
+
|
| 59 |
@pytest.mark.skip(reason="disabled")
|
| 60 |
@pytest.mark.parametrize("_task_id,_exp", [("cca530fc-4052-43b2-b130-b30968d8aa44",
|
| 61 |
"1K1111111PP11111P11RBBqP1111n111Q1111111p11b11111pp111pp1k11r111")])
|
|
|
|
| 84 |
print(f"Got result: {fen}")
|
| 85 |
assert fen == _exp
|
| 86 |
|
| 87 |
+
|
| 88 |
@pytest.mark.skip(reason="disabled")
|
| 89 |
@pytest.mark.parametrize("_fen,_exp",
|
| 90 |
[
|