krzsam commited on
Commit
6d959c6
·
1 Parent(s): 834b7c1
Files changed (5) hide show
  1. app.py +4 -4
  2. my_agent.py +1 -1
  3. my_base_wiki_api.py +124 -15
  4. my_tool_wiki_filter_tables.py +3 -1
  5. test_tools.py +18 -0
app.py CHANGED
@@ -12,11 +12,11 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
12
  # testing --------------------------------------------
13
  testing_mode = True
14
  questions_to_run = [
15
- "8e867cd7-cff9-4e6c-867a-ff5ddc2550be", # good answer, wrong albums
16
- #"a1e91b78-d3d8-4675-bb8d-62741b4b68a6",
17
  #"2d83110e-a098-4ebb-9987-066c06fa42d0", # OK
18
- # "cca530fc-4052-43b2-b130-b30968d8aa44", # OK, need to remove FINAL_ANSWER string
19
- #"4fc2f1ae-8625-45b5-ab34-ad4433bc21f8", # <--------
20
  #"6f37996b-2ac7-44b0-8e68-6d28256631b4",
21
  #"9d191bce-651d-4746-be2d-7ef8ecadb9c2",
22
  #"cabe07ed-9eca-40ea-8ead-410ef5e83f91",
 
12
  # testing --------------------------------------------
13
  testing_mode = True
14
  questions_to_run = [
15
+ #"8e867cd7-cff9-4e6c-867a-ff5ddc2550be", # not using tools
16
+ #"a1e91b78-d3d8-4675-bb8d-62741b4b68a6", # (!)
17
  #"2d83110e-a098-4ebb-9987-066c06fa42d0", # OK
18
+ #"cca530fc-4052-43b2-b130-b30968d8aa44", # OK, need to remove FINAL_ANSWER string
19
+ "4fc2f1ae-8625-45b5-ab34-ad4433bc21f8", # <--------
20
  #"6f37996b-2ac7-44b0-8e68-6d28256631b4",
21
  #"9d191bce-651d-4746-be2d-7ef8ecadb9c2",
22
  #"cabe07ed-9eca-40ea-8ead-410ef5e83f91",
my_agent.py CHANGED
@@ -84,7 +84,7 @@ class MyAgent:
84
  "my_tool_image_load",
85
  "my_tool_reverse_string",
86
  "my_tool_wiki_page_section",
87
- "my_tool_wiki_filter_tables"
88
  ],
89
  )
90
  #web_search_agent = CodeAgent(
 
84
  "my_tool_image_load",
85
  "my_tool_reverse_string",
86
  "my_tool_wiki_page_section",
87
+ "_my_tool_wiki_table_filter"
88
  ],
89
  )
90
  #web_search_agent = CodeAgent(
my_base_wiki_api.py CHANGED
@@ -43,6 +43,108 @@ class MyWikiAPI:
43
  }).json()
44
  return response["parse"][format]["*"]
45
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  def get_page_section(self, page_title, section_title):
47
  section_id = self.__find_section_on_page__(page_title, section_title)
48
  _ret = ""
@@ -53,29 +155,36 @@ class MyWikiAPI:
53
 
54
  def filter_section_and_table(self, section_content, sub_section_name, year_start, year_end):
55
  parsed = wtp.parse(section_content)
56
- #print(f"***KS*** Parsed section:\n{parsed}")
57
-
58
  sections = parsed.sections
59
- print(f"Sections {len(sections)}")
60
  section_found = None
61
  for sec in sections:
62
- print("---------------------------------------------------------------------")
63
- print(f"{sec.title if sec.title is not None else ''}")
64
  if sec.title is not None and sec.title.find(sub_section_name) >= 0:
65
  section_found = sec
66
 
67
- print(f"Found matching subsection: {section_found}")
68
-
69
- print(f"Section has tables: {section_found.tables}")
70
 
71
- table_data = section_found.tables[0].data()
72
- print(f"Table has data: {table_data}")
73
  rows_collected = []
74
- for row in table_data:
75
- print("---------------------------------------")
76
- print(f"Table has data: {row}")
 
 
 
 
 
 
 
 
 
 
 
 
77
 
78
- # TODO do filtering of rows and return the filtered table
79
 
 
 
 
 
80
 
81
- return ["aaa"]
 
43
  }).json()
44
  return response["parse"][format]["*"]
45
 
46
+ def __get_category_pages__(self, category, year):
47
+ # https://en.wikipedia.org/w/api.php?action=help&modules=main#main/datatype/timestamp
48
+ # https://www.mediawiki.org/wiki/API:Query#Generators
49
+ # https://www.mediawiki.org/wiki/API:Continue
50
+ # https://stackoverflow.com/questions/35826469/how-to-combine-two-wikipedia-api-calls-into-one
51
+ # 2001-01-15T14:56:00Z
52
+ # api.php?action=query&generator=categorymembers&gcmtitle=Category:Physics&prop=info [open in sandbox]
53
+
54
+ cmstart = f"{year}-01-01T00:00:00Z"
55
+ cmend = f"{year}-12-31T23:59:59Z"
56
+
57
+ print(f"Listing pages in category {category} from {cmstart} to {cmend}")
58
+
59
+ response = requests.get(
60
+ self.WIKI_BASE_URL,
61
+ headers=self.user_agent_headers,
62
+ params={
63
+ 'action': 'query',
64
+ 'format': 'json',
65
+ #'prop': 'extracts',
66
+ 'list': 'categorymembers',
67
+ #'generator': 'categorymembers',
68
+ 'cmtitle': f'Category:{category}',
69
+ 'cmsort': 'timestamp',
70
+ 'cmprop': 'ids|title|timestamp',
71
+ 'cmstart': cmstart,
72
+ 'cmend': cmend,
73
+ 'cmlimit': '500',
74
+ #'cmlimit': '20',
75
+ }).json()
76
+ print(f"Response: \n{json.dumps(response,indent=4)}")
77
+
78
+ print(f"Got artickles: {len(response['query']['categorymembers'])}")
79
+
80
+ #cmcontinue = ""
81
+ #while(True):
82
+ # if "continue" in response and "cmcontinue" in response["continue"]:
83
+ # cmcontinue = response["continue"]["cmcontinue"]
84
+ # else:
85
+ # break
86
+
87
+ # print(f"cmcontinue: {cmcontinue}")
88
+
89
+ # response = requests.get(
90
+ # self.WIKI_BASE_URL,
91
+ # headers=self.user_agent_headers,
92
+ # params={
93
+ # 'action': 'query',
94
+ # 'format': 'json',
95
+ # #'prop': 'extracts',
96
+ # 'list': 'categorymembers',
97
+ # #'generator': 'categorymembers',
98
+ # 'cmcontinue': cmcontinue,
99
+ # 'cmtitle': f'Category:{category}',
100
+ # 'cmsort': 'timestamp',
101
+ # 'cmprop': 'ids|title|timestamp',
102
+ # 'cmstart': cmstart,
103
+ # #'cmend': cmend,
104
+ # #'cmlimit': '500',
105
+ # 'cmlimit': '20',
106
+ # }).json()
107
+ # print(f"Response: \n{json.dumps(response,indent=4)}")
108
+
109
+
110
+ #response = requests.get(
111
+ # self.WIKI_BASE_URL,
112
+ # headers=self.user_agent_headers,
113
+ # params={
114
+ # 'action': 'query',
115
+ # 'format': 'json',
116
+ # 'prop': 'extracts',
117
+ # #'list': 'categorymembers',
118
+ # 'generator': 'categorymembers',
119
+ # 'gcmtitle': f'Category:{category}',
120
+ # 'gcmsort': 'timestamp',
121
+ # 'gcmprop': 'ids|title|timestamp',
122
+ # 'gcmstart': cmstart,
123
+ # #'cmend': cmend,
124
+ # #'cmlimit': '500',
125
+ # 'gcmlimit': '20',
126
+ # }).json()
127
+
128
+ # ['query']['categorymembers']
129
+ # list -->
130
+ # title
131
+
132
+
133
+
134
+ return response
135
+
136
+ def __is_int__(self, s):
137
+ try:
138
+ int(s)
139
+ except ValueError:
140
+ return False
141
+ else:
142
+ return True
143
+
144
+ def get_category(self, category, year):
145
+ ret = self.__get_category_pages__(category, year)
146
+ print(f"Got category: {category}\n{ret}")
147
+
148
  def get_page_section(self, page_title, section_title):
149
  section_id = self.__find_section_on_page__(page_title, section_title)
150
  _ret = ""
 
155
 
156
  def filter_section_and_table(self, section_content, sub_section_name, year_start, year_end):
157
  parsed = wtp.parse(section_content)
 
 
158
  sections = parsed.sections
 
159
  section_found = None
160
  for sec in sections:
 
 
161
  if sec.title is not None and sec.title.find(sub_section_name) >= 0:
162
  section_found = sec
163
 
164
+ print(f"Found matching subsection: {section_found.title}")
 
 
165
 
 
 
166
  rows_collected = []
167
+ if section_found is not None and section_found.tables is not None and len(section_found.tables) > 0:
168
+ table_data = section_found.tables[0].data()
169
+ for row in table_data:
170
+ if self.__is_int__(row[0]) and year_start <= int(row[0]) <= year_end:
171
+ rows_collected.append(row)
172
+
173
+ return rows_collected
174
+
175
+ def get_featured_articles(self, year):
176
+ featured_url = f"https://api.wikimedia.org/feed/v1/wikipedia/en/featured/{year}/01/01"
177
+
178
+ response = requests.get(
179
+ featured_url,
180
+ headers=self.user_agent_headers,
181
+ ).json()
182
 
183
+ #print(f"Got featured list: {json.dumps(response,indent=4)}")
184
 
185
+ tfa = response['tfa']
186
+ print(f"tfa: {json.dumps(tfa,indent=4)}")
187
+ title = tfa['titles']['canonical']
188
+ extract = tfa['extract']
189
 
190
+ print(f"tfa:\ntitle: {title}\nextract: {extract}")
my_tool_wiki_filter_tables.py CHANGED
@@ -56,4 +56,6 @@ class MyWikiTableFilterTool(Tool):
56
  self.wiki = MyWikiAPI()
57
 
58
  def forward(self, section_content, sub_section_name, year_start, year_end) -> str:
59
- return self.wiki.filter_section_and_table(section_content,sub_section_name, year_start, year_end)
 
 
 
56
  self.wiki = MyWikiAPI()
57
 
58
  def forward(self, section_content, sub_section_name, year_start, year_end) -> str:
59
+ result = self.wiki.filter_section_and_table(section_content,sub_section_name, year_start, year_end)
60
+ print(f"***KS*** Wiki table filter tool, result: {result}")
61
+ return result
test_tools.py CHANGED
@@ -5,12 +5,27 @@ from my_tool_fen import FENTool
5
  from my_tool_chess_analysis import ChessAnalysisTool
6
  from my_tool_wiki_page_section import MyWikiPageSectionTool
7
  from my_tool_wiki_filter_tables import MyWikiTableFilterTool
 
8
  import pytest
9
  import matplotlib as mp
10
 
11
  #pytest --capture=no
12
 
 
 
 
 
 
 
 
13
 
 
 
 
 
 
 
 
14
  @pytest.mark.parametrize("_page,_section,_sub_section,_year_start,_year_end,_exp",
15
  [("Mercedes Sosa", "Discography", "Studio albums", 2000, 2009, 3)])
16
  def test_tool_wiki_page_section(_page, _section, _sub_section, _year_start, _year_end, _exp):
@@ -28,6 +43,7 @@ def test_tool_wiki_page_section(_page, _section, _sub_section, _year_start, _yea
28
  def test_tool_reverse_string(_inp,_exp):
29
  assert ReverseStringTool().forward(_inp) == _exp
30
 
 
31
  @pytest.mark.skip(reason="disabled")
32
  @pytest.mark.parametrize("_task_id,_exp", [("cca530fc-4052-43b2-b130-b30968d8aa44", "")])
33
  def test_tool_image_load(_task_id,_exp):
@@ -39,6 +55,7 @@ def test_tool_image_load(_task_id,_exp):
39
  #plt.imshow(result)
40
  #plt.show()
41
 
 
42
  @pytest.mark.skip(reason="disabled")
43
  @pytest.mark.parametrize("_task_id,_exp", [("cca530fc-4052-43b2-b130-b30968d8aa44",
44
  "1K1111111PP11111P11RBBqP1111n111Q1111111p11b11111pp111pp1k11r111")])
@@ -67,6 +84,7 @@ def test_tool_fen(_pieces_list,_exp):
67
  print(f"Got result: {fen}")
68
  assert fen == _exp
69
 
 
70
  @pytest.mark.skip(reason="disabled")
71
  @pytest.mark.parametrize("_fen,_exp",
72
  [
 
5
  from my_tool_chess_analysis import ChessAnalysisTool
6
  from my_tool_wiki_page_section import MyWikiPageSectionTool
7
  from my_tool_wiki_filter_tables import MyWikiTableFilterTool
8
+ from my_base_wiki_api import MyWikiAPI
9
  import pytest
10
  import matplotlib as mp
11
 
12
  #pytest --capture=no
13
 
14
+ @pytest.mark.parametrize("_year,_exp", [(2023, 3)])
15
+ def test_tool_wiki_contributions(_year, _exp):
16
+ api = MyWikiAPI()
17
+ #api.get_featured_articles(_year)
18
+ # https://en.wikipedia.org/wiki/Category:Featured_articles
19
+ category = 'Featured articles'
20
+ api.get_category(category, _year)
21
 
22
+ category = 'Former featured articles'
23
+ api.get_category(category, _year)
24
+
25
+ assert _exp == _exp
26
+
27
+
28
+ @pytest.mark.skip(reason="disabled")
29
  @pytest.mark.parametrize("_page,_section,_sub_section,_year_start,_year_end,_exp",
30
  [("Mercedes Sosa", "Discography", "Studio albums", 2000, 2009, 3)])
31
  def test_tool_wiki_page_section(_page, _section, _sub_section, _year_start, _year_end, _exp):
 
43
  def test_tool_reverse_string(_inp,_exp):
44
  assert ReverseStringTool().forward(_inp) == _exp
45
 
46
+
47
  @pytest.mark.skip(reason="disabled")
48
  @pytest.mark.parametrize("_task_id,_exp", [("cca530fc-4052-43b2-b130-b30968d8aa44", "")])
49
  def test_tool_image_load(_task_id,_exp):
 
55
  #plt.imshow(result)
56
  #plt.show()
57
 
58
+
59
  @pytest.mark.skip(reason="disabled")
60
  @pytest.mark.parametrize("_task_id,_exp", [("cca530fc-4052-43b2-b130-b30968d8aa44",
61
  "1K1111111PP11111P11RBBqP1111n111Q1111111p11b11111pp111pp1k11r111")])
 
84
  print(f"Got result: {fen}")
85
  assert fen == _exp
86
 
87
+
88
  @pytest.mark.skip(reason="disabled")
89
  @pytest.mark.parametrize("_fen,_exp",
90
  [