krzsam commited on
Commit
a573bfb
·
1 Parent(s): 6d959c6
my_agent.py CHANGED
@@ -11,6 +11,7 @@ from my_tool_chess_analysis import ChessAnalysisTool
11
  from my_prompt_config import MyPromptConfig
12
  from my_tool_wiki_page_section import MyWikiPageSectionTool
13
  from my_tool_wiki_filter_tables import MyWikiTableFilterTool
 
14
  from dotenv import load_dotenv
15
 
16
  # https://huggingface.co/docs/transformers/model_doc/mistral?usage=Pipeline
@@ -60,6 +61,7 @@ class MyAgent:
60
  MyWikiPageSectionTool(),
61
  MyWikiTableFilterTool(),
62
  WebSearchTool(),
 
63
  FinalAnswerTool(),
64
  ],
65
  model=self.model,
@@ -84,7 +86,8 @@ class MyAgent:
84
  "my_tool_image_load",
85
  "my_tool_reverse_string",
86
  "my_tool_wiki_page_section",
87
- "_my_tool_wiki_table_filter"
 
88
  ],
89
  )
90
  #web_search_agent = CodeAgent(
 
11
  from my_prompt_config import MyPromptConfig
12
  from my_tool_wiki_page_section import MyWikiPageSectionTool
13
  from my_tool_wiki_filter_tables import MyWikiTableFilterTool
14
+ from my_tool_wiki_featured_articles import MyWikiFeaturedArticles
15
  from dotenv import load_dotenv
16
 
17
  # https://huggingface.co/docs/transformers/model_doc/mistral?usage=Pipeline
 
61
  MyWikiPageSectionTool(),
62
  MyWikiTableFilterTool(),
63
  WebSearchTool(),
64
+ MyWikiFeaturedArticles(),
65
  FinalAnswerTool(),
66
  ],
67
  model=self.model,
 
86
  "my_tool_image_load",
87
  "my_tool_reverse_string",
88
  "my_tool_wiki_page_section",
89
+ "my_tool_wiki_table_filter",
90
+ "my_tool_wiki_featured_articles"
91
  ],
92
  )
93
  #web_search_agent = CodeAgent(
my_base_wiki_api.py CHANGED
@@ -1,12 +1,15 @@
1
  import requests
2
  import json
3
  import wikitextparser as wtp
 
 
4
 
5
  # https://gist.github.com/scionoftech/0f35d5e231be2cf46823d774023268b6
6
  # https://www.mediawiki.org/wiki/API:Main_page
7
 
8
  class MyWikiAPI:
9
  WIKI_BASE_URL = "https://en.wikipedia.org/w/api.php"
 
10
  user_agent_headers = {"user-Agent":
11
  "AgentsCourseAssignment/1.0 (https://huggingface.co/spaces/krzsam/Agents-Course-Assignment)"}
12
 
@@ -43,95 +46,26 @@ class MyWikiAPI:
43
  }).json()
44
  return response["parse"][format]["*"]
45
 
46
- def __get_category_pages__(self, category, year):
47
- # https://en.wikipedia.org/w/api.php?action=help&modules=main#main/datatype/timestamp
48
- # https://www.mediawiki.org/wiki/API:Query#Generators
49
- # https://www.mediawiki.org/wiki/API:Continue
50
- # https://stackoverflow.com/questions/35826469/how-to-combine-two-wikipedia-api-calls-into-one
51
- # 2001-01-15T14:56:00Z
52
- # api.php?action=query&generator=categorymembers&gcmtitle=Category:Physics&prop=info [open in sandbox]
53
-
54
- cmstart = f"{year}-01-01T00:00:00Z"
55
- cmend = f"{year}-12-31T23:59:59Z"
56
-
57
- print(f"Listing pages in category {category} from {cmstart} to {cmend}")
58
-
59
  response = requests.get(
60
- self.WIKI_BASE_URL,
61
  headers=self.user_agent_headers,
62
- params={
63
- 'action': 'query',
64
- 'format': 'json',
65
- #'prop': 'extracts',
66
- 'list': 'categorymembers',
67
- #'generator': 'categorymembers',
68
- 'cmtitle': f'Category:{category}',
69
- 'cmsort': 'timestamp',
70
- 'cmprop': 'ids|title|timestamp',
71
- 'cmstart': cmstart,
72
- 'cmend': cmend,
73
- 'cmlimit': '500',
74
- #'cmlimit': '20',
75
- }).json()
76
- print(f"Response: \n{json.dumps(response,indent=4)}")
77
-
78
- print(f"Got artickles: {len(response['query']['categorymembers'])}")
79
-
80
- #cmcontinue = ""
81
- #while(True):
82
- # if "continue" in response and "cmcontinue" in response["continue"]:
83
- # cmcontinue = response["continue"]["cmcontinue"]
84
- # else:
85
- # break
86
-
87
- # print(f"cmcontinue: {cmcontinue}")
88
-
89
- # response = requests.get(
90
- # self.WIKI_BASE_URL,
91
- # headers=self.user_agent_headers,
92
- # params={
93
- # 'action': 'query',
94
- # 'format': 'json',
95
- # #'prop': 'extracts',
96
- # 'list': 'categorymembers',
97
- # #'generator': 'categorymembers',
98
- # 'cmcontinue': cmcontinue,
99
- # 'cmtitle': f'Category:{category}',
100
- # 'cmsort': 'timestamp',
101
- # 'cmprop': 'ids|title|timestamp',
102
- # 'cmstart': cmstart,
103
- # #'cmend': cmend,
104
- # #'cmlimit': '500',
105
- # 'cmlimit': '20',
106
- # }).json()
107
- # print(f"Response: \n{json.dumps(response,indent=4)}")
108
-
109
-
110
- #response = requests.get(
111
- # self.WIKI_BASE_URL,
112
- # headers=self.user_agent_headers,
113
- # params={
114
- # 'action': 'query',
115
- # 'format': 'json',
116
- # 'prop': 'extracts',
117
- # #'list': 'categorymembers',
118
- # 'generator': 'categorymembers',
119
- # 'gcmtitle': f'Category:{category}',
120
- # 'gcmsort': 'timestamp',
121
- # 'gcmprop': 'ids|title|timestamp',
122
- # 'gcmstart': cmstart,
123
- # #'cmend': cmend,
124
- # #'cmlimit': '500',
125
- # 'gcmlimit': '20',
126
- # }).json()
127
-
128
- # ['query']['categorymembers']
129
- # list -->
130
- # title
131
-
132
 
 
 
 
 
 
 
 
 
 
 
133
 
134
- return response
135
 
136
  def __is_int__(self, s):
137
  try:
@@ -172,19 +106,10 @@ class MyWikiAPI:
172
 
173
  return rows_collected
174
 
175
- def get_featured_articles(self, year):
176
- featured_url = f"https://api.wikimedia.org/feed/v1/wikipedia/en/featured/{year}/01/01"
177
-
178
- response = requests.get(
179
- featured_url,
180
- headers=self.user_agent_headers,
181
- ).json()
182
-
183
- #print(f"Got featured list: {json.dumps(response,indent=4)}")
184
-
185
- tfa = response['tfa']
186
- print(f"tfa: {json.dumps(tfa,indent=4)}")
187
- title = tfa['titles']['canonical']
188
- extract = tfa['extract']
189
 
190
- print(f"tfa:\ntitle: {title}\nextract: {extract}")
 
 
1
  import requests
2
  import json
3
  import wikitextparser as wtp
4
+ from lxml import etree
5
+ from lxml import html
6
 
7
  # https://gist.github.com/scionoftech/0f35d5e231be2cf46823d774023268b6
8
  # https://www.mediawiki.org/wiki/API:Main_page
9
 
10
  class MyWikiAPI:
11
  WIKI_BASE_URL = "https://en.wikipedia.org/w/api.php"
12
+ WIKI_FEATURED_URL = "https://en.wikipedia.org/wiki/Wikipedia:Featured_article_candidates/Featured_log"
13
  user_agent_headers = {"user-Agent":
14
  "AgentsCourseAssignment/1.0 (https://huggingface.co/spaces/krzsam/Agents-Course-Assignment)"}
15
 
 
46
  }).json()
47
  return response["parse"][format]["*"]
48
 
49
+ def __get_featured_log__(self, month, year):
50
+ featured_url = f"{self.WIKI_FEATURED_URL}/{month}_{year}"
51
+ print(f"Getting content for: {featured_url}")
 
 
 
 
 
 
 
 
 
 
52
  response = requests.get(
53
+ featured_url,
54
  headers=self.user_agent_headers,
55
+ ).text
56
+ return response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
+ def __process_featured_log__(self, html_content):
59
+ tree = html.fromstring(html_content)
60
+ article_heading = "mw-heading mw-heading3"
61
+ #article_heading = "mw-heading3"
62
+ #elements_css = tree.cssselect(article_heading)
63
+ #for element in elements_css:
64
+ # print(f"Tag {element.tag}")
65
+ elements = tree.xpath(f"//div[@class='{article_heading}']")
66
+ for element in elements:
67
+ print(f"Tag {element.tag} {element.text}")
68
 
 
69
 
70
  def __is_int__(self, s):
71
  try:
 
106
 
107
  return rows_collected
108
 
109
+ def get_featured_articles(self, month, year):
110
+ # https://en.wikipedia.org/wiki/Wikipedia:Featured_article_candidates/Featured_log
111
+ ret = self.__get_featured_log__(month,year)
112
+ self.__process_featured_log__(ret)
 
 
 
 
 
 
 
 
 
 
113
 
114
+ #print(f"Featured for {month} {year}:\n{ret}")
115
+ return ret
my_prompt_config.py CHANGED
@@ -31,6 +31,8 @@ class MyPromptConfig:
31
  * _my_tool_reverse_string : reverse provided string
32
  * _my_tool_wiki_page_section : get contents of a section of the Wikipedia page
33
  * _my_tool_wiki_table_filter : filter the subsection and tables on the Wikipedia page for provided years
 
 
34
 
35
  If the question mentions image or other file, use one of provided tools to load it
36
  using task_id associated with the question.
 
31
  * _my_tool_reverse_string : reverse provided string
32
  * _my_tool_wiki_page_section : get contents of a section of the Wikipedia page
33
  * _my_tool_wiki_table_filter : filter the subsection and tables on the Wikipedia page for provided years
34
+ * _my_tool_wiki_featured_articles : get information about all featured articles on Wikipedia for the
35
+ given month and year
36
 
37
  If the question mentions image or other file, use one of provided tools to load it
38
  using task_id associated with the question.
my_tool_wiki_featured_articles.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from smolagents import Tool
2
+ from my_base_wiki_api import MyWikiAPI
3
+
4
+
5
+ #AUTHORIZED_TYPES = [
6
+ # "string",
7
+ # "boolean",
8
+ # "integer",
9
+ # "number",
10
+ # "image",
11
+ # "audio",
12
+ # "array",
13
+ # "object",
14
+ # "any",
15
+ # "null",
16
+ #]
17
+
18
+ class MyWikiFeaturedArticles(Tool):
19
+ name = "_my_tool_wiki_featured_articles"
20
+ description = """
21
+ Extract contents of the page section for the provided a Wikipedia page
22
+ To invoke the tool use code as below
23
+ <code>
24
+ featured = _my_tool_wiki_featured_articles(month='January', year='2005')
25
+ </code>
26
+ """
27
+
28
+ inputs = {
29
+ "month": {
30
+ "type": "string",
31
+ "description": "month provided as name of the month for example January",
32
+ },
33
+ "year": {
34
+ "type": "integer",
35
+ "description": "Year expressed in 4 digit notation, for example 2009",
36
+ },
37
+ }
38
+
39
+ output_type = "string"
40
+
41
+ is_initialized = True
42
+
43
+ def __init__(self):
44
+ print(f"***KS*** Wiki featured articles tool initializing ...")
45
+ self.wiki = MyWikiAPI()
46
+
47
+ def forward(self, month, year) -> str:
48
+ result = self.wiki.get_featured_articles(month, year)
49
+ #print(f"***KS*** Wiki table filter tool, result: {result}")
50
+ return result
requirements.txt CHANGED
@@ -10,4 +10,6 @@ torch
10
  stockfish
11
  bs4
12
  wikitextparser
13
- mwparserfromhell
 
 
 
10
  stockfish
11
  bs4
12
  wikitextparser
13
+ mwparserfromhell
14
+ lxml
15
+ cssselect
test_tools.py CHANGED
@@ -11,18 +11,12 @@ import matplotlib as mp
11
 
12
  #pytest --capture=no
13
 
14
- @pytest.mark.parametrize("_year,_exp", [(2023, 3)])
15
- def test_tool_wiki_contributions(_year, _exp):
16
  api = MyWikiAPI()
17
- #api.get_featured_articles(_year)
18
- # https://en.wikipedia.org/wiki/Category:Featured_articles
19
- category = 'Featured articles'
20
- api.get_category(category, _year)
21
 
22
- category = 'Former featured articles'
23
- api.get_category(category, _year)
24
-
25
- assert _exp == _exp
26
 
27
 
28
  @pytest.mark.skip(reason="disabled")
 
11
 
12
  #pytest --capture=no
13
 
14
+ @pytest.mark.parametrize("_month,_year,_exp", [('November',2016, "Giganotosaurus")])
15
+ def test_tool_wiki_contributions(_month,_year, _exp):
16
  api = MyWikiAPI()
17
+ content = api.get_featured_articles(_month,_year)
 
 
 
18
 
19
+ assert content.find(_exp) >= 0
 
 
 
20
 
21
 
22
  @pytest.mark.skip(reason="disabled")