commit
Browse files- my_agent.py +4 -1
- my_base_wiki_api.py +25 -100
- my_prompt_config.py +2 -0
- my_tool_wiki_featured_articles.py +50 -0
- requirements.txt +3 -1
- test_tools.py +4 -10
my_agent.py
CHANGED
|
@@ -11,6 +11,7 @@ from my_tool_chess_analysis import ChessAnalysisTool
|
|
| 11 |
from my_prompt_config import MyPromptConfig
|
| 12 |
from my_tool_wiki_page_section import MyWikiPageSectionTool
|
| 13 |
from my_tool_wiki_filter_tables import MyWikiTableFilterTool
|
|
|
|
| 14 |
from dotenv import load_dotenv
|
| 15 |
|
| 16 |
# https://huggingface.co/docs/transformers/model_doc/mistral?usage=Pipeline
|
|
@@ -60,6 +61,7 @@ class MyAgent:
|
|
| 60 |
MyWikiPageSectionTool(),
|
| 61 |
MyWikiTableFilterTool(),
|
| 62 |
WebSearchTool(),
|
|
|
|
| 63 |
FinalAnswerTool(),
|
| 64 |
],
|
| 65 |
model=self.model,
|
|
@@ -84,7 +86,8 @@ class MyAgent:
|
|
| 84 |
"my_tool_image_load",
|
| 85 |
"my_tool_reverse_string",
|
| 86 |
"my_tool_wiki_page_section",
|
| 87 |
-
"
|
|
|
|
| 88 |
],
|
| 89 |
)
|
| 90 |
#web_search_agent = CodeAgent(
|
|
|
|
| 11 |
from my_prompt_config import MyPromptConfig
|
| 12 |
from my_tool_wiki_page_section import MyWikiPageSectionTool
|
| 13 |
from my_tool_wiki_filter_tables import MyWikiTableFilterTool
|
| 14 |
+
from my_tool_wiki_featured_articles import MyWikiFeaturedArticles
|
| 15 |
from dotenv import load_dotenv
|
| 16 |
|
| 17 |
# https://huggingface.co/docs/transformers/model_doc/mistral?usage=Pipeline
|
|
|
|
| 61 |
MyWikiPageSectionTool(),
|
| 62 |
MyWikiTableFilterTool(),
|
| 63 |
WebSearchTool(),
|
| 64 |
+
MyWikiFeaturedArticles(),
|
| 65 |
FinalAnswerTool(),
|
| 66 |
],
|
| 67 |
model=self.model,
|
|
|
|
| 86 |
"my_tool_image_load",
|
| 87 |
"my_tool_reverse_string",
|
| 88 |
"my_tool_wiki_page_section",
|
| 89 |
+
"my_tool_wiki_table_filter",
|
| 90 |
+
"my_tool_wiki_featured_articles"
|
| 91 |
],
|
| 92 |
)
|
| 93 |
#web_search_agent = CodeAgent(
|
my_base_wiki_api.py
CHANGED
|
@@ -1,12 +1,15 @@
|
|
| 1 |
import requests
|
| 2 |
import json
|
| 3 |
import wikitextparser as wtp
|
|
|
|
|
|
|
| 4 |
|
| 5 |
# https://gist.github.com/scionoftech/0f35d5e231be2cf46823d774023268b6
|
| 6 |
# https://www.mediawiki.org/wiki/API:Main_page
|
| 7 |
|
| 8 |
class MyWikiAPI:
|
| 9 |
WIKI_BASE_URL = "https://en.wikipedia.org/w/api.php"
|
|
|
|
| 10 |
user_agent_headers = {"user-Agent":
|
| 11 |
"AgentsCourseAssignment/1.0 (https://huggingface.co/spaces/krzsam/Agents-Course-Assignment)"}
|
| 12 |
|
|
@@ -43,95 +46,26 @@ class MyWikiAPI:
|
|
| 43 |
}).json()
|
| 44 |
return response["parse"][format]["*"]
|
| 45 |
|
| 46 |
-
def
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
# https://www.mediawiki.org/wiki/API:Continue
|
| 50 |
-
# https://stackoverflow.com/questions/35826469/how-to-combine-two-wikipedia-api-calls-into-one
|
| 51 |
-
# 2001-01-15T14:56:00Z
|
| 52 |
-
# api.php?action=query&generator=categorymembers&gcmtitle=Category:Physics&prop=info [open in sandbox]
|
| 53 |
-
|
| 54 |
-
cmstart = f"{year}-01-01T00:00:00Z"
|
| 55 |
-
cmend = f"{year}-12-31T23:59:59Z"
|
| 56 |
-
|
| 57 |
-
print(f"Listing pages in category {category} from {cmstart} to {cmend}")
|
| 58 |
-
|
| 59 |
response = requests.get(
|
| 60 |
-
|
| 61 |
headers=self.user_agent_headers,
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
'format': 'json',
|
| 65 |
-
#'prop': 'extracts',
|
| 66 |
-
'list': 'categorymembers',
|
| 67 |
-
#'generator': 'categorymembers',
|
| 68 |
-
'cmtitle': f'Category:{category}',
|
| 69 |
-
'cmsort': 'timestamp',
|
| 70 |
-
'cmprop': 'ids|title|timestamp',
|
| 71 |
-
'cmstart': cmstart,
|
| 72 |
-
'cmend': cmend,
|
| 73 |
-
'cmlimit': '500',
|
| 74 |
-
#'cmlimit': '20',
|
| 75 |
-
}).json()
|
| 76 |
-
print(f"Response: \n{json.dumps(response,indent=4)}")
|
| 77 |
-
|
| 78 |
-
print(f"Got artickles: {len(response['query']['categorymembers'])}")
|
| 79 |
-
|
| 80 |
-
#cmcontinue = ""
|
| 81 |
-
#while(True):
|
| 82 |
-
# if "continue" in response and "cmcontinue" in response["continue"]:
|
| 83 |
-
# cmcontinue = response["continue"]["cmcontinue"]
|
| 84 |
-
# else:
|
| 85 |
-
# break
|
| 86 |
-
|
| 87 |
-
# print(f"cmcontinue: {cmcontinue}")
|
| 88 |
-
|
| 89 |
-
# response = requests.get(
|
| 90 |
-
# self.WIKI_BASE_URL,
|
| 91 |
-
# headers=self.user_agent_headers,
|
| 92 |
-
# params={
|
| 93 |
-
# 'action': 'query',
|
| 94 |
-
# 'format': 'json',
|
| 95 |
-
# #'prop': 'extracts',
|
| 96 |
-
# 'list': 'categorymembers',
|
| 97 |
-
# #'generator': 'categorymembers',
|
| 98 |
-
# 'cmcontinue': cmcontinue,
|
| 99 |
-
# 'cmtitle': f'Category:{category}',
|
| 100 |
-
# 'cmsort': 'timestamp',
|
| 101 |
-
# 'cmprop': 'ids|title|timestamp',
|
| 102 |
-
# 'cmstart': cmstart,
|
| 103 |
-
# #'cmend': cmend,
|
| 104 |
-
# #'cmlimit': '500',
|
| 105 |
-
# 'cmlimit': '20',
|
| 106 |
-
# }).json()
|
| 107 |
-
# print(f"Response: \n{json.dumps(response,indent=4)}")
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
#response = requests.get(
|
| 111 |
-
# self.WIKI_BASE_URL,
|
| 112 |
-
# headers=self.user_agent_headers,
|
| 113 |
-
# params={
|
| 114 |
-
# 'action': 'query',
|
| 115 |
-
# 'format': 'json',
|
| 116 |
-
# 'prop': 'extracts',
|
| 117 |
-
# #'list': 'categorymembers',
|
| 118 |
-
# 'generator': 'categorymembers',
|
| 119 |
-
# 'gcmtitle': f'Category:{category}',
|
| 120 |
-
# 'gcmsort': 'timestamp',
|
| 121 |
-
# 'gcmprop': 'ids|title|timestamp',
|
| 122 |
-
# 'gcmstart': cmstart,
|
| 123 |
-
# #'cmend': cmend,
|
| 124 |
-
# #'cmlimit': '500',
|
| 125 |
-
# 'gcmlimit': '20',
|
| 126 |
-
# }).json()
|
| 127 |
-
|
| 128 |
-
# ['query']['categorymembers']
|
| 129 |
-
# list -->
|
| 130 |
-
# title
|
| 131 |
-
|
| 132 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 133 |
|
| 134 |
-
return response
|
| 135 |
|
| 136 |
def __is_int__(self, s):
|
| 137 |
try:
|
|
@@ -172,19 +106,10 @@ class MyWikiAPI:
|
|
| 172 |
|
| 173 |
return rows_collected
|
| 174 |
|
| 175 |
-
def get_featured_articles(self, year):
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
featured_url,
|
| 180 |
-
headers=self.user_agent_headers,
|
| 181 |
-
).json()
|
| 182 |
-
|
| 183 |
-
#print(f"Got featured list: {json.dumps(response,indent=4)}")
|
| 184 |
-
|
| 185 |
-
tfa = response['tfa']
|
| 186 |
-
print(f"tfa: {json.dumps(tfa,indent=4)}")
|
| 187 |
-
title = tfa['titles']['canonical']
|
| 188 |
-
extract = tfa['extract']
|
| 189 |
|
| 190 |
-
print(f"
|
|
|
|
|
|
| 1 |
import requests
|
| 2 |
import json
|
| 3 |
import wikitextparser as wtp
|
| 4 |
+
from lxml import etree
|
| 5 |
+
from lxml import html
|
| 6 |
|
| 7 |
# https://gist.github.com/scionoftech/0f35d5e231be2cf46823d774023268b6
|
| 8 |
# https://www.mediawiki.org/wiki/API:Main_page
|
| 9 |
|
| 10 |
class MyWikiAPI:
|
| 11 |
WIKI_BASE_URL = "https://en.wikipedia.org/w/api.php"
|
| 12 |
+
WIKI_FEATURED_URL = "https://en.wikipedia.org/wiki/Wikipedia:Featured_article_candidates/Featured_log"
|
| 13 |
user_agent_headers = {"user-Agent":
|
| 14 |
"AgentsCourseAssignment/1.0 (https://huggingface.co/spaces/krzsam/Agents-Course-Assignment)"}
|
| 15 |
|
|
|
|
| 46 |
}).json()
|
| 47 |
return response["parse"][format]["*"]
|
| 48 |
|
| 49 |
+
def __get_featured_log__(self, month, year):
|
| 50 |
+
featured_url = f"{self.WIKI_FEATURED_URL}/{month}_{year}"
|
| 51 |
+
print(f"Getting content for: {featured_url}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
response = requests.get(
|
| 53 |
+
featured_url,
|
| 54 |
headers=self.user_agent_headers,
|
| 55 |
+
).text
|
| 56 |
+
return response
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
|
| 58 |
+
def __process_featured_log__(self, html_content):
|
| 59 |
+
tree = html.fromstring(html_content)
|
| 60 |
+
article_heading = "mw-heading mw-heading3"
|
| 61 |
+
#article_heading = "mw-heading3"
|
| 62 |
+
#elements_css = tree.cssselect(article_heading)
|
| 63 |
+
#for element in elements_css:
|
| 64 |
+
# print(f"Tag {element.tag}")
|
| 65 |
+
elements = tree.xpath(f"//div[@class='{article_heading}']")
|
| 66 |
+
for element in elements:
|
| 67 |
+
print(f"Tag {element.tag} {element.text}")
|
| 68 |
|
|
|
|
| 69 |
|
| 70 |
def __is_int__(self, s):
|
| 71 |
try:
|
|
|
|
| 106 |
|
| 107 |
return rows_collected
|
| 108 |
|
| 109 |
+
def get_featured_articles(self, month, year):
|
| 110 |
+
# https://en.wikipedia.org/wiki/Wikipedia:Featured_article_candidates/Featured_log
|
| 111 |
+
ret = self.__get_featured_log__(month,year)
|
| 112 |
+
self.__process_featured_log__(ret)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
|
| 114 |
+
#print(f"Featured for {month} {year}:\n{ret}")
|
| 115 |
+
return ret
|
my_prompt_config.py
CHANGED
|
@@ -31,6 +31,8 @@ class MyPromptConfig:
|
|
| 31 |
* _my_tool_reverse_string : reverse provided string
|
| 32 |
* _my_tool_wiki_page_section : get contents of a section of the Wikipedia page
|
| 33 |
* _my_tool_wiki_table_filter : filter the subsection and tables on the Wikipedia page for provided years
|
|
|
|
|
|
|
| 34 |
|
| 35 |
If the question mentions image or other file, use one of provided tools to load it
|
| 36 |
using task_id associated with the question.
|
|
|
|
| 31 |
* _my_tool_reverse_string : reverse provided string
|
| 32 |
* _my_tool_wiki_page_section : get contents of a section of the Wikipedia page
|
| 33 |
* _my_tool_wiki_table_filter : filter the subsection and tables on the Wikipedia page for provided years
|
| 34 |
+
* _my_tool_wiki_featured_articles : get information about all featured articles on Wikipedia for the
|
| 35 |
+
given month and year
|
| 36 |
|
| 37 |
If the question mentions image or other file, use one of provided tools to load it
|
| 38 |
using task_id associated with the question.
|
my_tool_wiki_featured_articles.py
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from smolagents import Tool
|
| 2 |
+
from my_base_wiki_api import MyWikiAPI
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
#AUTHORIZED_TYPES = [
|
| 6 |
+
# "string",
|
| 7 |
+
# "boolean",
|
| 8 |
+
# "integer",
|
| 9 |
+
# "number",
|
| 10 |
+
# "image",
|
| 11 |
+
# "audio",
|
| 12 |
+
# "array",
|
| 13 |
+
# "object",
|
| 14 |
+
# "any",
|
| 15 |
+
# "null",
|
| 16 |
+
#]
|
| 17 |
+
|
| 18 |
+
class MyWikiFeaturedArticles(Tool):
|
| 19 |
+
name = "_my_tool_wiki_featured_articles"
|
| 20 |
+
description = """
|
| 21 |
+
Extract contents of the page section for the provided a Wikipedia page
|
| 22 |
+
To invoke the tool use code as below
|
| 23 |
+
<code>
|
| 24 |
+
featured = _my_tool_wiki_featured_articles(month='January', year='2005')
|
| 25 |
+
</code>
|
| 26 |
+
"""
|
| 27 |
+
|
| 28 |
+
inputs = {
|
| 29 |
+
"month": {
|
| 30 |
+
"type": "string",
|
| 31 |
+
"description": "month provided as name of the month for example January",
|
| 32 |
+
},
|
| 33 |
+
"year": {
|
| 34 |
+
"type": "integer",
|
| 35 |
+
"description": "Year expressed in 4 digit notation, for example 2009",
|
| 36 |
+
},
|
| 37 |
+
}
|
| 38 |
+
|
| 39 |
+
output_type = "string"
|
| 40 |
+
|
| 41 |
+
is_initialized = True
|
| 42 |
+
|
| 43 |
+
def __init__(self):
|
| 44 |
+
print(f"***KS*** Wiki featured articles tool initializing ...")
|
| 45 |
+
self.wiki = MyWikiAPI()
|
| 46 |
+
|
| 47 |
+
def forward(self, month, year) -> str:
|
| 48 |
+
result = self.wiki.get_featured_articles(month, year)
|
| 49 |
+
#print(f"***KS*** Wiki table filter tool, result: {result}")
|
| 50 |
+
return result
|
requirements.txt
CHANGED
|
@@ -10,4 +10,6 @@ torch
|
|
| 10 |
stockfish
|
| 11 |
bs4
|
| 12 |
wikitextparser
|
| 13 |
-
mwparserfromhell
|
|
|
|
|
|
|
|
|
| 10 |
stockfish
|
| 11 |
bs4
|
| 12 |
wikitextparser
|
| 13 |
+
mwparserfromhell
|
| 14 |
+
lxml
|
| 15 |
+
cssselect
|
test_tools.py
CHANGED
|
@@ -11,18 +11,12 @@ import matplotlib as mp
|
|
| 11 |
|
| 12 |
#pytest --capture=no
|
| 13 |
|
| 14 |
-
@pytest.mark.parametrize("_year,_exp", [(
|
| 15 |
-
def test_tool_wiki_contributions(_year, _exp):
|
| 16 |
api = MyWikiAPI()
|
| 17 |
-
|
| 18 |
-
# https://en.wikipedia.org/wiki/Category:Featured_articles
|
| 19 |
-
category = 'Featured articles'
|
| 20 |
-
api.get_category(category, _year)
|
| 21 |
|
| 22 |
-
|
| 23 |
-
api.get_category(category, _year)
|
| 24 |
-
|
| 25 |
-
assert _exp == _exp
|
| 26 |
|
| 27 |
|
| 28 |
@pytest.mark.skip(reason="disabled")
|
|
|
|
| 11 |
|
| 12 |
#pytest --capture=no
|
| 13 |
|
| 14 |
+
@pytest.mark.parametrize("_month,_year,_exp", [('November',2016, "Giganotosaurus")])
|
| 15 |
+
def test_tool_wiki_contributions(_month,_year, _exp):
|
| 16 |
api = MyWikiAPI()
|
| 17 |
+
content = api.get_featured_articles(_month,_year)
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
+
assert content.find(_exp) >= 0
|
|
|
|
|
|
|
|
|
|
| 20 |
|
| 21 |
|
| 22 |
@pytest.mark.skip(reason="disabled")
|