Agents-Course-Assignment

Paused

App Files Files Community

krzsam commited on Jul 12, 2025

Commit

a573bfb

1 Parent(s): 6d959c6

commit

Browse files

Files changed (6) hide show

my_agent.py +4 -1
my_base_wiki_api.py +25 -100
my_prompt_config.py +2 -0
my_tool_wiki_featured_articles.py +50 -0
requirements.txt +3 -1
test_tools.py +4 -10

my_agent.py CHANGED Viewed

@@ -11,6 +11,7 @@ from my_tool_chess_analysis import ChessAnalysisTool
 from my_prompt_config import MyPromptConfig
 from my_tool_wiki_page_section import MyWikiPageSectionTool
 from my_tool_wiki_filter_tables import MyWikiTableFilterTool
 from dotenv import load_dotenv
 # https://huggingface.co/docs/transformers/model_doc/mistral?usage=Pipeline
@@ -60,6 +61,7 @@ class MyAgent:
                 MyWikiPageSectionTool(),
                 MyWikiTableFilterTool(),
                 WebSearchTool(),
                 FinalAnswerTool(),
             ],
             model=self.model,
@@ -84,7 +86,8 @@ class MyAgent:
                 "my_tool_image_load",
                 "my_tool_reverse_string",
                 "my_tool_wiki_page_section",
-                "_my_tool_wiki_table_filter"
             ],
         )
         #web_search_agent = CodeAgent(

 from my_prompt_config import MyPromptConfig
 from my_tool_wiki_page_section import MyWikiPageSectionTool
 from my_tool_wiki_filter_tables import MyWikiTableFilterTool
+from my_tool_wiki_featured_articles import MyWikiFeaturedArticles
 from dotenv import load_dotenv
 # https://huggingface.co/docs/transformers/model_doc/mistral?usage=Pipeline
                 MyWikiPageSectionTool(),
                 MyWikiTableFilterTool(),
                 WebSearchTool(),
+                MyWikiFeaturedArticles(),
                 FinalAnswerTool(),
             ],
             model=self.model,
                 "my_tool_image_load",
                 "my_tool_reverse_string",
                 "my_tool_wiki_page_section",
+                "my_tool_wiki_table_filter",
+                "my_tool_wiki_featured_articles"
             ],
         )
         #web_search_agent = CodeAgent(

my_base_wiki_api.py CHANGED Viewed

@@ -1,12 +1,15 @@
 import requests
 import json
 import wikitextparser as wtp
 # https://gist.github.com/scionoftech/0f35d5e231be2cf46823d774023268b6
 # https://www.mediawiki.org/wiki/API:Main_page
 class MyWikiAPI:
     WIKI_BASE_URL = "https://en.wikipedia.org/w/api.php"
     user_agent_headers = {"user-Agent":
         "AgentsCourseAssignment/1.0 (https://huggingface.co/spaces/krzsam/Agents-Course-Assignment)"}
@@ -43,95 +46,26 @@ class MyWikiAPI:
             }).json()
         return response["parse"][format]["*"]
-    def __get_category_pages__(self, category, year):
-        # https://en.wikipedia.org/w/api.php?action=help&modules=main#main/datatype/timestamp
-        # https://www.mediawiki.org/wiki/API:Query#Generators
-        # https://www.mediawiki.org/wiki/API:Continue
-        # https://stackoverflow.com/questions/35826469/how-to-combine-two-wikipedia-api-calls-into-one
-        # 2001-01-15T14:56:00Z
-        # api.php?action=query&generator=categorymembers&gcmtitle=Category:Physics&prop=info [open in sandbox]
-        cmstart = f"{year}-01-01T00:00:00Z"
-        cmend =   f"{year}-12-31T23:59:59Z"
-        print(f"Listing pages in category {category} from {cmstart} to {cmend}")
         response = requests.get(
-            self.WIKI_BASE_URL,
             headers=self.user_agent_headers,
-            params={
-                'action': 'query',
-                'format': 'json',
-                #'prop': 'extracts',
-                'list': 'categorymembers',
-                #'generator': 'categorymembers',
-                'cmtitle': f'Category:{category}',
-                'cmsort': 'timestamp',
-                'cmprop': 'ids|title|timestamp',
-                'cmstart': cmstart,
-                'cmend': cmend,
-                'cmlimit': '500',
-                #'cmlimit': '20',
-            }).json()
-        print(f"Response: \n{json.dumps(response,indent=4)}")
-        print(f"Got artickles: {len(response['query']['categorymembers'])}")
-        #cmcontinue = ""
-        #while(True):
-        #    if "continue" in response and "cmcontinue" in response["continue"]:
-        #        cmcontinue = response["continue"]["cmcontinue"]
-        #    else:
-        #        break
-        #    print(f"cmcontinue: {cmcontinue}")
-        #    response = requests.get(
-        #        self.WIKI_BASE_URL,
-        #        headers=self.user_agent_headers,
-        #        params={
-        #            'action': 'query',
-        #            'format': 'json',
-        #            #'prop': 'extracts',
-        #            'list': 'categorymembers',
-        #            #'generator': 'categorymembers',
-        #            'cmcontinue': cmcontinue,
-        #            'cmtitle': f'Category:{category}',
-        #            'cmsort': 'timestamp',
-        #            'cmprop': 'ids|title|timestamp',
-        #            'cmstart': cmstart,
-        #            #'cmend': cmend,
-        #            #'cmlimit': '500',
-        #            'cmlimit': '20',
-        #        }).json()
-        #    print(f"Response: \n{json.dumps(response,indent=4)}")
-        #response = requests.get(
-        #    self.WIKI_BASE_URL,
-        #    headers=self.user_agent_headers,
-        #    params={
-        #        'action': 'query',
-        #        'format': 'json',
-        #        'prop': 'extracts',
-        #        #'list': 'categorymembers',
-        #        'generator': 'categorymembers',
-        #        'gcmtitle': f'Category:{category}',
-        #        'gcmsort': 'timestamp',
-        #        'gcmprop': 'ids|title|timestamp',
-        #        'gcmstart': cmstart,
-        #        #'cmend': cmend,
-        #        #'cmlimit': '500',
-        #        'gcmlimit': '20',
-        #    }).json()
-        # ['query']['categorymembers']
-        # list -->
-        #   title
-        return response
     def __is_int__(self, s):
         try:
@@ -172,19 +106,10 @@ class MyWikiAPI:
         return rows_collected
-    def get_featured_articles(self, year):
-        featured_url = f"https://api.wikimedia.org/feed/v1/wikipedia/en/featured/{year}/01/01"
-        response = requests.get(
-            featured_url,
-            headers=self.user_agent_headers,
-        ).json()
-        #print(f"Got featured list: {json.dumps(response,indent=4)}")
-        tfa = response['tfa']
-        print(f"tfa: {json.dumps(tfa,indent=4)}")
-        title = tfa['titles']['canonical']
-        extract = tfa['extract']
-        print(f"tfa:\ntitle: {title}\nextract: {extract}")

 import requests
 import json
 import wikitextparser as wtp
+from lxml import etree
+from lxml import html
 # https://gist.github.com/scionoftech/0f35d5e231be2cf46823d774023268b6
 # https://www.mediawiki.org/wiki/API:Main_page
 class MyWikiAPI:
     WIKI_BASE_URL = "https://en.wikipedia.org/w/api.php"
+    WIKI_FEATURED_URL = "https://en.wikipedia.org/wiki/Wikipedia:Featured_article_candidates/Featured_log"
     user_agent_headers = {"user-Agent":
         "AgentsCourseAssignment/1.0 (https://huggingface.co/spaces/krzsam/Agents-Course-Assignment)"}
             }).json()
         return response["parse"][format]["*"]
+    def __get_featured_log__(self, month, year):
+        featured_url = f"{self.WIKI_FEATURED_URL}/{month}_{year}"
+        print(f"Getting content for: {featured_url}")
         response = requests.get(
+            featured_url,
             headers=self.user_agent_headers,
+            ).text
+        return response
+    def __process_featured_log__(self, html_content):
+        tree = html.fromstring(html_content)
+        article_heading = "mw-heading mw-heading3"
+        #article_heading = "mw-heading3"
+        #elements_css = tree.cssselect(article_heading)
+        #for element in elements_css:
+        #    print(f"Tag {element.tag}")
+        elements = tree.xpath(f"//div[@class='{article_heading}']")
+        for element in elements:
+            print(f"Tag {element.tag}  {element.text}")
     def __is_int__(self, s):
         try:
         return rows_collected
+    def get_featured_articles(self, month, year):
+        # https://en.wikipedia.org/wiki/Wikipedia:Featured_article_candidates/Featured_log
+        ret = self.__get_featured_log__(month,year)
+        self.__process_featured_log__(ret)
+        #print(f"Featured for {month} {year}:\n{ret}")
+        return ret

my_prompt_config.py CHANGED Viewed

@@ -31,6 +31,8 @@ class MyPromptConfig:
             * _my_tool_reverse_string : reverse provided string
             * _my_tool_wiki_page_section : get contents of a section of the Wikipedia page
             * _my_tool_wiki_table_filter : filter the subsection and tables on the Wikipedia page for provided years
             If the question mentions image or other file, use one of provided tools to load it
             using task_id associated with the question.

             * _my_tool_reverse_string : reverse provided string
             * _my_tool_wiki_page_section : get contents of a section of the Wikipedia page
             * _my_tool_wiki_table_filter : filter the subsection and tables on the Wikipedia page for provided years
+            * _my_tool_wiki_featured_articles : get information about all featured articles on Wikipedia for the
+            given month and year
             If the question mentions image or other file, use one of provided tools to load it
             using task_id associated with the question.

my_tool_wiki_featured_articles.py ADDED Viewed

	@@ -0,0 +1,50 @@

+from smolagents import Tool
+from my_base_wiki_api import MyWikiAPI
+#AUTHORIZED_TYPES = [
+#    "string",
+#    "boolean",
+#    "integer",
+#    "number",
+#    "image",
+#    "audio",
+#    "array",
+#    "object",
+#    "any",
+#    "null",
+#]
+class MyWikiFeaturedArticles(Tool):
+    name = "_my_tool_wiki_featured_articles"
+    description = """
+        Extract contents of the page section for the provided a Wikipedia page
+        To invoke the tool use code as below
+        <code>
+        featured = _my_tool_wiki_featured_articles(month='January', year='2005')
+        </code>
+    """
+    inputs = {
+        "month": {
+            "type": "string",
+            "description": "month provided as name of the month for example January",
+        },
+        "year": {
+            "type": "integer",
+            "description": "Year expressed in 4 digit notation, for example 2009",
+        },
+    }
+    output_type = "string"
+    is_initialized = True
+    def __init__(self):
+        print(f"***KS*** Wiki featured articles tool initializing ...")
+        self.wiki = MyWikiAPI()
+    def forward(self, month, year) -> str:
+        result = self.wiki.get_featured_articles(month, year)
+        #print(f"***KS*** Wiki table filter tool, result: {result}")
+        return result

requirements.txt CHANGED Viewed

@@ -10,4 +10,6 @@ torch
 stockfish
 bs4
 wikitextparser
-mwparserfromhell

 stockfish
 bs4
 wikitextparser
+mwparserfromhell
+lxml
+cssselect

test_tools.py CHANGED Viewed

@@ -11,18 +11,12 @@ import matplotlib as mp
 #pytest --capture=no
-@pytest.mark.parametrize("_year,_exp", [(2023, 3)])
-def test_tool_wiki_contributions(_year, _exp):
     api = MyWikiAPI()
-    #api.get_featured_articles(_year)
-    # https://en.wikipedia.org/wiki/Category:Featured_articles
-    category = 'Featured articles'
-    api.get_category(category, _year)
-    category = 'Former featured articles'
-    api.get_category(category, _year)
-    assert _exp == _exp
 @pytest.mark.skip(reason="disabled")

 #pytest --capture=no
+@pytest.mark.parametrize("_month,_year,_exp", [('November',2016, "Giganotosaurus")])
+def test_tool_wiki_contributions(_month,_year, _exp):
     api = MyWikiAPI()
+    content = api.get_featured_articles(_month,_year)
+    assert content.find(_exp) >= 0
 @pytest.mark.skip(reason="disabled")