krzsam commited on
Commit
de39e8f
·
1 Parent(s): 05c6dfb
my_base_libretexts_api.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import json
3
+ from lxml import etree
4
+ from lxml import html
5
+
6
+ # https://gist.github.com/scionoftech/0f35d5e231be2cf46823d774023268b6
7
+ # https://www.mediawiki.org/wiki/API:Main_page
8
+
9
+ class MyLibreTextsAPI:
10
+ LIBRETEXTS_BASE_URL = "https://chem.libretexts.org"
11
+ user_agent_headers = {"user-Agent":
12
+ "AgentsCourseAssignment/1.0 (https://huggingface.co/spaces/krzsam/Agents-Course-Assignment)"}
13
+
14
+ def __init__(self):
15
+ print(f"***KS*** Initializing LibreTexts API")
16
+
17
+ def get_bookshelves(self):
18
+ html_content = requests.get(
19
+ f"{self.LIBRETEXTS_BASE_URL}/Bookshelves",
20
+ headers=self.user_agent_headers,
21
+ ).text
22
+ tree = html.fromstring(html_content)
23
+ link_class = "mt-sortable-listing-link mt-edit-section internal"
24
+ elements = tree.xpath(f"//a[@class='{link_class}']")
25
+ #for element in elements:
26
+ # print("-----------------------------------")
27
+ # print(f"Tag {element.tag} : title: {element.attrib['title']} href: {element.attrib['href']}")
28
+
29
+ bookshelf = [(element.attrib['title'], element.attrib['href']) for element in elements]
30
+ print(f"Bookshelf:\n{bookshelf}\n\n")
31
+
32
+ return bookshelf
33
+
34
+ def get_books(self, bookshelf_url):
35
+ html_content = requests.get(
36
+ bookshelf_url,
37
+ headers=self.user_agent_headers,
38
+ ).text
39
+ tree = html.fromstring(html_content)
40
+ link_class = "mt-sortable-listing-link mt-edit-section internal"
41
+ elements = tree.xpath(f"//a[@class='{link_class}']")
42
+ #for element in elements:
43
+ # print("-----------------------------------")
44
+ # print(f"Tag {element.tag} : title: {element.attrib['title']} href: {element.attrib['href']}")
45
+
46
+ books = [(element.attrib['title'], element.attrib['href']) for element in elements]
47
+ print(f"Books:\n{books}\n\n")
48
+
49
+ return books
50
+
51
+ def get_book_sections(self, book_url):
52
+ html_content = requests.get(
53
+ book_url,
54
+ headers=self.user_agent_headers,
55
+ ).text
56
+ tree = html.fromstring(html_content)
57
+ link_class = "mt-sortable-listing-link mt-edit-section internal"
58
+ elements = tree.xpath(f"//a[@class='{link_class}']")
59
+ #for element in elements:
60
+ # print("-----------------------------------")
61
+ # print(f"Tag {element.tag} : title: {element.attrib['title']} href: {element.attrib['href']}")
62
+
63
+ sections = [(element.attrib['title'], element.attrib['href']) for element in elements]
64
+ print(f"Sections:\n{sections}\n\n")
65
+
66
+ return sections
67
+
68
+ def get_book_section_paragraphs(self, section_url):
69
+ html_content = requests.get(
70
+ section_url,
71
+ headers=self.user_agent_headers,
72
+ ).text
73
+ tree = html.fromstring(html_content)
74
+ link_class = "internal"
75
+ elements = tree.xpath(f"//a[@class='{link_class}']")
76
+ #for element in elements:
77
+ # print("-----------------------------------")
78
+ # print(f"Tag {element.tag} : title: {element.attrib['title']} href: {element.attrib['href']}")
79
+
80
+ paragraphs = [(element.attrib['title'], element.attrib['href']) for element in elements]
81
+ print(f"Paragraphs:\n{paragraphs}\n\n")
82
+
83
+ return paragraphs
84
+
85
+ def get_section_contents(self, paragraph_url):
86
+ html_content = requests.get(
87
+ paragraph_url,
88
+ headers=self.user_agent_headers,
89
+ ).text
90
+ tree = html.fromstring(html_content)
91
+ link_class = "internal"
92
+ elements = tree.xpath(f"//a[@class='{link_class}']")
93
+
94
+ # TODO get contents and convert to MD format
95
+
96
+ return contents
97
+
my_base_wiki_api.py CHANGED
@@ -61,9 +61,6 @@ class MyWikiAPI:
61
  elements = tree.xpath(f"//div[@class='{article_heading}']")
62
  element_texts = []
63
  for element in elements:
64
- #print("-----------------------------------")
65
- #print(f"Tag {element.tag}")
66
-
67
  n1 = element.getnext()
68
  n1_text = " ".join(n1.itertext())
69
 
@@ -71,12 +68,10 @@ class MyWikiAPI:
71
  n2_text = " ".join(n2.itertext())
72
  element_text = f"{n1_text} {n2_text}"
73
 
74
- #print(f"Element text: {element_text}")
75
  element_texts.append(element_text)
76
 
77
  return element_texts
78
 
79
-
80
  def __is_int__(self, s):
81
  try:
82
  int(s)
 
61
  elements = tree.xpath(f"//div[@class='{article_heading}']")
62
  element_texts = []
63
  for element in elements:
 
 
 
64
  n1 = element.getnext()
65
  n1_text = " ".join(n1.itertext())
66
 
 
68
  n2_text = " ".join(n2.itertext())
69
  element_text = f"{n1_text} {n2_text}"
70
 
 
71
  element_texts.append(element_text)
72
 
73
  return element_texts
74
 
 
75
  def __is_int__(self, s):
76
  try:
77
  int(s)
my_tool_libretext_book_shelves.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from smolagents import Tool
2
+ from my_base_wiki_api import MyWikiAPI
3
+
4
+
5
+ #AUTHORIZED_TYPES = [
6
+ # "string",
7
+ # "boolean",
8
+ # "integer",
9
+ # "number",
10
+ # "image",
11
+ # "audio",
12
+ # "array",
13
+ # "object",
14
+ # "any",
15
+ # "null",
16
+ #]
17
+
18
+ class MyWikiFeaturedArticles(Tool):
19
+ name = "_my_tool_wiki_featured_articles"
20
+ description = """
21
+ Extracts a list of nominators and summaries of featured Wikipedia articles for the provided month and year
22
+ To invoke the tool use code as below
23
+ <code>
24
+ featured = _my_tool_wiki_featured_articles(month='January', year='2005')
25
+ </code>
26
+ """
27
+
28
+ inputs = {
29
+ "month": {
30
+ "type": "string",
31
+ "description": "name of the month for example January",
32
+ },
33
+ "year": {
34
+ "type": "integer",
35
+ "description": "Year expressed in 4 digit notation, for example 2009",
36
+ },
37
+ }
38
+
39
+ output_type = "string"
40
+
41
+ is_initialized = True
42
+
43
+ def __init__(self):
44
+ print(f"***KS*** Wiki featured articles tool initializing ...")
45
+ self.wiki = MyWikiAPI()
46
+
47
+ def forward(self, month, year):
48
+ result = self.wiki.get_featured_articles(month, year)
49
+ return result
test_tools.py CHANGED
@@ -7,21 +7,53 @@ from my_tool_wiki_page_section import MyWikiPageSectionTool
7
  from my_tool_wiki_filter_tables import MyWikiTableFilterTool
8
  from my_base_wiki_api import MyWikiAPI
9
  from my_tool_wiki_featured_articles import MyWikiFeaturedArticles
 
10
  import pytest
11
  import matplotlib as mp
12
 
13
  #pytest --capture=no
14
 
15
- @pytest.mark.parametrize("_month,_year,_exp", [('November',2016, "Giganotosaurus")])
16
- def test_tool_wiki_contributions(_month,_year, _exp):
17
- #api = MyWikiAPI()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  #content = api.get_featured_articles(_month,_year)
 
 
 
 
 
 
 
 
 
 
 
19
  tool = MyWikiFeaturedArticles()
20
  results = tool(month=_month, year=_year)
21
  results_str = "\n".join(results)
22
  print(f"Results: {len(results)}\n{results_str}")
23
 
24
- assert 0 == 0
25
 
26
 
27
  @pytest.mark.skip(reason="disabled")
@@ -31,7 +63,6 @@ def test_tool_wiki_page_section(_page, _section, _sub_section, _year_start, _yea
31
  w = MyWikiPageSectionTool()
32
  f = MyWikiTableFilterTool()
33
  result = w(page=_page, section=_section)
34
- # print(f"Got page section content: \n{result}")
35
  filtered = f(result, _sub_section, _year_start, _year_end)
36
  print(f"Table filtered: \n{filtered}")
37
  assert len(filtered) == _exp
@@ -51,8 +82,6 @@ def test_tool_image_load(_task_id,_exp):
51
  result = t.forward(_task_id)
52
  print(f"Got result: {result}")
53
  mp.use('QtAgg')
54
- #plt.imshow(result)
55
- #plt.show()
56
 
57
 
58
  @pytest.mark.skip(reason="disabled")
 
7
  from my_tool_wiki_filter_tables import MyWikiTableFilterTool
8
  from my_base_wiki_api import MyWikiAPI
9
  from my_tool_wiki_featured_articles import MyWikiFeaturedArticles
10
+ from my_base_libretexts_api import MyLibreTextsAPI
11
  import pytest
12
  import matplotlib as mp
13
 
14
  #pytest --capture=no
15
 
16
+ @pytest.mark.parametrize("_exp", [("Introductory")])
17
+ def test_tool_libretextx_bookshelves(_exp):
18
+ api = MyLibreTextsAPI()
19
+ bookshelves = api.get_bookshelves()
20
+
21
+ shelf_1 = bookshelves[0]
22
+ shelf_1_url = shelf_1[1]
23
+
24
+ books = api.get_books(shelf_1_url)
25
+
26
+ book_2 = books[1]
27
+ book_2_url = book_2[1]
28
+
29
+ sections = api.get_book_sections(book_2_url)
30
+
31
+ section_2 = sections[1]
32
+ section_2_url = section_2[1]
33
+
34
+ paragraphs = api.get_book_section_paragraphs(section_2_url)
35
+
36
+ paragraph_8 = paragraphs[7]
37
+ paragraph_8_url = paragraph_8[1]
38
+
39
  #content = api.get_featured_articles(_month,_year)
40
+ #tool = MyWikiFeaturedArticles()
41
+ #results = tool(month=_month, year=_year)
42
+ #results_str = "\n".join(results)
43
+ #print(f"Results: {len(results)}\n{results_str}")
44
+
45
+ assert 0 == 0
46
+
47
+
48
+ @pytest.mark.skip(reason="disabled")
49
+ @pytest.mark.parametrize("_month,_year,_exp", [('November',2016, "FunkMonk")])
50
+ def test_tool_wiki_contributions(_month,_year, _exp):
51
  tool = MyWikiFeaturedArticles()
52
  results = tool(month=_month, year=_year)
53
  results_str = "\n".join(results)
54
  print(f"Results: {len(results)}\n{results_str}")
55
 
56
+ assert _exp in results_str
57
 
58
 
59
  @pytest.mark.skip(reason="disabled")
 
63
  w = MyWikiPageSectionTool()
64
  f = MyWikiTableFilterTool()
65
  result = w(page=_page, section=_section)
 
66
  filtered = f(result, _sub_section, _year_start, _year_end)
67
  print(f"Table filtered: \n{filtered}")
68
  assert len(filtered) == _exp
 
82
  result = t.forward(_task_id)
83
  print(f"Got result: {result}")
84
  mp.use('QtAgg')
 
 
85
 
86
 
87
  @pytest.mark.skip(reason="disabled")