commit
Browse files- my_base_libretexts_api.py +97 -0
- my_base_wiki_api.py +0 -5
- my_tool_libretext_book_shelves.py +49 -0
- test_tools.py +36 -7
my_base_libretexts_api.py
ADDED
|
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import requests
|
| 2 |
+
import json
|
| 3 |
+
from lxml import etree
|
| 4 |
+
from lxml import html
|
| 5 |
+
|
| 6 |
+
# https://gist.github.com/scionoftech/0f35d5e231be2cf46823d774023268b6
|
| 7 |
+
# https://www.mediawiki.org/wiki/API:Main_page
|
| 8 |
+
|
| 9 |
+
class MyLibreTextsAPI:
|
| 10 |
+
LIBRETEXTS_BASE_URL = "https://chem.libretexts.org"
|
| 11 |
+
user_agent_headers = {"user-Agent":
|
| 12 |
+
"AgentsCourseAssignment/1.0 (https://huggingface.co/spaces/krzsam/Agents-Course-Assignment)"}
|
| 13 |
+
|
| 14 |
+
def __init__(self):
|
| 15 |
+
print(f"***KS*** Initializing LibreTexts API")
|
| 16 |
+
|
| 17 |
+
def get_bookshelves(self):
|
| 18 |
+
html_content = requests.get(
|
| 19 |
+
f"{self.LIBRETEXTS_BASE_URL}/Bookshelves",
|
| 20 |
+
headers=self.user_agent_headers,
|
| 21 |
+
).text
|
| 22 |
+
tree = html.fromstring(html_content)
|
| 23 |
+
link_class = "mt-sortable-listing-link mt-edit-section internal"
|
| 24 |
+
elements = tree.xpath(f"//a[@class='{link_class}']")
|
| 25 |
+
#for element in elements:
|
| 26 |
+
# print("-----------------------------------")
|
| 27 |
+
# print(f"Tag {element.tag} : title: {element.attrib['title']} href: {element.attrib['href']}")
|
| 28 |
+
|
| 29 |
+
bookshelf = [(element.attrib['title'], element.attrib['href']) for element in elements]
|
| 30 |
+
print(f"Bookshelf:\n{bookshelf}\n\n")
|
| 31 |
+
|
| 32 |
+
return bookshelf
|
| 33 |
+
|
| 34 |
+
def get_books(self, bookshelf_url):
|
| 35 |
+
html_content = requests.get(
|
| 36 |
+
bookshelf_url,
|
| 37 |
+
headers=self.user_agent_headers,
|
| 38 |
+
).text
|
| 39 |
+
tree = html.fromstring(html_content)
|
| 40 |
+
link_class = "mt-sortable-listing-link mt-edit-section internal"
|
| 41 |
+
elements = tree.xpath(f"//a[@class='{link_class}']")
|
| 42 |
+
#for element in elements:
|
| 43 |
+
# print("-----------------------------------")
|
| 44 |
+
# print(f"Tag {element.tag} : title: {element.attrib['title']} href: {element.attrib['href']}")
|
| 45 |
+
|
| 46 |
+
books = [(element.attrib['title'], element.attrib['href']) for element in elements]
|
| 47 |
+
print(f"Books:\n{books}\n\n")
|
| 48 |
+
|
| 49 |
+
return books
|
| 50 |
+
|
| 51 |
+
def get_book_sections(self, book_url):
|
| 52 |
+
html_content = requests.get(
|
| 53 |
+
book_url,
|
| 54 |
+
headers=self.user_agent_headers,
|
| 55 |
+
).text
|
| 56 |
+
tree = html.fromstring(html_content)
|
| 57 |
+
link_class = "mt-sortable-listing-link mt-edit-section internal"
|
| 58 |
+
elements = tree.xpath(f"//a[@class='{link_class}']")
|
| 59 |
+
#for element in elements:
|
| 60 |
+
# print("-----------------------------------")
|
| 61 |
+
# print(f"Tag {element.tag} : title: {element.attrib['title']} href: {element.attrib['href']}")
|
| 62 |
+
|
| 63 |
+
sections = [(element.attrib['title'], element.attrib['href']) for element in elements]
|
| 64 |
+
print(f"Sections:\n{sections}\n\n")
|
| 65 |
+
|
| 66 |
+
return sections
|
| 67 |
+
|
| 68 |
+
def get_book_section_paragraphs(self, section_url):
|
| 69 |
+
html_content = requests.get(
|
| 70 |
+
section_url,
|
| 71 |
+
headers=self.user_agent_headers,
|
| 72 |
+
).text
|
| 73 |
+
tree = html.fromstring(html_content)
|
| 74 |
+
link_class = "internal"
|
| 75 |
+
elements = tree.xpath(f"//a[@class='{link_class}']")
|
| 76 |
+
#for element in elements:
|
| 77 |
+
# print("-----------------------------------")
|
| 78 |
+
# print(f"Tag {element.tag} : title: {element.attrib['title']} href: {element.attrib['href']}")
|
| 79 |
+
|
| 80 |
+
paragraphs = [(element.attrib['title'], element.attrib['href']) for element in elements]
|
| 81 |
+
print(f"Paragraphs:\n{paragraphs}\n\n")
|
| 82 |
+
|
| 83 |
+
return paragraphs
|
| 84 |
+
|
| 85 |
+
def get_section_contents(self, paragraph_url):
|
| 86 |
+
html_content = requests.get(
|
| 87 |
+
paragraph_url,
|
| 88 |
+
headers=self.user_agent_headers,
|
| 89 |
+
).text
|
| 90 |
+
tree = html.fromstring(html_content)
|
| 91 |
+
link_class = "internal"
|
| 92 |
+
elements = tree.xpath(f"//a[@class='{link_class}']")
|
| 93 |
+
|
| 94 |
+
# TODO get contents and convert to MD format
|
| 95 |
+
|
| 96 |
+
return contents
|
| 97 |
+
|
my_base_wiki_api.py
CHANGED
|
@@ -61,9 +61,6 @@ class MyWikiAPI:
|
|
| 61 |
elements = tree.xpath(f"//div[@class='{article_heading}']")
|
| 62 |
element_texts = []
|
| 63 |
for element in elements:
|
| 64 |
-
#print("-----------------------------------")
|
| 65 |
-
#print(f"Tag {element.tag}")
|
| 66 |
-
|
| 67 |
n1 = element.getnext()
|
| 68 |
n1_text = " ".join(n1.itertext())
|
| 69 |
|
|
@@ -71,12 +68,10 @@ class MyWikiAPI:
|
|
| 71 |
n2_text = " ".join(n2.itertext())
|
| 72 |
element_text = f"{n1_text} {n2_text}"
|
| 73 |
|
| 74 |
-
#print(f"Element text: {element_text}")
|
| 75 |
element_texts.append(element_text)
|
| 76 |
|
| 77 |
return element_texts
|
| 78 |
|
| 79 |
-
|
| 80 |
def __is_int__(self, s):
|
| 81 |
try:
|
| 82 |
int(s)
|
|
|
|
| 61 |
elements = tree.xpath(f"//div[@class='{article_heading}']")
|
| 62 |
element_texts = []
|
| 63 |
for element in elements:
|
|
|
|
|
|
|
|
|
|
| 64 |
n1 = element.getnext()
|
| 65 |
n1_text = " ".join(n1.itertext())
|
| 66 |
|
|
|
|
| 68 |
n2_text = " ".join(n2.itertext())
|
| 69 |
element_text = f"{n1_text} {n2_text}"
|
| 70 |
|
|
|
|
| 71 |
element_texts.append(element_text)
|
| 72 |
|
| 73 |
return element_texts
|
| 74 |
|
|
|
|
| 75 |
def __is_int__(self, s):
|
| 76 |
try:
|
| 77 |
int(s)
|
my_tool_libretext_book_shelves.py
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from smolagents import Tool
|
| 2 |
+
from my_base_wiki_api import MyWikiAPI
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
#AUTHORIZED_TYPES = [
|
| 6 |
+
# "string",
|
| 7 |
+
# "boolean",
|
| 8 |
+
# "integer",
|
| 9 |
+
# "number",
|
| 10 |
+
# "image",
|
| 11 |
+
# "audio",
|
| 12 |
+
# "array",
|
| 13 |
+
# "object",
|
| 14 |
+
# "any",
|
| 15 |
+
# "null",
|
| 16 |
+
#]
|
| 17 |
+
|
| 18 |
+
class MyWikiFeaturedArticles(Tool):
|
| 19 |
+
name = "_my_tool_wiki_featured_articles"
|
| 20 |
+
description = """
|
| 21 |
+
Extracts a list of nominators and summaries of featured Wikipedia articles for the provided month and year
|
| 22 |
+
To invoke the tool use code as below
|
| 23 |
+
<code>
|
| 24 |
+
featured = _my_tool_wiki_featured_articles(month='January', year='2005')
|
| 25 |
+
</code>
|
| 26 |
+
"""
|
| 27 |
+
|
| 28 |
+
inputs = {
|
| 29 |
+
"month": {
|
| 30 |
+
"type": "string",
|
| 31 |
+
"description": "name of the month for example January",
|
| 32 |
+
},
|
| 33 |
+
"year": {
|
| 34 |
+
"type": "integer",
|
| 35 |
+
"description": "Year expressed in 4 digit notation, for example 2009",
|
| 36 |
+
},
|
| 37 |
+
}
|
| 38 |
+
|
| 39 |
+
output_type = "string"
|
| 40 |
+
|
| 41 |
+
is_initialized = True
|
| 42 |
+
|
| 43 |
+
def __init__(self):
|
| 44 |
+
print(f"***KS*** Wiki featured articles tool initializing ...")
|
| 45 |
+
self.wiki = MyWikiAPI()
|
| 46 |
+
|
| 47 |
+
def forward(self, month, year):
|
| 48 |
+
result = self.wiki.get_featured_articles(month, year)
|
| 49 |
+
return result
|
test_tools.py
CHANGED
|
@@ -7,21 +7,53 @@ from my_tool_wiki_page_section import MyWikiPageSectionTool
|
|
| 7 |
from my_tool_wiki_filter_tables import MyWikiTableFilterTool
|
| 8 |
from my_base_wiki_api import MyWikiAPI
|
| 9 |
from my_tool_wiki_featured_articles import MyWikiFeaturedArticles
|
|
|
|
| 10 |
import pytest
|
| 11 |
import matplotlib as mp
|
| 12 |
|
| 13 |
#pytest --capture=no
|
| 14 |
|
| 15 |
-
@pytest.mark.parametrize("
|
| 16 |
-
def
|
| 17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
#content = api.get_featured_articles(_month,_year)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
tool = MyWikiFeaturedArticles()
|
| 20 |
results = tool(month=_month, year=_year)
|
| 21 |
results_str = "\n".join(results)
|
| 22 |
print(f"Results: {len(results)}\n{results_str}")
|
| 23 |
|
| 24 |
-
assert
|
| 25 |
|
| 26 |
|
| 27 |
@pytest.mark.skip(reason="disabled")
|
|
@@ -31,7 +63,6 @@ def test_tool_wiki_page_section(_page, _section, _sub_section, _year_start, _yea
|
|
| 31 |
w = MyWikiPageSectionTool()
|
| 32 |
f = MyWikiTableFilterTool()
|
| 33 |
result = w(page=_page, section=_section)
|
| 34 |
-
# print(f"Got page section content: \n{result}")
|
| 35 |
filtered = f(result, _sub_section, _year_start, _year_end)
|
| 36 |
print(f"Table filtered: \n{filtered}")
|
| 37 |
assert len(filtered) == _exp
|
|
@@ -51,8 +82,6 @@ def test_tool_image_load(_task_id,_exp):
|
|
| 51 |
result = t.forward(_task_id)
|
| 52 |
print(f"Got result: {result}")
|
| 53 |
mp.use('QtAgg')
|
| 54 |
-
#plt.imshow(result)
|
| 55 |
-
#plt.show()
|
| 56 |
|
| 57 |
|
| 58 |
@pytest.mark.skip(reason="disabled")
|
|
|
|
| 7 |
from my_tool_wiki_filter_tables import MyWikiTableFilterTool
|
| 8 |
from my_base_wiki_api import MyWikiAPI
|
| 9 |
from my_tool_wiki_featured_articles import MyWikiFeaturedArticles
|
| 10 |
+
from my_base_libretexts_api import MyLibreTextsAPI
|
| 11 |
import pytest
|
| 12 |
import matplotlib as mp
|
| 13 |
|
| 14 |
#pytest --capture=no
|
| 15 |
|
| 16 |
+
@pytest.mark.parametrize("_exp", [("Introductory")])
|
| 17 |
+
def test_tool_libretextx_bookshelves(_exp):
|
| 18 |
+
api = MyLibreTextsAPI()
|
| 19 |
+
bookshelves = api.get_bookshelves()
|
| 20 |
+
|
| 21 |
+
shelf_1 = bookshelves[0]
|
| 22 |
+
shelf_1_url = shelf_1[1]
|
| 23 |
+
|
| 24 |
+
books = api.get_books(shelf_1_url)
|
| 25 |
+
|
| 26 |
+
book_2 = books[1]
|
| 27 |
+
book_2_url = book_2[1]
|
| 28 |
+
|
| 29 |
+
sections = api.get_book_sections(book_2_url)
|
| 30 |
+
|
| 31 |
+
section_2 = sections[1]
|
| 32 |
+
section_2_url = section_2[1]
|
| 33 |
+
|
| 34 |
+
paragraphs = api.get_book_section_paragraphs(section_2_url)
|
| 35 |
+
|
| 36 |
+
paragraph_8 = paragraphs[7]
|
| 37 |
+
paragraph_8_url = paragraph_8[1]
|
| 38 |
+
|
| 39 |
#content = api.get_featured_articles(_month,_year)
|
| 40 |
+
#tool = MyWikiFeaturedArticles()
|
| 41 |
+
#results = tool(month=_month, year=_year)
|
| 42 |
+
#results_str = "\n".join(results)
|
| 43 |
+
#print(f"Results: {len(results)}\n{results_str}")
|
| 44 |
+
|
| 45 |
+
assert 0 == 0
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
@pytest.mark.skip(reason="disabled")
|
| 49 |
+
@pytest.mark.parametrize("_month,_year,_exp", [('November',2016, "FunkMonk")])
|
| 50 |
+
def test_tool_wiki_contributions(_month,_year, _exp):
|
| 51 |
tool = MyWikiFeaturedArticles()
|
| 52 |
results = tool(month=_month, year=_year)
|
| 53 |
results_str = "\n".join(results)
|
| 54 |
print(f"Results: {len(results)}\n{results_str}")
|
| 55 |
|
| 56 |
+
assert _exp in results_str
|
| 57 |
|
| 58 |
|
| 59 |
@pytest.mark.skip(reason="disabled")
|
|
|
|
| 63 |
w = MyWikiPageSectionTool()
|
| 64 |
f = MyWikiTableFilterTool()
|
| 65 |
result = w(page=_page, section=_section)
|
|
|
|
| 66 |
filtered = f(result, _sub_section, _year_start, _year_end)
|
| 67 |
print(f"Table filtered: \n{filtered}")
|
| 68 |
assert len(filtered) == _exp
|
|
|
|
| 82 |
result = t.forward(_task_id)
|
| 83 |
print(f"Got result: {result}")
|
| 84 |
mp.use('QtAgg')
|
|
|
|
|
|
|
| 85 |
|
| 86 |
|
| 87 |
@pytest.mark.skip(reason="disabled")
|