krzsam commited on
Commit
264afdc
·
1 Parent(s): de39e8f
app.py CHANGED
@@ -19,7 +19,7 @@ questions_to_run = [
19
  #"4fc2f1ae-8625-45b5-ab34-ad4433bc21f8", # OK
20
  #"6f37996b-2ac7-44b0-8e68-6d28256631b4", # ??? don't understand the question
21
  #"9d191bce-651d-4746-be2d-7ef8ecadb9c2", # later
22
- "cabe07ed-9eca-40ea-8ead-410ef5e83f91", # <----
23
  #"3cef3a44-215e-4aed-8e3b-b1e3f08063b7",
24
  #"99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3",
25
  #"305ac316-eef6-4446-960a-92d80d542f82",
 
19
  #"4fc2f1ae-8625-45b5-ab34-ad4433bc21f8", # OK
20
  #"6f37996b-2ac7-44b0-8e68-6d28256631b4", # ??? don't understand the question
21
  #"9d191bce-651d-4746-be2d-7ef8ecadb9c2", # later
22
+ "cabe07ed-9eca-40ea-8ead-410ef5e83f91", # <---
23
  #"3cef3a44-215e-4aed-8e3b-b1e3f08063b7",
24
  #"99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3",
25
  #"305ac316-eef6-4446-960a-92d80d542f82",
my_agent.py CHANGED
@@ -12,6 +12,9 @@ from my_prompt_config import MyPromptConfig
12
  from my_tool_wiki_page_section import MyWikiPageSectionTool
13
  from my_tool_wiki_filter_tables import MyWikiTableFilterTool
14
  from my_tool_wiki_featured_articles import MyWikiFeaturedArticles
 
 
 
15
  from dotenv import load_dotenv
16
 
17
  # https://huggingface.co/docs/transformers/model_doc/mistral?usage=Pipeline
@@ -49,6 +52,13 @@ class MyAgent:
49
  self.model = InferenceClientModel(model_id=self.MODEL_REASONING)
50
  # --- HF Inference ------------------------------------------------------------------------------
51
 
 
 
 
 
 
 
 
52
  self.reasoning_agent = CodeAgent(
53
  name="CourseAssistant",
54
  description="General AI Assistant",
@@ -62,6 +72,11 @@ class MyAgent:
62
  MyWikiTableFilterTool(),
63
  WebSearchTool(),
64
  MyWikiFeaturedArticles(),
 
 
 
 
 
65
  FinalAnswerTool(),
66
  ],
67
  model=self.model,
@@ -87,7 +102,9 @@ class MyAgent:
87
  "my_tool_reverse_string",
88
  "my_tool_wiki_page_section",
89
  "my_tool_wiki_table_filter",
90
- "my_tool_wiki_featured_articles"
 
 
91
  ],
92
  )
93
  #web_search_agent = CodeAgent(
 
12
  from my_tool_wiki_page_section import MyWikiPageSectionTool
13
  from my_tool_wiki_filter_tables import MyWikiTableFilterTool
14
  from my_tool_wiki_featured_articles import MyWikiFeaturedArticles
15
+ from my_tools_libretexts import MyLibreTextsBookshelvesTool, MyLibreTextsBooksTool
16
+ from my_tools_libretexts import MyLibreTextsBookSectionsTool, MyLibreTextsBookSectionParagraphsTool
17
+ from my_tools_libretexts import MyLibreTextsParagraphContentsTool
18
  from dotenv import load_dotenv
19
 
20
  # https://huggingface.co/docs/transformers/model_doc/mistral?usage=Pipeline
 
52
  self.model = InferenceClientModel(model_id=self.MODEL_REASONING)
53
  # --- HF Inference ------------------------------------------------------------------------------
54
 
55
+ # Instruction how to specify tools
56
+ # 1. Implement a tool (one or more tools per file)
57
+ # 2. Add imports for used tools in the header for this file
58
+ # 3. Add file(s) with tools to the 'additional_authorized_imports' below
59
+ # 4. Add tool(s) to 'tools' below
60
+ # 5. Add tool(s) name(s) to prompt configuration in my_prompt_config.py
61
+
62
  self.reasoning_agent = CodeAgent(
63
  name="CourseAssistant",
64
  description="General AI Assistant",
 
72
  MyWikiTableFilterTool(),
73
  WebSearchTool(),
74
  MyWikiFeaturedArticles(),
75
+ MyLibreTextsBookshelvesTool(),
76
+ MyLibreTextsBooksTool(),
77
+ MyLibreTextsBookSectionsTool(),
78
+ MyLibreTextsBookSectionParagraphsTool(),
79
+ MyLibreTextsParagraphContentsTool(),
80
  FinalAnswerTool(),
81
  ],
82
  model=self.model,
 
102
  "my_tool_reverse_string",
103
  "my_tool_wiki_page_section",
104
  "my_tool_wiki_table_filter",
105
+ "my_tool_wiki_featured_articles",
106
+ "my_tools_libretexts",
107
+ "my_base_libretexts_api"
108
  ],
109
  )
110
  #web_search_agent = CodeAgent(
my_base_libretexts_api.py CHANGED
@@ -1,7 +1,6 @@
1
  import requests
2
- import json
3
- from lxml import etree
4
  from lxml import html
 
5
 
6
  # https://gist.github.com/scionoftech/0f35d5e231be2cf46823d774023268b6
7
  # https://www.mediawiki.org/wiki/API:Main_page
@@ -26,10 +25,10 @@ class MyLibreTextsAPI:
26
  # print("-----------------------------------")
27
  # print(f"Tag {element.tag} : title: {element.attrib['title']} href: {element.attrib['href']}")
28
 
29
- bookshelf = [(element.attrib['title'], element.attrib['href']) for element in elements]
30
- print(f"Bookshelf:\n{bookshelf}\n\n")
31
 
32
- return bookshelf
33
 
34
  def get_books(self, bookshelf_url):
35
  html_content = requests.get(
@@ -43,8 +42,8 @@ class MyLibreTextsAPI:
43
  # print("-----------------------------------")
44
  # print(f"Tag {element.tag} : title: {element.attrib['title']} href: {element.attrib['href']}")
45
 
46
- books = [(element.attrib['title'], element.attrib['href']) for element in elements]
47
- print(f"Books:\n{books}\n\n")
48
 
49
  return books
50
 
@@ -60,8 +59,8 @@ class MyLibreTextsAPI:
60
  # print("-----------------------------------")
61
  # print(f"Tag {element.tag} : title: {element.attrib['title']} href: {element.attrib['href']}")
62
 
63
- sections = [(element.attrib['title'], element.attrib['href']) for element in elements]
64
- print(f"Sections:\n{sections}\n\n")
65
 
66
  return sections
67
 
@@ -77,21 +76,18 @@ class MyLibreTextsAPI:
77
  # print("-----------------------------------")
78
  # print(f"Tag {element.tag} : title: {element.attrib['title']} href: {element.attrib['href']}")
79
 
80
- paragraphs = [(element.attrib['title'], element.attrib['href']) for element in elements]
81
- print(f"Paragraphs:\n{paragraphs}\n\n")
82
 
83
  return paragraphs
84
 
85
- def get_section_contents(self, paragraph_url):
86
  html_content = requests.get(
87
  paragraph_url,
88
  headers=self.user_agent_headers,
89
  ).text
90
- tree = html.fromstring(html_content)
91
- link_class = "internal"
92
- elements = tree.xpath(f"//a[@class='{link_class}']")
93
 
94
- # TODO get contents and convert to MD format
95
 
96
- return contents
97
 
 
1
  import requests
 
 
2
  from lxml import html
3
+ from html_to_markdown import convert_to_markdown
4
 
5
  # https://gist.github.com/scionoftech/0f35d5e231be2cf46823d774023268b6
6
  # https://www.mediawiki.org/wiki/API:Main_page
 
25
  # print("-----------------------------------")
26
  # print(f"Tag {element.tag} : title: {element.attrib['title']} href: {element.attrib['href']}")
27
 
28
+ bookshelves = [{"title": element.attrib['title'], "url": element.attrib['href']} for element in elements]
29
+ #print(f"Bookshelf:\n{bookshelf}\n\n")
30
 
31
+ return bookshelves
32
 
33
  def get_books(self, bookshelf_url):
34
  html_content = requests.get(
 
42
  # print("-----------------------------------")
43
  # print(f"Tag {element.tag} : title: {element.attrib['title']} href: {element.attrib['href']}")
44
 
45
+ books = [{"title": element.attrib['title'], "url": element.attrib['href']} for element in elements]
46
+ #print(f"Books:\n{books}\n\n")
47
 
48
  return books
49
 
 
59
  # print("-----------------------------------")
60
  # print(f"Tag {element.tag} : title: {element.attrib['title']} href: {element.attrib['href']}")
61
 
62
+ sections = [{"title": element.attrib['title'], "url": element.attrib['href']} for element in elements]
63
+ #print(f"Sections:\n{sections}\n\n")
64
 
65
  return sections
66
 
 
76
  # print("-----------------------------------")
77
  # print(f"Tag {element.tag} : title: {element.attrib['title']} href: {element.attrib['href']}")
78
 
79
+ paragraphs = [{"title": element.attrib['title'], "url": element.attrib['href']} for element in elements]
80
+ #print(f"Paragraphs:\n{paragraphs}\n\n")
81
 
82
  return paragraphs
83
 
84
+ def get_paragraph_contents(self, paragraph_url):
85
  html_content = requests.get(
86
  paragraph_url,
87
  headers=self.user_agent_headers,
88
  ).text
 
 
 
89
 
90
+ markdown = convert_to_markdown(html_content)
91
 
92
+ return markdown
93
 
my_prompt_config.py CHANGED
@@ -33,6 +33,11 @@ class MyPromptConfig:
33
  * _my_tool_wiki_table_filter : filter the subsection and tables on the Wikipedia page for provided years
34
  * _my_tool_wiki_featured_articles : get nominator and summary information for Wikipedia featured articles
35
  for given month and year
 
 
 
 
 
36
 
37
  If the question mentions image or other file, use one of provided tools to load it
38
  using task_id associated with the question.
 
33
  * _my_tool_wiki_table_filter : filter the subsection and tables on the Wikipedia page for provided years
34
  * _my_tool_wiki_featured_articles : get nominator and summary information for Wikipedia featured articles
35
  for given month and year
36
+ * _my_tool_libretexts_bookshelves : Get a list of bookshelves in LibreTexts library of chemistry materials
37
+ * _my_tool_libretexts_books : Get a list of books in LibreTexts bookshelf
38
+ * _my_tool_libretexts_book_sections : Get a list of sections in a book in LibreTexts materials
39
+ * _my_tool_libretexts_book_section_paragraphs : Get a list of section paragraphs in a book in LibreTexts materials
40
+ * _my_tool_libretexts_paragraph_contents : Get contents of a paragraph in a book in LibreTexts materials
41
 
42
  If the question mentions image or other file, use one of provided tools to load it
43
  using task_id associated with the question.
my_tool_libretext_book_shelves.py DELETED
@@ -1,49 +0,0 @@
1
- from smolagents import Tool
2
- from my_base_wiki_api import MyWikiAPI
3
-
4
-
5
- #AUTHORIZED_TYPES = [
6
- # "string",
7
- # "boolean",
8
- # "integer",
9
- # "number",
10
- # "image",
11
- # "audio",
12
- # "array",
13
- # "object",
14
- # "any",
15
- # "null",
16
- #]
17
-
18
- class MyWikiFeaturedArticles(Tool):
19
- name = "_my_tool_wiki_featured_articles"
20
- description = """
21
- Extracts a list of nominators and summaries of featured Wikipedia articles for the provided month and year
22
- To invoke the tool use code as below
23
- <code>
24
- featured = _my_tool_wiki_featured_articles(month='January', year='2005')
25
- </code>
26
- """
27
-
28
- inputs = {
29
- "month": {
30
- "type": "string",
31
- "description": "name of the month for example January",
32
- },
33
- "year": {
34
- "type": "integer",
35
- "description": "Year expressed in 4 digit notation, for example 2009",
36
- },
37
- }
38
-
39
- output_type = "string"
40
-
41
- is_initialized = True
42
-
43
- def __init__(self):
44
- print(f"***KS*** Wiki featured articles tool initializing ...")
45
- self.wiki = MyWikiAPI()
46
-
47
- def forward(self, month, year):
48
- result = self.wiki.get_featured_articles(month, year)
49
- return result
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
my_tools_libretexts.py ADDED
@@ -0,0 +1,165 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from smolagents import Tool
2
+ from my_base_libretexts_api import MyLibreTextsAPI
3
+
4
+
5
+ #AUTHORIZED_TYPES = [
6
+ # "string",
7
+ # "boolean",
8
+ # "integer",
9
+ # "number",
10
+ # "image",
11
+ # "audio",
12
+ # "array",
13
+ # "object",
14
+ # "any",
15
+ # "null",
16
+ #]
17
+
18
+
19
+ class MyLibreTextsBase(Tool):
20
+ is_initialized = True
21
+
22
+ def __init__(self):
23
+ print(f"***KS*** LibreTexts Base Tool initializing ...")
24
+ self.api = MyLibreTextsAPI()
25
+
26
+
27
+ class MyLibreTextsBookshelvesTool(MyLibreTextsBase):
28
+ name = "_my_tool_libretexts_bookshelves"
29
+ description = """
30
+ Get a list of bookshelves in LibreTexts library of chemistry materials
31
+ Returns an array with dictionary items containing 'title' and 'url' of a bookshelf
32
+ <code>
33
+ bookshelves = _my_tool_libretexts_bookshelves()
34
+ </code>
35
+ """
36
+
37
+ inputs = {
38
+ }
39
+
40
+ output_type = "array"
41
+
42
+ is_initialized = True
43
+
44
+ def __init__(self):
45
+ MyLibreTextsBase.__init__(self)
46
+ print(f"***KS*** LibreTexts Bookshelves Tool initializing ...")
47
+
48
+ def forward(self):
49
+ return self.api.get_bookshelves()
50
+
51
+
52
+ class MyLibreTextsBooksTool(MyLibreTextsBase):
53
+ name = "_my_tool_libretexts_books"
54
+ description = """
55
+ Get a list of books in LibreTexts bookshelf
56
+ Returns an array with dictionary items containing 'title' and 'url' of a book
57
+ <code>
58
+ books = _my_tool_libretexts_books(bookshelf_url='https://chem.libretexts.org/Bookshelves/Inorganic_Chemistry')
59
+ </code>
60
+ """
61
+
62
+ inputs = {
63
+ "bookshelf_url": {
64
+ "type": "string",
65
+ "description": "Bookshelf URL",
66
+ },
67
+ }
68
+
69
+ output_type = "array"
70
+
71
+ is_initialized = True
72
+
73
+ def __init__(self):
74
+ MyLibreTextsBase.__init__(self)
75
+ print(f"***KS*** LibreTexts Books Tool initializing ...")
76
+
77
+ def forward(self, bookshelf_url):
78
+ return self.api.get_books(bookshelf_url)
79
+
80
+
81
+ class MyLibreTextsBookSectionsTool(MyLibreTextsBase):
82
+ name = "_my_tool_libretexts_book_sections"
83
+ description = """
84
+ Get a list of sections in a book in LibreTexts materials
85
+ Returns an array with dictionary items containing 'title' and 'url' of a section
86
+ <code>
87
+ sections = _my_tool_libretexts_book_sections(book_url='https://chem.libretexts.org/Bookshelves/Inorganic_Chemistry/Inorganic_Chemistry_(Saito)')
88
+ </code>
89
+ """
90
+
91
+ inputs = {
92
+ "book_url": {
93
+ "type": "string",
94
+ "description": "Book URL",
95
+ },
96
+ }
97
+
98
+ output_type = "array"
99
+
100
+ is_initialized = True
101
+
102
+ def __init__(self):
103
+ MyLibreTextsBase.__init__(self)
104
+ print(f"***KS*** LibreTexts Book Sections Tool initializing ...")
105
+
106
+ def forward(self, book_url):
107
+ return self.api.get_book_sections(book_url)
108
+
109
+
110
+ class MyLibreTextsBookSectionParagraphsTool(MyLibreTextsBase):
111
+ name = "_my_tool_libretexts_book_section_paragraphs"
112
+ description = """
113
+ Get a list of section paragraphs in a book in LibreTexts materials
114
+ Returns an array with dictionary items containing 'title' and 'url' of a paragraph
115
+ <code>
116
+ paragraphs = _my_tool_libretexts_book_section_paragraphs(section_url='https://chem.libretexts.org/Bookshelves/Inorganic_Chemistry/Inorganic_Chemistry_(Saito)/03%3A_Reactions')
117
+ </code>
118
+ """
119
+
120
+ inputs = {
121
+ "section_url": {
122
+ "type": "string",
123
+ "description": "Book section URL",
124
+ },
125
+ }
126
+
127
+ output_type = "array"
128
+
129
+ is_initialized = True
130
+
131
+ def __init__(self):
132
+ MyLibreTextsBase.__init__(self)
133
+ print(f"***KS*** LibreTexts Book Section Paragraphs Tool initializing ...")
134
+
135
+ def forward(self, section_url):
136
+ return self.api.get_book_section_paragraphs(section_url)
137
+
138
+
139
+ class MyLibreTextsParagraphContentsTool(MyLibreTextsBase):
140
+ name = "_my_tool_libretexts_paragraph_contents"
141
+ description = """
142
+ Get contents of a paragraph in a book in LibreTexts materials
143
+ Returns contents in Markdown format
144
+ <code>
145
+ markdown = _my_tool_libretexts_paragraph_contents(paragraph_url='https://chem.libretexts.org/Bookshelves/Inorganic_Chemistry/Inorganic_Chemistry_(Saito)/03%3A_Reactions/3.01%3A_Thermodynamics')
146
+ </code>
147
+ """
148
+
149
+ inputs = {
150
+ "paragraph_url": {
151
+ "type": "string",
152
+ "description": "Paragraph URL",
153
+ },
154
+ }
155
+
156
+ output_type = "string"
157
+
158
+ is_initialized = True
159
+
160
+ def __init__(self):
161
+ MyLibreTextsBase.__init__(self)
162
+ print(f"***KS*** LibreTexts Paragraph Contents Tool initializing ...")
163
+
164
+ def forward(self, paragraph_url):
165
+ return self.api.get_paragraph_contents(paragraph_url)
requirements.txt CHANGED
@@ -12,4 +12,5 @@ bs4
12
  wikitextparser
13
  mwparserfromhell
14
  lxml
15
- cssselect
 
 
12
  wikitextparser
13
  mwparserfromhell
14
  lxml
15
+ cssselect
16
+ html-to-markdown
test_tools.py CHANGED
@@ -19,30 +19,28 @@ def test_tool_libretextx_bookshelves(_exp):
19
  bookshelves = api.get_bookshelves()
20
 
21
  shelf_1 = bookshelves[0]
22
- shelf_1_url = shelf_1[1]
23
 
24
  books = api.get_books(shelf_1_url)
25
 
26
  book_2 = books[1]
27
- book_2_url = book_2[1]
28
 
29
  sections = api.get_book_sections(book_2_url)
30
 
31
  section_2 = sections[1]
32
- section_2_url = section_2[1]
33
 
34
  paragraphs = api.get_book_section_paragraphs(section_2_url)
35
 
36
  paragraph_8 = paragraphs[7]
37
- paragraph_8_url = paragraph_8[1]
38
 
39
- #content = api.get_featured_articles(_month,_year)
40
- #tool = MyWikiFeaturedArticles()
41
- #results = tool(month=_month, year=_year)
42
- #results_str = "\n".join(results)
43
- #print(f"Results: {len(results)}\n{results_str}")
44
 
45
- assert 0 == 0
 
 
46
 
47
 
48
  @pytest.mark.skip(reason="disabled")
 
19
  bookshelves = api.get_bookshelves()
20
 
21
  shelf_1 = bookshelves[0]
22
+ shelf_1_url = shelf_1["url"]
23
 
24
  books = api.get_books(shelf_1_url)
25
 
26
  book_2 = books[1]
27
+ book_2_url = book_2["url"]
28
 
29
  sections = api.get_book_sections(book_2_url)
30
 
31
  section_2 = sections[1]
32
+ section_2_url = section_2["url"]
33
 
34
  paragraphs = api.get_book_section_paragraphs(section_2_url)
35
 
36
  paragraph_8 = paragraphs[7]
37
+ paragraph_8_url = paragraph_8["url"]
38
 
39
+ contents = api.get_paragraph_contents(paragraph_8_url)
 
 
 
 
40
 
41
+ #print(f"Contents:\n{contents}")
42
+
43
+ assert "Louvrier" in contents
44
 
45
 
46
  @pytest.mark.skip(reason="disabled")