| import requests |
| from lxml import html |
| from html_to_markdown import convert_to_markdown |
|
|
| |
| |
|
|
| class MyLibreTextsAPI: |
| LIBRETEXTS_BASE_URL = "https://chem.libretexts.org" |
| user_agent_headers = {"user-Agent": |
| "AgentsCourseAssignment/1.0 (https://huggingface.co/spaces/krzsam/Agents-Course-Assignment)"} |
|
|
| def __init__(self): |
| print(f"***KS*** Initializing LibreTexts API") |
|
|
| def get_bookshelves(self): |
| html_content = requests.get( |
| f"{self.LIBRETEXTS_BASE_URL}/Bookshelves", |
| headers=self.user_agent_headers, |
| ).text |
| tree = html.fromstring(html_content) |
| link_class = "mt-sortable-listing-link mt-edit-section internal" |
| elements = tree.xpath(f"//a[@class='{link_class}']") |
| |
| |
| |
|
|
| bookshelves = [{"title": element.attrib['title'], "url": element.attrib['href']} for element in elements] |
| |
|
|
| return bookshelves |
|
|
| def get_books(self, bookshelf_url): |
| html_content = requests.get( |
| bookshelf_url, |
| headers=self.user_agent_headers, |
| ).text |
| tree = html.fromstring(html_content) |
| link_class = "mt-sortable-listing-link mt-edit-section internal" |
| elements = tree.xpath(f"//a[@class='{link_class}']") |
| |
| |
| |
|
|
| books = [{"title": element.attrib['title'], "url": element.attrib['href']} for element in elements] |
| |
|
|
| return books |
|
|
| def get_book_sections(self, book_url): |
| html_content = requests.get( |
| book_url, |
| headers=self.user_agent_headers, |
| ).text |
| tree = html.fromstring(html_content) |
| link_class = "mt-sortable-listing-link mt-edit-section internal" |
| elements = tree.xpath(f"//a[@class='{link_class}']") |
| |
| |
| |
|
|
| sections = [{"title": element.attrib['title'], "url": element.attrib['href']} for element in elements] |
| |
|
|
| return sections |
|
|
| def get_book_section_paragraphs(self, section_url): |
| html_content = requests.get( |
| section_url, |
| headers=self.user_agent_headers, |
| ).text |
| tree = html.fromstring(html_content) |
| link_class = "internal" |
| elements = tree.xpath(f"//a[@class='{link_class}']") |
| |
| |
| |
|
|
| paragraphs = [{"title": element.attrib['title'], "url": element.attrib['href']} for element in elements] |
| |
|
|
| return paragraphs |
|
|
| def get_paragraph_contents(self, paragraph_url): |
| html_content = requests.get( |
| paragraph_url, |
| headers=self.user_agent_headers, |
| ).text |
|
|
| markdown = convert_to_markdown(html_content) |
|
|
| return markdown |
|
|
|
|