| import os |
| import sys |
| import unittest |
|
|
| from bs4 import BeautifulSoup |
|
|
| sys.path.append(os.path.abspath("../scripts")) |
|
|
| from browse import extract_hyperlinks |
|
|
|
|
| class TestBrowseLinks(unittest.TestCase): |
| def test_extract_hyperlinks(self): |
| body = """ |
| <body> |
| <a href="https://google.com">Google</a> |
| <a href="foo.html">Foo</a> |
| <div>Some other crap</div> |
| </body> |
| """ |
| soup = BeautifulSoup(body, "html.parser") |
| links = extract_hyperlinks(soup, "http://example.com") |
| self.assertEqual( |
| links, |
| [("Google", "https://google.com"), ("Foo", "http://example.com/foo.html")], |
| ) |
|
|