Spaces:
Paused
Paused
| #!/usr/bin/env python3 | |
| import time | |
| from flask import url_for | |
| from .util import live_server_setup, wait_for_all_checks | |
| from ..html_tools import * | |
| def set_original_response(): | |
| test_return_data = """<html> | |
| <body> | |
| Some initial text<br> | |
| <p>Which is across multiple lines</p> | |
| <br> | |
| So let's see what happens. <br> | |
| <div id="sametext">Some text thats the same</div> | |
| <div class="changetext">Some text that will change</div> | |
| </body> | |
| </html> | |
| """ | |
| with open("test-datastore/endpoint-content.txt", "w") as f: | |
| f.write(test_return_data) | |
| return None | |
| def set_modified_response(): | |
| test_return_data = """<html> | |
| <body> | |
| Some initial text<br> | |
| <p>which has this one new line</p> | |
| <br> | |
| So let's see what happens. <br> | |
| <div id="sametext">Some text thats the same</div> | |
| <div class="changetext">Some text that did change ( 1000 online <br> 80 guests<br> 2000 online )</div> | |
| <div class="changetext">SomeCase insensitive 3456</div> | |
| </body> | |
| </html> | |
| """ | |
| with open("test-datastore/endpoint-content.txt", "w") as f: | |
| f.write(test_return_data) | |
| return None | |
| def set_multiline_response(): | |
| test_return_data = """<html> | |
| <body> | |
| <p>Something <br> | |
| across 6 billion multiple<br> | |
| lines | |
| </p> | |
| <div>aaand something lines</div> | |
| <br> | |
| <div>and this should be</div> | |
| </body> | |
| </html> | |
| """ | |
| with open("test-datastore/endpoint-content.txt", "w") as f: | |
| f.write(test_return_data) | |
| return None | |
| # def test_setup(client, live_server, measure_memory_usage): | |
| # live_server_setup(live_server) # Setup on conftest per function | |
| def test_check_filter_multiline(client, live_server, measure_memory_usage): | |
| ## live_server_setup(live_server) # Setup on conftest per function | |
| set_multiline_response() | |
| # Add our URL to the import page | |
| test_url = url_for('test_endpoint', _external=True) | |
| res = client.post( | |
| url_for("imports.import_page"), | |
| data={"urls": test_url}, | |
| follow_redirects=True | |
| ) | |
| assert b"1 Imported" in res.data | |
| wait_for_all_checks(client) | |
| # Goto the edit page, add our ignore text | |
| # Add our URL to the import page | |
| res = client.post( | |
| url_for("ui.ui_edit.edit_page", uuid="first"), | |
| data={"include_filters": '', | |
| # Test a regex and a plaintext | |
| 'extract_text': '/something.+?6 billion.+?lines/si\r\nand this should be', | |
| "url": test_url, | |
| "tags": "", | |
| "headers": "", | |
| 'fetch_backend': "html_requests" | |
| }, | |
| follow_redirects=True | |
| ) | |
| assert b"Updated watch." in res.data | |
| wait_for_all_checks(client) | |
| res = client.get(url_for("watchlist.index")) | |
| # Issue 1828 | |
| assert b'not at the start of the expression' not in res.data | |
| res = client.get( | |
| url_for("ui.ui_views.preview_page", uuid="first"), | |
| follow_redirects=True | |
| ) | |
| # Plaintext that doesnt look like a regex should match also | |
| assert b'and this should be' in res.data | |
| assert b'Something' in res.data | |
| assert b'across 6 billion multiple' in res.data | |
| assert b'lines' in res.data | |
| # but the last one, which also says 'lines' shouldnt be here (non-greedy match checking) | |
| assert b'aaand something lines' not in res.data | |
| def test_check_filter_and_regex_extract(client, live_server, measure_memory_usage): | |
| include_filters = ".changetext" | |
| set_original_response() | |
| # Add our URL to the import page | |
| test_url = url_for('test_endpoint', _external=True) | |
| res = client.post( | |
| url_for("imports.import_page"), | |
| data={"urls": test_url}, | |
| follow_redirects=True | |
| ) | |
| assert b"1 Imported" in res.data | |
| # Give the thread time to pick it up | |
| wait_for_all_checks(client) | |
| # Goto the edit page, add our ignore text | |
| # Add our URL to the import page | |
| res = client.post( | |
| url_for("ui.ui_edit.edit_page", uuid="first"), | |
| data={"include_filters": include_filters, | |
| 'extract_text': '/\d+ online/\r\n/\d+ guests/\r\n/somecase insensitive \d+/i\r\n/somecase insensitive (345\d)/i\r\n/issue1828.+?2022/i', | |
| "url": test_url, | |
| "tags": "", | |
| "headers": "", | |
| 'fetch_backend': "html_requests" | |
| }, | |
| follow_redirects=True | |
| ) | |
| assert b"Updated watch." in res.data | |
| # Give the thread time to pick it up | |
| wait_for_all_checks(client) | |
| res = client.get(url_for("watchlist.index")) | |
| #issue 1828 | |
| assert b'not at the start of the expression' not in res.data | |
| # Make a change | |
| set_modified_response() | |
| # Trigger a check | |
| client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | |
| # Give the thread time to pick it up | |
| wait_for_all_checks(client) | |
| # It should have 'unviewed' still | |
| # Because it should be looking at only that 'sametext' id | |
| res = client.get(url_for("watchlist.index")) | |
| assert b'unviewed' in res.data | |
| # Check HTML conversion detected and workd | |
| res = client.get( | |
| url_for("ui.ui_views.preview_page", uuid="first"), | |
| follow_redirects=True | |
| ) | |
| assert b'1000 online' in res.data | |
| # All regex matching should be here | |
| assert b'2000 online' in res.data | |
| # Both regexs should be here | |
| assert b'80 guests' in res.data | |
| # Regex with flag handling should be here | |
| assert b'SomeCase insensitive 3456' in res.data | |
| # Singular group from /somecase insensitive (345\d)/i | |
| assert b'3456' in res.data | |
| # Regex with multiline flag handling should be here | |
| # Should not be here | |
| assert b'Some text that did change' not in res.data | |
| def test_regex_error_handling(client, live_server, measure_memory_usage): | |
| # Add our URL to the import page | |
| test_url = url_for('test_endpoint', _external=True) | |
| res = client.post( | |
| url_for("imports.import_page"), | |
| data={"urls": test_url}, | |
| follow_redirects=True | |
| ) | |
| assert b"1 Imported" in res.data | |
| ### test regex error handling | |
| res = client.post( | |
| url_for("ui.ui_edit.edit_page", uuid="first"), | |
| data={"extract_text": '/something bad\d{3/XYZ', | |
| "url": test_url, | |
| "fetch_backend": "html_requests"}, | |
| follow_redirects=True | |
| ) | |
| assert b'is not a valid regular expression.' in res.data | |
| res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True) | |
| assert b'Deleted' in res.data | |