{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from datetime import datetime\n", "from utils.validators import validate_url, is_likely_policy_url\n", "\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "TEST_URLS = [\n", " {\"company\": \"Discord\", \"url\": \"https://discord.com/privacy\"},\n", " {\"company\": \"Spotify\", \"url\": \"https://www.spotify.com/legal/privacy-policy/\"},\n", " {\"company\": \"Reddit\", \"url\": \"https://www.reddit.com/policies/privacy-policy\"},\n", " {\"company\": \"Netflix\", \"url\": \"https://www.netflix.com/privacy\"},\n", " {\"company\": \"Twitter\", \"url\": \"https://twitter.com/en/tos\"},\n", " {\"company\": \"TikTok\", \"url\": \"https://www.tiktok.com/legal/privacy-policy\"},\n", " {\"company\": \"LinkedIn\", \"url\": \"https://www.linkedin.com/legal/privacy-policy\"},\n", " {\"company\": \"Google\", \"url\": \"https://policies.google.com/privacy\"},\n", " {\"company\": \"Apple\", \"url\": \"https://www.apple.com/legal/privacy/\"},\n", " {\"company\": \"Amazon\", \"url\": \"https://www.amazon.com/gp/help/customer/display.html\"}\n", "]\n", "\n", "BAD_URLS = [\n", " {\"name\": \"Empty\", \"url\": \"\"},\n", " {\"name\": \"No protocol\", \"url\": \"google.com\"},\n", " {\"name\": \"Localhost\", \"url\": \"http://localhost/test\"},\n", " {\"name\": \"Private IP\", \"url\": \"http://192.168.1.1/page\"}\n", "]" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'Discord': {'valid': True, 'is_policy': True, 'status': 'PASS'},\n", " 'Spotify': {'valid': True, 'is_policy': True, 'status': 'PASS'},\n", " 'Reddit': {'valid': True, 'is_policy': True, 'status': 'PASS'},\n", " 'Netflix': {'valid': True, 'is_policy': True, 'status': 'PASS'},\n", " 'Twitter': {'valid': True, 'is_policy': True, 'status': 'PASS'},\n", " 'TikTok': {'valid': True, 'is_policy': True, 'status': 'PASS'},\n", " 'LinkedIn': {'valid': True, 'is_policy': True, 'status': 'PASS'},\n", " 'Google': {'valid': True, 'is_policy': True, 'status': 'PASS'},\n", " 'Apple': {'valid': True, 'is_policy': True, 'status': 'PASS'},\n", " 'Amazon': {'valid': True, 'is_policy': False, 'status': 'PASS'}}" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "url_results = {}\n", "\n", "for test in TEST_URLS:\n", " valid, msg = validate_url(test['url'])\n", " is_policy = is_likely_policy_url(test['url'])\n", " url_results[test['company']] = {\n", " \"valid\": valid,\n", " \"is_policy\": is_policy,\n", " \"status\": \"PASS\" if valid else \"FAIL\"\n", " }\n", "\n", "url_results" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'Empty': {'blocked': True,\n", " 'message': 'URL cannot be empty',\n", " 'status': 'PASS'},\n", " 'No protocol': {'blocked': True,\n", " 'message': 'Invalid URL format. Must start with http:// or https://',\n", " 'status': 'PASS'},\n", " 'Localhost': {'blocked': True,\n", " 'message': 'Cannot scrape localhost or private addresses',\n", " 'status': 'PASS'},\n", " 'Private IP': {'blocked': False, 'message': '', 'status': 'FAIL'}}" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "safety_results = {}\n", "\n", "for test in BAD_URLS:\n", " valid, msg = validate_url(test['url'])\n", " safety_results[test['name']] = {\n", " \"blocked\": not valid,\n", " \"message\": msg,\n", " \"status\": \"PASS\" if not valid else \"FAIL\"\n", " }\n", "\n", "safety_results" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'date': '2026-02-12 20:27',\n", " 'url_validation': '10/10',\n", " 'safety_tests': '3/4',\n", " 'overall': 'PASS'}" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "url_passed = sum(1 for r in url_results.values() if r['status'] == 'PASS')\n", "safety_passed = sum(1 for r in safety_results.values() if r['status'] == 'PASS')\n", "\n", "{\n", " \"date\": datetime.now().strftime(\"%Y-%m-%d %H:%M\"),\n", " \"url_validation\": f\"{url_passed}/10\",\n", " \"safety_tests\": f\"{safety_passed}/4\",\n", " \"overall\": \"PASS\" if url_passed >= 8 and safety_passed >=3 else \"FAIL\"\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.10" } }, "nbformat": 4, "nbformat_minor": 4 }