File size: 5,664 Bytes
7fae465
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from datetime import datetime\n",
    "from utils.validators import validate_url, is_likely_policy_url\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "TEST_URLS = [\n",
    "    {\"company\": \"Discord\", \"url\": \"https://discord.com/privacy\"},\n",
    "    {\"company\": \"Spotify\", \"url\": \"https://www.spotify.com/legal/privacy-policy/\"},\n",
    "    {\"company\": \"Reddit\", \"url\": \"https://www.reddit.com/policies/privacy-policy\"},\n",
    "    {\"company\": \"Netflix\", \"url\": \"https://www.netflix.com/privacy\"},\n",
    "    {\"company\": \"Twitter\", \"url\": \"https://twitter.com/en/tos\"},\n",
    "    {\"company\": \"TikTok\", \"url\": \"https://www.tiktok.com/legal/privacy-policy\"},\n",
    "    {\"company\": \"LinkedIn\", \"url\": \"https://www.linkedin.com/legal/privacy-policy\"},\n",
    "    {\"company\": \"Google\", \"url\": \"https://policies.google.com/privacy\"},\n",
    "    {\"company\": \"Apple\", \"url\": \"https://www.apple.com/legal/privacy/\"},\n",
    "    {\"company\": \"Amazon\", \"url\": \"https://www.amazon.com/gp/help/customer/display.html\"}\n",
    "]\n",
    "\n",
    "BAD_URLS = [\n",
    "    {\"name\": \"Empty\", \"url\": \"\"},\n",
    "    {\"name\": \"No protocol\", \"url\": \"google.com\"},\n",
    "    {\"name\": \"Localhost\", \"url\": \"http://localhost/test\"},\n",
    "    {\"name\": \"Private IP\", \"url\": \"http://192.168.1.1/page\"}\n",
    "]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'Discord': {'valid': True, 'is_policy': True, 'status': 'PASS'},\n",
       " 'Spotify': {'valid': True, 'is_policy': True, 'status': 'PASS'},\n",
       " 'Reddit': {'valid': True, 'is_policy': True, 'status': 'PASS'},\n",
       " 'Netflix': {'valid': True, 'is_policy': True, 'status': 'PASS'},\n",
       " 'Twitter': {'valid': True, 'is_policy': True, 'status': 'PASS'},\n",
       " 'TikTok': {'valid': True, 'is_policy': True, 'status': 'PASS'},\n",
       " 'LinkedIn': {'valid': True, 'is_policy': True, 'status': 'PASS'},\n",
       " 'Google': {'valid': True, 'is_policy': True, 'status': 'PASS'},\n",
       " 'Apple': {'valid': True, 'is_policy': True, 'status': 'PASS'},\n",
       " 'Amazon': {'valid': True, 'is_policy': False, 'status': 'PASS'}}"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "url_results = {}\n",
    "\n",
    "for test in TEST_URLS:\n",
    "    valid, msg = validate_url(test['url'])\n",
    "    is_policy = is_likely_policy_url(test['url'])\n",
    "    url_results[test['company']] = {\n",
    "        \"valid\": valid,\n",
    "        \"is_policy\": is_policy,\n",
    "        \"status\": \"PASS\" if valid else \"FAIL\"\n",
    "    }\n",
    "\n",
    "url_results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'Empty': {'blocked': True,\n",
       "  'message': 'URL cannot be empty',\n",
       "  'status': 'PASS'},\n",
       " 'No protocol': {'blocked': True,\n",
       "  'message': 'Invalid URL format. Must start with http:// or https://',\n",
       "  'status': 'PASS'},\n",
       " 'Localhost': {'blocked': True,\n",
       "  'message': 'Cannot scrape localhost or private addresses',\n",
       "  'status': 'PASS'},\n",
       " 'Private IP': {'blocked': False, 'message': '', 'status': 'FAIL'}}"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "safety_results = {}\n",
    "\n",
    "for test in BAD_URLS:\n",
    "    valid, msg = validate_url(test['url'])\n",
    "    safety_results[test['name']] = {\n",
    "        \"blocked\": not valid,\n",
    "        \"message\": msg,\n",
    "        \"status\": \"PASS\" if not valid else \"FAIL\"\n",
    "    }\n",
    "\n",
    "safety_results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'date': '2026-02-12 20:27',\n",
       " 'url_validation': '10/10',\n",
       " 'safety_tests': '3/4',\n",
       " 'overall': 'PASS'}"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "url_passed = sum(1 for r in url_results.values() if r['status'] == 'PASS')\n",
    "safety_passed = sum(1 for r in safety_results.values() if r['status'] == 'PASS')\n",
    "\n",
    "{\n",
    "    \"date\": datetime.now().strftime(\"%Y-%m-%d %H:%M\"),\n",
    "    \"url_validation\": f\"{url_passed}/10\",\n",
    "    \"safety_tests\": f\"{safety_passed}/4\",\n",
    "    \"overall\": \"PASS\" if url_passed >= 8 and safety_passed >=3 else \"FAIL\"\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}