olafuraron commited on
Commit
72b170f
·
verified ·
1 Parent(s): 19072d2

Upload 4 files

Browse files
README.md ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: cc-by-nc-sa-4.0
3
+ tags:
4
+ - web-privacy
5
+ - tracker-detection
6
+ - entity-attribution
7
+ - feedforward
8
+ - safetensors
9
+ - webassembly
10
+ datasets:
11
+ - olafuraron/tracker-radar-ml
12
+ ---
13
+
14
+ # Tracking Entity Classifier
15
+
16
+ Predicts which company owns a third-party tracking domain based on behavioral patterns from DuckDuckGo's [Tracker Radar](https://github.com/duckduckgo/tracker-radar) dataset. No ownership metadata is used as input — the model learns to identify entities from API usage, cookie behavior, resource types, and prevalence patterns.
17
+
18
+ ## Labels
19
+
20
+ 13 tracking-related entities:
21
+
22
+ Adobe Inc., ByteDance Ltd., Comcast Corporation, Conversant LLC, Google LLC, HubSpot Inc., Impact, Leven Labs Inc. DBA Admiral, Microsoft Corporation, Oracle Corporation, Salesforce.com Inc., Yahoo Inc., Yandex LLC
23
+
24
+ ## Performance
25
+
26
+ - **Accuracy:** 58.5%
27
+ - **Weighted F1:** 0.604
28
+ - **Training data:** 731 domains from Tracker Radar US region
29
+ - **Features:** 164 behavioral features
30
+
31
+ Strong per-entity results for distinctive entities: Leven Labs (F1 0.93), Google (F1 0.75), Microsoft (F1 0.65). Less reliable for smaller entities with few training samples.
32
+
33
+ ## Architecture
34
+
35
+ Feedforward neural network: 164 → 128 → 64 → 13 with ReLU activations and dropout (0.2). Model size: 118.5 KB.
36
+
37
+ Designed for on-device inference via [Kjarni](https://github.com/olafurjohannsson/kjarni) WebAssembly runtime with SIMD128 acceleration.
38
+
39
+ ## Usage
40
+
41
+ Features must be standardized using the provided scaler (mean and scale in `tracking_entity_classifier_scaler.json`) before inference. This model is most meaningful when applied to domains already identified as ad tech by the [entity cluster classifier](https://huggingface.co/olafuraron/entity-cluster-classifier).
42
+
43
+ ## Context
44
+
45
+ This model demonstrates that tracking companies have identifiable behavioral fingerprints — their domains exhibit characteristic patterns of API usage, cookie behavior, and web presence that distinguish them from other entities. See [TrackerML](https://github.com/olafurjohannsson/tracker-ml) for the full project.
46
+
47
+ ## License
48
+
49
+ CC-BY-NC-SA 4.0 (derived from DuckDuckGo Tracker Radar).
tracking_entity_classifier.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e5fd548a9c98feb5ca4ec18760aa32410a5598853338bf419b9ff05a286f6d2
3
+ size 121348
tracking_entity_classifier_config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "input_dim": 164,
3
+ "hidden_dim": 128,
4
+ "output_dim": 13,
5
+ "labels": [
6
+ "Adobe Inc.",
7
+ "ByteDance Ltd.",
8
+ "Comcast Corporation",
9
+ "Conversant LLC",
10
+ "Google LLC",
11
+ "HubSpot, Inc.",
12
+ "Impact",
13
+ "Leven Labs, Inc. DBA Admiral",
14
+ "Microsoft Corporation",
15
+ "Oracle Corporation",
16
+ "Salesforce.com, Inc.",
17
+ "Yahoo Inc.",
18
+ "Yandex LLC"
19
+ ],
20
+ "accuracy": 0.5846994535519126,
21
+ "top5_accuracy": 0.8415300546448088,
22
+ "weighted_f1": 0.6036854240453027
23
+ }
tracking_entity_classifier_scaler.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"mean": [0.006395531589650341, 931.8782489740082, 0.7113543091655267, 9.833105335157319, 0.12585499316005472, 105.75923392612859, 39.79753761969904, 27.024623803009575, 5.738714090287278, 12.495212038303693, 4.083447332421341, 11.388508891928865, 2.9767441860465116, 0.21203830369357046, 0.2996977166442784, 0.15166629808305532, 0.0037214582967133865, 83.5718194254446, 51225.61104767736, 10.66484268125855, 3.6716826265389875, 3249.8180574555404, 8.471731601596368, 89.35546929924335, 2.4817099995228236, 17.687895728975903, 254.81549654271143, 0.213406292749658, 0.015047879616963064, 1.1956224350205198, 0.35020519835841313, 0.4801641586867305, 0.39261285909712723, 0.0027359781121751026, 0.0013679890560875513, 0.008207934336525308, 0.0, 0.006839945280437756, 0.013679890560875513, 0.0, 0.012311901504787962, 0.009575923392612859, 0.008207934336525308, 0.0013679890560875513, 0.06703146374829001, 0.06566347469220246, 0.15868673050615595, 0.13132694938440492, 0.0013679890560875513, 0.0, 0.0, 0.0, 0.004103967168262654, 0.0027359781121751026, 0.0027359781121751026, 0.1545827633378933, 0.11080711354309165, 0.0027359781121751026, 0.023255813953488372, 0.0560875512995896, 0.0, 0.0, 0.0, 0.0, 0.0013679890560875513, 0.09165526675786594, 0.027359781121751026, 0.03419972640218878, 0.0, 0.0, 0.008207934336525308, 0.015047879616963064, 0.009575923392612859, 0.046511627906976744, 0.04103967168262654, 0.0506155950752394, 0.04240766073871409, 0.025991792065663474, 0.023255813953488372, 0.013679890560875513, 0.0, 0.03419972640218878, 0.012311901504787962, 0.004103967168262654, 0.08481532147742818, 0.03283173734610123, 0.04377564979480164, 0.006839945280437756, 0.023255813953488372, 0.016415868673050615, 0.028727770177838577, 0.019151846785225718, 0.06566347469220246, 0.03283173734610123, 0.0027359781121751026, 0.0533515731874145, 0.02051983584131327, 0.004103967168262654, 0.016415868673050615, 0.26265389876880985, 0.046511627906976744, 0.009575923392612859, 0.03419972640218878, 0.004103967168262654, 0.008207934336525308, 0.02188782489740082, 0.03283173734610123, 0.02051983584131327, 0.06566347469220246, 0.006839945280437756, 0.017783857729138167, 0.0506155950752394, 0.006839945280437756, 0.006839945280437756, 0.04103967168262654, 0.004103967168262654, 0.008207934336525308, 0.038303693570451436, 0.036935704514363885, 0.060191518467852256, 0.019151846785225718, 0.012311901504787962, 0.06429548563611491, 0.0, 0.0013679890560875513, 0.0, 0.0, 0.0, 0.0, 0.0, 0.03419972640218878, 0.0013679890560875513, 0.0027359781121751026, 0.0027359781121751026, 0.0013679890560875513, 0.0, 0.0027359781121751026, 0.009575923392612859, 0.023255813953488372, 0.023255813953488372, 0.008207934336525308, 0.012311901504787962, 0.0027359781121751026, 0.0, 0.0, 0.0, 0.006839945280437756, 0.0027359781121751026, 0.0013679890560875513, 0.058823529411764705, 0.03283173734610123, 0.08755129958960328, 0.09439124487004104, 0.2421340629274966, 0.024623803009575923, 0.03556771545827633, 0.0013679890560875513, 0.025991792065663474, 0.03146374829001368, 0.015047879616963064, 0.015047879616963064, 0.017783857729138167, 0.017783857729138167, 0.17647058823529413], "scale": [0.04434519375803678, 6461.548841292232, 0.9180480685312489, 119.30423948014294, 0.3316858662299891, 960.9178820054451, 415.27130557094904, 387.5879804923282, 59.35064822840308, 278.3576829100961, 63.16064149732695, 205.73822789624072, 48.16414314492987, 2.032611705663939, 0.3981521899849945, 0.31085168713548245, 0.031657034940445736, 1550.946210010745, 1278568.6085561125, 136.51681639249978, 9.380627637346324, 32714.206226247217, 33.31794122243412, 478.7410858158808, 5.13520735962879, 86.67482578122551, 1412.7618152415537, 0.9034881623740295, 0.16882337366867126, 3.6511763731789464, 1.5061876340893863, 0.936026456403602, 0.8832845907064032, 0.05223497426001836, 0.036961028963355236, 0.0902250749539865, 1.0, 0.082420631088328, 0.11615830213599833, 1.0, 0.11027383454892818, 0.09738698621372122, 0.09022507495398642, 0.03696102896335543, 0.2500764815332522, 0.24769292033392787, 0.36538370525712177, 0.3377575783750713, 0.03696102896335514, 1.0, 1.0, 1.0, 0.06393062350505073, 0.05223497426001813, 0.05223497426001813, 0.361506476590272, 0.31389313011173064, 0.052234974260018, 0.15071489996297222, 0.23009071665064143, 1.0, 1.0, 1.0, 1.0, 0.03696102896335528, 0.2885386955564389, 0.1631294685172515, 0.1817418639615117, 1.0, 1.0, 0.09022507495398638, 0.12174334041743946, 0.09738698621372167, 0.21059035204970708, 0.19838199774880858, 0.21921144269955356, 0.20151737158216376, 0.1591107124328182, 0.15071489996297224, 0.11615830213599818, 1.0, 0.18174186396151165, 0.11027383454892754, 0.06393062350505033, 0.2786066810399733, 0.17819599986794887, 0.20459555782040947, 0.08242063108832832, 0.15071489996297247, 0.12706843797245518, 0.16704037056486642, 0.13705857707542823, 0.24769292033392767, 0.17819599986794887, 0.052234974260018455, 0.22473358188273132, 0.14177013852839032, 0.06393062350504992, 0.1270684379724545, 0.440075934618508, 0.2105903520497074, 0.09738698621372167, 0.18174186396151168, 0.06393062350505022, 0.0902250749539865, 0.1463172854404491, 0.17819599986794887, 0.14177013852839088, 0.24769292033392792, 0.08242063108832806, 0.13216501856924107, 0.2192114426995533, 0.08242063108832783, 0.08242063108832783, 0.1983819977488086, 0.06393062350505012, 0.0902250749539864, 0.19192842579803737, 0.18860397197936027, 0.23784133276700972, 0.13705857707542843, 0.11027383454892754, 0.2452785684949468, 1.0, 0.036961028963355395, 1.0, 1.0, 1.0, 1.0, 1.0, 0.18174186396151173, 0.036961028963355305, 0.052234974260018205, 0.052234974260018205, 0.036961028963355305, 1.0, 0.05223497426001813, 0.09738698621372123, 0.15071489996297213, 0.15071489996297213, 0.09022507495398656, 0.11027383454892759, 0.052234974260018496, 1.0, 1.0, 1.0, 0.08242063108832832, 0.052234974260018545, 0.036961028963355354, 0.23529411764705765, 0.17819599986794882, 0.2826412381974272, 0.29237225887885715, 0.4283750208610689, 0.15497571208070593, 0.18520975426622394, 0.036961028963355305, 0.15911071243281819, 0.1745674105712652, 0.12174334041743948, 0.12174334041743948, 0.13216501856924093, 0.13216501856924093, 0.381220041082814], "feature_names": ["prevalence", "sites", "fingerprinting_score", "subdomain_count", "has_cnames", "resource_count", "script_count", "image_count", "fetch_count", "xhr_count", "stylesheet_count", "font_count", "document_count", "media_count", "script_ratio", "image_ratio", "total_cookie_prevalence", "total_first_party_cookies", "max_cookie_ttl_days", "total_cookies_sent", "distinct_api_count", "total_api_calls", "mean_api_weight", "max_api_weight", "median_api_weight", "std_api_weight", "weighted_fp_score", "canvas_api_count", "audio_api_count", "navigator_api_count", "screen_api_count", "storage_api_count", "timing_api_count", "api_Animation_prototype_currentTime", "api_Animation_prototype_startTime", "api_AudioBuffer_prototype_getChannelData", "api_AudioWorkletNode_prototype_constructor", "api_BarProp_prototype_visible", "api_BroadcastChannel_prototype_constructor", "api_CSSStyleDeclaration_setPropertyfontFamily\u2026", "api_CanvasRenderingContext2D_prototype_getImageData", "api_CanvasRenderingContext2D_prototype_isPointInPath", "api_CanvasRenderingContext2D_prototype_measureText", "api_CookieStore_prototype_get", "api_CookieStore_prototype_getAll", "api_CookieStore_prototype_set", "api_Date_prototype_getTime", "api_Date_prototype_getTimezoneOffset", "api_DeviceMotionEvent_prototype_acceleration", "api_DeviceMotionEvent_prototype_accelerationIncludingGravity", "api_DeviceMotionEvent_prototype_rotationRate", "api_DeviceOrientationEvent_prototype_absolute", "api_DeviceOrientationEvent_prototype_alpha", "api_DeviceOrientationEvent_prototype_beta", "api_DeviceOrientationEvent_prototype_gamma", "api_Document_cookie_getter", "api_Document_cookie_setter", "api_Document_prototype_interestCohort", "api_Element_prototype_getClientRects", "api_Event_prototype_timeStamp", "api_Gyroscope_prototype_constructor", "api_Gyroscope_prototype_x", "api_Gyroscope_prototype_y", "api_Gyroscope_prototype_z", "api_HTMLCanvasElement_prototype_toBlob", "api_HTMLCanvasElement_prototype_toDataURL", "api_HTMLMediaElement_prototype_canPlayType", "api_Intl_DateTimeFormat_prototype_resolvedOptions", "api_KeyboardEvent_prototype_code", "api_KeyboardEvent_prototype_keyCode", "api_MediaDevices_prototype_enumerateDevices", "api_MediaSource_isTypeSupported", "api_Navigator_prototype_appCodeName", "api_Navigator_prototype_appName", "api_Navigator_prototype_appVersion", "api_Navigator_prototype_connection", "api_Navigator_prototype_cookieEnabled", "api_Navigator_prototype_deviceMemory", "api_Navigator_prototype_doNotTrack", "api_Navigator_prototype_getBattery", "api_Navigator_prototype_getGamepads", "api_Navigator_prototype_hardwareConcurrency", "api_Navigator_prototype_javaEnabled", "api_Navigator_prototype_keyboard", "api_Navigator_prototype_language", "api_Navigator_prototype_languages", "api_Navigator_prototype_maxTouchPoints", "api_Navigator_prototype_mediaCapabilities", "api_Navigator_prototype_mediaDevices", "api_Navigator_prototype_mimeTypes", "api_Navigator_prototype_onLine", "api_Navigator_prototype_permissions", "api_Navigator_prototype_platform", "api_Navigator_prototype_plugins", "api_Navigator_prototype_presentation", "api_Navigator_prototype_product", "api_Navigator_prototype_productSub", "api_Navigator_prototype_requestMediaKeySystemAccess", "api_Navigator_prototype_storage", "api_Navigator_prototype_userAgent", "api_Navigator_prototype_vendor", "api_Navigator_prototype_vendorSub", "api_Navigator_prototype_webdriver", "api_Navigator_prototype_webkitPersistentStorage", "api_Navigator_prototype_webkitTemporaryStorage", "api_NavigatorUAData_prototype_brands", "api_NavigatorUAData_prototype_getHighEntropyValues", "api_NavigatorUAData_prototype_platform", "api_Notification_permission", "api_OfflineAudioContext_prototype_constructor", "api_Performance_prototype_memory", "api_PerformanceTiming_prototype_navigationStart", "api_RTCPeerConnection_prototype_constructor", "api_RTCPeerConnectionIceEvent_prototype_candidate", "api_Screen_prototype_availHeight", "api_Screen_prototype_availLeft", "api_Screen_prototype_availTop", "api_Screen_prototype_availWidth", "api_Screen_prototype_colorDepth", "api_Screen_prototype_height", "api_Screen_prototype_orientation", "api_Screen_prototype_pixelDepth", "api_Screen_prototype_width", "api_Sensor_prototype_start", "api_SharedWorker_prototype_constructor", "api_Touch_prototype_force", "api_Touch_prototype_radiusX", "api_Touch_prototype_radiusY", "api_Touch_prototype_rotationAngle", "api_TouchEvent_prototype_constructor", "api_URL_createObjectURL", "api_WebGL2RenderingContext_prototype_getContextAttributes", "api_WebGL2RenderingContext_prototype_getExtension", "api_WebGL2RenderingContext_prototype_getParameter", "api_WebGL2RenderingContext_prototype_getShaderPrecisionFormat", "api_WebGL2RenderingContext_prototype_getSupportedExtensions", "api_WebGL2RenderingContext_prototype_readPixels", "api_WebGLRenderingContext_prototype_getContextAttributes", "api_WebGLRenderingContext_prototype_getExtension", "api_WebGLRenderingContext_prototype_getParameter", "api_WebGLRenderingContext_prototype_getShaderPrecisionFormat", "api_WebGLRenderingContext_prototype_getSupportedExtensions", "api_WebGLRenderingContext_prototype_readPixels", "api_WheelEvent_prototype_deltaX", "api_WheelEvent_prototype_deltaY", "api_WheelEvent_prototype_deltaZ", "api_console_memory", "api_document_fonts_check", "api_speechSynthesis___proto___getVoices", "api_window_devicePixelRatio", "api_window_indexedDB", "api_window_innerHeight", "api_window_innerWidth", "api_window_localStorage", "api_window_matchMediaprefers-color-scheme", "api_window_name", "api_window_openDatabase", "api_window_outerHeight", "api_window_outerWidth", "api_window_screenLeft", "api_window_screenTop", "api_window_screenX", "api_window_screenY", "api_window_sessionStorage"]}