Wipoba commited on
Commit
aa29186
·
1 Parent(s): 7865198

Add clustering models, metadata, and README

Browse files
README.md ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # User Clustering Model
3
+
4
+ This repository contains models and artifacts for a user clustering pipeline.
5
+
6
+ ## Models
7
+ - Preprocessor (OneHotEncoder + StandardScaler)
8
+ - UMAP reducer for dimensionality reduction
9
+ - KMeans clustering model with k=15
10
+
11
+ ## Metrics
12
+ - Best silhouette score on training: 0.4733
13
+ - Recommended silhouette score threshold for triggering auto retrain: 0.4
14
+
15
+ ## Files
16
+ - `preprocessor.joblib` : preprocessing pipeline
17
+ - `umap_reducer.joblib` : UMAP reducer
18
+ - `kmeans_model.joblib` : KMeans model
19
+ - `top_categories.json` : top categories for cardinality limiting
20
+ - `cluster_sizes.png` : cluster distribution plot
21
+ - `metadata.json` : metadata JSON with metrics and parameters
22
+
23
+ ## Usage
24
+ Load the models using `joblib.load()`, preprocess incoming data with the preprocessor, transform with UMAP, then predict clusters using KMeans.
25
+
26
+ Auto retrain can be triggered if silhouette score on new data falls below 0.4.
27
+
28
+ ## License
29
+ Specify your license here.
30
+
31
+ ---
32
+
33
+ *Generated and pushed by your clustering pipeline.*
cluster_sizes.png ADDED
kmeans_model.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:310174015d9f6b2c23e96a9d7a258ec941d86d409f631f571c7f2e260f1b0774
3
+ size 201927
metadata.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "best_k": 15,
3
+ "best_silhouette_score": 0.4733,
4
+ "silhouette_threshold_for_auto_retrain": 0.4,
5
+ "description": "Clustering pipeline using KMeans on UMAP embeddings with categorical and numerical preprocessing."
6
+ }
preprocessor.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e78032ba80d19793bc1e17bffea61b53a682a44fe6eaff9921ee1f1095dfeaec
3
+ size 5071
top_categories.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"location": ["Hyderabad", "Mumbai", "Delhi", "Kolkata", "Chennai", "Bengaluru"], "device_id": ["other", "4MUBM68D91", "2WCRGL9JUP", "F2JB8H7VRV", "7DVZ8X5VS2", "K3340D69D2", "RRT0PW5ZWY", "0KM7V5VWMK", "VAIGAON4IY", "D6ERBUECZG", "XF9P4XHJZT", "0H2XCN3VD9", "2RDRNE35YG", "7E48MOAE9S", "T49XRMEVO0", "5AEKQ5ID4T", "8GBKUMUTN9", "U9QZJWN11L", "L53Q252TJH", "SZ55B3BDKT", "SVSX6HMA4A", "LNR2O0D85V", "QABNQKVWPB", "K9OBJJ90FU", "5X67WEOKB5", "03YE51DZGU", "0SBTM6JAM0", "ML9IQVU9D3", "D1TTUE8JRC", "XE4VVMDIST", "MGKYKYHYJ9", "2BCG181094", "RQN2PNE67Q", "L2G7XYL4U1", "HPWJAKCPXH", "VCIFKH98ZL", "82JV8X2ZII", "65W388AGZ5", "DRVO2FCXTH", "RYZEFXGYNR", "8ON32ET9TU", "DGOF8QKXVF", "LHSFC65EIW", "XWGGCTL7G7", "WPWSKFYKWD", "SPMQJ29UYM", "GYPQY786S9", "0XSVYGRW3E", "D3TCP2HSZ4", "RGLU112Z8K", "J5XHQSSOX9", "QJATDLZ8EX", "ZCUI3FVH6R", "B11MRCQGSE", "RO1LLAQLGV", "3D8L7OXH4K", "T9IZ6ORIXO", "BUNI1CODM7", "RA0KW8FXMA", "EBSDZ3ELKG", "VH705SL3SE", "JC9Z2Y1TC9", "IFEW4DIUW0", "P8HH16D64N", "KGG7BDS53Q", "JGQ9G8O5I9", "0ELBLOSZY2", "4HDYYTXQGT", "QS50HVOKL1", "606JKWFS51", "CQCUUQXTWI", "BT6B8ORC6V", "DNWEMURJ1S", "A6PCHWB5QD", "PHIGB5NDLH", "ZXMHBILDAJ", "HRFX34MBPK", "EF081LY12N", "K64RPQ5MJ5", "4VMNEL2SB2", "IY7W52PIRO", "3FYFVG013M", "VVRRTR078T", "6ZEL63FCZQ", "ESDQFINUOW", "NMS475A6Z9", "YVOF8ONBR1", "5YGY11U1KV", "XQGU2HZ4BL", "GMSJRONC04", "X3LK5Y25ZX", "JHXBBKTO88", "6BKNXUD1AU", "6LFUILOIG7", "EIVTBD24V0", "JT9MO7K88I", "AKTJZE397N", "LBSM5WNCXL", "J7C8QZLQ2J", "D57Y4U3NR5"], "browser": ["Chrome", "Safari", "Edge", "Firefox"], "os": ["iOS", "Linux", "Windows", "Android"], "action": ["email_change", "check_balance", "login", "logout", "money_transfer", "password_reset"]}
umap_reducer.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6454cd218d372f2eb6d75d9d72b5ac21d877cdd68019f47c01a29bb3e7b5dca
3
+ size 128247393