chairulridjal commited on
Commit
2f54b53
Β·
verified Β·
1 Parent(s): df293c7

Expose main checkpoint at repository root for Hub indexing

Browse files
Files changed (4) hide show
  1. .gitattributes +1 -43
  2. README.md +7 -4
  3. config.json +59 -0
  4. model.safetensors +3 -0
.gitattributes CHANGED
@@ -1,46 +1,4 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
  *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
  *.pt filter=lfs diff=lfs merge=lfs -text
23
  *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
36
- data/processed/r8_5class_train.jsonl filter=lfs diff=lfs merge=lfs -text
37
- data/processed/enriched_13class_train.jsonl filter=lfs diff=lfs merge=lfs -text
38
- data/processed/enriched_5class_train.jsonl filter=lfs diff=lfs merge=lfs -text
39
- data/processed/enriched_5class_train_cleaned_trimmed.jsonl filter=lfs diff=lfs merge=lfs -text
40
- data/processed/r8_5class_train_propagated.jsonl filter=lfs diff=lfs merge=lfs -text
41
- data/processed/enriched_5class_train_cleaned.jsonl filter=lfs diff=lfs merge=lfs -text
42
- data/processed/r7_5class_train.jsonl filter=lfs diff=lfs merge=lfs -text
43
- data/processed/r9_5class_train.jsonl filter=lfs diff=lfs merge=lfs -text
44
- data/processed/enriched_5class_train_cleaned_deleaked.jsonl filter=lfs diff=lfs merge=lfs -text
45
- data/processed/backup/enriched_13class_train.jsonl filter=lfs diff=lfs merge=lfs -text
46
- data/processed/backup/enriched_5class_train.jsonl filter=lfs diff=lfs merge=lfs -text
 
1
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
 
2
  *.bin filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  *.pt filter=lfs diff=lfs merge=lfs -text
4
  *.pth filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
README.md CHANGED
@@ -12,6 +12,7 @@ tags:
12
  - moe
13
  - span-detection
14
  base_model: openai/privacy-filter
 
15
  pipeline_tag: token-classification
16
  ---
17
 
@@ -176,13 +177,13 @@ Sanity-check fine-tune on 50 examples to validate the training pipeline end-to-e
176
  # Install the base framework
177
  pip install -e vendor/privacy-filter
178
 
179
- # Run inference with the R8 checkpoint
180
- opf --checkpoint checkpoints/r8_5class/epoch_4 --device cpu \
181
  "APT29 deployed Cobalt Strike beacon via CVE-2021-44228 against Microsoft Exchange servers."
182
 
183
  # Evaluate on a JSONL test file
184
  opf eval data/processed/cyner_test.jsonl \
185
- --checkpoint checkpoints/r8_5class/epoch_4 \
186
  --device cpu
187
 
188
  # Fine-tune further with a custom dataset
@@ -213,8 +214,10 @@ opf train my_train.jsonl \
213
 
214
  ```
215
  arcspan/
 
 
216
  β”œβ”€β”€ checkpoints/
217
- β”‚ β”œβ”€β”€ r8_5class/epoch_4/ ← Main checkpoint (model.safetensors + config.json)
218
  β”‚ └── cyner_v1_sanity/ ← Sanity-check checkpoint
219
  β”œβ”€β”€ data/
220
  β”‚ β”œβ”€β”€ processed/ ← Training/eval JSONL splits (all benchmarks)
 
12
  - moe
13
  - span-detection
14
  base_model: openai/privacy-filter
15
+ base_model_relation: finetune
16
  pipeline_tag: token-classification
17
  ---
18
 
 
177
  # Install the base framework
178
  pip install -e vendor/privacy-filter
179
 
180
+ # Run inference with the default R8 checkpoint at the repository root
181
+ opf --checkpoint . --device cpu \
182
  "APT29 deployed Cobalt Strike beacon via CVE-2021-44228 against Microsoft Exchange servers."
183
 
184
  # Evaluate on a JSONL test file
185
  opf eval data/processed/cyner_test.jsonl \
186
+ --checkpoint . \
187
  --device cpu
188
 
189
  # Fine-tune further with a custom dataset
 
214
 
215
  ```
216
  arcspan/
217
+ β”œβ”€β”€ config.json ← Main checkpoint config for Hugging Face Hub indexing
218
+ β”œβ”€β”€ model.safetensors ← Main R8 checkpoint weights for Hugging Face Hub indexing
219
  β”œβ”€β”€ checkpoints/
220
+ β”‚ β”œβ”€β”€ r8_5class/epoch_4/ ← Archived copy of main checkpoint
221
  β”‚ └── cyner_v1_sanity/ ← Sanity-check checkpoint
222
  β”œβ”€β”€ data/
223
  β”‚ β”œβ”€β”€ processed/ ← Training/eval JSONL splits (all benchmarks)
config.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bidirectional_context": true,
3
+ "bidirectional_left_context": 128,
4
+ "bidirectional_right_context": 128,
5
+ "category_version": "cyner_v1",
6
+ "default_n_ctx": 128000,
7
+ "encoding": "o200k_base",
8
+ "experts_per_token": 4,
9
+ "head_dim": 64,
10
+ "hidden_size": 640,
11
+ "inference_contract_version": 1,
12
+ "initial_context_length": 4096,
13
+ "intermediate_size": 640,
14
+ "max_position_embeddings": 131072,
15
+ "model_type": "privacy_filter",
16
+ "ner_class_names": [
17
+ "O",
18
+ "B-Malware",
19
+ "I-Malware",
20
+ "E-Malware",
21
+ "S-Malware",
22
+ "B-Indicator",
23
+ "I-Indicator",
24
+ "E-Indicator",
25
+ "S-Indicator",
26
+ "B-System",
27
+ "I-System",
28
+ "E-System",
29
+ "S-System",
30
+ "B-Organization",
31
+ "I-Organization",
32
+ "E-Organization",
33
+ "S-Organization",
34
+ "B-Vulnerability",
35
+ "I-Vulnerability",
36
+ "E-Vulnerability",
37
+ "S-Vulnerability"
38
+ ],
39
+ "num_attention_heads": 14,
40
+ "num_experts": 128,
41
+ "num_hidden_layers": 8,
42
+ "num_key_value_heads": 2,
43
+ "num_labels": 21,
44
+ "param_dtype": "bfloat16",
45
+ "rope_ntk_alpha": 1.0,
46
+ "rope_ntk_beta": 32.0,
47
+ "rope_scaling_factor": 32.0,
48
+ "rope_theta": 150000,
49
+ "sliding_window": 257,
50
+ "span_class_names": [
51
+ "O",
52
+ "Malware",
53
+ "Indicator",
54
+ "System",
55
+ "Organization",
56
+ "Vulnerability"
57
+ ],
58
+ "vocab_size": 200064
59
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4b901e80d256fe774fcf161ac3b430a31b2b3be975118806f59806e546d0680
3
+ size 2798968616