LoganResearch commited on
Commit
aef02f9
·
verified ·
1 Parent(s): f33089b

Upload config.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. config.json +42 -0
config.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "adaptive_repetition_controller",
3
+ "version": "1.0.0",
4
+ "architecture": {
5
+ "d_model": 4096,
6
+ "n_layers": 32,
7
+ "d_fiber": 16,
8
+ "d_control": 64,
9
+ "rep_window": 32,
10
+ "total_params": 50000
11
+ },
12
+ "training": {
13
+ "dataset": "wikitext-2",
14
+ "loss": "BCEWithLogitsLoss",
15
+ "pos_weight": "dynamic",
16
+ "lr_predictor": 1e-4,
17
+ "lr_lora": 2e-5,
18
+ "batch_size": 4,
19
+ "gradient_accumulation": 8,
20
+ "optimal_steps": 5000
21
+ },
22
+ "performance": {
23
+ "f1_score": 0.99,
24
+ "risk_at_repeats": 0.998,
25
+ "risk_at_non_repeats": 0.008,
26
+ "separation": "125x",
27
+ "repetition_reduction": "48.4%",
28
+ "distinct2_improvement": "16.7%"
29
+ },
30
+ "inference": {
31
+ "penalty_scale_default": 3.0,
32
+ "temperature_default": 0.8,
33
+ "threshold_default": 0.1,
34
+ "rep_window": 32
35
+ },
36
+ "base_model_compatibility": [
37
+ "llama-3.1-8b",
38
+ "llama-3-8b",
39
+ "mistral-7b"
40
+ ],
41
+ "notes": "This is a decode-time intervention system, not an attention modification. The geometric CF-HoT theory remains unvalidated; this is the working practical implementation."
42
+ }