LoganResearch
/

Adaptive-Repetition-Controller-ARC

Text Generation

adaptive_repetition_controller

repetition-suppression

decode-time-intervention

Eval Results (legacy)

Model card Files Files and versions

LoganResearch commited on Jan 17

Commit

aef02f9

·

verified ·

1 Parent(s): f33089b

Upload config.json with huggingface_hub

Files changed (1) hide show

config.json +42 -0

config.json ADDED Viewed

	@@ -0,0 +1,42 @@

+{
+  "model_type": "adaptive_repetition_controller",
+  "version": "1.0.0",
+  "architecture": {
+    "d_model": 4096,
+    "n_layers": 32,
+    "d_fiber": 16,
+    "d_control": 64,
+    "rep_window": 32,
+    "total_params": 50000
+  },
+  "training": {
+    "dataset": "wikitext-2",
+    "loss": "BCEWithLogitsLoss",
+    "pos_weight": "dynamic",
+    "lr_predictor": 1e-4,
+    "lr_lora": 2e-5,
+    "batch_size": 4,
+    "gradient_accumulation": 8,
+    "optimal_steps": 5000
+  },
+  "performance": {
+    "f1_score": 0.99,
+    "risk_at_repeats": 0.998,
+    "risk_at_non_repeats": 0.008,
+    "separation": "125x",
+    "repetition_reduction": "48.4%",
+    "distinct2_improvement": "16.7%"
+  },
+  "inference": {
+    "penalty_scale_default": 3.0,
+    "temperature_default": 0.8,
+    "threshold_default": 0.1,
+    "rep_window": 32
+  },
+  "base_model_compatibility": [
+    "llama-3.1-8b",
+    "llama-3-8b",
+    "mistral-7b"
+  ],
+  "notes": "This is a decode-time intervention system, not an attention modification. The geometric CF-HoT theory remains unvalidated; this is the working practical implementation."
+}