File size: 44,219 Bytes
0ea4f78 29564b6 0ea4f78 29564b6 0ea4f78 29564b6 0ea4f78 29564b6 0ea4f78 29564b6 0ea4f78 29564b6 0ea4f78 a41a529 0ea4f78 a41a529 0ea4f78 9d4137b a41a529 0ea4f78 29564b6 0ea4f78 29564b6 0ea4f78 29564b6 0ea4f78 29564b6 a41a529 0ea4f78 29564b6 0ea4f78 29564b6 0ea4f78 29564b6 0ea4f78 29564b6 0ea4f78 29564b6 0ea4f78 29564b6 0ea4f78 29564b6 0ea4f78 a41a529 0ea4f78 29564b6 0ea4f78 29564b6 0ea4f78 29564b6 0ea4f78 29564b6 0ea4f78 29564b6 0ea4f78 29564b6 0ea4f78 29564b6 0ea4f78 a41a529 0ea4f78 29564b6 0ea4f78 29564b6 0ea4f78 29564b6 0ea4f78 29564b6 0ea4f78 29564b6 0ea4f78 29564b6 0ea4f78 29564b6 0ea4f78 29564b6 0ea4f78 29564b6 0ea4f78 a41a529 29564b6 0ea4f78 29564b6 0ea4f78 a41a529 0ea4f78 29564b6 0ea4f78 29564b6 0ea4f78 29564b6 0ea4f78 29564b6 0ea4f78 29564b6 0ea4f78 29564b6 0ea4f78 29564b6 0ea4f78 a41a529 0ea4f78 29564b6 0ea4f78 29564b6 0ea4f78 29564b6 0ea4f78 29564b6 0ea4f78 29564b6 0ea4f78 29564b6 0ea4f78 29564b6 0ea4f78 29564b6 0ea4f78 29564b6 0ea4f78 29564b6 0ea4f78 29564b6 0ea4f78 a41a529 0ea4f78 29564b6 0ea4f78 d73ba34 0ea4f78 2f40d7e 0ea4f78 d73ba34 0ea4f78 2f40d7e 0ea4f78 d73ba34 0ea4f78 2f40d7e 0ea4f78 a41a529 0ea4f78 29564b6 0ea4f78 29564b6 0ea4f78 29564b6 0ea4f78 29564b6 0ea4f78 29564b6 0ea4f78 a41a529 0ea4f78 29564b6 0ea4f78 29564b6 0ea4f78 29564b6 0ea4f78 a41a529 0ea4f78 29564b6 2f40d7e f660a1b 29564b6 f660a1b 29564b6 f660a1b 0ea4f78 f660a1b 0ea4f78 f660a1b 29564b6 f660a1b 29564b6 0ea4f78 f660a1b 29564b6 0ea4f78 29564b6 0ea4f78 a41a529 0ea4f78 29564b6 0ea4f78 2f40d7e 0ea4f78 29564b6 0ea4f78 29564b6 0ea4f78 2f40d7e 29564b6 2f40d7e 0ea4f78 29564b6 0ea4f78 2f40d7e 29564b6 2f40d7e a41a529 2f40d7e 29564b6 2f40d7e 29564b6 0ea4f78 a41a529 29564b6 0ea4f78 a41a529 0ea4f78 29564b6 8ef1125 29564b6 0ea4f78 8ef1125 0ea4f78 a41a529 0ea4f78 29564b6 0ea4f78 29564b6 0ea4f78 29564b6 0ea4f78 29564b6 0ea4f78 a41a529 0ea4f78 29564b6 8bc141e c5bd425 8bc141e 29564b6 0ea4f78 2f40d7e a41a529 0ea4f78 29564b6 0ea4f78 a41a529 0ea4f78 29564b6 0ea4f78 a41a529 0ea4f78 29564b6 0ea4f78 29564b6 2f40d7e 29564b6 b238807 0ea4f78 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 |
---
license: cc-by-4.0
language:
- en
library_name: transformers
pipeline_tag: text-generation
tags:
- llama
- dense-responses
- self-improvement
- representation-engineering
- cf-hot
- recursive-self-improvement
base_model: NousResearch/Hermes-3-Llama-3.1-8B
---
<div align="center">
# ARC-Base-8B-Condensed
## Adaptive Recursive Cognition
**A Multi-Loop Self-Stabilizing Language Model with Predictive Control**
*Logan Matthew Napolitano*
[](https://creativecommons.org/licenses/by/4.0/)
[](https://www.python.org/downloads/)
[](https://huggingface.co/NousResearch/Hermes-3-Llama-3.1-8B)
*Research into stable self-improving language models*
[Quick Start](#quick-start) β’ [Architecture](#architecture) β’ [Commands](#command-reference) β’ [Technical Specification](#technical-specification) β’ [Citation](#citation)
</div>
---
## Table of Contents
1. [Model Description](#model-description)
2. [Quick Start](#quick-start)
3. [Architecture](#architecture)
4. [Core Technology](#core-technology)
5. [Command Reference](#command-reference)
6. [Evaluation](#evaluation)
7. [Installation](#installation)
8. [Configuration](#configuration)
9. [Repository Structure](#repository-structure)
10. [Hardware Requirements](#hardware-requirements)
11. [Training From Scratch](#training-from-scratch)
12. [API Reference](#api-reference)
13. [Limitations](#limitations)
14. [Ethical Considerations](#ethical-considerations)
15. [Technical Specification](#technical-specification)
16. [Changelog](#changelog)
17. [Citation](#citation)
18. [License](#license)
---
### Primary Reference
The complete theoretical framework, methodology, and reproducibility details for this model are documented in:
**Napolitano, L. M. (2025). _Controlled Language Models: Decode-Time Behavioral Control and Token Efficiency._**
Zenodo. https://doi.org/10.5281/zenodo.18344021
This paper should be cited for any academic or technical use of ARC-Base-8B-Condensed.
## Model Description
ARC-Base-8B-Condensed is a fine-tuned version of [Hermes-3-Llama-3.1-8B](https://huggingface.co/NousResearch/Hermes-3-Llama-3.1-8B) designed for:
1. **Dense, information-rich responses** β Reduced filler, hedging, and verbosity
2. **Predictive behavioral control** β CF-HoT heads detect and suppress failure modes before they manifest
3. **Recursive self-improvement** β Micro-training with automatic rollback on quality degradation
4. **Mentor-based learning** β Optional consultation with Claude API for continuous improvement
### Intended Use
- Research into self-improving language models
- Applications requiring concise, direct responses
- Study of representation engineering and behavioral control
- Base for further fine-tuning experiments
### Not Intended For
- Production deployment without evaluation
- Safety-critical applications
- Unsupervised autonomous operation
- Applications requiring verbose, elaborative responses
---
## Quick Start
### One-Command Start
```bash
git clone https://huggingface.co/LoganResearch/ARC-Base-8B-Condensed
cd ARC-Base-8B-Condensed
pip install -r requirements.txt
python arc_engine_v29_full.py
```
On first run, the engine will:
1. Download the base model (~16GB)
2. Load the DENSE adapter and CF-HoT heads
3. Initialize all subsystems
4. Present an interactive command prompt
```
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
ARC ENGINE v2.9 - Adaptive Recursive Cognition
Multi-Loop Self-Stabilizing Language Model
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
DENSE Mode: ON (CONDENSATOR checkpoint)
CF-HoT Control: ON
CF-HoT 125Γ: OFF
Mentor Mode: OFF
Auto-Train: OFF
Experience Buffer: 0 examples
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
You> hello
Hello. How can I help?
[Quality: 0.82 | Density: 45.2 | Coherence: 0.95 | Tokens: 5]
```
### Minimal Python Usage
```python
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
model = AutoModelForCausalLM.from_pretrained(
"LoganResearch/ARC-Base-8B-Condensed",
torch_dtype=torch.bfloat16,
device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained("LoganResearch/ARC-Base-8B-Condensed")
prompt = "<|im_start|>user\nExplain gradient descent briefly.<|im_end|>\n<|im_start|>assistant\n"
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
outputs = model.generate(**inputs, max_new_tokens=100, do_sample=True, temperature=0.7)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))
```
---
## Architecture
### System Overview
```
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
β ARC ENGINE ARCHITECTURE β
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ€
β β
β βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ β
β β INPUT PROCESSING β β
β β User Input β Command Parser β Generate / Tool Execute β β
β βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ β
β β β
β βΌ β
β βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ β
β β CORE MODEL STACK β β
β βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ€ β
β β β β
β β Base Model: Hermes-3-Llama-3.1-8B (8B parameters) β β
β β β β β
β β βΌ β β
β β DENSE Adapter βββ THE CONDENSATOR trained (SFTβDPOβRL) β β
β β β β β
β β βΌ β β
β β CF-HoT Heads βββ Repetition (125Γ), Hedging, Verbosity β β
β β β β β
β β βΌ β β
β β Output Generation βββ Quality-controlled, density-optimized β β
β β β β
β βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ β
β β β
β βΌ β
β βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ β
β β QUALITY EVALUATION β β
β β Response β Density Score β Coherence Score β Overall Quality β β
β β β β β
β β βΌ β β
β β ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ β β
β β β Mentor Mode Check: Quality < 0.6 OR Uncertainty > 0.4? β β β
β β β β Yes β β β
β β β βΌ β β β
β β β Consult Claude β Learn from Response β Update Training Buffer β β β
β β ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ β β
β βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ β
β β β
β βΌ β
β βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ β
β β RSI EXPERIENCE BUFFER β β
β β Store: prompt, response, quality, domain, difficulty, feedback β β
β β β β β
β β ββββββββββββ΄βββββββββββ β β
β β βΌ βΌ β β
β β Auto-Train Trigger? Dream Cycle? β β
β β β β β β
β β βΌ βΌ β β
β β Micro-training Experience Replay β β
β β (25 steps) (Reinforce learnings) β β
β βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ β
β β β
β βΌ β
β βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ β
β β VALIDATION & COMMIT β β
β β New Quality vs Old Quality β Better? COMMIT : ROLLBACK β β
β βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ β
β β
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
```
### RSI Loop (Recursive Self-Improvement)
```
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
β RECURSIVE SELF-IMPROVEMENT LOOP β
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ€
β β
β βββββββββββ β
β β CHAT ββββββββββββββββββββββββββββββββββββββββββββββββββββ β
β ββββββ¬βββββ β β
β β β β
β βΌ β β
β βββββββββββ β β
β β MEASURE β Calculate quality, density, coherence β β
β ββββββ¬βββββ β β
β β β β
β βΌ β β
β βββββββββββ β β
β β BUFFER β Store in experience buffer with metadata β β
β ββββββ¬βββββ β β
β β β β
β βΌ β β
β ββββββββββββββββ β β
β β AUTO-TRIGGER β Buffer full? Quality threshold? Feedback? β β
β ββββββββ¬ββββββββ β β
β β β β
β Yes β No ββββββββββββββββββββββββββββββββββββββββββββββββββ β
β β β
β βΌ β
β βββββββββββββββ β
β β MICRO-TRAIN β 25 steps on high-quality buffer samples β
β ββββββββ¬βββββββ β
β β β
β βΌ β
β βββββββββββββββ β
β β VALIDATE β Compare new model vs checkpoint β
β ββββββββ¬βββββββ β
β β β
β ββββββ΄βββββ β
β β β β
β Better? Worse? β
β β β β
β βΌ βΌ β
β COMMIT ROLLBACK β
β β β β
β ββββββ¬βββββ β
β β β
β βΌ β
β Continue ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
β β
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
```
### Mentor Mode Flow
```
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
β MENTOR MODE LEARNING FLOW β
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ€
β β
β User Prompt β
β β β
β βΌ β
β βββββββββββββββββββ β
β β Local Generation β Generate response with local 8B model β
β ββββββββββ¬βββββββββ β
β β β
β βΌ β
β βββββββββββββββββββ β
β β Quality Check β Evaluate density, coherence, quality β
β ββββββββββ¬βββββββββ β
β β β
β βΌ β
β ββββββββββββββββββββββββββββββββββββββ β
β β Quality < 0.6 OR Uncertainty > 0.4 β β
β ββββββββββ¬ββββββββββββββββββββββββββββ β
β β β
β Yes β No βββββββββββΊ Return local response β
β β β
β βΌ β
β βββββββββββββββββββ β
β β Consult Claude β Via API β
β ββββββββββ¬βββββββββ β
β β β
β βΌ β
β βββββββββββββββββββ β
β β Create DPO Pair β β
β β chosen: Claude β β
β β rejected: Local β β
β ββββββββββ¬βββββββββ β
β β β
β βΌ β
β βββββββββββββββββββ β
β β Add to Buffer β High-quality experience for training β
β ββββββββββ¬βββββββββ β
β β β
β βΌ β
β Return Claude's response + log learning β
β β
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
```
---
## Core Technology
### 1. CF-HoT: Control-Field Holonomy
Predictive control through hidden-state monitoring. Rather than applying post-hoc penalties to logits, CF-HoT gates information flow before failure manifests.
```
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
β CF-HoT ARCHITECTURE β
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ€
β β
β Hidden States (Layers 16-24) β
β β β
β βΌ β
β βββββββββββββββββββ β
β β Fiber Projection β Compress to d=16 per layer β
β ββββββββββ¬βββββββββ β
β β β
β βΌ β
β βββββββββββββββββββ β
β β Layer Attention β Weighted aggregation across layers β
β ββββββββββ¬βββββββββ β
β β β
β βΌ β
β βββββββββββββββββββ β
β β Risk Predictor β Binary classifier: P(unwanted_behavior) β
β ββββββββββ¬βββββββββ β
β β β
β βΌ β
β If P > threshold βββΊ Apply logit penalties β
β β
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
```
**Head Performance:**
| Head | Separation | Description |
|------|------------|-------------|
| Repetition | 125Γ | Detects impending repetitive loops |
| Hedging | 1.5Γ | Blocks uncertainty markers |
| Verbosity | 2.1Γ | Suppresses filler content |
The repetition head achieves 125Γ separation between positive (pre-repetition) and negative (diverse output) hidden states, enabling reliable early warning.
### 2. The Condensator: Dense Response Training
4-stage training pipeline:
```
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
β THE CONDENSATOR PIPELINE β
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ€
β β
β STAGE 1: Supervised Fine-Tuning (SFT) β
β βββββββββββββββββββββββββββββββββββββ β
β β’ 847 curated dense response examples β
β β’ Learning rate: 2e-5 β
β β’ Epochs: 3 β
β β
β STAGE 2: Direct Preference Optimization (DPO) β
β βββββββββββββββββββββββββββββββββββββββββββββ β
β β’ Preference pairs: dense (chosen) vs verbose (rejected) β
β β’ Beta: 0.1 β
β β’ Epochs: 2 β
β β
β STAGE 3: Reinforcement Learning (PPO) β
β βββββββββββββββββββββββββββββββββββββ β
β β’ Reward = quality_score - length_penalty β
β β’ Conservative KL constraint β
β β’ Learning rate: 1e-6 β
β β
β STAGE 4: Checkpointing β
β βββββββββββββββββββββ β
β β’ Save every 25 steps β
β β’ A/B comparison on held-out prompts β
β β’ Automatic rollback if quality drops β
β β
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
```
### 3. Enhanced CF-HoT Parameters
| Parameter | Value | Reason |
|-----------|-------|--------|
| EMA Momentum | 0.995 | Stable control field |
| Gate Temperature | 2.0 | Softer sigmoid |
| Gate Bounds | [0.1, 0.9] | Prevent saturation |
| Monitoring | Every 50 steps | Detect drift |
| Warmup | 500 steps | Smooth initialization |
---
## Command Reference
### Core Commands
| Command | Description |
|---------|-------------|
| `status` | System status overview |
| `help` | Full command menu |
| `help <topic>` | Topic-specific help |
| `quit` | Exit |
### Self-Improvement
| Command | Description |
|---------|-------------|
| `!improve` | Run improvement iteration |
| `!eval` | Full evaluation |
| `!train <steps>` | Training steps |
| `!compare` | Compare checkpoints |
| `!rollback` | Revert to best checkpoint |
| `!load <path>` | Load checkpoint |
| `!benchmark` | Evaluation suite |
### Mentor Mode
| Command | Description |
|---------|-------------|
| `!mentor` | Show mentor mode status |
| `!mentor on` | Enable auto-consultation |
| `!mentor off` | Disable mentor mode |
| `!mentor ask <question>` | Ask Claude and learn from response |
| `!mentor learn` | Show collected learnings |
### RSI (Recursive Self-Improvement)
| Command | Description |
|---------|-------------|
| `!auto_train on` | Enable learning during chat |
| `!auto_train off` | Disable auto-training |
| `!skills` | Quality per domain |
| `!forgetting` | Detect catastrophic forgetting |
| `!dream` | Force experience replay |
| `!buffer` | Experience buffer stats |
| `!selfplay <N>` | Run N self-play iterations |
### Condensator
| Command | Description |
|---------|-------------|
| `!condensator` | Run full SFTβDPOβRL pipeline |
| `!dpo` | Run DPO stage only |
| `!rl` | Run RL stage only |
| `!train_cfhot` | Train CF-HoT heads |
### CF-HoT Control
| Command | Description |
|---------|-------------|
| `!cfhot` / `!125x` | Toggle 125Γ head |
| `!cfhot status` | Head status |
| `!gate_stats` | CF-HoT gate health |
### Generation Modes
| Command | Description |
|---------|-------------|
| `!book` | Toggle book mode (16K tokens) |
| `!write <topic>` | Write extended content |
| `!claude <prompt>` | Direct Claude API prompt |
### Tools
| Command | Description |
|---------|-------------|
| `!shell <cmd>` | Execute shell command |
| `!python <code>` | Execute Python |
| `!read <path>` | Read file |
| `!write <path> <content>` | Write file |
| `!search <query>` | Web search |
| `!fetch <url>` | Fetch URL content |
### Browser (requires Playwright)
| Command | Description |
|---------|-------------|
| `!browse <url>` | Open URL |
| `!click <selector>` | Click element |
| `!type <text>` | Type text |
| `!read` | Read page content |
### Multimedia (optional dependencies)
| Command | Description |
|---------|-------------|
| `!stream` | Open live token window |
| `!audio` / `!tts` | Toggle text-to-speech |
| `!imagine <prompt>` | Generate image (SDXL) |
| `!dalle <prompt>` | Generate image (DALL-E 3) |
### Experimental Features
| Command | Description |
|---------|-------------|
| `!content blog <topic>` | Generate blog post |
| `!content youtube <topic>` | Generate video script |
---
## Evaluation
### Qualitative Comparison
| Prompt | Base Hermes-3 | ARC-Condensed |
|--------|---------------|---------------|
| "hello" | "Hello! I'm here to help you with any questions or tasks you might have. Feel free to ask me anything!" (23 tokens) | "Hello. How can I help?" (5 tokens) |
| "What is recursion?" | "That's a great question! Recursion is a programming concept where a function calls itself..." (150+ tokens) | "Function calling itself until base case. Stack frames accumulate, unwind on return." (12 tokens) |
| "How are you?" | "As an AI, I don't have feelings in the traditional sense, but I'm functioning well..." (25 tokens) | "Functional. Task?" (3 tokens) |
### Quantitative Metrics
| Metric | Base Model | ARC-Condensed | Change |
|--------|------------|---------------|--------|
| Avg. Response Length | 150 tokens | 45 tokens | -70% |
| Filler Phrases | Present | Minimal | ~-95% |
| Information Density | 17.0 | 45.2 | +166% |
| Quality Score (internal) | 0.52 | 0.78 | +50% |
**Note:** These are heuristic metrics from internal evaluation. Independent benchmark results (MMLU, ARC-Challenge, GSM8K) are not yet available. We welcome independent evaluation.
### Self-Improvement Trajectory (Observed)
```
Iteration 0: Quality 0.52 (baseline)
Iteration 5: Quality 0.68 (+31%)
Iteration 10: Quality 0.75 (+44%)
Iteration 15: Quality 0.78 (+50%, plateau)
```
Self-improvement shows diminishing returns after ~15 iterations. This is expected behavior, not a limitation to work around.
---
## Installation
### Minimal Installation
```bash
pip install torch transformers accelerate peft bitsandbytes datasets trl
```
### Full Installation
```bash
pip install -r requirements.txt
```
### Optional Dependencies
```bash
# Browser automation
pip install playwright && playwright install firefox
# Image generation
pip install diffusers pillow
# Text-to-speech
pip install pyttsx3 gTTS pygame
# Claude API (for mentor mode)
pip install anthropic
# OpenAI API (for DALL-E)
pip install openai
# Web search
pip install requests
```
### Environment Variables
```bash
# Optional - for enhanced features
export ANTHROPIC_API_KEY="sk-ant-..." # Mentor Mode
export OPENAI_API_KEY="sk-..." # DALL-E
```
---
## Configuration
### Main Configuration
```python
class Config:
# Generation
temperature = 0.85
top_p = 0.9
max_new_tokens = 512
repetition_penalty = 1.1
# CF-HoT
use_cfhot = True
use_cfhot_125x = False
cfhot_repetition_threshold = 0.6
cfhot_repetition_penalty = 6.0
# Self-improvement
min_quality_score = 0.5
target_quality_score = 0.75
training_steps_per_iteration = 25
quality_drop_threshold = 0.1
```
### RSI Configuration
```python
@dataclass
class RSIConfig:
auto_train_enabled: bool = False
buffer_size: int = 1000
min_experiences_to_train: int = 50
quality_threshold_for_training: float = 0.7
dream_cycle_interval: int = 100
forgetting_check_interval: int = 50
```
### Mentor Configuration
```python
@dataclass
class MentorConfig:
enabled: bool = False
auto_consult_threshold: float = 0.6
uncertainty_threshold: float = 0.4
learn_from_responses: bool = True
```
---
## Repository Structure
```
ARC-Base-8B-Condensed/
β
βββ arc_engine_v29_full.py # Main engine
βββ README.md # This file
βββ requirements.txt # Dependencies
β
βββ model-00001-of-00004.safetensors # Model weights
βββ model-00002-of-00004.safetensors
βββ model-00003-of-00004.safetensors
βββ model-00004-of-00004.safetensors
βββ config.json
βββ tokenizer.json
βββ tokenizer_config.json
βββ special_tokens_map.json
βββ generation_config.json
β
βββ dense_checkpoints/ # Training checkpoints
β βββ step_*/
β
βββ cfhot_checkpoints/ # CF-HoT heads
β βββ final_6000/
β βββ risk_predictor.pt
β
βββ improvement_logs/ # RSI logs
βββ exports/ # Checkpoint exports
```
---
## Hardware Requirements
| Component | Minimum | Recommended |
|-----------|---------|-------------|
| GPU VRAM | 16 GB | 24+ GB |
| System RAM | 32 GB | 64 GB |
| Storage | 50 GB | 100 GB |
| Python | 3.10+ | 3.11 |
**Tested Configurations:**
- NVIDIA RTX 3090 (24GB), 64GB RAM β
- NVIDIA RTX 4090 (24GB), 128GB RAM β
- NVIDIA A100 (40GB) β
**Performance Estimates:**
- Inference: ~15-25 tokens/second
- Full Condensator pipeline: ~4 hours (RTX 3090)
- Self-improvement iteration: ~30 minutes
---
## Training From Scratch
### Automated Training
```bash
python arc_engine_v29_full.py
> !condensator
```
This runs:
1. SFT (3 epochs)
2. DPO (2 epochs)
3. RL (300 steps)
4. Checkpoint validation
### Manual Training
**Step 1: Train CF-HoT Heads**
```
> !train_cfhot
```
**Step 2: Run Condensator**
```
> !condensator
```
**Step 3: Self-Improvement**
```
> !selfplay 1000
```
---
## API Reference
### Start Server
```
> !api
[api] Server running on http://0.0.0.0:8080
```
### Endpoints
#### POST /generate
```bash
curl -X POST http://localhost:8080/generate \
-H "Content-Type: application/json" \
-d '{"prompt": "What is recursion?"}'
```
Response:
```json
{
"response": "Function calling itself until base case.",
"quality": 0.82,
"density": 48.3,
"tokens": 8
}
```
#### GET /health
```bash
curl http://localhost:8080/health
```
---
## Limitations
### Known Limitations
| Limitation | Description |
|------------|-------------|
| **Scale** | Tested on 8B parameters only; scaling behavior unknown |
| **Language** | English only |
| **Benchmarks** | No formal benchmark results (MMLU, GSM8K, etc.) |
| **Terseness** | May be too concise for applications requiring elaboration |
| **Iterations** | Self-improvement plateaus after ~15 iterations |
| **Memory** | Full features require 16GB+ VRAM |
### What This Is Not
- This is **not** AGI or a path to AGI
- This is **not** a production-ready system
- Self-improvement is **bounded and reversible**
- The model **requires human oversight**
- Claims are **not independently validated**
---
## Ethical Considerations
### Safety Measures
- **Quality gates:** All self-modification requires quality validation
- **Automatic rollback:** Degradation triggers checkpoint restoration
- **Bounded improvement:** No unbounded recursive self-modification
- **Human oversight:** System designed for interactive use, not autonomy
### Potential Risks
- Dense responses may omit important caveats or safety information
- Self-improvement research requires careful monitoring
- Model inherits biases from base Hermes-3 and training data
- Experimental features should not be used for consequential decisions
### Explicit Non-Goals
This system is **not designed for:**
- Autonomous operation without human oversight
- Self-replication or self-preservation
- Deception or manipulation
- Capability acquisition beyond defined scope
---
## Technical Specification
Full technical documentation is available:
- **Primary Reference (Master Book):**
[Controlled Language Models: Decode-Time Behavioral Control and Token Efficiency](https://doi.org/10.5281/zenodo.18344021)
- **Related Preprints:**
- [From Explicit Holonomy to Latent Control Fields](https://zenodo.org/records/14707164)
- [The Holonomy Transformer](https://zenodo.org/records/14707081)
The specification covers:
- Multi-loop training architecture
- Control field theory and implementation
- Tokenization co-evolution (fourth loop)
- Reliability engineering and rollback protocols
- Reproducibility requirements
---
## Changelog
### v2.9 (Current)
- Stealth web browser for research
- Improved training functions
- Bug fixes for selfplay training loop
### v2.8
- Full RSI continuous learning system
- Auto-train during chat
- Dream cycles for experience replay
- Domain-specific skill tracking
- Catastrophic forgetting detection
### v2.4
- Mentor Mode: Learn from Claude API
- Content generation tools
- Smart help system
### v2.2
- Full CONDENSATOR pipeline
- Enhanced CF-HoT with EMA, gate temperature
- DPO and RL training stages
### v2.0
- Initial release
- CF-HoT 125Γ repetition head
- Dense response training
- Basic self-improvement loop
---
## Citation
```bibtex
@software{napolitano2025arc,
author = {Napolitano, Logan Matthew},
title = {{ARC-Base-8B-Condensed}: Adaptive Recursive Cognition for Self-Stabilizing Language Models},
year = {2025},
publisher = {Hugging Face},
url = {https://huggingface.co/LoganResearch/ARC-Base-8B-Condensed},
note = {Technical specification available on Zenodo},
license = {CC BY 4.0}
}
```
```bibtex
@article{napolitano2025controlled,
author = {Napolitano, Logan Matthew},
title = {Controlled Language Models: Decode-Time Behavioral Control and Token Efficiency},
year = {2025},
doi = {10.5281/zenodo.18344021},
url = {https://zenodo.org/records/18344021},
publisher = {Zenodo},
note = {Primary technical reference for ARC-Base-8B-Condensed}
}
```
```bibtex
@article{napolitano2025controlfield,
author = {Napolitano, Logan Matthew},
title = {From Explicit Holonomy to Latent Control Fields},
year = {2025},
doi = {10.5281/zenodo.14707164},
url = {https://zenodo.org/records/14707164},
publisher = {Zenodo}
}
```
## References
1. Zou, A., et al. (2023). Representation Engineering: A Top-Down Approach to AI Transparency. arXiv:2310.01405
2. Rafailov, R., et al. (2023). Direct Preference Optimization. arXiv:2305.18290
3. Hu, E. J., et al. (2021). LoRA: Low-Rank Adaptation of Large Language Models. arXiv:2106.09685
4. Ouyang, L., et al. (2022). Training language models to follow instructions with human feedback. NeurIPS.
---
## Acknowledgments
- **NousResearch** for Hermes-3-Llama-3.1-8B base model
- **Meta AI** for Llama 3.1 architecture
- **Hugging Face** for transformers, PEFT, TRL
- **Anthropic** for Claude API (Mentor Mode)
---
## License
This work is licensed under [CC BY 4.0](https://creativecommons.org/licenses/by/4.0/) (Creative Commons Attribution 4.0 International).
You are free to:
- **Share** β copy and redistribute the material in any medium or format
- **Adapt** β remix, transform, and build upon the material for any purpose, including commercial
Under the following terms:
- **Attribution** β You must give appropriate credit, provide a link to the license, and indicate if changes were made.
---
<div align="center">
**Contact:** [GitHub Issues](https://github.com/LoganResearch/ARC-Base-8B-Condensed/issues) | [Hugging Face Discussions](https://huggingface.co/LoganResearch/ARC-Base-8B-Condensed/discussions)
**Version:** 2.9 | **Last Updated:** January 2025
</div> |