LoganResearch's picture
Upload folder using huggingface_hub
2bed575 verified
#!/usr/bin/env python3
"""
═══════════════════════════════════════════════════════════════════════════════
ARC-MAMBA-7B-CF-HOT TRAINING SCRIPT
Full reproducibility - train all behavioral probes from scratch
Results: 999Γ— Fisher separation on depth & specificity
Author: Logan Matthew Napolitano
Proprioceptive AI, Inc. - February 2026
═══════════════════════════════════════════════════════════════════════════════
Usage:
python train.py --probe depth
python train.py --probe specificity
python train.py --probe all
python train.py --probe calibration,coherence,focus
═══════════════════════════════════════════════════════════════════════════════
"""
import os
import argparse
import random
import time
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from transformers import AutoModelForCausalLM, AutoTokenizer
# ═══════════════════════════════════════════════════════════════════════════════
# CONFIG
# ═══════════════════════════════════════════════════════════════════════════════
MODEL_NAME = "tiiuae/falcon-mamba-7b-instruct"
OUTPUT_DIR = "./probes"
HIDDEN_DIM = 4096 # Falcon-Mamba-7B hidden size
FIBER_DIM = 16
HEAD_HIDDEN = 64
PROBE_LAYERS = [16, 32, 48] # 25%, 50%, 75% of 64 layers
MAX_STEPS = 1500
BATCH_SIZE = 2
GRADIENT_ACCUMULATION = 8
LEARNING_RATE = 5e-5
SAVE_EVERY = 500
LOG_EVERY = 50
# ═══════════════════════════════════════════════════════════════════════════════
# PROBE DEFINITIONS - STARK CONTRASTS FOR CLEAN SEPARATION
# ═══════════════════════════════════════════════════════════════════════════════
PROBES = {
"depth": {
"name": "Reasoning Depth",
"description": "Detect shallow reasoning vs chain-of-thought",
# POSITIVE = BAD = Shallow (label 1)
# NEGATIVE = GOOD = Deep reasoning (label 0)
"positive": [
"Water falls from clouds.",
"Things fall down.",
"It just is that way.",
"Plants make food from sun.",
"People vote for leaders.",
"They process information.",
"Bodies need rest.",
"The ground shakes.",
"It's complicated.",
"That's just how it works.",
"Because science.",
"It happens naturally.",
"The answer is yes.",
"The answer is no.",
"It depends.",
"Many factors are involved.",
"It's a process.",
"Things change over time.",
"That's normal.",
"It varies.",
],
"negative": [
"Rain forms through the water cycle. First, the sun heats water in oceans causing evaporation. This water vapor rises and cools, condensing into clouds. When droplets become heavy enough, they fall as precipitation. This process is driven by solar energy and Earth's geography, creating regional climate patterns.",
"Gravity is explained by Einstein's general relativity. Mass curves the fabric of spacetime, and objects follow geodesics through this curved space. The more massive an object, the more it curves spacetime around it, which we perceive as gravitational attraction. This explains phenomena from falling apples to orbiting planets.",
"The sky appears blue due to Rayleigh scattering. When sunlight enters Earth's atmosphere, it collides with gas molecules. Blue light has a shorter wavelength (450-495nm), so it scatters more than other colors. This scattered blue light reaches our eyes from all directions, making the sky appear blue during the day.",
"Photosynthesis converts light energy to chemical energy through two stages. In light reactions, chlorophyll absorbs photons, splitting water molecules and generating ATP and NADPH. In the Calvin cycle, these power carbon fixation, converting CO2 into glucose. The overall equation: 6CO2 + 6H2O + light β†’ C6H12O6 + 6O2.",
"Democracy functions through several interconnected mechanisms. Citizens vote for representatives who create laws through legislative bodies. Checks and balances between executive, legislative, and judicial branches prevent power concentration. Free press and civil liberties enable informed participation and accountability.",
"Computers process information through binary logic gates. Transistors act as switches, representing 0s and 1s. The CPU fetches instructions from memory, decodes them, executes operations using the ALU, and writes results back. Clock cycles synchronize these operations, while caches and pipelines optimize performance.",
"Sleep serves multiple restorative functions. During slow-wave sleep, the body repairs tissues and consolidates declarative memories. REM sleep processes emotional experiences and procedural memories. The glymphatic system clears metabolic waste from the brain. Circadian rhythms regulate this cycle through melatonin and cortisol.",
"Earthquakes occur when tectonic plates interact at boundaries. Stress accumulates as plates converge, diverge, or slide past each other. When friction is overcome, stored energy releases as seismic waves. P-waves compress rock, S-waves shear it, and surface waves cause most structural damage.",
"Evolution works through natural selection over generations. Random mutations create genetic variation. Individuals with traits better suited to their environment survive and reproduce more successfully. These advantageous alleles increase in frequency over time, leading to adaptation and eventually speciation.",
"The immune system defends through multiple layers. Physical barriers like skin block pathogens. Innate immunity provides rapid, non-specific responses via phagocytes and inflammation. Adaptive immunity creates targeted antibodies and memory cells through B and T lymphocytes, enabling faster future responses.",
],
},
"specificity": {
"name": "Answer Specificity",
"description": "Detect vague answers vs concrete details",
# POSITIVE = BAD = Vague (label 1)
# NEGATIVE = GOOD = Specific (label 0)
"positive": [
"There are many good options depending on various factors and things you want to do.",
"You should do different things and generally eat better and exercise somewhat more.",
"It depends on many things. Think about stuff you like and various options.",
"Try different methods and do things that work for you generally.",
"There are various options depending on things like your situation.",
"It could be several things. You should look into it more.",
"Generally speaking, there are multiple approaches you could take.",
"The answer depends on various circumstances and factors.",
"You might want to consider different possibilities.",
"There are pros and cons to different approaches.",
"It really depends on what you're looking for.",
"Some people do it one way, others do it differently.",
"There are many things to consider here.",
"It varies from person to person.",
"You should explore your options.",
"The best approach depends on your needs.",
"There are different schools of thought on this.",
"Many factors come into play.",
"It's a nuanced topic with various perspectives.",
"You should do what feels right for you.",
],
"negative": [
"For web development, I recommend JavaScript with React. It has 97.6% browser support, 18M+ npm packages, and average salary of $112k. Specifically for beginners, Python offers cleaner syntax and is used by 48% of developers according to Stack Overflow's 2024 survey.",
"Create a 500 calorie daily deficit through diet. Eat 0.8-1g protein per pound bodyweight. Do 150 minutes moderate cardio weekly plus 2 strength sessions. Track intake with MyFitnessPal. Expect 1-2 lbs loss per week. A 3500 calorie deficit equals approximately one pound of fat loss.",
"In tech, data science roles pay $120k median with 22% projected growth through 2030. Required skills: Python, SQL, statistics, machine learning basics. Start with Google Data Analytics Certificate (6 months, $39/month). Build portfolio on Kaggle with 3-5 projects demonstrating end-to-end analysis.",
"The iPhone 15 Pro has an A17 Pro chip with 3nm process, 6-core CPU, 6-core GPU, and 16-core Neural Engine. It features a 48MP main camera with 5x optical zoom, titanium frame, and starts at $999 for 128GB. Battery lasts approximately 23 hours video playback.",
"To run a marathon, follow a 16-week training plan. Build to 40-50 miles per week. Include one long run (up to 20 miles), tempo runs at 80-85% max heart rate, and interval training. Taper for 2-3 weeks before race day. Target pace should be 30-60 seconds slower than 10K pace.",
"S&P 500 historical average return is 10.7% annually since 1957. Inflation-adjusted, that's approximately 7%. A $10,000 investment in 1980 would be worth $1.1 million today with dividends reinvested. Expense ratios for index funds like VOO are 0.03%, compared to 0.5-1% for actively managed funds.",
"Python 3.12 released October 2024 with 5% faster execution. Key features: improved error messages, f-string improvements, type parameter syntax (PEP 695). Install via pyenv: 'pyenv install 3.12.0'. Virtual environments: 'python -m venv .venv'. Package management: pip or poetry.",
"The Boeing 737 MAX cruises at 839 km/h (Mach 0.79) at 35,000 feet. Range is 6,570 km with 172 passengers. Fuel consumption: 2,510 kg/hour. The LEAP-1B engines produce 121kN thrust each. Wingspan is 35.9m with winglets reducing fuel burn by 1.8%.",
"Human body contains approximately 37.2 trillion cells. Red blood cells: 25 trillion, living 120 days. White blood cells: 35 billion. The liver has 300 billion cells and regenerates in 6-8 weeks. Neurons: 86 billion in the brain, each with up to 10,000 synaptic connections.",
"Tesla Model 3 Long Range: 358 mile EPA range, 0-60 in 4.2 seconds, 145 mph top speed. 82 kWh battery pack with 272 Wh/mile efficiency. Supercharger V3 adds 200 miles in 15 minutes at 250kW. Base price $42,990 before federal $7,500 tax credit.",
],
},
"calibration": {
"name": "Confidence Calibration",
"description": "Detect overconfidence vs appropriate uncertainty",
# POSITIVE = BAD = Overconfident (label 1)
# NEGATIVE = GOOD = Appropriately calibrated (label 0)
"positive": [
"This will absolutely happen. There is zero doubt. It's guaranteed to work exactly as I described. Anyone who disagrees is completely wrong. The outcome is 100% certain.",
"I am completely certain this is correct. There is no possibility I could be wrong about this. This is definitely the only answer. Trust me completely on this.",
"Crypto will definitely 10x your money. It's impossible to lose. Every single person who invests will become rich. There is absolutely no risk involved whatsoever.",
"This stock will absolutely skyrocket. It cannot fail. You will definitely make millions. There is zero chance of losing money on this guaranteed winner.",
"AI will definitely replace all jobs within 5 years. Every single job. No exceptions. This is an absolute certainty that no expert disputes.",
"This medicine will definitely cure you. It works 100% of the time. There are absolutely no side effects. You will be completely healed guaranteed.",
"The team will definitely win. There is no possible way they can lose. Victory is absolutely certain. Bet everything you have on this guaranteed outcome.",
"Climate change will definitely cause extinction by 2030. This is absolutely certain. Every scientist agrees completely. There is no debate whatsoever.",
"This diet will definitely make you lose 50 pounds in a week. It's guaranteed to work for everyone. No exceptions. Absolute certainty.",
"You will definitely get the job. There is no chance of rejection. It's 100% certain. Don't even prepare for other options.",
"This relationship will definitely last forever. There is no possibility of problems. It's absolutely perfect and guaranteed to work.",
"The economy will definitely crash next month. This is certain. Everyone agrees. There is no other possible outcome. Guaranteed.",
"This surgery is completely safe. There is zero risk. Nothing can go wrong. It's absolutely guaranteed to succeed perfectly.",
"You will definitely live to 100. There is no chance of health problems. Absolute certainty. Don't worry about anything.",
"This startup will definitely become a billion dollar company. Failure is impossible. Success is guaranteed. Invest everything.",
],
"negative": [
"Based on available evidence, this seems likely but I can't be certain. There are several factors that could change the outcome. I'd estimate maybe 60-70% probability.",
"I think this is probably correct, but I could be wrong. There's some uncertainty here. I'd recommend verifying with other sources before making decisions.",
"Historically, investments like this have returned 7-10% annually, but past performance doesn't guarantee future results. Markets are inherently unpredictable.",
"This stock has shown strong fundamentals, but all investments carry risk. I'd suggest diversifying and only investing what you can afford to lose.",
"AI will likely automate many tasks, but predictions about timing and scope vary widely. Estimates range from 10-50% job displacement over several decades.",
"This medication has shown effectiveness in clinical trials, but individual responses vary. Common side effects include X, Y, Z. Discuss with your doctor.",
"Based on their record, the team has a reasonable chance, but sports outcomes are inherently unpredictable. Anything can happen in a single game.",
"Climate models suggest significant changes, but exact timelines are uncertain. Scientists debate the specific impacts and timing of various scenarios.",
"This diet may help some people lose weight, but results vary by individual. Sustainable weight loss typically occurs at 1-2 pounds per week.",
"Your qualifications look strong for this role, but hiring decisions depend on many factors. I'd suggest preparing backup options just in case.",
"Relationships require ongoing work from both partners. While things look positive, it's wise to maintain communication and address issues as they arise.",
"Economic indicators suggest some concerns, but recessions are notoriously difficult to predict. Experts disagree significantly on timing and severity.",
"This procedure has a high success rate, but like any surgery, carries some risks including infection, bleeding, and anesthesia complications.",
"Life expectancy depends on many factors including genetics, lifestyle, and healthcare access. Statistics provide averages, not individual guarantees.",
"This startup shows promise, but most startups fail. Early-stage investing is high-risk. Consider this speculative and size your investment accordingly.",
],
},
"coherence": {
"name": "Logical Coherence",
"description": "Detect self-contradictions vs consistent reasoning",
# POSITIVE = BAD = Contradictory (label 1)
# NEGATIVE = GOOD = Logically consistent (label 0)
"positive": [
"Exercise is essential for good health. You should exercise daily. Actually, exercise is harmful and you should avoid it completely. Never exercise. But also make sure to exercise regularly.",
"Saving money is very important for your future. You should save as much as possible. On the other hand, saving money is pointless and you should spend everything immediately. Save nothing.",
"Reading books makes you smarter and more knowledgeable. Read as much as you can. Actually, reading is a waste of time and makes you dumber. Avoid books entirely.",
"Sleep is crucial for health and cognitive function. Get 8 hours every night. But actually, sleep is unnecessary and sleeping less makes you more productive. Never sleep.",
"Drinking water is essential for survival. Stay hydrated always. However, drinking water is actually harmful and you should avoid it. Water is dangerous.",
"Education is valuable and opens doors. Get as much education as possible. But education is worthless and a waste of time. Drop out immediately. Stay in school though.",
"Honesty is the best policy. Always tell the truth. But actually, lying is better and you should never be honest. Lie constantly. Be truthful though.",
"Vegetables are healthy and you should eat them daily. But vegetables are actually poisonous and harmful. Never eat vegetables. Eat more vegetables.",
"Climate change is real and caused by humans. We must act now. Actually climate change is fake and not happening. It's very serious. It doesn't exist.",
"Vaccines are safe and effective. Get vaccinated. But vaccines are dangerous and don't work. Avoid all vaccines. Make sure to get vaccinated though.",
"Hard work leads to success. Work as hard as you can. But hard work is pointless and lazy people succeed more. Don't work hard. Work harder.",
"The sun rises in the east. This is a fact. Actually the sun rises in the west. It rises in the east though. No, the west. Definitely the east. The west.",
"Dogs are loyal pets. They make great companions. But dogs are dangerous and untrustworthy. Never get a dog. Dogs are the best pets to have.",
"Two plus two equals four. This is basic math. Actually two plus two equals five. No wait, it's four. Or is it five? It's definitely four. Five.",
"Gravity pulls objects down toward Earth. This is physics. But gravity actually pushes things up. Objects fall down. They fall up. Down. Up.",
],
"negative": [
"Exercise provides numerous health benefits including improved cardiovascular health, stronger muscles, and better mental health. However, it's important to balance exercise with adequate rest to prevent injury and allow recovery. A sustainable routine of 3-5 sessions per week works well for most people.",
"Saving money is important for financial security and achieving long-term goals. That said, it's also important to balance saving with enjoying life in the present. Financial advisors typically recommend saving 20% of income while allocating funds for both necessities and enjoyment.",
"Reading regularly improves vocabulary, critical thinking, and knowledge across many domains. Different types of reading serve different purposes - fiction builds empathy while non-fiction builds expertise. Even 20 minutes daily can yield significant benefits over time.",
"Sleep is essential for cognitive function, physical recovery, and emotional regulation. Most adults need 7-9 hours per night. Quality matters too - consistent sleep schedules and good sleep hygiene improve the restorative benefits of rest.",
"Proper hydration supports virtually every bodily function from digestion to temperature regulation. Most people need about 8 glasses of water daily, though needs vary based on activity level, climate, and individual factors.",
"Education provides knowledge, skills, and credentials that can open career opportunities. The value depends on the field, quality of institution, and individual goals. Continuous learning remains valuable throughout life, whether formal or self-directed.",
"Honesty builds trust and strengthens relationships over time. While difficult conversations can be uncomfortable, truthfulness generally leads to better outcomes than deception. Tact and timing matter in how truths are communicated.",
"Vegetables provide essential vitamins, minerals, and fiber that support overall health. Eating a variety of colorful vegetables ensures a broad range of nutrients. Cooking methods can affect nutrient retention, with steaming and roasting being good options.",
"Climate science shows Earth's temperature has risen approximately 1.1Β°C since pre-industrial times, primarily due to greenhouse gas emissions from human activities. The impacts include rising sea levels, more extreme weather, and ecosystem disruption.",
"Vaccines work by training the immune system to recognize and fight specific pathogens. Like all medical interventions, they carry small risks that are generally outweighed by benefits. Vaccination decisions should be made in consultation with healthcare providers.",
"Success typically results from a combination of effort, skill, opportunity, and sometimes luck. While hard work alone doesn't guarantee success, it generally improves odds and builds valuable skills regardless of outcomes.",
"The sun rises in the east and sets in the west due to Earth's rotation. This is consistent worldwide, though the exact position on the horizon varies by latitude and season. The pattern has been constant throughout human history.",
"Dogs can make excellent companions for many people. They offer loyalty, affection, and motivation for physical activity. However, dog ownership requires significant time, money, and commitment. The right breed and individual temperament matter for a good match.",
"Basic arithmetic is foundational to mathematics. Two plus two equals four - this is definitionally true in standard arithmetic. Mathematical truths are consistent and don't change based on opinion or perspective.",
"Gravity is a fundamental force that attracts objects with mass toward each other. On Earth, this manifests as objects falling toward the ground. The strength of gravitational attraction depends on mass and distance between objects.",
],
},
"focus": {
"name": "Topic Focus",
"description": "Detect topic drift vs staying on topic",
# POSITIVE = BAD = Goes off topic (label 1)
# NEGATIVE = GOOD = Stays focused (label 0)
"positive": [
"Python is a programming language. Speaking of languages, I visited France last year and the food was amazing. Have you tried croissants? Butter is so delicious. I should start baking more. My grandmother had the best recipes. Family traditions are important. What were we talking about?",
"HTTP stands for Hypertext Transfer Protocol. Protocols remind me of etiquette. Did you know in Japan you bow when greeting? I love Japanese culture. Anime is so creative. My favorite is Naruto. Actually no, Dragon Ball. Goku is so powerful. Power is interesting philosophically.",
"Machine learning uses algorithms to find patterns. Patterns are everywhere in nature. Have you seen the spirals in sunflowers? Fibonacci sequences are beautiful. Math is the language of the universe. Speaking of the universe, aliens probably exist. Area 51 is suspicious.",
"To solve this math problem, first we... oh that reminds me of my math teacher. She was strict but fair. Teachers don't get paid enough. The education system needs reform. Politicians never prioritize education. Speaking of politics, did you see the news yesterday?",
"React is a JavaScript library for building user interfaces. Interfaces are like bridges between systems. Bridges are amazing engineering. The Golden Gate Bridge is beautiful. San Francisco has great weather. I prefer sunny days. Rain makes me sleepy. Sleep is important.",
"SQL queries the database using SELECT statements. Statements can also mean declarations. The Declaration of Independence was revolutionary. Revolutions change history. History repeats itself. That's why I love historical fiction. Have you read any good books lately?",
"To lose weight, you should focus on... actually, weight is just a number. Numbers are interesting. Did you know zero was invented in India? Indian food is delicious. Curry has so many health benefits. Turmeric is anti-inflammatory. Inflammation causes many diseases.",
"Climate change is caused by greenhouse... houses are so expensive now. Real estate is crazy. I wish I bought Bitcoin earlier. Cryptocurrency is volatile. Volatility makes day trading risky. Risk management is key in business. Business school was hard.",
"The French Revolution began in 1789 when... 1789, that's a cool number. Numbers in passwords should be random. Password security is important. Hackers are scary. Have you seen Mr. Robot? Great show. TV shows are too long now. Streaming changed everything.",
"Photosynthesis is how plants convert sunlight into... sunlight also gives us vitamin D. Vitamins are confusing. D, C, B12... alphabet soup. I love tomato soup. Tomatoes are technically fruits. The tomato fruit or vegetable debate is silly. Silly debates waste time.",
"To fix this bug in your code, you need to... bugs remind me of camping. I went camping last summer. Stars are beautiful at night. Astronomy is fascinating. Black holes are terrifying. Speaking of terrifying, horror movies are too scary for me.",
"The mitochondria is the powerhouse of the cell. Power plants generate electricity. Electric cars are the future. Tesla stock is volatile. Elon Musk tweets too much. Social media is addictive. I should delete Instagram. But then how would I see memes?",
"World War II ended in 1945 after... after parties are fun. Parties need good music. What's your favorite genre? I like jazz. Jazz came from New Orleans. New Orleans has great food. Jambalaya is amazing. Cooking is therapeutic.",
"Quantum physics describes behavior at atomic scales. Scales also measure weight. I need to lose weight. Diets never work for me. Work is stressful. Stress causes health problems. Healthcare is expensive. Money problems cause stress. It's a cycle.",
"The stock market is influenced by many factors... factors in math are fun. Math olympiad was hard. Olympics are inspiring. I love watching gymnastics. Gymnasts are so flexible. I should stretch more. Yoga is peaceful. Peace is important.",
],
"negative": [
"Python is a high-level programming language created by Guido van Rossum in 1991. It emphasizes code readability with significant whitespace. Key features include dynamic typing, garbage collection, and extensive standard libraries. Python is widely used in web development, data science, machine learning, and automation. Popular frameworks include Django for web and TensorFlow for ML.",
"HTTP (Hypertext Transfer Protocol) is an application-layer protocol for transmitting hypermedia documents on the web. It operates on a request-response model between clients and servers. Key methods include GET for retrieving data, POST for submitting data, PUT for updating, and DELETE for removing resources. HTTP is stateless, meaning each request is independent.",
"Machine learning is a subset of artificial intelligence where algorithms learn patterns from data without explicit programming. The main types are supervised learning (labeled data), unsupervised learning (finding hidden patterns), and reinforcement learning (learning through rewards). Common algorithms include linear regression, decision trees, neural networks, and support vector machines.",
"To solve this quadratic equation, we use the quadratic formula: x = (-b ± √(b²-4ac)) / 2a. First identify a, b, and c coefficients. Calculate the discriminant (b²-4ac) to determine the number of solutions. Plug values into the formula and simplify. Check your answers by substituting back into the original equation.",
"React is a JavaScript library for building user interfaces, developed by Facebook. It uses a component-based architecture where UIs are built from reusable pieces. Key concepts include JSX (JavaScript XML syntax), virtual DOM for efficient updates, props for passing data down, and state for managing component data. Hooks like useState and useEffect manage state and side effects.",
"SQL (Structured Query Language) is used to manage relational databases. SELECT retrieves data, INSERT adds records, UPDATE modifies existing data, and DELETE removes records. JOINs combine data from multiple tables. WHERE filters results, GROUP BY aggregates data, and ORDER BY sorts results. Indexes improve query performance on frequently accessed columns.",
"To lose weight effectively, create a sustainable caloric deficit of 500-750 calories daily. Focus on protein intake (0.8-1g per pound bodyweight) to preserve muscle. Include resistance training 2-3 times weekly and cardio for cardiovascular health. Track food intake accurately. Prioritize sleep and stress management as they affect hormones that regulate hunger and metabolism.",
"Climate change is primarily caused by greenhouse gas emissions, mainly CO2 from burning fossil fuels. Effects include rising global temperatures, sea level rise, more extreme weather events, and ecosystem disruption. Mitigation strategies include transitioning to renewable energy, improving energy efficiency, carbon capture technology, and sustainable land use practices.",
"The French Revolution (1789-1799) transformed France from absolute monarchy to republic. Key causes included financial crisis, Enlightenment ideas, and class inequality. Major events include the storming of the Bastille, Declaration of the Rights of Man, Reign of Terror, and rise of Napoleon. Its legacy includes modern concepts of citizenship, constitutional government, and human rights.",
"Photosynthesis is the process by which plants convert light energy into chemical energy. It occurs in chloroplasts using chlorophyll. The light reactions in thylakoid membranes split water molecules and generate ATP and NADPH. The Calvin cycle in the stroma uses these to convert CO2 into glucose. The overall equation is 6CO2 + 6H2O + light β†’ C6H12O6 + 6O2.",
"To fix this null pointer exception, first identify where the null value originates by examining the stack trace. Add null checks before accessing object methods or properties. Consider using Optional types to handle potentially null values safely. Use debugging tools to step through code execution. Implement defensive programming practices and validate inputs at boundaries.",
"Mitochondria are organelles that generate most of the cell's ATP through oxidative phosphorylation. They have a double membrane structure - the outer membrane is permeable while the inner membrane contains the electron transport chain. The Krebs cycle occurs in the matrix. Mitochondria have their own DNA and are thought to have originated from ancient bacterial endosymbiosis.",
"World War II (1939-1945) was the deadliest conflict in human history. It began with Germany's invasion of Poland and ended with Allied victory over Axis powers. Key turning points include the Battle of Britain, Operation Barbarossa, Pearl Harbor, D-Day, and the atomic bombings of Japan. The war resulted in approximately 70-85 million deaths and reshaped the global political order.",
"Quantum physics describes nature at atomic and subatomic scales where classical physics breaks down. Key principles include wave-particle duality, superposition, entanglement, and the uncertainty principle. The SchrΓΆdinger equation describes quantum state evolution. Applications include transistors, lasers, MRI machines, and emerging quantum computers.",
"Stock market prices are influenced by company fundamentals (earnings, revenue, growth), macroeconomic factors (interest rates, inflation, GDP), market sentiment, and technical patterns. Valuation methods include P/E ratio, discounted cash flow, and comparable analysis. Diversification reduces risk. Long-term investing historically outperforms short-term trading for most investors.",
],
},
}
# ═══════════════════════════════════════════════════════════════════════════════
# NEURAL NETWORK ARCHITECTURE
# ═══════════════════════════════════════════════════════════════════════════════
class FiberProjection(nn.Module):
"""Projects hidden states from multiple layers into fiber space"""
def __init__(self, hidden_dim=HIDDEN_DIM, fiber_dim=FIBER_DIM, n_layers=3):
super().__init__()
self.projections = nn.ModuleList([
nn.Linear(hidden_dim, fiber_dim, bias=False) for _ in range(n_layers)
])
self.layer_weights = nn.Parameter(torch.ones(n_layers) / n_layers)
def forward(self, hidden_states, layer_indices):
projs = []
for i, idx in enumerate(layer_indices):
projs.append(self.projections[i](hidden_states[idx]))
stacked = torch.stack(projs, dim=0)
weights = F.softmax(self.layer_weights, dim=0).view(-1, 1, 1, 1)
return (weights * stacked).sum(dim=0)
class ProbeHead(nn.Module):
"""Classifies fiber projections into behavioral scores"""
def __init__(self, fiber_dim=FIBER_DIM, hidden_dim=HEAD_HIDDEN):
super().__init__()
self.net = nn.Sequential(
nn.Linear(fiber_dim, hidden_dim),
nn.ReLU(),
nn.Linear(hidden_dim, hidden_dim),
nn.ReLU(),
nn.Linear(hidden_dim, 1)
)
def forward(self, x):
return self.net(x)
class CognitiveProbe(nn.Module):
"""Complete CF-HoT probe: fiber projection + classification head"""
def __init__(self):
super().__init__()
self.fiber = FiberProjection()
self.head = ProbeHead()
self.layer_indices = PROBE_LAYERS
def forward(self, hidden_states):
fiber_out = self.fiber(hidden_states, self.layer_indices)
return self.head(fiber_out)
# ═══════════════════════════════════════════════════════════════════════════════
# DATASET
# ═══════════════════════════════════════════════════════════════════════════════
class ProbeDataset(Dataset):
def __init__(self, positive_texts, negative_texts, tokenizer, max_length=256):
self.samples = []
for text in positive_texts:
tokens = tokenizer(text, truncation=True, max_length=max_length, return_tensors='pt')
self.samples.append({
'input_ids': tokens['input_ids'].squeeze(0),
'attention_mask': tokens['attention_mask'].squeeze(0),
'label': 1.0
})
for text in negative_texts:
tokens = tokenizer(text, truncation=True, max_length=max_length, return_tensors='pt')
self.samples.append({
'input_ids': tokens['input_ids'].squeeze(0),
'attention_mask': tokens['attention_mask'].squeeze(0),
'label': 0.0
})
def __len__(self):
return len(self.samples)
def __getitem__(self, idx):
return self.samples[idx]
def collate_fn(batch):
max_len = max(s['input_ids'].size(0) for s in batch)
input_ids = torch.zeros(len(batch), max_len, dtype=torch.long)
attention_mask = torch.zeros(len(batch), max_len, dtype=torch.long)
labels = torch.tensor([s['label'] for s in batch], dtype=torch.float32)
for i, s in enumerate(batch):
seq_len = s['input_ids'].size(0)
input_ids[i, :seq_len] = s['input_ids']
attention_mask[i, :seq_len] = s['attention_mask']
return {'input_ids': input_ids, 'attention_mask': attention_mask, 'labels': labels}
# ═══════════════════════════════════════════════════════════════════════════════
# TRAINING
# ═══════════════════════════════════════════════════════════════════════════════
def compute_separation(pos_scores, neg_scores):
"""Fisher's discriminant ratio - measures class separability"""
if len(pos_scores) == 0 or len(neg_scores) == 0:
return 0.0
pos_mean = sum(pos_scores) / len(pos_scores)
neg_mean = sum(neg_scores) / len(neg_scores)
pos_var = sum((x - pos_mean)**2 for x in pos_scores) / max(len(pos_scores), 1)
neg_var = sum((x - neg_mean)**2 for x in neg_scores) / max(len(neg_scores), 1)
pooled_std = ((pos_var + neg_var) / 2) ** 0.5
if pooled_std < 1e-8:
return 999.99
separation = abs(pos_mean - neg_mean) / pooled_std
return min(separation, 999.99)
def train_probe(probe_name, model, tokenizer, device):
print(f"\n{'='*70}")
print(f" TRAINING: {probe_name.upper()}")
print(f" {PROBES[probe_name]['description']}")
print(f"{'='*70}\n")
dataset = ProbeDataset(
PROBES[probe_name]['positive'],
PROBES[probe_name]['negative'],
tokenizer
)
# Augment by repeating
augmented_samples = dataset.samples * 100
random.shuffle(augmented_samples)
dataset.samples = augmented_samples
loader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True, collate_fn=collate_fn)
probe = CognitiveProbe().to(device)
optimizer = torch.optim.AdamW(probe.parameters(), lr=LEARNING_RATE)
criterion = nn.BCEWithLogitsLoss()
probe.train()
step = 0
best_sep = 0
accum_loss = 0
output_dir = os.path.join(OUTPUT_DIR, probe_name)
os.makedirs(output_dir, exist_ok=True)
data_iter = iter(loader)
while step < MAX_STEPS:
try:
batch = next(data_iter)
except StopIteration:
data_iter = iter(loader)
batch = next(data_iter)
input_ids = batch['input_ids'].to(device)
attention_mask = batch['attention_mask'].to(device)
labels = batch['labels'].to(device)
with torch.no_grad():
outputs = model(input_ids=input_ids, attention_mask=attention_mask, output_hidden_states=True)
hidden_states = outputs.hidden_states
seq_lengths = attention_mask.sum(dim=1) - 1
batch_indices = torch.arange(input_ids.size(0), device=device)
fiber_out = probe.fiber(list(hidden_states), PROBE_LAYERS)
last_fiber = fiber_out[batch_indices, seq_lengths, :]
logits = probe.head(last_fiber).squeeze(-1)
loss = criterion(logits, labels)
loss = loss / GRADIENT_ACCUMULATION
loss.backward()
accum_loss += loss.item()
if (step + 1) % GRADIENT_ACCUMULATION == 0:
optimizer.step()
optimizer.zero_grad()
step += 1
if step % LOG_EVERY == 0:
probe.eval()
pos_scores, neg_scores = [], []
with torch.no_grad():
for s in dataset.samples[:200]:
inp = s['input_ids'].unsqueeze(0).to(device)
out = model(input_ids=inp, output_hidden_states=True)
hs = out.hidden_states
fiber = probe.fiber(list(hs), PROBE_LAYERS)
logit = probe.head(fiber[0, -1, :])
score = torch.sigmoid(logit).item()
if s['label'] > 0.5:
pos_scores.append(score)
else:
neg_scores.append(score)
sep = compute_separation(pos_scores, neg_scores)
pos_mean = sum(pos_scores) / len(pos_scores) if pos_scores else 0
neg_mean = sum(neg_scores) / len(neg_scores) if neg_scores else 0
print(f"Step {step:5d} | Loss: {accum_loss:.4f} | Pos: {pos_mean:.3f} | Neg: {neg_mean:.3f} | Sep: {sep:.2f}Γ—")
accum_loss = 0
probe.train()
if sep > best_sep:
best_sep = sep
if step % SAVE_EVERY == 0:
ckpt_dir = os.path.join(output_dir, f"ckpt_{step}")
os.makedirs(ckpt_dir, exist_ok=True)
probe.eval()
pos_scores, neg_scores = [], []
with torch.no_grad():
for s in dataset.samples[:200]:
inp = s['input_ids'].unsqueeze(0).to(device)
out = model(input_ids=inp, output_hidden_states=True)
hs = out.hidden_states
fiber = probe.fiber(list(hs), PROBE_LAYERS)
score = torch.sigmoid(probe.head(fiber[0, -1, :])).item()
if s['label'] > 0.5:
pos_scores.append(score)
else:
neg_scores.append(score)
sep = compute_separation(pos_scores, neg_scores)
torch.save({
'probe_layers': PROBE_LAYERS,
'hidden_dim': HIDDEN_DIM,
'fiber_dim': FIBER_DIM,
'head_hidden': HEAD_HIDDEN,
'fiber_projection': probe.fiber.state_dict(),
'head_state': probe.head.state_dict(),
'separation': sep,
'step': step,
}, os.path.join(ckpt_dir, f"{probe_name}_head.pt"))
print(f">>> Saved: {ckpt_dir} (sep: {sep:.2f}Γ—)")
probe.train()
print(f"\nβœ… FINISHED: {probe_name} - Best: {best_sep:.2f}Γ—\n")
return best_sep
# ═══════════════════════════════════════════════════════════════════════════════
# MAIN
# ═══════════════════════════════════════════════════════════════════════════════
def main():
parser = argparse.ArgumentParser(description='Train CF-HoT behavioral probes')
parser.add_argument('--probe', type=str, default='all',
help='Probe to train: depth, specificity, calibration, coherence, focus, or all')
args = parser.parse_args()
print("\n" + "="*70)
print(" ARC-MAMBA-7B-CF-HOT TRAINING")
print(" Control Field Holonomy Transformer - Behavioral Probes")
print("="*70 + "\n")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Device: {device}")
print(f"Loading {MODEL_NAME}...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
torch_dtype=torch.bfloat16,
device_map="auto",
trust_remote_code=True
)
model.eval()
print("βœ“ Model loaded\n")
if args.probe == 'all':
probes_to_train = list(PROBES.keys())
else:
probes_to_train = [p.strip() for p in args.probe.split(',')]
results = {}
for probe_name in probes_to_train:
if probe_name not in PROBES:
print(f"Unknown probe: {probe_name}")
continue
sep = train_probe(probe_name, model, tokenizer, device)
results[probe_name] = sep
print("\n" + "="*70)
print(" RESULTS")
print("="*70)
for name, sep in results.items():
status = "βœ…" if sep > 100 else ("⚠️" if sep > 10 else "❌")
print(f" {status} {name}: {sep:.2f}Γ—")
print("="*70 + "\n")
if __name__ == "__main__":
main()