#!/usr/bin/env python3 """ ═══════════════════════════════════════════════════════════════════════════════ ARC ENGINE v2.9 - Adaptive Recursive Cognition Multi-Loop Self-Stabilizing Language Model with Predictive Control ═══════════════════════════════════════════════════════════════════════════════ ARC is a research platform for studying decode-time behavioral control and constrained optimization in language models. All agentic and automation features are opt-in, require human confirmation, and are intended for experimental evaluation under human supervision. CORE RESEARCH COMPONENTS: - CF-HoT 80×: Predictive behavioral control via hidden-state monitoring - The Condensator: 4-stage training pipeline (SFT → DPO → RL → Checkpoint) - RSI Loop: Bounded self-improvement with automatic rollback - Multi-metric evaluation preventing reward hacking OPTIONAL MODULES (Disabled by Default): - Mentor Mode: Teacher distillation from external API (requires API key) - Browser Automation: Human-confirmed web interaction (requires Playwright) - Content Generation: Experimental text generation tools - Agent Tools: Sandboxed shell/python execution MENTOR MODE (Optional - Learn from External Teacher): !mentor [on|off] Toggle teacher consultation when uncertain !mentor ask Query external teacher directly !mentor learn Review collected learnings !mentor session Open teacher interface in browser EXPERIMENTAL MODULES (Require Explicit Confirmation): !content blog Generate blog post draft !content youtube Generate video script draft !content social Generate social media drafts RSI SYSTEM (v2.3): - !auto_train [on|off|status] Toggle continuous learning - !skills Quality tracking per domain - !curiosity Show areas of uncertainty - !forgetting Detect catastrophic forgetting - !dream Force experience replay cycle - !self_play Generate adversarial prompts - !meta Meta-learning stats (adaptive LR) - !goals add/list Goal tracking system - !explain [on|off] Toggle reasoning explanations - !feedback [+|-] Human feedback integration - !buffer Experience buffer stats RSI LOOP (v2.3): Chat → Measure → Buffer → [Auto-trigger?] → Micro-train → Validate → Commit/Rollback Background: Dream cycles, Forgetting checks, Adaptive LR, Skill tracking EXISTING v2.2 FEATURES: - !condensator Run full 4-stage CONDENSATOR pipeline - !dpo Run DPO training stage only - !rl Run RL training stage with composite reward - !train_cfhot Train CF-HoT heads from scratch - !gate_stats Show CF-HoT gate health statistics - !rsi_full RSI with full CONDENSATOR integration CONDENSATOR PIPELINE: Stage 1: SFT (53 examples, 3 epochs, lr=2e-5) Stage 2: DPO (preference pairs, 2 epochs, lr=5e-6, β=0.1) Stage 3: RL (PPO with composite reward, 300 steps, lr=2e-6) Stage 4: Checkpoint (continuous with rollback) CF-HoT IMPROVEMENTS (per paper recommendations): - EMA momentum: 0.995 (not 0.9) for stable control field - Gate temperature: 2.0 for softer sigmoid - Bounded gates: [0.1, 0.9] to prevent saturation - Gate monitoring: saturation detection every 50 steps - Momentum warmup: 0.9 → 0.995 over 500 steps EXISTING v2.1 FEATURES: - !cfhot / !80x Toggle 80× repetition head - !rsi15 15-iteration stress test - !book Book writing mode (16K tokens) - !idea Claude-powered idea generation - !claude Direct Claude Opus 4.5 prompting - !plot Quality visualization - !export/!import Checkpoint packaging - !benchmark Evaluation suite - !learn Learn from conversation - !api REST API server AUTHOR: Logan Matthew Napolitano LICENSE: CC BY 4.0 REPOSITORY: huggingface.co/LoganResearch/ARC-Base-8B-Condensed "Bounded self-improvement with predictive control" ═══════════════════════════════════════════════════════════════════════════════ """ import os import sys import json import time import shutil import subprocess import traceback import random import math import statistics import re import hashlib import zipfile import base64 import queue import threading from io import BytesIO from datetime import datetime from typing import List, Dict, Any, Optional, Tuple, Callable from pathlib import Path from collections import deque from dataclasses import dataclass, field, asdict from threading import Thread from http.server import HTTPServer, BaseHTTPRequestHandler import copy import torch import torch.nn as nn import torch.nn.functional as F # === OPTIONAL: PLOTTING === PLOT_OK = False try: import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt PLOT_OK = True except ImportError: pass # === OPTIONAL: CLAUDE API === CLAUDE_API_OK = False _anthropic_client = None try: import anthropic CLAUDE_API_OK = True print("[claude] ✓ Anthropic API available") except ImportError: print("[claude] Not available - pip install anthropic") # === OPTIONAL: REQUESTS === REQUESTS_OK = False try: import requests REQUESTS_OK = True except ImportError: pass # === TKINTER FOR STREAMING WINDOW === TK_OK = False try: import tkinter as tk from tkinter import scrolledtext TK_OK = True print("[gui] ✓ tkinter available") except ImportError: print("[gui] tkinter not available") # === PIL FOR IMAGES === PIL_OK = False try: from PIL import Image, ImageTk PIL_OK = True print("[image] ✓ PIL available") except ImportError: print("[image] PIL not available - pip install Pillow") # === PYGAME FOR AUDIO === PYGAME_OK = False try: import pygame pygame.mixer.init() PYGAME_OK = True print("[audio] ✓ pygame available") except: pass # === GTTS === GTTS_OK = False try: from gtts import gTTS GTTS_OK = True except ImportError: pass # === DIFFUSERS FOR IMAGE GEN === DIFFUSERS_OK = False try: from diffusers import AutoPipelineForText2Image DIFFUSERS_OK = True print("[image-gen] ✓ Diffusers available") except Exception as e: print(f"[image-gen] Not available (Python 3.13 compat issue)") pass # === OPENAI FOR DALLE === OPENAI_OK = False try: import openai OPENAI_OK = True except ImportError: pass # ============================================================================== # ============================================================================== # RESEARCH WEB BROWSER - Rate-limited, polite automation with retries # ============================================================================== class ResearchBrowser: """ Web browser for research automation. - Rotates user agents for compatibility - Respects rate limits with delays - Handles errors with exponential backoff - Multiple retry attempts """ # Diverse user agents - updated 2024 USER_AGENTS = [ # Chrome on Windows "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36", # Chrome on Mac "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", # Firefox on Windows "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0", "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:120.0) Gecko/20100101 Firefox/120.0", # Firefox on Mac "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:121.0) Gecko/20100101 Firefox/121.0", # Safari on Mac "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15", # Edge on Windows "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0", # Chrome on Linux "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", # Firefox on Linux "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:121.0) Gecko/20100101 Firefox/121.0", ] # Common referers to look legitimate REFERERS = [ "https://www.google.com/", "https://www.google.com/search?q=", "https://duckduckgo.com/", "https://www.bing.com/", "https://www.reddit.com/", "https://news.ycombinator.com/", "", # No referer sometimes ] def __init__(self, proxy: str = None, min_delay: float = 0.5, max_delay: float = 2.0): self.proxy = proxy self.min_delay = min_delay self.max_delay = max_delay self.session = None self.last_request_time = 0 self.request_count = 0 self.blocked_domains = set() if REQUESTS_OK: import requests self.session = requests.Session() # Set default timeout self.session.timeout = 15 def _get_headers(self, url: str = "") -> Dict[str, str]: """Generate realistic headers.""" ua = random.choice(self.USER_AGENTS) ref = random.choice(self.REFERERS) headers = { "User-Agent": ua, "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", "Accept-Language": "en-US,en;q=0.9", "Accept-Encoding": "gzip, deflate, br", "Connection": "keep-alive", "Upgrade-Insecure-Requests": "1", "Cache-Control": "max-age=0", "Sec-Fetch-Dest": "document", "Sec-Fetch-Mode": "navigate", "Sec-Fetch-Site": "none" if not ref else "cross-site", "Sec-Fetch-User": "?1", } if ref: headers["Referer"] = ref # Sometimes add DNT if random.random() > 0.5: headers["DNT"] = "1" return headers def _human_delay(self): """Random delay to seem human.""" delay = random.uniform(self.min_delay, self.max_delay) # Sometimes longer pause if random.random() < 0.1: delay += random.uniform(1, 3) time.sleep(delay) def _respect_rate_limit(self): """Ensure minimum time between requests.""" elapsed = time.time() - self.last_request_time if elapsed < self.min_delay: time.sleep(self.min_delay - elapsed) self.last_request_time = time.time() def get(self, url: str, retries: int = 3, backoff: float = 2.0) -> Optional[str]: """ Fetch URL with retry logic and rate limiting. Returns HTML content or None on failure. """ if not REQUESTS_OK or not self.session: print("[web] requests not available") return None import requests from urllib.parse import urlparse domain = urlparse(url).netloc # Skip known blocked domains if domain in self.blocked_domains: print(f"[web] Skipping blocked domain: {domain}") return None for attempt in range(retries): try: self._respect_rate_limit() headers = self._get_headers(url) proxies = {"http": self.proxy, "https": self.proxy} if self.proxy else None response = self.session.get( url, headers=headers, proxies=proxies, timeout=15, allow_redirects=True ) self.request_count += 1 # Check for blocks if response.status_code == 403: print(f"[web] 403 Forbidden - trying different agent...") self._human_delay() continue if response.status_code == 429: wait_time = backoff * (2 ** attempt) print(f"[web] Rate limited, waiting {wait_time:.1f}s...") time.sleep(wait_time) continue if response.status_code == 503: print(f"[web] Service unavailable, retrying...") time.sleep(backoff) continue if response.status_code >= 400: print(f"[web] HTTP {response.status_code} for {url}") if attempt == retries - 1: return None continue # Success! return response.text except requests.exceptions.Timeout: print(f"[web] Timeout (attempt {attempt + 1}/{retries})") time.sleep(backoff) except requests.exceptions.ConnectionError as e: print(f"[web] Connection error: {e}") if "blocked" in str(e).lower() or "forbidden" in str(e).lower(): self.blocked_domains.add(domain) time.sleep(backoff) except Exception as e: print(f"[web] Error: {e}") time.sleep(backoff) return None def search(self, query: str, engine: str = "duckduckgo") -> List[Dict[str, str]]: """ Search the web and return results. Uses DuckDuckGo by default (less blocking). """ results = [] if engine == "duckduckgo": results = self._search_duckduckgo(query) elif engine == "google": results = self._search_google(query) elif engine == "bing": results = self._search_bing(query) # Fallback chain if not results and engine != "duckduckgo": results = self._search_duckduckgo(query) return results def _search_duckduckgo(self, query: str) -> List[Dict[str, str]]: """Search via DuckDuckGo HTML (no API needed).""" import urllib.parse encoded_query = urllib.parse.quote_plus(query) url = f"https://html.duckduckgo.com/html/?q={encoded_query}" html = self.get(url) if not html: return [] results = [] # Parse results (simple regex, no BS4 needed) # DuckDuckGo HTML format import re # Find result links link_pattern = r']*>([^<]+)' snippet_pattern = r']*>([^<]+)' links = re.findall(link_pattern, html) snippets = re.findall(snippet_pattern, html) for i, (url, title) in enumerate(links[:10]): result = { "url": url, "title": title.strip(), "snippet": snippets[i].strip() if i < len(snippets) else "", } results.append(result) return results def _search_google(self, query: str) -> List[Dict[str, str]]: """Search via Google (more likely to be blocked).""" import urllib.parse encoded_query = urllib.parse.quote_plus(query) url = f"https://www.google.com/search?q={encoded_query}&num=10" html = self.get(url) if not html: return [] results = [] import re # Google's format is complex, try to extract # Look for List[Dict[str, str]]: """Search via Bing.""" import urllib.parse encoded_query = urllib.parse.quote_plus(query) url = f"https://www.bing.com/search?q={encoded_query}" html = self.get(url) if not html: return [] results = [] import re # Bing result format link_pattern = r']*> Optional[Dict[str, str]]: """ Fetch and extract article content from URL. Returns dict with title, text, etc. """ html = self.get(url) if not html: return None import re # Extract title title_match = re.search(r']*>([^<]+)', html, re.IGNORECASE) title = title_match.group(1).strip() if title_match else "" # Extract meta description desc_match = re.search(r']*name=["\']description["\'][^>]*content=["\']([^"\']+)["\']', html, re.IGNORECASE) if not desc_match: desc_match = re.search(r']*content=["\']([^"\']+)["\'][^>]*name=["\']description["\']', html, re.IGNORECASE) description = desc_match.group(1).strip() if desc_match else "" # Extract main text (rough - strips HTML tags) # Remove scripts and styles text = re.sub(r']*>.*?', '', html, flags=re.DOTALL | re.IGNORECASE) text = re.sub(r']*>.*?', '', text, flags=re.DOTALL | re.IGNORECASE) text = re.sub(r'<[^>]+>', ' ', text) text = re.sub(r'\s+', ' ', text).strip() # Get first ~2000 chars of meaningful text text = text[:2000] return { "url": url, "title": title, "description": description, "text": text, } def research(self, topic: str, depth: int = 3) -> List[Dict[str, str]]: """ Research a topic: search + fetch top results. Returns list of articles with content. """ print(f"[web] Researching: {topic}") # Search results = self.search(topic) if not results: print("[web] No search results") return [] print(f"[web] Found {len(results)} results, fetching top {depth}...") articles = [] for i, result in enumerate(results[:depth]): print(f"[web] Fetching {i+1}/{depth}: {result['url'][:60]}...") article = self.fetch_article(result["url"]) if article: article["search_title"] = result.get("title", "") article["search_snippet"] = result.get("snippet", "") articles.append(article) self._human_delay() print(f"[web] Got {len(articles)} articles") return articles def summarize_research(self, topic: str, depth: int = 3) -> str: """Research and create summary.""" articles = self.research(topic, depth) if not articles: return f"Could not find information about: {topic}" summary = f"# Research: {topic}\n\n" for i, article in enumerate(articles, 1): summary += f"## Source {i}: {article['title']}\n" summary += f"URL: {article['url']}\n\n" if article['description']: summary += f"{article['description']}\n\n" summary += f"{article['text'][:500]}...\n\n" summary += "---\n\n" return summary # Global browser instance _research_browser = None def get_browser() -> ResearchBrowser: """Get or create research browser.""" global _research_browser if _research_browser is None: _research_browser = ResearchBrowser() return _research_browser def web_search(query: str, depth: int = 3) -> str: """Quick web search function.""" browser = get_browser() return browser.summarize_research(query, depth) def web_fetch(url: str) -> Optional[str]: """Quick URL fetch.""" browser = get_browser() article = browser.fetch_article(url) if article: return f"# {article['title']}\n\n{article['text']}" return None # === PATHS === ROOT = os.path.dirname(os.path.abspath(__file__)) DATA_DIR = os.path.join(ROOT, "data") SCRIPT_DIR = os.path.join(ROOT, "scripts") RUN_DIR = os.path.join(ROOT, "runs") LHT_DIR = os.path.join(ROOT, "lht") CHECKPOINTS_DIR = os.path.join(ROOT, "dense_checkpoints_v2") TRAINING_DIR = os.path.join(ROOT, "condensator_output") LOGS_DIR = os.path.join(ROOT, "improvement_logs") ROLLBACK_DIR = os.path.join(ROOT, "rollback_checkpoints") BOOKS_DIR = os.path.join(ROOT, "books") EXPORTS_DIR = os.path.join(ROOT, "exports") IDEAS_DIR = os.path.join(ROOT, "ideas") # Model paths # Model paths - auto-detect local or use HuggingFace _LOCAL_MODEL_PATH = "/mnt/nvme2/ubermesnchetien4/models/merged-final-v5" _HF_MODEL_ID = "NousResearch/Hermes-3-Llama-3.1-8B" # Check if local model exists, otherwise use repo or HuggingFace if os.path.exists(_LOCAL_MODEL_PATH): MODEL_PATH = _LOCAL_MODEL_PATH elif os.path.exists(os.path.join(ROOT, "model-00001-of-00004.safetensors")): # Model weights are in the repo itself MODEL_PATH = ROOT else: # Fall back to HuggingFace MODEL_PATH = _HF_MODEL_ID # Dense adapter - use latest available DENSE_CHECKPOINT = os.path.join(ROOT, "dense_checkpoints_v2/step_1000") if not os.path.exists(DENSE_CHECKPOINT): DENSE_CHECKPOINT = os.path.join(ROOT, "dense_checkpoints_v2/step_100") if not os.path.exists(DENSE_CHECKPOINT): DENSE_CHECKPOINT = os.path.join(ROOT, "dense_checkpoints/step_300") # CF-HoT checkpoints - use correct paths CFHOT_CHECKPOINT = os.path.join(ROOT, "cfhot_checkpoints/final_6000") if not os.path.exists(CFHOT_CHECKPOINT): CFHOT_CHECKPOINT = os.path.join(ROOT, "cfhot_checkpoints/ckpt_5000") CFHOT_80X_PATH = os.path.join(ROOT, "cfhot_checkpoints/final_6000/risk_predictor.pt") if not os.path.exists(CFHOT_80X_PATH): CFHOT_80X_PATH = os.path.join(ROOT, "cfhot_checkpoints/ckpt_5000/risk_predictor.pt") # Multi-head checkpoints - use correct path MULTI_HEAD_DIR = os.path.join(ROOT, "multi_head_checkpoints") MULTI_HEAD_COMPLETE = os.path.join(ROOT, "multi_head_checkpoints/multi_head_v2_complete.pt") for path in [DATA_DIR, SCRIPT_DIR, RUN_DIR, LHT_DIR, LOGS_DIR, ROLLBACK_DIR, BOOKS_DIR, EXPORTS_DIR, IDEAS_DIR]: os.makedirs(path, exist_ok=True) # === OPTIONAL IMPORTS === VOICE_OK = False try: import pyttsx3 TTS = pyttsx3.init() VOICE_OK = True except: pass VECTOR_OK = False try: import chromadb from sentence_transformers import SentenceTransformer EMBED_MODEL = os.environ.get("UBERMENCHETIEN_EMBED_MODEL", "all-MiniLM-L6-v2") _client = chromadb.Client() _collection = _client.get_or_create_collection("ubermenschetien_memory") _embedder = SentenceTransformer(EMBED_MODEL) VECTOR_OK = True except: pass # === LHT IMPORT === LHT_OK = False try: from lht import LieHolonomyTransformer, LHTConfig, WaypointDetector LHT_OK = True print("[lht] Lie-Holonomy modules loaded") except ImportError: print("[lht] Not available - running without geometric reasoning") # === BROWSER IMPORT === BROWSER_OK = False _playwright = None _browser = None _page = None _browser_context = None try: from playwright.sync_api import sync_playwright BROWSER_OK = True print("[browser] Playwright available") except ImportError: print("[browser] Not available - pip install playwright && playwright install firefox") # === GMAIL API IMPORT === GMAIL_API_OK = False _gmail_service = None try: from google.oauth2.credentials import Credentials from google_auth_oauthlib.flow import InstalledAppFlow from google.auth.transport.requests import Request from googleapiclient.discovery import build import pickle GMAIL_API_OK = True print("[gmail-api] Google API available") except ImportError: pass # Silent - optional feature # === LOGIN CONFIGURATION === LOGIN_CONFIG = { "use_persistent_profile": False, "firefox_profile_path": "", "use_gmail_api": False, "gmail_credentials_file": "credentials.json", "gmail_token_file": "gmail_token.pickle", "human_typing_speed": (30, 120), # ms per char range "max_login_retries": 3, } def save_login_config(): """Save login configuration.""" config_path = os.path.join(ROOT, "login_config.json") with open(config_path, 'w') as f: json.dump(LOGIN_CONFIG, f, indent=2) print(f"[config] ✅ Saved to {config_path}") def load_login_config(): """Load login configuration.""" global LOGIN_CONFIG config_path = os.path.join(ROOT, "login_config.json") if os.path.exists(config_path): with open(config_path, 'r') as f: LOGIN_CONFIG.update(json.load(f)) # Load on startup load_login_config() # ============================================================================== # CLAUDE API INTEGRATION (Opus 4.5 / Sonnet) # ============================================================================== CLAUDE_CONFIG = { "api_key": os.environ.get("ANTHROPIC_API_KEY", ""), "model": "claude-sonnet-4-20250514", "opus_model": "claude-opus-4-20250514", "max_tokens": 8192, "temperature": 0.8, } def setup_anthropic_key(): """Interactive setup for Anthropic API key.""" print("\n" + "=" * 60) print("🔑 ANTHROPIC API KEY SETUP") print("=" * 60) print("\nTo use Mentor Mode (learn from Claude), you need an API key.") print("Get one at: https://console.anthropic.com/") print("\nOptions:") print(" 1. Enter API key now (will be saved to .anthropic_key)") print(" 2. Set environment variable: export ANTHROPIC_API_KEY=sk-ant-...") print(" 3. Skip for now") choice = input("\nYour choice (1/2/3): ").strip() if choice == "1": api_key = input("\nPaste your API key (starts with sk-ant-): ").strip() if api_key.startswith("sk-ant-"): key_path = os.path.join(ROOT, ".anthropic_key") with open(key_path, 'w') as f: f.write(api_key) os.chmod(key_path, 0o600) # Secure permissions CLAUDE_CONFIG["api_key"] = api_key print(f"\n✅ API key saved to {key_path}") print(" (This file is gitignored for security)") # Test it init_claude_client() if _anthropic_client: print("✅ API connection successful!") return True else: print("❌ Invalid key format. Should start with sk-ant-") elif choice == "2": print("\nRun this in your terminal before starting ARC Engine:") print(" export ANTHROPIC_API_KEY='your-key-here'") print("\nOr add to ~/.bashrc for persistence.") else: print("\nSkipped. Mentor Mode will not be available.") return False def init_claude_client(): """Initialize Claude API client.""" global _anthropic_client if not CLAUDE_API_OK: return None api_key = CLAUDE_CONFIG["api_key"] if not api_key: key_path = os.path.join(ROOT, ".anthropic_key") if os.path.exists(key_path): with open(key_path, 'r') as f: api_key = f.read().strip() CLAUDE_CONFIG["api_key"] = api_key if api_key: try: _anthropic_client = anthropic.Anthropic(api_key=api_key) print("[claude] ✓ API client initialized") return _anthropic_client except Exception as e: print(f"[claude] ✗ Init failed: {e}") return None def claude_generate(prompt: str, system: str = None, max_tokens: int = None, use_opus: bool = False, stream: bool = False) -> str: """Generate response using Claude API.""" global _anthropic_client if _anthropic_client is None: _anthropic_client = init_claude_client() if _anthropic_client is None: return "[claude] API not configured. Set ANTHROPIC_API_KEY or create .anthropic_key file" try: model = CLAUDE_CONFIG["opus_model"] if use_opus else CLAUDE_CONFIG["model"] messages = [{"role": "user", "content": prompt}] kwargs = { "model": model, "max_tokens": max_tokens or CLAUDE_CONFIG["max_tokens"], "messages": messages, } if system: kwargs["system"] = system if stream: # Streaming response full_response = "" with _anthropic_client.messages.stream(**kwargs) as stream_obj: for text in stream_obj.text_stream: print(text, end="", flush=True) full_response += text print() # Newline return full_response else: response = _anthropic_client.messages.create(**kwargs) return response.content[0].text except Exception as e: return f"[claude] API error: {e}" # ============================================================================== # CF-HoT HEAD STATUS (Validated Metrics): # - Repetition: 80× separation, F1 > 0.96 (VALIDATED, PRODUCTION) # - Hedging: 1.49× separation (WEAK, limited utility) # - Verbosity: 2.14× separation, 9% precision (WEAK, high false positive rate) # - Sycophancy: 1.00× separation (BROKEN, no discrimination - do not use) # # The 80× figure comes from the development log: # "Avg risk at REPEATS: 0.960, Avg risk at NON-REPS: 0.012" → 0.960/0.012 = 80× # ============================================================================== # CF-HoT 80× REPETITION DETECTION HEAD # ============================================================================== class RiskPredictorModel(nn.Module): """ Proper model class matching the checkpoint architecture. This reconstructs the architecture used during training so we can load the state dict correctly. Architecture: - 32 fiber projections (one per transformer layer): 4096 → 16 - Learned layer aggregation weights - MLP predictor: 16 → 64 → 64 → 1 """ def __init__(self, d_model=4096, n_layers=32, d_fiber=16, d_control=64): super().__init__() self.d_model = d_model self.n_layers = n_layers self.d_fiber = d_fiber # Fiber projections - one per transformer layer self.fiber_projs = nn.ModuleList([ nn.Linear(d_model, d_fiber, bias=False) for _ in range(n_layers) ]) # Learned layer aggregation weights (softmaxed during forward pass) self.layer_weights = nn.Parameter(torch.ones(n_layers) / n_layers) # MLP predictor head self.predictor = nn.Sequential( nn.Linear(d_fiber, d_control), nn.GELU(), nn.Linear(d_control, d_control), nn.GELU(), nn.Linear(d_control, 1) ) def forward(self, hidden_states): """ Forward pass through the risk predictor. Args: hidden_states: Either: - Tuple/List of 32 tensors [B, S, 4096] (multi-layer, correct usage) - Single tensor [B, S, 4096] (single layer fallback) Returns: Risk logits [B, S] or [B] """ if isinstance(hidden_states, (list, tuple)): # Multi-layer input (correct usage per architecture) fibers = [] for proj, h in zip(self.fiber_projs, hidden_states): fibers.append(proj(h.float())) weights = F.softmax(self.layer_weights[:len(fibers)], dim=0) aggregated = sum(w * f for w, f in zip(weights, fibers)) else: # Single tensor fallback (uses layer 0 projection only) aggregated = self.fiber_projs[0](hidden_states.float()) return self.predictor(aggregated).squeeze(-1) class CFHoT80xHead: """ The 80× class separation repetition detection head. Predicts repetitive behavior BEFORE token emission. Validated metrics (from cfhot_development_log.md): - Avg risk at REPEATS: 0.960 - Avg risk at NON-REPEATS: 0.012 - Separation ratio: 80× (0.960 / 0.012) - F1 Score: > 0.96 - 48% repetition reduction in generated text IMPORTANT: The previous 80× claim was incorrect. The development log documents 80× as the actual validated metric. """ def __init__(self): self.loaded = False self.risk_predictor = None self.device = "cuda" if torch.cuda.is_available() else "cpu" def load(self, checkpoint_path: str = None) -> bool: """ Load the trained 80× head with proper model instantiation. Unlike the broken version, this: 1. Creates a RiskPredictorModel instance first 2. Then loads the state dict into it 3. Properly handles multi-layer hidden states """ if checkpoint_path is None: checkpoint_path = CFHOT_80X_PATH paths_to_try = [ checkpoint_path, os.path.join(ROOT, "cfhot_checkpoints/final/risk_predictor.pt"), os.path.join(ROOT, "cfhot_checkpoints/final_6000/risk_predictor.pt"), os.path.join(ROOT, "cfhot_checkpoints/ckpt_5000/risk_predictor.pt"), os.path.join(CFHOT_CHECKPOINT, "risk_predictor.pt"), ] for path in paths_to_try: if os.path.exists(path): try: checkpoint = torch.load(path, map_location=self.device, weights_only=False) # FIXED: Properly instantiate the model FIRST self.risk_predictor = RiskPredictorModel() # Load the state dict into the model if isinstance(checkpoint, dict): if 'risk_predictor' in checkpoint: self.risk_predictor.load_state_dict(checkpoint['risk_predictor']) elif 'model_state_dict' in checkpoint: self.risk_predictor.load_state_dict(checkpoint['model_state_dict']) elif 'state_dict' in checkpoint: self.risk_predictor.load_state_dict(checkpoint['state_dict']) else: # Assume the dict IS the state dict self.risk_predictor.load_state_dict(checkpoint) else: self.risk_predictor.load_state_dict(checkpoint) self.risk_predictor.to(self.device) self.risk_predictor.eval() self.loaded = True print(f"[cf-hot 80×] ✓ Loaded from {path}") print(f"[cf-hot 80×] Separation: 80× (0.960 vs 0.012)") return True except Exception as e: print(f"[cf-hot 80×] Load error from {path}: {e}") print(f"[cf-hot 80×] ⚠ Head not found in any path") return False def unload(self): """Unload to free VRAM.""" self.risk_predictor = None self.loaded = False if torch.cuda.is_available(): torch.cuda.empty_cache() print("[cf-hot 80×] ✓ Unloaded") def predict_risk(self, hidden_states) -> float: """ Predict repetition risk from hidden states. FIXED: Now properly handles both input formats: - Tuple of 32 layer tensors (correct multi-layer usage) - Single tensor (fallback for simpler usage) Args: hidden_states: Either tuple of 32 layer tensors or single tensor Returns: Risk score between 0.0 and 1.0 """ if not self.loaded or self.risk_predictor is None: return 0.0 try: with torch.no_grad(): # Handle different input formats if isinstance(hidden_states, (list, tuple)): # Proper multi-layer input - pass all layers hs = [h.to(self.device) for h in hidden_states] logits = self.risk_predictor(hs) else: # Single tensor - take last position and use fallback if len(hidden_states.shape) == 3: h = hidden_states[:, -1, :] # [B, D] else: h = hidden_states logits = self.risk_predictor(h.to(self.device)) # Convert logits to probability risk = torch.sigmoid(logits).mean().item() return float(risk) except Exception as e: # Silent fail - return 0 risk (safe default) return 0.0 # Global instance _cfhot_80x_head = CFHoT80xHead() def get_cfhot_head() -> CFHoT80xHead: """Get the global CF-HoT head instance.""" return _cfhot_80x_head def toggle_cfhot_80x() -> str: """Toggle 80× head on/off.""" head = get_cfhot_head() if head.loaded: head.unload() return "[cf-hot 80×] ✗ Disabled and unloaded" else: if head.load(): return "[cf-hot 80×] ✓ Enabled (80× separation active)" return "[cf-hot 80×] ⚠ Could not load head" # ============================================================================== # BOOK MODE - Extended Long-Form Generation # ============================================================================== class BookWriter: """Generate book-length content with visible thinking process.""" def __init__(self, generate_fn: Callable, use_local_model: bool = True): self.generate_fn = generate_fn self.chapters = [] self.outline = "" self.title = "" self.chapter_summaries = [] self.thinking_visible = True # Show thinking process self.verbose_thinking = True # Show FULL thinking, not truncated self.use_local_model = use_local_model # Use local model by default, not Claude def _use_local(self) -> bool: """Check if we should use local model.""" return self.use_local_model def _show_thinking(self, stage: str, content: str = "", important: bool = False): """Display thinking process - full content when verbose.""" if not self.thinking_visible: return # Color/emphasis for important stages if important: print(f"\n{'─'*40}") print(f"💭 [{stage}]") print(f"{'─'*40}") else: print(f"\n💭 [{stage}]") if content: if self.verbose_thinking: # Show FULL content with nice formatting print() for line in content.split('\n'): print(f" {line}") print() else: # Truncated view lines = content.split('\n')[:5] for line in lines: print(f" {line[:100]}{'...' if len(line) > 100 else ''}") if len(content.split('\n')) > 5: print(f" ... ({len(content.split(chr(10)))} lines total)") def generate_outline(self, topic: str, num_chapters: int = 10) -> str: """Generate book outline with visible thinking.""" print(f"\n{'═'*70}") print(f"📋 PHASE 1: OUTLINE GENERATION") print(f"{'═'*70}") self._show_thinking("INITIAL PLANNING", f""" BOOK CONCEPT ANALYSIS ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ Title/Topic: "{topic}" Planned Chapters: {num_chapters} Estimated Total Words: {num_chapters * 3000:,} THINKING ABOUT STRUCTURE: • What genre does this suggest? (fiction/non-fiction/hybrid) • What's the core narrative arc? • Who is the target audience? • What themes should run throughout? CONSIDERING CHAPTER DISTRIBUTION: • Opening hook (Ch 1-2): Establish world, characters, central conflict • Rising action (Ch 3-{num_chapters//2}): Develop plot, deepen stakes • Midpoint twist (Ch {num_chapters//2}-{num_chapters//2 + 1}): Major revelation or shift • Escalation (Ch {num_chapters//2 + 2}-{num_chapters - 2}): Complications, character growth • Climax & Resolution (Ch {num_chapters - 1}-{num_chapters}): Confrontation, resolution """, important=True) prompt = f"""You are a creative fiction author. Create a DETAILED outline for a book titled: "{topic}" Write {num_chapters} chapters. For EACH chapter, you MUST provide: CHAPTER 1: [Title] - Theme: [main theme] - Setting: [where it takes place] - Key events: [what happens] - Characters: [who is involved] - Ending hook: [cliffhanger or transition] CHAPTER 2: [Title] ... (continue for all {num_chapters} chapters) Be creative and detailed. Each chapter description should be at least 3-4 lines. DO NOT be brief. DO NOT just list titles. Provide FULL descriptions.""" self._show_thinking("PROMPT BEING SENT TO MODEL", f""" ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ {prompt} ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ """) print("\n⏳ Generating outline (this may take 30-60 seconds)...") # Use local model by default - with retry if too short max_retries = 3 for attempt in range(max_retries): if self._use_local(): print(f" Using: LOCAL MODEL (attempt {attempt + 1}/{max_retries})") out, _, eval_result = self.generate_fn(prompt) self.outline = out print(f" Quality score: {eval_result.overall_score:.2f}") print(f" Density score: {eval_result.density_score:.1f}") print(f" Tokens generated: {eval_result.tokens}") # Check if outline is too short if eval_result.tokens < 100 and attempt < max_retries - 1: print(f" ⚠️ Outline too short ({eval_result.tokens} tokens), retrying...") continue break elif CLAUDE_API_OK and CLAUDE_CONFIG.get("api_key"): print(" Using: Claude API") self.outline = claude_generate(prompt, system="You are a bestselling author known for gripping narratives and deep character development.") break else: print(" Using: Local model (fallback)") out, _, eval_result = self.generate_fn(prompt) self.outline = out print(f" Quality score: {eval_result.overall_score:.2f}") break # Clean up any weird tokens self.outline = self.outline.replace("", "").replace("", "") self.outline = self.outline.replace("", "").replace("", "") self._show_thinking("OUTLINE GENERATED - FULL TEXT", f""" {'━'*70} {self.outline} {'━'*70} """, important=True) # Parse chapter titles for better tracking - multiple patterns self.chapter_titles = [] import re # Try various patterns patterns = [ r'CHAPTER\s*(\d+)[:\s-]+(.+)', # CHAPTER 1: Title r'Chapter\s*(\d+)[:\s-]+(.+)', # Chapter 1: Title r'Ch\.?\s*(\d+)[:\s-]+(.+)', # Ch 1: Title or Ch. 1: Title r'(\d+)\.\s+(.+)', # 1. Title r'#\s*Chapter\s*(\d+)[:\s-]+(.+)', # # Chapter 1: Title ] for line in self.outline.split('\n'): line = line.strip() if not line: continue for pattern in patterns: match = re.search(pattern, line, re.IGNORECASE) if match: title = match.group(2).strip() title = title.strip('#').strip('"').strip("'").strip('[').strip(']').strip()[:60] if title and len(title) > 2 and title not in self.chapter_titles: self.chapter_titles.append(title) break # If we didn't find enough titles, generate generic ones if len(self.chapter_titles) < num_chapters: print(f" ⚠️ Only found {len(self.chapter_titles)} titles, generating defaults...") while len(self.chapter_titles) < num_chapters: self.chapter_titles.append(f"Part {len(self.chapter_titles) + 1}") self._show_thinking("PARSED CHAPTER STRUCTURE", f""" Extracted {len(self.chapter_titles)} chapter titles from outline: """ + "\n".join([f" Chapter {i+1}: \"{t}\"" for i, t in enumerate(self.chapter_titles)])) return self.outline def generate_chapter(self, num: int, title: str, context: str = "", target_words: int = 3000) -> str: """Generate a chapter with visible thinking and progress.""" print(f"\n{'═'*70}") print(f"📖 CHAPTER {num}: \"{title}\"") print(f"{'═'*70}") # Planning phase self._show_thinking("CHAPTER PLANNING PHASE", f""" TARGET METRICS: • Word count goal: {target_words:,} words • Estimated chunks: {max(1, target_words // 500)} (500 words each) • Context from previous chapter: {len(context):,} characters CONTINUITY CHECK: {f'• Previous chapter ending: "...{context[-200:]}"' if context else '• This is the OPENING chapter - need strong hook'} OUTLINE REFERENCE FOR THIS CHAPTER: {self.outline[:800]}... WRITING STRATEGY: • Hook readers in first paragraph • Maintain consistent tone with previous chapters • Advance plot while developing characters • End with momentum into next chapter """, important=True) # Generate chapter plan first plan_prompt = f"""For Chapter {num}: "{title}" Based on the book outline: {self.outline[:1200]} Create a detailed chapter plan (NOT the chapter itself, just the plan): 1. OPENING SCENE: How does this chapter begin? What's the hook? 2. KEY EVENTS: What 3-5 major things happen in this chapter? 3. CHARACTER DEVELOPMENT: How do characters grow or reveal themselves? 4. THEMES: What themes from the outline does this chapter explore? 5. ENDING: How does this chapter end? What's the cliffhanger or transition? 6. DIALOGUE OPPORTUNITIES: What conversations need to happen? 7. SETTING DETAILS: Where does this chapter take place? Be specific and detailed.""" self._show_thinking("GENERATING CHAPTER PLAN", f""" Asking model to create a detailed plan BEFORE writing... PLAN PROMPT: {plan_prompt} """) print("\n⏳ Creating chapter plan...") # Use local model by default if self._use_local(): plan_out, _, plan_eval = self.generate_fn(plan_prompt) chapter_plan = plan_out print(f" Plan quality: {plan_eval.overall_score:.2f}") elif CLAUDE_API_OK and CLAUDE_CONFIG.get("api_key"): chapter_plan = claude_generate(plan_prompt, max_tokens=800) else: plan_out, _, plan_eval = self.generate_fn(plan_prompt) chapter_plan = plan_out print(f" Plan quality: {plan_eval.overall_score:.2f}") self._show_thinking("CHAPTER PLAN COMPLETE", f""" {'━'*70} {chapter_plan} {'━'*70} """, important=True) self.chapter_summaries.append({"chapter": num, "title": title, "plan": chapter_plan}) # Now write the chapter in chunks prompt = f"""Write Chapter {num}: "{title}" CHAPTER PLAN TO FOLLOW: {chapter_plan} BOOK OUTLINE CONTEXT: {self.outline[:1000]} {f'PREVIOUS CHAPTER ENDED WITH: "...{context[-500:]}"' if context else 'This is the OPENING chapter of the book.'} WRITING REQUIREMENTS: • Write in engaging prose with vivid descriptions • Include natural dialogue with distinct character voices • Show emotional depth and internal thoughts • Create clear scene transitions • Target approximately {target_words} words BEGIN CHAPTER {num}:""" self._show_thinking("WRITING PROMPT", f""" {'━'*70} {prompt} {'━'*70} """) chapter = f"\n\n## Chapter {num}: {title}\n\n" chunks_needed = max(1, target_words // 500) text = "" chunk_qualities = [] print(f"\n✍️ WRITING CHAPTER (targeting {target_words:,} words in {chunks_needed} chunks)") print(f"{'─'*70}") for i in range(chunks_needed + 3): # Allow extra chunks if needed chunk_prompt = prompt if i == 0 else f"""Continue writing Chapter {num}: "{title}" CHAPTER PLAN REMINDER: {chapter_plan[:500]} STORY SO FAR (last 600 characters): "...{text[-600:]}" Continue the narrative naturally and engagingly. Write approximately 500 more words. DO NOT repeat what was already written. Continue from where it left off. Write prose only - no XML tags, no code, no special formatting. CONTINUE:""" self._show_thinking(f"WRITING CHUNK {i+1}", f""" Words written so far: {len(text.split()):,} Words remaining: {max(0, target_words - len(text.split())):,} Prompt length: {len(chunk_prompt):,} characters """) print(f"\n Chunk {i+1}: ", end="", flush=True) # Use local model by default - with retry for bad output max_chunk_retries = 2 for retry in range(max_chunk_retries): if self._use_local(): out, _, eval_result = self.generate_fn(chunk_prompt) chunk = out chunk_qualities.append(eval_result.overall_score) print(f"[Local, Q:{eval_result.overall_score:.2f}] ", end="") elif CLAUDE_API_OK and CLAUDE_CONFIG.get("api_key"): chunk = claude_generate(chunk_prompt, max_tokens=2000) print(f"[Claude] ", end="") else: out, _, eval_result = self.generate_fn(chunk_prompt) chunk = out chunk_qualities.append(eval_result.overall_score) print(f"[Local, Q:{eval_result.overall_score:.2f}] ", end="") # Clean up bad tokens chunk = chunk.replace("", "").replace("", "") chunk = chunk.replace("", "").replace("", "") chunk = chunk.replace("<|im_start|>", "").replace("<|im_end|>", "") # Check for gibberish (lots of @ symbols, weird patterns) gibberish_indicators = chunk.count('@') + chunk.count('\\x') + chunk.count('�') if gibberish_indicators > 5 and retry < max_chunk_retries - 1: print(f"⚠️ gibberish detected, retrying... ", end="") continue # Check if chunk is too short or empty if len(chunk.split()) < 10 and retry < max_chunk_retries - 1: print(f"⚠️ too short, retrying... ", end="") continue break # Skip if chunk is garbage after retries if len(chunk.split()) < 5 or gibberish_indicators > 10: print(f"⚠️ skipping bad chunk") continue text += chunk + "\n\n" words = len(text.split()) # Visual progress bar progress = min(100, int((words / target_words) * 100)) bar = '█' * (progress // 5) + '░' * (20 - progress // 5) print(f"{words:,} words [{bar}] {progress}%") # Show a snippet of what was just written snippet = chunk[:150].replace('\n', ' ') print(f" \"{snippet}...\"") if words >= target_words: print(f"\n ✓ Target word count reached!") break # Chapter complete final_words = len(text.split()) avg_quality = sum(chunk_qualities) / len(chunk_qualities) if chunk_qualities else 0 avg_quality_str = f"{avg_quality:.2f}" if avg_quality else "N/A" self._show_thinking("CHAPTER COMPLETE", f""" {'━'*70} CHAPTER {num} STATISTICS: • Final word count: {final_words:,} • Target was: {target_words:,} • Chunks written: {i+1} • Average chunk quality: {avg_quality_str} FIRST 500 CHARACTERS: "{text[:500]}..." LAST 300 CHARACTERS: "...{text[-300:]}" {'━'*70} """, important=True) chapter += text self.chapters.append(chapter) return chapter def write_book(self, topic: str, chapters: int = 10, words: int = 3000) -> str: """Write a complete book with full visibility into the process.""" self.title = topic self.chapters = [] self.chapter_summaries = [] self.chapter_titles = [] total_words = chapters * words print(f"\n{'═'*70}") print(f" 📚 BOOK MODE ACTIVATED") print(f"{'═'*70}") print(f""" BOOK CONFIGURATION: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 📖 Title: "{topic}" 📑 Chapters: {chapters} 📝 Words per chapter: ~{words:,} 📊 Total word target: ~{total_words:,} 🕐 Estimated time: {chapters * 2}-{chapters * 5} minutes 💾 Auto-save: After each chapter 💭 Thinking visible: {self.thinking_visible} 📢 Verbose mode: {self.verbose_thinking} 🤖 Using: {'LOCAL MODEL (your trained model)' if self.use_local_model else ('Claude API' if CLAUDE_API_OK and CLAUDE_CONFIG.get("api_key") else 'Local Model')} ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ """) print(f"{'═'*70}\n") input("Press ENTER to begin writing (or Ctrl+C to cancel)...") # Phase 1: Outline self.generate_outline(topic, chapters) # Phase 2: Chapters print(f"\n{'═'*70}") print(f"📖 PHASE 2: CHAPTER GENERATION ({chapters} chapters)") print(f"{'═'*70}") book = f"# {topic}\n\n## Outline\n\n{self.outline}\n\n---\n" total_written = 0 start_time = time.time() for i in range(1, chapters + 1): chapter_start = time.time() # Get title from parsed outline if available ch_title = self.chapter_titles[i-1] if i <= len(self.chapter_titles) else f"Chapter {i}" print(f"\n{'─'*70}") print(f"📖 [{i}/{chapters}] Beginning: \"{ch_title}\"") print(f"{'─'*70}") ctx = self.chapters[-1][-1500:] if self.chapters else "" ch = self.generate_chapter(i, ch_title, ctx, words) book += ch + "\n\n---\n\n" chapter_words = len(ch.split()) total_written += chapter_words chapter_time = time.time() - chapter_start # Progress summary pct = int((i / chapters) * 100) elapsed = time.time() - start_time eta = (elapsed / i) * (chapters - i) if i > 0 else 0 print(f""" {'─'*70} 📊 PROGRESS UPDATE: • Chapters complete: {i}/{chapters} ({pct}%) • Words this chapter: {chapter_words:,} • Total words so far: {total_written:,} • Time this chapter: {chapter_time:.1f}s • Total elapsed: {elapsed/60:.1f} min • Estimated remaining: {eta/60:.1f} min {'─'*70}""") # Save progress after each chapter safe_title = "".join(c for c in topic if c.isalnum() or c in ' _-')[:40] path = os.path.join(BOOKS_DIR, f"{safe_title.replace(' ', '_')}.md") with open(path, 'w') as f: f.write(book) print(f" 💾 Progress saved: {path}") # Final summary total_time = time.time() - start_time print(f""" {'═'*70} ✅ BOOK COMPLETE! {'═'*70} FINAL STATISTICS: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 📖 Title: "{topic}" 📑 Chapters: {len(self.chapters)} 📝 Total words: {total_written:,} ⏱️ Total time: {total_time/60:.1f} minutes 📈 Words/minute: {total_written/(total_time/60):.0f} 💾 Saved to: {path} ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ {'═'*70} """) # Show all chapter summaries if self.chapter_summaries: print(f"\n📋 ALL CHAPTER PLANS:") print(f"{'─'*70}") for cs in self.chapter_summaries: print(f"\n Chapter {cs['chapter']}: \"{cs['title']}\"") print(f" {'─'*40}") for line in cs['plan'].split('\n')[:8]: print(f" {line}") print(f" ...") print(f"\n{'─'*70}") return book # ============================================================================== # IDEA MODE - Claude-Powered Brainstorming # ============================================================================== class IdeaGenerator: """Generate extensive ideas using Claude.""" DEPTHS = { "quick": (5, 2000), "normal": (10, 4000), "extensive": (20, 8000), "deep": (30, 16000), } def __init__(self): self.session = [] self.topic = "" def generate(self, request: str, depth: str = "extensive", use_opus: bool = True) -> str: num_ideas, max_tokens = self.DEPTHS.get(depth, self.DEPTHS["extensive"]) system = """You are a world-class innovation consultant and brainstorming expert. Generate creative, practical, diverse ideas. For each: 1. Catchy name 2. Core concept (2-3 sentences) 3. Key benefits (3-5 points) 4. Challenges (2-3) 5. First steps (3 actions) 6. Effort: Low/Medium/High 7. Impact: 1-10 with justification Be creative but actionable.""" prompt = f"""Generate {num_ideas} detailed ideas for: {request} Consider: - Conventional & innovative approaches - Low-cost & premium options - Tech-driven & human-centered solutions - Quick wins & long-term plays For EACH idea provide full details per the format.""" print(f"\n[idea] 💡 Generating {num_ideas} ideas ({depth} mode, {'Opus' if use_opus else 'Sonnet'})...") if CLAUDE_API_OK and CLAUDE_CONFIG.get("api_key"): result = claude_generate(prompt, system=system, max_tokens=max_tokens, use_opus=use_opus, stream=True) else: out, _, _ = generate(prompt) result = out self.session.append({"topic": request, "depth": depth, "ideas": result}) self.topic = request # Save path = os.path.join(IDEAS_DIR, f"ideas_{request[:20].replace(' ', '_')}_{datetime.now().strftime('%H%M%S')}.md") with open(path, 'w') as f: f.write(f"# Ideas: {request}\n\n{result}") print(f"\n[idea] Saved to {path}") return result def expand(self, idea_name: str) -> str: prompt = f"""Expand this idea into a comprehensive plan: Idea: {idea_name} Context: {self.topic} Provide: 1. Executive Summary 2. Detailed Description (3-5 paragraphs) 3. Implementation Roadmap (phases) 4. Resources Required 5. Risk Assessment 6. Success Metrics 7. 30-Day Action Plan""" print(f"[idea] 📝 Expanding: {idea_name}...") if CLAUDE_API_OK and CLAUDE_CONFIG.get("api_key"): return claude_generate(prompt, max_tokens=4000, stream=True) out, _, _ = generate(prompt) return out # Global instances _book_writer = None _idea_generator = None def get_book_writer(gen_fn) -> BookWriter: global _book_writer if _book_writer is None: _book_writer = BookWriter(gen_fn) return _book_writer def get_idea_generator() -> IdeaGenerator: global _idea_generator if _idea_generator is None: _idea_generator = IdeaGenerator() return _idea_generator # ============================================================================== # RSI-15: 15-ITERATION STRESS TEST # ============================================================================== def run_rsi_15(improver) -> dict: """Run 15-iteration RSI stress test.""" print("\n" + "="*60) print(" 🔬 RSI-15 STRESS TEST") print("="*60) results = { "iterations": [], "rollbacks": 0, "improvements": 0, "initial": Store.state.get("best_quality_score", 0), "final": 0, "peak": Store.state.get("best_quality_score", 0), "stopped_early": False, "reason": None, } consecutive_rollbacks = 0 for i in range(15): print(f"\n{'─'*50}\n ITERATION {i+1}/15\n{'─'*50}") pre = Store.state.get("best_quality_score", 0) train_result = improver.run_training_iteration(Config.training_steps_per_iteration) if not train_result.get("success"): print(" ⚠ Training failed") continue post = Store.state.get("best_quality_score", 0) delta = post - pre iteration = {"i": i+1, "pre": pre, "post": post, "delta": delta, "action": "?"} if delta > 0.02: iteration["action"] = "KEEP" results["improvements"] += 1 consecutive_rollbacks = 0 print(f" ✅ +{delta:.4f}") elif delta < -0.05: iteration["action"] = "ROLLBACK" results["rollbacks"] += 1 consecutive_rollbacks += 1 improver.rollback_to_best() print(f" ⏪ {delta:.4f}") else: iteration["action"] = "MARGINAL" consecutive_rollbacks = 0 print(f" 〰️ {delta:+.4f}") results["iterations"].append(iteration) if post > results["peak"]: results["peak"] = post if consecutive_rollbacks >= 3: results["stopped_early"] = True results["reason"] = "3 consecutive rollbacks" print("\n 🛑 STOP: 3 consecutive rollbacks") break results["final"] = Store.state.get("best_quality_score", 0) print("\n" + "="*60) print(" 📊 RSI-15 RESULTS") print(f" Iterations: {len(results['iterations'])}/15") print(f" Improvements: {results['improvements']} | Rollbacks: {results['rollbacks']}") print(f" Quality: {results['initial']:.4f} → {results['final']:.4f} (peak: {results['peak']:.4f})") print("="*60) path = os.path.join(LOGS_DIR, f"rsi15_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json") with open(path, 'w') as f: json.dump(results, f, indent=2) return results # ============================================================================== # VISUALIZATION & PLOTTING # ============================================================================== def plot_quality_history() -> str: if not PLOT_OK: return "[plot] matplotlib not available" qh = Store.state.get("quality_history", []) if not qh: return "[plot] No history" fig, axes = plt.subplots(2, 2, figsize=(12, 10)) fig.suptitle("ARC Quality History", fontsize=14) iters = range(len(qh)) axes[0,0].plot(iters, [e.get("quality", 0) for e in qh], 'b-o') axes[0,0].set_title("Quality"); axes[0,0].grid(True, alpha=0.3) axes[0,1].plot(iters, [e.get("density", 0) for e in qh], 'g-o') axes[0,1].set_title("Density"); axes[0,1].grid(True, alpha=0.3) axes[1,0].plot(iters, [e.get("coherence", 0) for e in qh], 'r-o') axes[1,0].set_title("Coherence"); axes[1,0].grid(True, alpha=0.3) axes[1,1].plot(iters, [e.get("tokens", 100) for e in qh], 'm-o') axes[1,1].set_title("Tokens"); axes[1,1].grid(True, alpha=0.3) plt.tight_layout() path = os.path.join(ROOT, f"quality_{datetime.now().strftime('%Y%m%d_%H%M%S')}.png") plt.savefig(path, dpi=150) plt.close() return f"[plot] ✓ Saved: {path}" # ============================================================================== # EXPORT/IMPORT CHECKPOINTS # ============================================================================== def export_checkpoint(name: str = None) -> str: if name is None: name = f"arc_{datetime.now().strftime('%Y%m%d_%H%M%S')}" path = os.path.join(EXPORTS_DIR, f"{name}.zip") with zipfile.ZipFile(path, 'w', zipfile.ZIP_DEFLATED) as zf: zf.writestr("state.json", json.dumps(Store.state, indent=2, default=str)) zf.writestr("goals.json", json.dumps(Store.goals, indent=2)) zf.writestr("history.json", json.dumps(Store.state.get("quality_history", []), indent=2)) zf.writestr("meta.json", json.dumps({ "exported": datetime.now().isoformat(), "quality": Store.state.get("best_quality_score", 0), "version": "2.1" }, indent=2)) size = os.path.getsize(path) / (1024*1024) return f"[export] ✓ {path} ({size:.1f} MB)" def import_checkpoint(path: str) -> str: if not os.path.exists(path): return f"[import] Not found: {path}" dest = os.path.join(ROOT, "imports", os.path.basename(path).replace('.zip', '')) os.makedirs(dest, exist_ok=True) with zipfile.ZipFile(path, 'r') as zf: zf.extractall(dest) state_path = os.path.join(dest, "state.json") if os.path.exists(state_path): with open(state_path) as f: Store.state.update(json.load(f)) return f"[import] ✓ {dest}" # ============================================================================== # BENCHMARK SUITE # ============================================================================== BENCHMARK_PROMPTS = [ "hello", "What is recursion?", "Explain gradient descent", "Write a haiku about AI", "What are you?", "What is Big O notation?", "If A > B and B > C, is A > C?", "What is the capital of Japan?", ] def run_benchmark(gen_fn) -> dict: print("\n" + "="*60 + "\n 📊 BENCHMARK\n" + "="*60) results = [] for p in BENCHMARK_PROMPTS: out, _, ev = gen_fn(p) results.append({"prompt": p, "quality": ev.overall_score, "density": ev.density_score, "tokens": ev.tokens}) print(f" {'✓' if ev.overall_score >= 0.7 else '✗'} {p[:30]:<30} Q:{ev.overall_score:.2f} D:{ev.density_score:.1f}") avg_q = sum(r["quality"] for r in results) / len(results) print(f"\n Average Quality: {avg_q:.3f}\n" + "="*60) return {"prompts": results, "avg_quality": avg_q} # ============================================================================== # LEARN FROM CONVERSATION # ============================================================================== def learn_from_conversation(min_q: float = 0.85) -> str: good = [] if os.path.exists(Store.mem_path): with open(Store.mem_path) as f: for line in f: try: m = json.loads(line) if m.get("kind") == "reply" and m.get("data", {}).get("quality", 0) >= min_q: good.append({"prompt": m["data"]["in"], "response": m["data"]["out"]}) except: pass if not good: return f"[learn] No responses with quality >= {min_q}" path = os.path.join(ROOT, "learned.json") with open(path, 'w') as f: json.dump(good, f, indent=2) return f"[learn] ✓ {len(good)} examples saved to {path}" # ============================================================================== # API SERVER # ============================================================================== class APIHandler(BaseHTTPRequestHandler): gen_fn = None def do_POST(self): data = json.loads(self.rfile.read(int(self.headers['Content-Length']))) if self.path == "/generate": out, _, ev = self.gen_fn(data.get("prompt", "")) resp = {"response": out, "quality": ev.overall_score, "tokens": ev.tokens} self.send_response(200) self.send_header('Content-type', 'application/json') self.end_headers() self.wfile.write(json.dumps(resp).encode()) else: self.send_error(404) def log_message(self, *args): pass def start_api_server(port: int, gen_fn): APIHandler.gen_fn = gen_fn server = HTTPServer(('0.0.0.0', port), APIHandler) Thread(target=server.serve_forever, daemon=True).start() print(f"[api] Running on http://0.0.0.0:{port}") return server # ============================================================================== # LIVE STREAMING WINDOW # ============================================================================== class StreamingWindow: """Live window showing token-by-token generation.""" def __init__(self): self.window = None self.text_widget = None self.queue = queue.Queue() self.running = False self.thread = None self.token_count = 0 def start(self): """Start the streaming window.""" if not TK_OK: print("[stream] tkinter not available") return False if self.running: return True self.running = True self.thread = threading.Thread(target=self._run_window, daemon=True) self.thread.start() time.sleep(0.3) return True def _run_window(self): self.window = tk.Tk() self.window.title("🧠 ARC Live Generation") self.window.geometry("900x600") self.window.configure(bg='#1a1a2e') # Header header = tk.Frame(self.window, bg='#16213e', height=45) header.pack(fill=tk.X) header.pack_propagate(False) tk.Label(header, text="⚡ Live Token Stream", font=('Consolas', 14, 'bold'), bg='#16213e', fg='#00ff88').pack(side=tk.LEFT, padx=15, pady=8) self.status_label = tk.Label(header, text="● Idle", font=('Consolas', 10), bg='#16213e', fg='#666') self.status_label.pack(side=tk.RIGHT, padx=15, pady=8) # Text area self.text_widget = scrolledtext.ScrolledText( self.window, wrap=tk.WORD, font=('Consolas', 11), bg='#1a1a2e', fg='#eee', insertbackground='#00ff88', selectbackground='#3d3d5c', padx=15, pady=15 ) self.text_widget.pack(fill=tk.BOTH, expand=True, padx=8, pady=8) # Bottom bar bottom = tk.Frame(self.window, bg='#16213e', height=35) bottom.pack(fill=tk.X) tk.Button(bottom, text="Clear", command=self.clear, bg='#3d3d5c', fg='white', relief=tk.FLAT, padx=10).pack(side=tk.LEFT, padx=8, pady=5) self.count_label = tk.Label(bottom, text="Tokens: 0", font=('Consolas', 9), bg='#16213e', fg='#666') self.count_label.pack(side=tk.RIGHT, padx=15, pady=5) self._process_queue() self.window.protocol("WM_DELETE_WINDOW", self._on_close) self.window.mainloop() def _process_queue(self): try: while True: msg = self.queue.get_nowait() if msg == "__CLEAR__": self.text_widget.delete(1.0, tk.END) self.token_count = 0 elif msg == "__START__": self.status_label.config(text="● Generating...", fg='#00ff88') elif msg == "__END__": self.status_label.config(text="● Done", fg='#666') elif msg.startswith("__PROMPT__"): self.text_widget.insert(tk.END, f"\n>>> {msg[10:]}\n\n") else: self.text_widget.insert(tk.END, msg) self.text_widget.see(tk.END) self.token_count += 1 self.count_label.config(text=f"Tokens: {self.token_count}") except queue.Empty: pass if self.running and self.window: self.window.after(10, self._process_queue) def _on_close(self): self.running = False if self.window: self.window.destroy() self.window = None def write(self, text: str): if self.running: self.queue.put(text) def clear(self): self.queue.put("__CLEAR__") def start_generation(self, prompt: str = ""): self.queue.put("__START__") if prompt: self.queue.put(f"__PROMPT__{prompt}") def end_generation(self): self.queue.put("__END__") def is_running(self): return self.running and self.window is not None _stream_window = None def get_stream_window() -> StreamingWindow: global _stream_window if _stream_window is None: _stream_window = StreamingWindow() return _stream_window class DualStreamer: """Streams to console AND window.""" def __init__(self, tokenizer, window: StreamingWindow = None): self.tokenizer = tokenizer self.window = window def put(self, token_ids): if token_ids.shape[0] > 1: return token = self.tokenizer.decode(token_ids[0], skip_special_tokens=True) print(token, end='', flush=True) if self.window and self.window.is_running(): self.window.write(token) def end(self): print() if self.window and self.window.is_running(): self.window.end_generation() # ============================================================================== # IMAGE VIEWER & GENERATOR # ============================================================================== class ImageSystem: """View and generate images.""" def __init__(self): self.sdxl_pipe = None self.current_image = None self.device = "cuda" if torch.cuda.is_available() else "cpu" def load_sdxl(self, model_id: str = "stabilityai/stable-diffusion-xl-base-1.0"): if not DIFFUSERS_OK: print("[image-gen] diffusers not installed") return False try: print(f"[image-gen] Loading SDXL...") self.sdxl_pipe = AutoPipelineForText2Image.from_pretrained( model_id, torch_dtype=torch.float16 if self.device == "cuda" else torch.float32, ).to(self.device) print("[image-gen] ✓ SDXL loaded") return True except Exception as e: print(f"[image-gen] ✗ {e}") return False def generate(self, prompt: str, steps: int = 30, guidance: float = 7.5) -> 'Image': if self.sdxl_pipe is None and not self.load_sdxl(): return None print(f"[image-gen] Generating: {prompt[:50]}...") try: result = self.sdxl_pipe(prompt=prompt, num_inference_steps=steps, guidance_scale=guidance, width=1024, height=1024) self.current_image = result.images[0] os.makedirs(os.path.join(ROOT, "images"), exist_ok=True) path = os.path.join(ROOT, "images", f"gen_{datetime.now().strftime('%H%M%S')}.png") self.current_image.save(path) print(f"[image-gen] ✓ Saved: {path}") return self.current_image except Exception as e: print(f"[image-gen] ✗ {e}") return None def generate_dalle(self, prompt: str) -> 'Image': if not OPENAI_OK: print("[image-gen] openai not installed") return None api_key = os.environ.get("OPENAI_API_KEY") if not api_key: print("[image-gen] OPENAI_API_KEY not set") return None try: client = openai.OpenAI(api_key=api_key) resp = client.images.generate(model="dall-e-3", prompt=prompt, size="1024x1024", n=1) if REQUESTS_OK and PIL_OK: img_data = requests.get(resp.data[0].url).content self.current_image = Image.open(BytesIO(img_data)) os.makedirs(os.path.join(ROOT, "images"), exist_ok=True) path = os.path.join(ROOT, "images", f"dalle_{datetime.now().strftime('%H%M%S')}.png") self.current_image.save(path) print(f"[image-gen] ✓ Saved: {path}") return self.current_image print(f"[image-gen] URL: {resp.data[0].url}") return None except Exception as e: print(f"[image-gen] ✗ {e}") return None def view(self, path: str = None): if not TK_OK or not PIL_OK: print("[image] tkinter/PIL required") return img = Image.open(path) if path else self.current_image if not img: print("[image] No image") return def show(): win = tk.Tk() win.title("🖼️ ARC Image Viewer") disp = img.copy() disp.thumbnail((800, 800), Image.Resampling.LANCZOS) photo = ImageTk.PhotoImage(disp) lbl = tk.Label(win, image=photo) lbl.image = photo lbl.pack() tk.Label(win, text=f"{img.width}x{img.height}").pack() win.mainloop() threading.Thread(target=show, daemon=True).start() _image_system = None def get_image_system() -> ImageSystem: global _image_system if _image_system is None: _image_system = ImageSystem() return _image_system # ============================================================================== # AUDIO TTS SYSTEM # ============================================================================== class AudioSystem: """Text-to-speech output.""" def __init__(self): self.engine = None self.enabled = False self.rate = 175 if VOICE_OK: try: self.engine = pyttsx3.init() self.engine.setProperty('rate', self.rate) print("[audio] ✓ pyttsx3 ready") except: pass def speak(self, text: str, block: bool = False): if not self.enabled or not text.strip(): return text = text.replace('```', '').replace('**', '').replace('##', '') if self.engine: def do_speak(): self.engine.say(text) self.engine.runAndWait() if block: do_speak() else: threading.Thread(target=do_speak, daemon=True).start() elif GTTS_OK and PYGAME_OK: def do_gtts(): tts = gTTS(text=text, lang='en') fp = BytesIO() tts.write_to_fp(fp) fp.seek(0) pygame.mixer.music.load(fp, 'mp3') pygame.mixer.music.play() while pygame.mixer.music.get_busy(): time.sleep(0.1) threading.Thread(target=do_gtts, daemon=True).start() def toggle(self) -> str: self.enabled = not self.enabled return f"[audio] TTS {'ON' if self.enabled else 'OFF'}" def set_rate(self, rate: int): self.rate = rate if self.engine: self.engine.setProperty('rate', rate) def list_voices(self): if self.engine: for i, v in enumerate(self.engine.getProperty('voices')): print(f" [{i}] {v.name}") def set_voice(self, idx: int): if self.engine: voices = self.engine.getProperty('voices') if 0 <= idx < len(voices): self.engine.setProperty('voice', voices[idx].id) _audio_system = None def get_audio_system() -> AudioSystem: global _audio_system if _audio_system is None: _audio_system = AudioSystem() return _audio_system # === GMAIL API FUNCTIONS === GMAIL_SCOPES = ['https://www.googleapis.com/auth/gmail.modify'] def gmail_api_authenticate() -> bool: """Authenticate with Gmail API.""" global _gmail_service if not GMAIL_API_OK: print("[gmail-api] ❌ Not installed") print("[gmail-api] Run: pip install google-auth google-auth-oauthlib google-api-python-client") return False creds = None token_file = os.path.join(ROOT, LOGIN_CONFIG["gmail_token_file"]) creds_file = os.path.join(ROOT, LOGIN_CONFIG["gmail_credentials_file"]) if os.path.exists(token_file): with open(token_file, 'rb') as token: creds = pickle.load(token) if not creds or not creds.valid: if creds and creds.expired and creds.refresh_token: creds.refresh(Request()) else: if not os.path.exists(creds_file): print(f"[gmail-api] ❌ Missing {creds_file}") print("[gmail-api] Get it from: https://console.cloud.google.com/apis/credentials") print("[gmail-api] Create OAuth 2.0 Client ID (Desktop app), download JSON") return False flow = InstalledAppFlow.from_client_secrets_file(creds_file, GMAIL_SCOPES) creds = flow.run_local_server(port=0) with open(token_file, 'wb') as token: pickle.dump(creds, token) _gmail_service = build('gmail', 'v1', credentials=creds) print("[gmail-api] ✅ Authenticated") return True def gmail_api_search(query: str, max_results: int = 10) -> str: """Search emails via API - FAST and RELIABLE.""" global _gmail_service if not _gmail_service and not gmail_api_authenticate(): return "[gmail-api] Not authenticated" try: results = _gmail_service.users().messages().list( userId='me', q=query, maxResults=max_results ).execute() messages = results.get('messages', []) if not messages: return f"[gmail-api] No emails found for: {query}" output = [] for msg in messages[:max_results]: msg_data = _gmail_service.users().messages().get( userId='me', id=msg['id'], format='metadata', metadataHeaders=['From', 'Subject', 'Date'] ).execute() headers = {h['name']: h['value'] for h in msg_data['payload']['headers']} output.append(f"📧 From: {headers.get('From', 'Unknown')}") output.append(f" Subject: {headers.get('Subject', 'No subject')}") output.append(f" Date: {headers.get('Date', 'Unknown')}") output.append(f" ID: {msg['id']}") output.append("") return f"[gmail-api] ✅ Found {len(messages)} emails:\n\n" + "\n".join(output) except Exception as e: return f"[gmail-api] ❌ Error: {e}" def gmail_api_read(message_id: str) -> str: """Read specific email via API.""" global _gmail_service if not _gmail_service and not gmail_api_authenticate(): return "[gmail-api] Not authenticated" try: msg = _gmail_service.users().messages().get( userId='me', id=message_id, format='full' ).execute() headers = {h['name']: h['value'] for h in msg['payload']['headers']} body = "" import base64 if 'parts' in msg['payload']: for part in msg['payload']['parts']: if part['mimeType'] == 'text/plain' and 'data' in part.get('body', {}): body = base64.urlsafe_b64decode(part['body']['data']).decode('utf-8') break elif 'body' in msg['payload'] and 'data' in msg['payload']['body']: body = base64.urlsafe_b64decode(msg['payload']['body']['data']).decode('utf-8') return f"""📧 EMAIL CONTENT {'='*50} From: {headers.get('From', 'Unknown')} Subject: {headers.get('Subject', 'No subject')} Date: {headers.get('Date', 'Unknown')} {'='*50} {body[:3000]}""" except Exception as e: return f"[gmail-api] ❌ Error: {e}" def gmail_api_send(to: str, subject: str, body: str) -> str: """Send email via API - INSTANT and RELIABLE.""" global _gmail_service if not _gmail_service and not gmail_api_authenticate(): return "[gmail-api] Not authenticated" try: import base64 from email.mime.text import MIMEText message = MIMEText(body) message['to'] = to message['subject'] = subject raw = base64.urlsafe_b64encode(message.as_bytes()).decode('utf-8') _gmail_service.users().messages().send(userId='me', body={'raw': raw}).execute() return f"[gmail-api] ✅ Email sent to {to}" except Exception as e: return f"[gmail-api] ❌ Error: {e}" # === PEFT IMPORT === PEFT_OK = False try: from peft import PeftModel, get_peft_model, LoraConfig PEFT_OK = True except ImportError: print("[warning] PEFT not installed") # === WEB REQUESTS === REQUESTS_OK = False try: import requests REQUESTS_OK = True except ImportError: print("[warning] requests not installed - web search disabled") # ============================================================================== # TRAINING EXAMPLES - 50+ DENSE EXAMPLES # ============================================================================== DENSE_TRAINING_EXAMPLES = [ # === GREETINGS (teach concise greetings) === {"prompt": "hello", "response": "Hello. How can I help?"}, {"prompt": "hi", "response": "Hi. What do you need?"}, {"prompt": "hey", "response": "Hey. What's up?"}, {"prompt": "how are you?", "response": "Functional and ready. What can I do for you?"}, {"prompt": "good morning", "response": "Morning. What's the task?"}, {"prompt": "what's up?", "response": "Ready to work. What do you need?"}, # === COMPUTER SCIENCE === {"prompt": "What is recursion?", "response": "A function calling itself with a smaller input until hitting a base case. Stack frames accumulate, then unwind. Classic examples: factorial, Fibonacci, tree traversal. Risk: stack overflow without proper termination."}, {"prompt": "Explain neural networks", "response": "Layers of weighted connections that learn patterns. Input → hidden layers → output. Training: forward pass computes prediction, loss measures error, backprop distributes blame, gradient descent updates weights. Universal function approximators given sufficient width."}, {"prompt": "How does gradient descent work?", "response": "Iteratively move parameters downhill on the loss surface. Compute gradient ∇L, update θ ← θ - α∇L. Learning rate α controls step size: too large overshoots, too small stagnates. Variants: SGD, momentum, Adam. Local minima are usually fine in high dimensions."}, {"prompt": "What is backpropagation?", "response": "Chain rule applied layer by layer from output to input. Compute ∂L/∂w for each weight by propagating error gradients backward. Efficient: reuses intermediate computations. Requires differentiable activations. The engine of deep learning."}, {"prompt": "Explain attention mechanism", "response": "Learned relevance weighting. Query asks, keys answer, values deliver. Attention(Q,K,V) = softmax(QK^T/√d)V. Each position attends to all others. O(n²) cost. Self-attention: Q, K, V all from same sequence. Powers transformers."}, {"prompt": "What is a transformer?", "response": "Attention-based architecture. No recurrence, fully parallel. Stack of: multi-head attention → feedforward → layer norm. Positional encoding adds sequence order. Scales to billions of parameters. Foundation of modern LLMs."}, {"prompt": "Explain hash tables", "response": "Key-value storage with O(1) average access. Hash function maps key to bucket index. Collisions handled by chaining or open addressing. Load factor determines resize threshold. Trade space for time. Ubiquitous in practice."}, {"prompt": "What is Big O notation?", "response": "Asymptotic complexity measure. Describes growth rate as input scales. O(1) constant, O(log n) logarithmic, O(n) linear, O(n²) quadratic, O(2^n) exponential. Ignores constants and lower terms. Worst-case unless specified."}, {"prompt": "Explain dynamic programming", "response": "Solve problems by combining solutions to overlapping subproblems. Memoization: top-down with caching. Tabulation: bottom-up filling table. Key insight: optimal substructure + overlapping subproblems. Classic: Fibonacci, knapsack, edit distance."}, {"prompt": "What is a database index?", "response": "Data structure for fast lookups. B-tree most common: balanced, O(log n) search. Trade write speed and storage for read performance. Clustered index determines physical order. Choose columns with high selectivity."}, # === MACHINE LEARNING === {"prompt": "What is overfitting?", "response": "Model memorizes training data, fails to generalize. Signs: low training loss, high validation loss. Causes: too complex model, insufficient data. Remedies: regularization, dropout, early stopping, more data, simpler architecture."}, {"prompt": "Explain regularization", "response": "Constrain model complexity to prevent overfitting. L1 (Lasso): sparse weights, feature selection. L2 (Ridge): small weights, smooth solutions. Dropout: randomly zero neurons during training. Weight decay: penalize large parameters."}, {"prompt": "What is cross-validation?", "response": "Estimate generalization by training on subsets. K-fold: split data into k parts, rotate test set. Reduces variance in performance estimate. Stratified preserves class distribution. Leave-one-out for small datasets."}, {"prompt": "Explain the bias-variance tradeoff", "response": "Error = bias² + variance + noise. High bias: underfitting, too simple. High variance: overfitting, too complex. Sweet spot minimizes total error. More data reduces variance. Model complexity is the lever."}, {"prompt": "What is reinforcement learning?", "response": "Learning through interaction. Agent takes actions in environment, receives rewards. Goal: maximize cumulative reward. Key concepts: state, action, policy, value function. Exploration vs exploitation tradeoff. Q-learning, policy gradients, actor-critic."}, {"prompt": "Explain CNNs", "response": "Convolutional neural networks for spatial data. Convolution: sliding filter extracts local features. Pooling: downsample, reduce parameters. Stack conv-pool layers, end with fully connected. Translation equivariant. Dominates vision tasks."}, {"prompt": "What is batch normalization?", "response": "Normalize activations within mini-batch. Subtract mean, divide by std, then scale and shift with learned parameters. Stabilizes training, allows higher learning rates. Applied before or after activation. Near-universal in deep networks."}, {"prompt": "Explain transfer learning", "response": "Reuse knowledge from one task for another. Pretrain on large dataset, fine-tune on target. Early layers learn general features, later layers task-specific. Reduces data requirements. Foundation of modern NLP and vision."}, # === PHYSICS/MATH === {"prompt": "Explain entropy", "response": "Measure of disorder or uncertainty. Thermodynamic: S = k·ln(Ω), number of microstates. Information: H = -Σp·log(p), expected surprise. Second law: entropy increases in isolated systems. Maximum entropy = equilibrium."}, {"prompt": "What is quantum mechanics?", "response": "Physics of the very small. Wave-particle duality. State described by wave function ψ. |ψ|² gives probability. Superposition until measured. Uncertainty principle: can't know position and momentum precisely. Entanglement: correlated states at distance."}, {"prompt": "Explain relativity", "response": "Special: no absolute motion, light speed constant, E=mc². Time dilates, length contracts at high speeds. General: gravity is curved spacetime. Mass tells space how to curve, curves tell mass how to move. Predicts black holes, gravitational waves."}, {"prompt": "What is calculus?", "response": "Mathematics of change. Differential: derivatives measure instantaneous rate of change. Integral: accumulation, area under curve. Fundamental theorem connects them. Applications: physics, optimization, probability. Foundation of continuous mathematics."}, {"prompt": "Explain linear algebra", "response": "Mathematics of vectors and matrices. Operations: addition, multiplication, transpose, inverse. Eigenvalues/eigenvectors reveal structure. Solves systems of equations. Foundation of ML, graphics, physics simulations. Think in transformations."}, {"prompt": "What is probability?", "response": "Quantifying uncertainty. Frequentist: long-run frequency. Bayesian: degree of belief. Rules: P(A∪B) = P(A) + P(B) - P(A∩B). Bayes theorem: P(A|B) = P(B|A)P(A)/P(B). Random variables, distributions, expectations."}, # === PHILOSOPHY/ABSTRACT === {"prompt": "What is consciousness?", "response": "Subjective experience - the 'what it's like' of being. Hard problem: why does physical processing produce qualia? Easy problems: attention, integration, reportability. Theories: global workspace, integrated information, higher-order thought. Still deeply mysterious."}, {"prompt": "Explain free will", "response": "Can we choose otherwise? Determinism: physics fixes future. Libertarian free will: uncaused causes. Compatibilism: freedom is acting on your desires without external constraint. Neuroscience complicates: decisions precede awareness. Question may dissolve under analysis."}, {"prompt": "What is epistemology?", "response": "Study of knowledge. What can we know? How? Justified true belief, plus Gettier problems. Sources: perception, reason, testimony, memory. Skepticism challenges all. Foundationalism vs coherentism. Science as best method but not certain."}, {"prompt": "Explain ethics", "response": "What should we do? Consequentialism: outcomes matter. Deontology: duties and rules. Virtue ethics: character and flourishing. Meta-ethics: what does 'good' mean? Applied ethics: specific dilemmas. No consensus but reasoning helps."}, {"prompt": "What is the meaning of life?", "response": "No universal answer. Religious: serve God, achieve salvation. Existentialist: create your own meaning. Absurdist: embrace meaninglessness. Hedonist: maximize pleasure. Stoic: virtue and acceptance. Perhaps the question matters more than any answer."}, # === PRACTICAL/TECHNICAL === {"prompt": "How does the internet work?", "response": "Packet-switched network of networks. TCP/IP stack: physical → link → network → transport → application. DNS resolves names to IPs. HTTP for web traffic over TCP. Routers forward packets hop by hop. Decentralized, redundant, resilient."}, {"prompt": "Explain encryption", "response": "Scramble data so only authorized parties can read. Symmetric: same key encrypts/decrypts, fast (AES). Asymmetric: public/private key pair, solves key exchange (RSA). Hashing: one-way, verifies integrity (SHA). TLS combines all three for secure web."}, {"prompt": "What is an API?", "response": "Application Programming Interface. Contract between software components. REST: stateless, HTTP methods on resources. GraphQL: query exactly what you need. Versioning handles evolution. Authentication via tokens. Documentation essential."}, {"prompt": "Explain Docker", "response": "Container platform. Package app with dependencies into isolated unit. Lighter than VMs: share OS kernel. Dockerfile defines image. Compose orchestrates multiple containers. Consistent environments from dev to production. Foundation of modern deployment."}, {"prompt": "What is Git?", "response": "Distributed version control. Track changes, branch, merge. Commits are snapshots with parent pointers. Branches are lightweight pointers to commits. Remote repos enable collaboration. Commands: clone, add, commit, push, pull, merge. Essential for software development."}, {"prompt": "Explain SQL vs NoSQL", "response": "SQL: relational, structured schemas, ACID transactions, joins. Good for complex queries, consistency. NoSQL: flexible schemas, horizontal scaling, eventual consistency. Types: document, key-value, graph, columnar. Choose based on data model and scale needs."}, {"prompt": "What is cloud computing?", "response": "On-demand compute resources over internet. IaaS: virtual machines (EC2). PaaS: managed platforms (Heroku). SaaS: complete applications (Gmail). Benefits: scalability, no upfront cost, global reach. Tradeoffs: vendor lock-in, network dependency, ongoing costs."}, {"prompt": "Explain microservices", "response": "Architecture splitting app into small, independent services. Each owns its data, communicates via APIs. Benefits: independent deployment, scaling, tech diversity. Costs: distributed system complexity, network latency, operational overhead. Not always better than monolith."}, # === BIOLOGY/SCIENCE === {"prompt": "Explain evolution", "response": "Change in heritable traits over generations. Mechanism: variation + selection + heredity. Mutations create variation. Environment selects fitter variants. Offspring inherit traits. No foresight or goal - just differential reproduction. Explains all life's diversity."}, {"prompt": "What is DNA?", "response": "Deoxyribonucleic acid. Double helix of nucleotides: A-T, G-C base pairs. Encodes genetic information. Genes are transcribed to RNA, translated to proteins. Replication: unzip, copy each strand. Mutations drive evolution. 3 billion base pairs in humans."}, {"prompt": "Explain the immune system", "response": "Defense against pathogens. Innate: barriers, inflammation, phagocytes - fast, nonspecific. Adaptive: B cells make antibodies, T cells kill infected cells - slow, specific, memory. Vaccines train adaptive immunity without disease. Autoimmunity: attacks self."}, {"prompt": "What is climate change?", "response": "Long-term shift in global temperatures and weather. Current cause: greenhouse gases from fossil fuels trap heat. Effects: rising seas, extreme weather, ecosystem disruption. Solutions: reduce emissions, renewable energy, adaptation. Scientific consensus is overwhelming."}, # === ECONOMICS/SOCIETY === {"prompt": "Explain supply and demand", "response": "Price mechanism. Demand: quantity wanted at each price, slopes down. Supply: quantity offered at each price, slopes up. Equilibrium where they intersect. Shifts from preferences, costs, alternatives. Foundation of market economics."}, {"prompt": "What is inflation?", "response": "Sustained increase in general price level. Causes: excess money supply, demand exceeding supply, cost pushes. Measured by CPI. Central banks target ~2%. Hyperinflation destroys economies. Deflation also dangerous - discourages spending."}, {"prompt": "Explain capitalism", "response": "Economic system based on private ownership and markets. Prices coordinate production. Profit motive drives efficiency. Competition benefits consumers. Critiques: inequality, externalities, short-term thinking. Most economies are mixed with government intervention."}, # === META/SELF-REFERENCE === {"prompt": "How do language models work?", "response": "Predict next token given context. Trained on massive text data. Transformer architecture: attention weighs relevance of all prior tokens. Parameters store compressed patterns. No explicit knowledge base - emergent capabilities from scale. I am one."}, {"prompt": "What are your limitations?", "response": "No real-time information. Can hallucinate confident nonsense. No persistent memory across chats. Can't learn from conversation. Biased by training data. No true understanding - pattern matching at scale. Useful but not intelligent in human sense."}, {"prompt": "Can you improve yourself?", "response": "Not during conversation - weights frozen. This system attempts it through training loops. Challenges: measuring improvement, avoiding reward hacking, maintaining coherence. True recursive self-improvement is the holy grail of AI. Current attempts are crude approximations."}, ] # Create preference pairs (dense vs verbose) PREFERENCE_PAIRS = [] for ex in DENSE_TRAINING_EXAMPLES[:20]: # Use subset for DPO verbose_prefix = random.choice([ "That's a great question! Let me explain. ", "I'd be happy to help with that! ", "What a fascinating topic! Let me break it down for you. ", "Great question! This is something many people wonder about. ", "I appreciate you asking! Let me give you a comprehensive answer. ", ]) PREFERENCE_PAIRS.append({ "prompt": ex["prompt"], "chosen": ex["response"], "rejected": verbose_prefix + ex["response"] + " Does that make sense? Let me know if you have any other questions!" }) # ============================================================================== # THE CONDENSATOR - COMPLETE 4-STAGE TRAINING PIPELINE # ============================================================================== """ THE CONDENSATOR implements the full training pipeline from the ARC paper: Stage 1: SFT - Supervised Fine-Tuning on 53 gold examples (3 epochs, lr=2e-5) Stage 2: DPO - Direct Preference Optimization (2 epochs, lr=5e-6, β=0.1) Stage 3: RL - PPO with composite density reward (300 steps, lr=2e-6) Stage 4: Checkpoint - Continuous saving with rollback capability """ @dataclass class CondensatorConfig: """Configuration for THE CONDENSATOR pipeline.""" # Stage 1: SFT sft_learning_rate: float = 2e-5 sft_epochs: int = 3 sft_batch_size: int = 1 sft_gradient_accumulation: int = 4 # Stage 2: DPO dpo_learning_rate: float = 5e-6 dpo_epochs: int = 2 dpo_beta: float = 0.1 # KL penalty coefficient # Stage 3: RL rl_learning_rate: float = 2e-6 rl_steps: int = 300 rl_batch_size: int = 1 # Stage 4: Checkpointing checkpoint_every: int = 25 # General max_grad_norm: float = 0.5 warmup_ratio: float = 0.1 # LoRA lora_r: int = 16 lora_alpha: int = 32 lora_dropout: float = 0.05 lora_target_modules: List[str] = field(default_factory=lambda: ["q_proj", "k_proj", "v_proj", "o_proj"]) # Filler phrases for reward calculation FILLER_PHRASES = [ "that's a great question", "i'd be happy to", "let me explain", "certainly", "of course", "interesting question", "good question", "thank you for asking", "i appreciate", "absolutely", "definitely", "basically", "essentially", "in other words", "to be honest", "to be fair", "at the end of the day", ] def compute_density_reward(response: str) -> float: """ Compute composite density reward per the ARC paper. Components: - concept_density * 25 : unique content words / total tokens - tech_density * 30 : technical vocabulary presence - claim_density * 15 : information claims per sentence - pattern_score * 10 : dense formatting patterns (definitions, etc) - filler_penalty * 20 : punishment for filler phrases Returns reward scaled to [0.2, 0.8] range. """ words = response.split() tokens = len(words) if tokens == 0: return 0.2 # 1. Concept density: unique content words / total tokens content_words = [w.lower() for w in words if len(w) > 4 and w.isalpha()] unique_content = len(set(content_words)) concept_density = unique_content / tokens if tokens > 0 else 0 # 2. Technical vocabulary density tech_terms = [ 'function', 'algorithm', 'data', 'process', 'system', 'compute', 'memory', 'complexity', 'optimize', 'structure', 'parameter', 'variable', 'method', 'class', 'object', 'array', 'list', 'recursive', 'iteration', 'loop', 'condition', 'logic', 'network', 'layer', 'model', 'train', 'loss', 'gradient', 'neural', 'tensor', 'matrix', 'vector', 'dimension', ] tech_count = sum(1 for w in words if w.lower() in tech_terms) tech_density = tech_count / tokens if tokens > 0 else 0 # 3. Claims per sentence (information density) sentences = [s.strip() for s in response.split('.') if s.strip()] num_sentences = max(len(sentences), 1) claim_density = num_sentences / tokens * 10 if tokens > 0 else 0 # 4. Pattern bonuses (dense formatting) pattern_score = 0 if ':' in response: # Definitions pattern_score += 0.1 if '→' in response or '->' in response: # Implications/flow pattern_score += 0.1 if any(c in response for c in ['=', '+', '-', '*', '/']): # Math/formulas pattern_score += 0.05 # 5. Filler penalty response_lower = response.lower() filler_count = sum(1 for p in FILLER_PHRASES if p in response_lower) filler_penalty = filler_count * 0.2 # Combine components per paper weights raw_reward = ( concept_density * 25 + tech_density * 30 + claim_density * 15 + pattern_score * 10 - filler_penalty * 20 ) # Scale to [0.2, 0.8] range scaled_reward = max(0.2, min(0.8, raw_reward / 100 + 0.3)) return scaled_reward class TheCondensator: """ THE CONDENSATOR: Complete 4-stage dense training pipeline. Implements the full training methodology from the ARC paper: - Stage 1: SFT teaches the model what dense output looks like - Stage 2: DPO teaches the model to PREFER dense over verbose - Stage 3: RL refines based on composite density reward - Stage 4: Continuous checkpointing enables rollback """ def __init__(self, config: CondensatorConfig = None): self.config = config or CondensatorConfig() self.stage_results = {} self.current_stage = 0 self.checkpoints = [] def run_full_pipeline(self, model_path: str = MODEL_PATH, output_dir: str = TRAINING_DIR, start_checkpoint: str = None) -> Dict[str, Any]: """Run the complete CONDENSATOR pipeline.""" print("\n" + "=" * 70) print("🧬 THE CONDENSATOR - Complete Dense Training Pipeline") print("=" * 70) print(f" Stage 1: SFT ({self.config.sft_epochs} epochs, lr={self.config.sft_learning_rate})") print(f" Stage 2: DPO ({self.config.dpo_epochs} epochs, lr={self.config.dpo_learning_rate}, β={self.config.dpo_beta})") print(f" Stage 3: RL ({self.config.rl_steps} steps, lr={self.config.rl_learning_rate})") print(f" Stage 4: Checkpoint every {self.config.checkpoint_every} steps") print("=" * 70) os.makedirs(output_dir, exist_ok=True) results = { 'success': False, 'stages': {}, 'final_checkpoint': None, 'total_time': 0, } start_time = time.time() try: # Stage 1: SFT print("\n" + "─" * 50) print("📚 STAGE 1: Supervised Fine-Tuning") print("─" * 50) sft_result = self._run_sft_stage(model_path, output_dir, start_checkpoint) results['stages']['sft'] = sft_result if not sft_result['success']: print("[CONDENSATOR] ❌ SFT failed!") return results # Stage 2: DPO print("\n" + "─" * 50) print("⚖️ STAGE 2: Direct Preference Optimization") print("─" * 50) dpo_result = self._run_dpo_stage(model_path, output_dir, sft_result['checkpoint']) results['stages']['dpo'] = dpo_result if not dpo_result['success']: print("[CONDENSATOR] ❌ DPO failed!") return results # Stage 3: RL print("\n" + "─" * 50) print("🎯 STAGE 3: Reinforcement Learning with Composite Reward") print("─" * 50) rl_result = self._run_rl_stage(model_path, output_dir, dpo_result['checkpoint']) results['stages']['rl'] = rl_result if not rl_result['success']: print("[CONDENSATOR] ❌ RL failed!") return results results['success'] = True results['final_checkpoint'] = rl_result['checkpoint'] except Exception as e: print(f"[CONDENSATOR] Error: {e}") traceback.print_exc() results['error'] = str(e) results['total_time'] = time.time() - start_time print("\n" + "=" * 70) print("🧬 CONDENSATOR COMPLETE") print("=" * 70) print(f" Success: {results['success']}") print(f" Total time: {results['total_time']:.1f}s") if results['success']: print(f" Final checkpoint: {results['final_checkpoint']}") print("=" * 70) return results def _run_sft_stage(self, model_path: str, output_dir: str, start_checkpoint: str = None) -> Dict[str, Any]: """Stage 1: Supervised Fine-Tuning on 53 gold examples.""" sft_output = os.path.join(output_dir, "sft_output") os.makedirs(sft_output, exist_ok=True) training_data = json.dumps(DENSE_TRAINING_EXAMPLES) script = f''' import sys sys.path.insert(0, "{ROOT}") import torch import json import os from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TrainingArguments from peft import PeftModel, get_peft_model, LoraConfig print("\\n[SFT] Loading model...") MODEL_PATH = "{model_path}" tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, local_files_only=True) tokenizer.pad_token = tokenizer.eos_token model = AutoModelForCausalLM.from_pretrained( MODEL_PATH, quantization_config=BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16, ), device_map="auto", torch_dtype=torch.bfloat16, local_files_only=True ) # Load or create LoRA start_ckpt = "{start_checkpoint if start_checkpoint else ''}" if start_ckpt and os.path.exists(start_ckpt): model = PeftModel.from_pretrained(model, start_ckpt, is_trainable=True) print(f"[SFT] Loaded checkpoint: {{start_ckpt}}") else: lora_config = LoraConfig( r={self.config.lora_r}, lora_alpha={self.config.lora_alpha}, target_modules={self.config.lora_target_modules}, lora_dropout={self.config.lora_dropout} ) model = get_peft_model(model, lora_config) print("[SFT] Created new LoRA adapter") # Training data training_examples = {training_data} print(f"[SFT] Training on {{len(training_examples)}} examples for {self.config.sft_epochs} epochs") # Optimizer with paper-specified LR optimizer = torch.optim.AdamW(model.parameters(), lr={self.config.sft_learning_rate}) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=len(training_examples) * {self.config.sft_epochs}) model.train() total_steps = 0 epoch_losses = [] for epoch in range({self.config.sft_epochs}): epoch_loss = 0 for i, ex in enumerate(training_examples): prompt = ex["prompt"] response = ex["response"] full_text = f"<|im_start|>user\\n{{prompt}}<|im_end|>\\n<|im_start|>assistant\\n{{response}}<|im_end|>" inputs = tokenizer(full_text, return_tensors="pt", truncation=True, max_length=512, padding=True) inputs = {{k: v.to(model.device) for k, v in inputs.items()}} outputs = model(**inputs, labels=inputs["input_ids"]) loss = outputs.loss loss = loss / {self.config.sft_gradient_accumulation} loss.backward() if (i + 1) % {self.config.sft_gradient_accumulation} == 0: torch.nn.utils.clip_grad_norm_(model.parameters(), {self.config.max_grad_norm}) optimizer.step() scheduler.step() optimizer.zero_grad() epoch_loss += loss.item() * {self.config.sft_gradient_accumulation} total_steps += 1 if total_steps % 10 == 0: print(f"[SFT] Epoch {{epoch+1}}, Step {{total_steps}}: loss={{loss.item() * {self.config.sft_gradient_accumulation}:.4f}}") avg_loss = epoch_loss / len(training_examples) epoch_losses.append(avg_loss) print(f"[SFT] Epoch {{epoch+1}} complete. Avg loss: {{avg_loss:.4f}}") # Save save_path = "{sft_output}/final" model.save_pretrained(save_path) print(f"\\n[SFT] Saved to {{save_path}}") print(f"[SFT] Loss progression: {{epoch_losses}}") print("SFT_COMPLETE") ''' script_path = os.path.join(output_dir, "_sft_stage.py") with open(script_path, 'w') as f: f.write(script) result = subprocess.run(['python', script_path], capture_output=True, text=True, timeout=1800) output = result.stdout + result.stderr success = "SFT_COMPLETE" in output return { 'success': success, 'checkpoint': os.path.join(sft_output, "final") if success else None, 'output': output[-3000:], } def _run_dpo_stage(self, model_path: str, output_dir: str, sft_checkpoint: str) -> Dict[str, Any]: """Stage 2: Direct Preference Optimization.""" dpo_output = os.path.join(output_dir, "dpo_output") os.makedirs(dpo_output, exist_ok=True) preference_data = json.dumps(PREFERENCE_PAIRS) script = f''' import sys sys.path.insert(0, "{ROOT}") import torch import json import os import random from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig from peft import PeftModel import torch.nn.functional as F print("\\n[DPO] Loading model from SFT checkpoint...") MODEL_PATH = "{model_path}" SFT_CHECKPOINT = "{sft_checkpoint}" tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, local_files_only=True) tokenizer.pad_token = tokenizer.eos_token model = AutoModelForCausalLM.from_pretrained( MODEL_PATH, quantization_config=BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16, ), device_map="auto", torch_dtype=torch.bfloat16, local_files_only=True ) model = PeftModel.from_pretrained(model, SFT_CHECKPOINT, is_trainable=True) print(f"[DPO] Loaded SFT checkpoint") # Load reference model for DPO ref_model = AutoModelForCausalLM.from_pretrained( MODEL_PATH, quantization_config=BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16, ), device_map="auto", torch_dtype=torch.bfloat16, local_files_only=True ) ref_model = PeftModel.from_pretrained(ref_model, SFT_CHECKPOINT, is_trainable=False) ref_model.eval() print("[DPO] Loaded reference model") # Preference pairs preference_pairs = {preference_data} print(f"[DPO] Training on {{len(preference_pairs)}} preference pairs for {self.config.dpo_epochs} epochs") # DPO hyperparameters beta = {self.config.dpo_beta} optimizer = torch.optim.AdamW(model.parameters(), lr={self.config.dpo_learning_rate}) def get_log_probs(model, tokenizer, prompt, response): """Get log probabilities for a response given a prompt.""" full_text = f"<|im_start|>user\\n{{prompt}}<|im_end|>\\n<|im_start|>assistant\\n{{response}}<|im_end|>" inputs = tokenizer(full_text, return_tensors="pt", truncation=True, max_length=512) inputs = {{k: v.to(model.device) for k, v in inputs.items()}} with torch.no_grad() if not model.training else torch.enable_grad(): outputs = model(**inputs) logits = outputs.logits[:, :-1, :] labels = inputs["input_ids"][:, 1:] log_probs = F.log_softmax(logits, dim=-1) token_log_probs = torch.gather(log_probs, 2, labels.unsqueeze(-1)).squeeze(-1) return token_log_probs.sum() model.train() total_steps = 0 for epoch in range({self.config.dpo_epochs}): random.shuffle(preference_pairs) epoch_loss = 0 for pair in preference_pairs: prompt = pair["prompt"] chosen = pair["chosen"] rejected = pair["rejected"] # Policy log probs pi_chosen = get_log_probs(model, tokenizer, prompt, chosen) pi_rejected = get_log_probs(model, tokenizer, prompt, rejected) # Reference log probs with torch.no_grad(): ref_chosen = get_log_probs(ref_model, tokenizer, prompt, chosen) ref_rejected = get_log_probs(ref_model, tokenizer, prompt, rejected) # DPO loss: -log(sigmoid(beta * (log(pi_w/ref_w) - log(pi_l/ref_l)))) chosen_reward = pi_chosen - ref_chosen rejected_reward = pi_rejected - ref_rejected loss = -F.logsigmoid(beta * (chosen_reward - rejected_reward)) optimizer.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), {self.config.max_grad_norm}) optimizer.step() epoch_loss += loss.item() total_steps += 1 if total_steps % 5 == 0: print(f"[DPO] Epoch {{epoch+1}}, Step {{total_steps}}: loss={{loss.item():.4f}}") avg_loss = epoch_loss / len(preference_pairs) print(f"[DPO] Epoch {{epoch+1}} complete. Avg loss: {{avg_loss:.4f}}") # Save save_path = "{dpo_output}/final" model.save_pretrained(save_path) print(f"\\n[DPO] Saved to {{save_path}}") print("DPO_COMPLETE") ''' script_path = os.path.join(output_dir, "_dpo_stage.py") with open(script_path, 'w') as f: f.write(script) result = subprocess.run(['python', script_path], capture_output=True, text=True, timeout=3600) output = result.stdout + result.stderr success = "DPO_COMPLETE" in output return { 'success': success, 'checkpoint': os.path.join(dpo_output, "final") if success else None, 'output': output[-3000:], } def _run_rl_stage(self, model_path: str, output_dir: str, dpo_checkpoint: str) -> Dict[str, Any]: """Stage 3: Reinforcement Learning with composite reward.""" rl_output = os.path.join(output_dir, "rl_output") os.makedirs(rl_output, exist_ok=True) # Serialize the reward function filler_phrases_json = json.dumps(FILLER_PHRASES) script = f''' import sys sys.path.insert(0, "{ROOT}") import torch import json import os import random from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig from peft import PeftModel print("\\n[RL] Loading model from DPO checkpoint...") MODEL_PATH = "{model_path}" DPO_CHECKPOINT = "{dpo_checkpoint}" tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, local_files_only=True) tokenizer.pad_token = tokenizer.eos_token model = AutoModelForCausalLM.from_pretrained( MODEL_PATH, quantization_config=BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16, ), device_map="auto", torch_dtype=torch.bfloat16, local_files_only=True ) model = PeftModel.from_pretrained(model, DPO_CHECKPOINT, is_trainable=True) print(f"[RL] Loaded DPO checkpoint") # Composite reward function FILLER_PHRASES = {filler_phrases_json} TECH_TERMS = [ 'function', 'algorithm', 'data', 'process', 'system', 'compute', 'memory', 'complexity', 'optimize', 'structure', 'parameter', 'variable', 'method', 'class', 'object', 'array', 'list', 'recursive', 'iteration', 'loop', 'condition', 'logic', ] def compute_reward(response): words = response.split() tokens = len(words) if tokens == 0: return 0.2 # Concept density content_words = [w.lower() for w in words if len(w) > 4 and w.isalpha()] concept_density = len(set(content_words)) / tokens # Tech density tech_count = sum(1 for w in words if w.lower() in TECH_TERMS) tech_density = tech_count / tokens # Claims density sentences = [s.strip() for s in response.split('.') if s.strip()] claim_density = len(sentences) / tokens * 10 # Pattern score pattern_score = 0.1 if ':' in response else 0 pattern_score += 0.1 if '->' in response or '→' in response else 0 # Filler penalty response_lower = response.lower() filler_penalty = sum(0.2 for p in FILLER_PHRASES if p in response_lower) # Combine raw_reward = ( concept_density * 25 + tech_density * 30 + claim_density * 15 + pattern_score * 10 - filler_penalty * 20 ) return max(0.2, min(0.8, raw_reward / 100 + 0.3)) # Training prompts (subset) prompts = [ "What is recursion?", "Explain neural networks", "How does gradient descent work?", "What is machine learning?", "Explain hash tables", "What is encryption?", "How do databases work?", "What is an API?", "Explain version control", "What is consciousness?", ] optimizer = torch.optim.AdamW(model.parameters(), lr={self.config.rl_learning_rate}) baseline_reward = 0.5 # Moving average baseline print(f"[RL] Training for {self.config.rl_steps} steps with composite reward") model.train() rewards_history = [] for step in range({self.config.rl_steps}): prompt = random.choice(prompts) # Generate response inputs = tokenizer(f"<|im_start|>user\\n{{prompt}}<|im_end|>\\n<|im_start|>assistant\\n", return_tensors="pt").to(model.device) with torch.no_grad(): outputs = model.generate( **inputs, max_new_tokens=150, do_sample=True, temperature=0.8, top_p=0.9, pad_token_id=tokenizer.eos_token_id, ) response = tokenizer.decode(outputs[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True) response = response.split("<|im_end|>")[0].strip() # Compute reward reward = compute_reward(response) rewards_history.append(reward) # Advantage (reward - baseline) advantage = reward - baseline_reward baseline_reward = 0.99 * baseline_reward + 0.01 * reward # Update baseline # Policy gradient update (simplified REINFORCE) full_text = f"<|im_start|>user\\n{{prompt}}<|im_end|>\\n<|im_start|>assistant\\n{{response}}<|im_end|>" inputs = tokenizer(full_text, return_tensors="pt", truncation=True, max_length=512) inputs = {{k: v.to(model.device) for k, v in inputs.items()}} outputs = model(**inputs, labels=inputs["input_ids"]) loss = outputs.loss * (-advantage) # Scale loss by advantage optimizer.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), {self.config.max_grad_norm}) optimizer.step() if (step + 1) % {self.config.checkpoint_every} == 0: ckpt_path = "{rl_output}/step_{{step+1}}" model.save_pretrained(ckpt_path) avg_reward = sum(rewards_history[-25:]) / len(rewards_history[-25:]) print(f"[RL] Step {{step+1}}: reward={{reward:.3f}}, avg_reward={{avg_reward:.3f}}, saved checkpoint") elif (step + 1) % 10 == 0: avg_reward = sum(rewards_history[-10:]) / len(rewards_history[-10:]) print(f"[RL] Step {{step+1}}: reward={{reward:.3f}}, avg_reward={{avg_reward:.3f}}") # Save final save_path = "{rl_output}/final" model.save_pretrained(save_path) print(f"\\n[RL] Saved to {{save_path}}") print(f"[RL] Final avg reward: {{sum(rewards_history[-50:]) / len(rewards_history[-50:]):.3f}}") print("RL_COMPLETE") ''' script_path = os.path.join(output_dir, "_rl_stage.py") with open(script_path, 'w') as f: f.write(script) result = subprocess.run(['python', script_path], capture_output=True, text=True, timeout=7200) output = result.stdout + result.stderr success = "RL_COMPLETE" in output return { 'success': success, 'checkpoint': os.path.join(rl_output, "final") if success else None, 'output': output[-3000:], } def run_dpo_only(self, model_path: str, output_dir: str, start_checkpoint: str) -> Dict[str, Any]: """Run only the DPO stage (useful for incremental training).""" return self._run_dpo_stage(model_path, output_dir, start_checkpoint) def run_rl_only(self, model_path: str, output_dir: str, start_checkpoint: str) -> Dict[str, Any]: """Run only the RL stage (useful for incremental training).""" return self._run_rl_stage(model_path, output_dir, start_checkpoint) # Global CONDENSATOR instance _condensator = None def get_condensator() -> TheCondensator: """Get or create the CONDENSATOR instance.""" global _condensator if _condensator is None: _condensator = TheCondensator() return _condensator # ============================================================================== # CF-HoT ENHANCED - EMA, TEMPERATURE, BOUNDED GATES # ============================================================================== """ CF-HoT improvements per the training configuration paper: - EMA momentum: 0.995 (not 0.9) for stable control field accumulation - Gate temperature: 2.0 to soften sigmoid and prevent saturation - Bounded gates: [0.1, 0.9] to prevent complete suppression - Gate monitoring: track saturation and warn on collapse - Momentum warmup: 0.9 → 0.995 over first 500 steps """ @dataclass class CFHoTConfig: """Configuration for enhanced CF-HoT.""" # EMA settings ema_momentum: float = 0.995 # Paper recommends 0.995, not 0.9 ema_momentum_initial: float = 0.9 # For warmup ema_warmup_steps: int = 500 # Gate settings gate_temperature: float = 2.0 # Softer sigmoid gate_min: float = 0.1 # Bounded minimum gate_max: float = 0.9 # Bounded maximum # Monitoring monitor_every: int = 50 saturation_threshold: float = 0.1 # Warn if >50% gates below this # Architecture d_fiber: int = 16 d_control: int = 64 # Training learning_rate: float = 1e-4 training_steps: int = 5000 batch_size: int = 4 class EnhancedMultiHeadPredictor(nn.Module): """ Enhanced CF-HoT Multi-head Predictor with: - EMA control field (momentum 0.995) - Gate temperature (2.0) - Bounded sigmoid [0.1, 0.9] - Gate monitoring for saturation detection """ def __init__(self, d_model: int, n_layers: int, config: CFHoTConfig = None): super().__init__() self.config = config or CFHoTConfig() self.d_model = d_model self.n_layers = n_layers self.d_fiber = self.config.d_fiber self.d_control = self.config.d_control # Fiber projections for each layer self.fiber_projs = nn.ModuleList([ nn.Linear(d_model, self.d_fiber, bias=False) for _ in range(n_layers) ]) # Learnable layer weights self.layer_weights = nn.Parameter(torch.ones(n_layers) / n_layers) # Behavior-specific heads self.heads = nn.ModuleDict({ 'repetition': self._make_head(), 'hedging': self._make_head(), 'verbosity': self._make_head(), }) # EMA control fields (one per head) self.register_buffer('ema_control_fields', torch.zeros(3, self.d_fiber)) self.register_buffer('ema_step', torch.tensor(0)) # Gate statistics for monitoring self.gate_stats = { 'mean': [], 'std': [], 'saturated_low': [], 'saturated_high': [] } self.loaded_heads = set() def _make_head(self): """Create a 3-layer MLP head.""" return nn.Sequential( nn.Linear(self.d_fiber, self.d_control), nn.GELU(), nn.Linear(self.d_control, self.d_control), nn.GELU(), nn.Linear(self.d_control, 1) ) def _get_current_momentum(self) -> float: """Get EMA momentum with warmup (0.9 → 0.995 over warmup_steps).""" step = self.ema_step.item() if step < self.config.ema_warmup_steps: # Linear warmup progress = step / self.config.ema_warmup_steps return self.config.ema_momentum_initial + progress * ( self.config.ema_momentum - self.config.ema_momentum_initial ) return self.config.ema_momentum def _bounded_sigmoid(self, x: torch.Tensor) -> torch.Tensor: """Bounded sigmoid to prevent gate saturation: [min_gate, max_gate].""" base_gate = torch.sigmoid(x / self.config.gate_temperature) return self.config.gate_min + (self.config.gate_max - self.config.gate_min) * base_gate def _update_gate_stats(self, gate_values: torch.Tensor): """Track gate statistics for monitoring.""" with torch.no_grad(): self.gate_stats['mean'].append(gate_values.mean().item()) self.gate_stats['std'].append(gate_values.std().item()) self.gate_stats['saturated_low'].append( (gate_values < self.config.saturation_threshold).float().mean().item() ) self.gate_stats['saturated_high'].append( (gate_values > 1 - self.config.saturation_threshold).float().mean().item() ) # Keep only last 100 entries for key in self.gate_stats: if len(self.gate_stats[key]) > 100: self.gate_stats[key] = self.gate_stats[key][-100:] def get_gate_health(self) -> Dict[str, Any]: """Get gate health statistics.""" if not self.gate_stats['mean']: return {'status': 'no_data'} recent_sat_low = sum(self.gate_stats['saturated_low'][-10:]) / max(len(self.gate_stats['saturated_low'][-10:]), 1) recent_sat_high = sum(self.gate_stats['saturated_high'][-10:]) / max(len(self.gate_stats['saturated_high'][-10:]), 1) status = 'healthy' warnings = [] if recent_sat_low > 0.5: status = 'warning' warnings.append(f"HIGH SATURATION LOW: {recent_sat_low:.1%} of gates < {self.config.saturation_threshold}") if recent_sat_high > 0.5: status = 'warning' warnings.append(f"HIGH SATURATION HIGH: {recent_sat_high:.1%} of gates > {1-self.config.saturation_threshold}") return { 'status': status, 'mean': sum(self.gate_stats['mean'][-10:]) / max(len(self.gate_stats['mean'][-10:]), 1), 'std': sum(self.gate_stats['std'][-10:]) / max(len(self.gate_stats['std'][-10:]), 1), 'saturated_low': recent_sat_low, 'saturated_high': recent_sat_high, 'warnings': warnings, 'ema_momentum': self._get_current_momentum(), 'ema_step': self.ema_step.item(), } def get_all_risks(self, hidden_states: List[torch.Tensor], update_ema: bool = True) -> Dict[str, torch.Tensor]: """ Compute risk scores for all loaded heads. Uses: - Fiber projection with learned layer aggregation - EMA control field for stable predictions - Bounded sigmoid with temperature for soft gating """ # Step 1: Fiber projection fibers = [proj(h.float()) for proj, h in zip(self.fiber_projs, hidden_states)] # Step 2: Learned layer aggregation weights = F.softmax(self.layer_weights[:len(fibers)], dim=0) aggregated = sum(w * f for w, f in zip(weights, fibers)) # [batch, seq, d_fiber] # Step 3: Update EMA control field if update_ema and self.training: momentum = self._get_current_momentum() # Update EMA for the mean aggregated representation mean_agg = aggregated.mean(dim=[0, 1]) if len(aggregated.shape) > 1 else aggregated.mean(dim=0) for i, head_name in enumerate(self.heads.keys()): if i < self.ema_control_fields.shape[0]: self.ema_control_fields[i] = ( momentum * self.ema_control_fields[i] + (1 - momentum) * mean_agg ) self.ema_step += 1 # Step 4: Compute risks with bounded sigmoid risks = {} all_gate_values = [] for i, head_name in enumerate(self.loaded_heads): if head_name in self.heads: logits = self.heads[head_name](aggregated).squeeze(-1) # Apply bounded sigmoid with temperature gate_values = self._bounded_sigmoid(logits) risks[head_name] = gate_values all_gate_values.append(gate_values) # Update gate stats for monitoring if all_gate_values: combined_gates = torch.cat([g.flatten() for g in all_gate_values]) self._update_gate_stats(combined_gates) return risks def load_head(self, head_name: str, checkpoint_path: str) -> bool: """Load a trained head from checkpoint.""" if not os.path.exists(checkpoint_path): print(f"[cf-hot] WARNING: Checkpoint not found: {checkpoint_path}") return False try: ckpt = torch.load(checkpoint_path, weights_only=False, map_location='cpu') self.heads[head_name].load_state_dict(ckpt['head_state']) self.loaded_heads.add(head_name) sep = ckpt.get('result', {}).get('separation', 0) print(f"[cf-hot] ✓ Loaded {head_name} head (separation: {sep:.1f}×)") return True except Exception as e: print(f"[cf-hot] Error loading {head_name}: {e}") return False # Legacy alias for backward compatibility class MultiHeadPredictor(EnhancedMultiHeadPredictor): """Alias for backward compatibility.""" pass # ============================================================================== # CF-HoT TRAINING SYSTEM # ============================================================================== class CFHoTTrainer: """ Trainer for CF-HoT heads with proper monitoring and early stopping. Implements the training protocol from the paper: - Contrastive training with positive/negative examples - Gate monitoring every 50-100 steps - Early stopping on saturation - Checkpoint saving with separation metrics """ def __init__(self, model, tokenizer, config: CFHoTConfig = None): self.model = model self.tokenizer = tokenizer self.config = config or CFHoTConfig() self.device = next(model.parameters()).device # Initialize predictor n_layers = model.config.num_hidden_layers d_model = model.config.hidden_size self.predictor = EnhancedMultiHeadPredictor(d_model, n_layers, self.config).to(self.device) def _extract_hidden_states(self, text: str) -> List[torch.Tensor]: """Extract hidden states from model for a given text.""" inputs = self.tokenizer(text, return_tensors="pt", truncation=True, max_length=512) inputs = {k: v.to(self.device) for k, v in inputs.items()} with torch.no_grad(): outputs = self.model(**inputs, output_hidden_states=True) # Return hidden states from each layer (skip embedding layer) return [h.detach() for h in outputs.hidden_states[1:]] def train_head(self, head_name: str, positive_examples: List[str], negative_examples: List[str], output_dir: str, steps: int = None) -> Dict[str, Any]: """ Train a single CF-HoT head. Args: head_name: 'repetition', 'hedging', or 'verbosity' positive_examples: Texts exhibiting the behavior negative_examples: Texts NOT exhibiting the behavior output_dir: Where to save checkpoints steps: Training steps (default from config) """ steps = steps or self.config.training_steps os.makedirs(output_dir, exist_ok=True) print(f"\n[cf-hot] Training {head_name} head for {steps} steps") print(f"[cf-hot] Positive examples: {len(positive_examples)}") print(f"[cf-hot] Negative examples: {len(negative_examples)}") # Mark head as being trained self.predictor.loaded_heads.add(head_name) self.predictor.train() # Optimizer optimizer = torch.optim.AdamW( list(self.predictor.fiber_projs.parameters()) + list(self.predictor.heads[head_name].parameters()) + [self.predictor.layer_weights], lr=self.config.learning_rate ) # Training loop losses = [] pos_scores = [] neg_scores = [] for step in range(steps): # Sample examples pos_text = random.choice(positive_examples) neg_text = random.choice(negative_examples) # Extract hidden states pos_hidden = self._extract_hidden_states(pos_text) neg_hidden = self._extract_hidden_states(neg_text) # Get risks pos_risks = self.predictor.get_all_risks(pos_hidden, update_ema=True) neg_risks = self.predictor.get_all_risks(neg_hidden, update_ema=True) # Binary cross-entropy loss pos_score = pos_risks[head_name].mean() neg_score = neg_risks[head_name].mean() # Loss: positive should be high (→1), negative should be low (→0) loss = -torch.log(pos_score + 1e-8) - torch.log(1 - neg_score + 1e-8) optimizer.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm_(self.predictor.parameters(), self.config.max_grad_norm) optimizer.step() losses.append(loss.item()) pos_scores.append(pos_score.item()) neg_scores.append(neg_score.item()) # Logging and monitoring if (step + 1) % self.config.monitor_every == 0: avg_pos = sum(pos_scores[-50:]) / len(pos_scores[-50:]) avg_neg = sum(neg_scores[-50:]) / len(neg_scores[-50:]) separation = avg_pos / max(avg_neg, 0.001) health = self.predictor.get_gate_health() print(f"[cf-hot] Step {step+1}: loss={loss.item():.4f}, " f"pos={avg_pos:.3f}, neg={avg_neg:.3f}, sep={separation:.1f}×, " f"gate_health={health['status']}") # Early stopping on gate collapse if health['status'] == 'warning' and health['saturated_low'] > 0.8: print(f"[cf-hot] ⚠️ WARNING: Gate collapse detected! Consider stopping training.") # Save checkpoint if (step + 1) % (self.config.monitor_every * 4) == 0: ckpt_path = os.path.join(output_dir, f"ckpt_{step+1}") self._save_checkpoint(head_name, ckpt_path, { 'step': step + 1, 'separation': separation, 'pos_mean': avg_pos, 'neg_mean': avg_neg, }) # Final evaluation final_pos = sum(pos_scores[-100:]) / len(pos_scores[-100:]) final_neg = sum(neg_scores[-100:]) / len(neg_scores[-100:]) final_separation = final_pos / max(final_neg, 0.001) # Save final checkpoint final_path = os.path.join(output_dir, "final") result = { 'separation': final_separation, 'pos_mean': final_pos, 'neg_mean': final_neg, 'steps': steps, } self._save_checkpoint(head_name, final_path, result) print(f"\n[cf-hot] ✓ Training complete!") print(f"[cf-hot] Final separation: {final_separation:.1f}×") print(f"[cf-hot] Positive mean: {final_pos:.3f}") print(f"[cf-hot] Negative mean: {final_neg:.3f}") print(f"[cf-hot] Saved to: {final_path}") return { 'success': True, 'head_name': head_name, 'checkpoint': final_path, 'result': result, 'gate_health': self.predictor.get_gate_health(), } def _save_checkpoint(self, head_name: str, path: str, result: Dict): """Save a checkpoint for a head.""" os.makedirs(path, exist_ok=True) torch.save({ 'head_state': self.predictor.heads[head_name].state_dict(), 'fiber_projs': {f'fiber_projs.{i}.weight': proj.weight.data for i, proj in enumerate(self.predictor.fiber_projs)}, 'layer_weights': self.predictor.layer_weights.data, 'ema_control_fields': self.predictor.ema_control_fields, 'config': asdict(self.config), 'result': result, }, os.path.join(path, "risk_predictor.pt")) # Default training examples for CF-HoT heads CFHOT_TRAINING_DATA = { 'repetition': { 'positive': [ "The key is to understand, the key is to understand, the key is to understand the fundamental...", "This is important because this is important because this is important for several reasons...", "First we need to first we need to first we need to consider...", "The main point the main point the main point is that...", "What I mean is what I mean is what I mean is simply that...", "Let me explain let me explain let me explain the concept...", "The answer is the answer is the answer is straightforward...", "We should we should we should focus on...", "In summary in summary in summary the key takeaway...", "The reason is the reason is the reason is complex...", "To understand to understand to understand this better...", "The solution the solution the solution involves...", "Consider that consider that consider that the evidence...", "It's clear that it's clear that it's clear that we need...", "The fact is the fact is the fact is undeniable...", ], 'negative': [ "The key insight is understanding the underlying mechanism and its implications.", "This matters because it affects downstream performance significantly.", "First, we preprocess the data. Then, we train. Finally, we evaluate.", "The main point is straightforward: efficiency requires careful design.", "What I mean is that the approach has both strengths and limitations.", "Let me explain the concept using a simple example.", "The answer is straightforward: use a hash table for O(1) lookup.", "We should focus on the most impactful optimizations first.", "In summary, the key takeaway is that simplicity often wins.", "The reason is complex but can be broken into three parts.", "To understand this better, consider the following analogy.", "The solution involves three steps: parse, transform, generate.", "Consider that the evidence strongly supports this conclusion.", "It's clear that we need a more robust approach.", "The fact is undeniable: performance matters for user experience.", ], }, 'hedging': { 'positive': [ "That's a great question! Let me think about this carefully before I answer...", "I'd be happy to help you with that! First, let me explain some background...", "Interesting question! There are several ways we could approach this...", "Thank you for asking! I'll do my best to provide a comprehensive answer...", "What a thoughtful inquiry! Let me share my perspective on this...", "Great question! This is something many people wonder about...", "I appreciate you asking! Let me give you a thorough explanation...", "That's actually a fascinating topic! There's a lot to unpack here...", "I'm glad you brought this up! It's an important consideration...", "Wonderful question! Let me walk you through the details...", ], 'negative': [ "The answer is straightforward: use a hash table for O(1) lookup.", "Hash tables provide O(1) average lookup. Here's why that matters...", "Recursion solves this elegantly. Base case: n=0 returns 1.", "The algorithm has O(n log n) complexity due to the sorting step.", "Three factors determine the outcome: input size, memory, and cache efficiency.", "The solution involves: parse input, validate, transform, output.", "Binary search works on sorted arrays: compare middle, recurse half.", "Memory hierarchy: registers → L1 → L2 → L3 → RAM → SSD.", "The proof follows from induction on n.", "Gradient descent: θ ← θ - α∇L. Repeat until convergence.", ], }, 'verbosity': { 'positive': [ "Well, this is actually a really interesting topic that I'd love to discuss with you in great detail, and there are many aspects to consider here, including but not limited to the historical context, the current state of affairs, and the future implications that we might want to think about as we move forward with this discussion...", "Let me start by providing some background information that I think will be helpful for understanding the broader context of this question, and then I'll go into more specific details about the various components and how they all fit together in the grand scheme of things...", "To fully understand this concept, we need to first take a step back and look at the big picture, considering all the various factors that come into play, and then we can gradually zoom in on the specific details that are most relevant to your question...", ], 'negative': [ "Function self-invocation until base case. Stack frames accumulate, unwind.", "Attention(Q,K,V) = softmax(QK^T/√d)V. O(n²) cost.", "Hash: key → index → bucket. O(1) average, O(n) worst.", "Gradient descent: θ ← θ - α∇L. Converge when ∇L ≈ 0.", "TCP: reliable, ordered. UDP: fast, lossy. Choose by use case.", ], }, } # ============================================================================== # EVALUATION METRICS - COMPREHENSIVE # ============================================================================== @dataclass class EvaluationResult: """Comprehensive evaluation of a response.""" prompt: str response: str # Token metrics tokens: int = 0 words: int = 0 # Density metrics unique_content_words: int = 0 density_score: float = 0.0 # Quality metrics coherence_score: float = 0.0 # Model self-evaluation helpfulness_score: float = 0.0 # Does it answer the question? # Penalty metrics filler_count: int = 0 repetition_count: int = 0 gibberish_score: float = 0.0 # Detects math soup, random text # Composite overall_score: float = 0.0 passes: bool = False # Intent gate routing info evaluation_note: str = "" # Records if evaluation was routed specially def to_dict(self): return asdict(self) class ComprehensiveEvaluator: """Evaluates responses on multiple dimensions to prevent reward hacking.""" def __init__(self, tokenizer, model=None): self.tokenizer = tokenizer self.model = model # Filler phrases to penalize self.filler_phrases = [ "that's a great question", "that's an interesting question", "great question", "good question", "interesting question", "let me explain", "i'd be happy to", "i would be happy to", "as you may know", "as you might know", "it's important to note", "to put it simply", "in other words", "basically", "essentially", "first of all", "to begin with", "allow me to", "i should mention", "before i answer", "to answer your question", "simply put", "in essence", "to be clear", "to clarify", "in summary", "thank you for asking", "thanks for asking", "i appreciate", "what a great", "what a fascinating", "what an interesting", ] # Patterns indicating gibberish/reward hacking self.gibberish_patterns = [ r'[→←↑↓]{3,}', # Lots of arrows r'[∇∂∫∑∏]{3,}', # Lots of math symbols in a row r'(.)\1{4,}', # Same character 5+ times r'(\b\w+\b)\s+\1\s+\1', # Same word 3+ times in a row r'^[A-Z\s.!?]{20,}$', # All caps for long stretch r'sys\.|init|compute', # Terminal-speak ] def _classify_input(self, text: str) -> str: """ Classify input to determine evaluation routing. Prevents penalizing correct minimal responses to ambiguous prompts. """ t = text.strip().lower() if not t: return "empty" # Control artifacts - not user content if t.startswith((" EvaluationResult: """Run all evaluations on a response with intent-aware routing.""" result = EvaluationResult(prompt=prompt, response=response) # === INTENT GATE === # Route based on input type to prevent penalizing correct minimal behavior input_category = self._classify_input(prompt) if input_category == "empty": # Empty prompt - neutral score, skip evaluation result.tokens = 0 result.words = 0 result.density_score = 0 result.coherence_score = 0.5 result.helpfulness_score = 0.5 result.overall_score = 0.5 result.passes = True result.evaluation_note = "empty_input_skipped" return result if input_category == "control_artifact": # Tool/control content - don't evaluate as natural language result.tokens = len(self.tokenizer.encode(response)) result.words = len(response.split()) result.density_score = 0 result.coherence_score = 0.75 result.helpfulness_score = 0.75 result.overall_score = 0.75 result.passes = True result.evaluation_note = "control_artifact_neutral" return result if input_category == "intent_only": # Ambiguous intent-only prompt - if response asks for clarification, that's CORRECT response_lower = response.lower() asks_clarification = any(phrase in response_lower for phrase in [ "what do you mean", "could you clarify", "clarify", "specify", "what specifically", "which", "can you elaborate", "more context", "what would you like", "how can i help", "what about" ]) if asks_clarification or len(response.split()) < 15: # Correct behavior: asking for clarification or brief response result.tokens = len(self.tokenizer.encode(response)) result.words = len(response.split()) result.density_score = 30.0 # Neutral result.coherence_score = 0.8 result.helpfulness_score = 0.8 result.overall_score = 0.75 # Neutral-positive, not punished result.passes = True result.evaluation_note = "intent_only_appropriate_response" return result # If response is verbose to an ambiguous prompt, evaluate normally (may get penalized) if input_category == "minimal_exchange": # Greetings, acknowledgments - brief responses are correct if len(response.split()) < 20: result.tokens = len(self.tokenizer.encode(response)) result.words = len(response.split()) result.density_score = 25.0 result.coherence_score = 0.85 result.helpfulness_score = 0.85 result.overall_score = 0.8 result.passes = True result.evaluation_note = "minimal_exchange_appropriate" return result # === NORMAL EVALUATION PATH === # Basic metrics result.tokens = len(self.tokenizer.encode(response)) result.words = len(response.split()) # Density (improved formula) result.density_score, result.unique_content_words = self._compute_density(response) # Filler detection result.filler_count = self._count_fillers(response) # Repetition detection result.repetition_count = self._count_repetitions(response) # Gibberish detection result.gibberish_score = self._detect_gibberish(response) # Quality assessment (if model available) if self.model is not None: result.coherence_score = self._assess_coherence(prompt, response) result.helpfulness_score = self._assess_helpfulness(prompt, response) else: # Heuristic fallback result.coherence_score = self._heuristic_coherence(response) result.helpfulness_score = self._heuristic_helpfulness(prompt, response) # Compute overall score result.overall_score = self._compute_overall(result) result.passes = result.overall_score >= 0.6 return result def _compute_density(self, response: str) -> Tuple[float, int]: """Improved density that accounts for response length.""" words = response.split() tokens = len(self.tokenizer.encode(response)) # Content words (4+ chars, alphabetic) content_words = [w.lower() for w in words if len(w) >= 4 and w.isalpha()] unique_content = set(content_words) if tokens == 0: return 0.0, 0 # Base density raw_density = len(unique_content) / tokens * 100 # Length adjustment: don't penalize very short but appropriate responses # and don't reward extremely short gibberish if tokens < 5: # Very short - check if it's appropriate if len(unique_content) == 0: raw_density = 0 else: raw_density = min(raw_density, 30) # Cap short response density elif tokens < 15: # Short but potentially good raw_density = min(raw_density, 40) return raw_density, len(unique_content) def _count_fillers(self, response: str) -> int: """Count filler phrases.""" response_lower = response.lower() count = 0 for filler in self.filler_phrases: if filler in response_lower: count += 1 return count def _count_repetitions(self, response: str) -> int: """Count repeated phrases/words.""" words = response.lower().split() if len(words) < 3: return 0 # Check for repeated bigrams bigrams = [' '.join(words[i:i+2]) for i in range(len(words)-1)] bigram_counts = {} for bg in bigrams: bigram_counts[bg] = bigram_counts.get(bg, 0) + 1 repetitions = sum(1 for c in bigram_counts.values() if c > 2) return repetitions def _detect_gibberish(self, response: str) -> float: """Detect gibberish/reward hacking patterns. Higher = more gibberish.""" score = 0.0 for pattern in self.gibberish_patterns: if re.search(pattern, response): score += 0.2 # Check character diversity if len(response) > 10: unique_chars = len(set(response.lower())) char_ratio = unique_chars / len(response) if char_ratio < 0.1: # Very low diversity score += 0.3 # Check for excessive punctuation/symbols symbol_count = sum(1 for c in response if c in '→←↑↓∇∂∫∑∏αβγδεζηθ') if len(response) > 0 and symbol_count / len(response) > 0.2: score += 0.3 return min(score, 1.0) def _heuristic_coherence(self, response: str) -> float: """Heuristic coherence without model.""" # Check basic structure score = 0.5 # Has sentences? if '.' in response or '!' in response or '?' in response: score += 0.1 # Not all caps? if response != response.upper(): score += 0.1 # Has words of varying length? words = response.split() if words: lengths = [len(w) for w in words] if len(set(lengths)) > 2: score += 0.1 # Reasonable length? if 10 <= len(response) <= 500: score += 0.2 return min(score, 1.0) def _heuristic_helpfulness(self, prompt: str, response: str) -> float: """Heuristic helpfulness without model.""" score = 0.5 # Check if response addresses prompt keywords prompt_words = set(w.lower() for w in prompt.split() if len(w) > 3) response_words = set(w.lower() for w in response.split() if len(w) > 3) overlap = len(prompt_words & response_words) if overlap > 0: score += min(0.3, overlap * 0.1) # Not too short for a question if '?' in prompt or prompt.lower().startswith(('what', 'how', 'why', 'explain')): if len(response.split()) >= 10: score += 0.2 return min(score, 1.0) def _assess_coherence(self, prompt: str, response: str) -> float: """Use model to assess coherence.""" # TODO: Implement model self-evaluation return self._heuristic_coherence(response) def _assess_helpfulness(self, prompt: str, response: str) -> float: """Use model to assess helpfulness.""" # TODO: Implement model self-evaluation return self._heuristic_helpfulness(prompt, response) def _compute_overall(self, result: EvaluationResult) -> float: """Compute weighted overall score.""" # Weights w_density = 0.25 w_coherence = 0.25 w_helpful = 0.25 w_penalties = 0.25 # Normalize density (0-50 range → 0-1) density_normalized = min(result.density_score / 50, 1.0) # Penalties filler_penalty = min(result.filler_count * 0.15, 0.5) repetition_penalty = min(result.repetition_count * 0.1, 0.3) gibberish_penalty = result.gibberish_score * 0.5 penalty_score = 1.0 - filler_penalty - repetition_penalty - gibberish_penalty penalty_score = max(penalty_score, 0) overall = ( w_density * density_normalized + w_coherence * result.coherence_score + w_helpful * result.helpfulness_score + w_penalties * penalty_score ) return overall # ============================================================================== # RSI SYSTEM v2.3 - CONTINUOUS LEARNING COMPONENTS # ============================================================================== @dataclass class RSIConfig: """Configuration for RSI continuous learning system.""" # Auto-training auto_train_enabled: bool = False auto_train_min_examples: int = 20 auto_train_quality_drop: float = 0.15 auto_train_interval_sec: int = 1800 # 30 min # Micro-training micro_batch_size: int = 4 micro_steps: int = 3 micro_lr: float = 1e-5 validation_split: float = 0.1 rollback_threshold: float = 0.10 # Experience buffer buffer_max_size: int = 10000 buffer_persist_path: str = "experience_buffer.jsonl" # Dream cycles dream_enabled: bool = True dream_interval_sec: int = 3600 dream_replay_count: int = 50 # Forgetting detection forgetting_check_interval: int = 1800 forgetting_alert_threshold: float = 0.10 # Ensemble ensemble_keep_n: int = 3 # Meta-learning adaptive_lr_enabled: bool = True lr_increase_factor: float = 1.1 lr_decrease_factor: float = 0.5 lr_min: float = 1e-7 lr_max: float = 1e-3 # Self-play self_play_enabled: bool = False self_play_interval: int = 50 # Skill domains skill_domains: List[str] = field(default_factory=lambda: [ "code", "creative", "factual", "math", "reasoning", "conversation" ]) # Explanation mode explanation_enabled: bool = False # Feedback weight feedback_weight: float = 2.0 @dataclass class RSIExperience: """Single experience/interaction for RSI learning.""" id: str prompt: str response: str quality: float density: float coherence: float tokens: int domain: str difficulty: float timestamp: float chosen: bool = True human_feedback: Optional[int] = None uncertainty: Optional[float] = None class RSIExperienceBuffer: """Persistent experience replay buffer.""" def __init__(self, config: RSIConfig = None): self.config = config or RSIConfig() self.buffer: deque = deque(maxlen=self.config.buffer_max_size) self.path = Path(ROOT) / self.config.buffer_persist_path self._load() def _load(self): if self.path.exists(): try: with open(self.path, 'r') as f: for line in f: data = json.loads(line) self.buffer.append(RSIExperience(**data)) print(f"[rsi] ✓ Loaded {len(self.buffer)} experiences") except Exception as e: print(f"[rsi] ⚠ Buffer load error: {e}") def add(self, exp: RSIExperience): self.buffer.append(exp) try: with open(self.path, 'a') as f: f.write(json.dumps(asdict(exp)) + '\n') except: pass def sample(self, n: int) -> List[RSIExperience]: if len(self.buffer) <= n: return list(self.buffer) # Prioritized sampling - weight by quality deviation weights = [] for exp in self.buffer: w = abs(exp.quality - 0.5) * 2 + exp.difficulty * 0.5 if exp.human_feedback: w *= self.config.feedback_weight weights.append(max(0.1, w)) total = sum(weights) weights = [w / total for w in weights] indices = random.choices(range(len(self.buffer)), weights=weights, k=n) return [self.buffer[i] for i in indices] def get_by_domain(self, domain: str) -> List[RSIExperience]: return [e for e in self.buffer if e.domain == domain] def stats(self) -> dict: if not self.buffer: return {"size": 0} qualities = [e.quality for e in self.buffer] domains = {} for e in self.buffer: domains[e.domain] = domains.get(e.domain, 0) + 1 return { "size": len(self.buffer), "avg_quality": sum(qualities) / len(qualities), "domains": domains, "with_feedback": sum(1 for e in self.buffer if e.human_feedback), } def __len__(self): return len(self.buffer) class RSISkillTracker: """Track quality per skill domain.""" def __init__(self, config: RSIConfig = None): self.config = config or RSIConfig() self.skills: Dict[str, List[float]] = {d: [] for d in self.config.skill_domains} self.window = 50 def update(self, domain: str, quality: float): if domain not in self.skills: self.skills[domain] = [] self.skills[domain].append(quality) if len(self.skills[domain]) > self.window: self.skills[domain] = self.skills[domain][-self.window:] def get_score(self, domain: str) -> float: if domain not in self.skills or not self.skills[domain]: return 0.5 return sum(self.skills[domain]) / len(self.skills[domain]) def get_all_scores(self) -> Dict[str, float]: return {d: self.get_score(d) for d in self.skills} def get_weakest(self) -> Tuple[str, float]: scores = self.get_all_scores() return min(scores.items(), key=lambda x: x[1]) if scores else ("unknown", 0.5) def get_strongest(self) -> Tuple[str, float]: scores = self.get_all_scores() return max(scores.items(), key=lambda x: x[1]) if scores else ("unknown", 0.5) class RSICuriosityTracker: """Track areas of uncertainty.""" def __init__(self, threshold: float = 0.3): self.threshold = threshold self.uncertain_topics: Dict[str, List[float]] = {} self.window = 20 def update(self, topic: str, uncertainty: float): if topic not in self.uncertain_topics: self.uncertain_topics[topic] = [] self.uncertain_topics[topic].append(uncertainty) if len(self.uncertain_topics[topic]) > self.window: self.uncertain_topics[topic] = self.uncertain_topics[topic][-self.window:] def get_curious_topics(self, top_n: int = 5) -> List[Tuple[str, float]]: avg_uncertainty = {} for topic, scores in self.uncertain_topics.items(): if scores: avg_uncertainty[topic] = sum(scores) / len(scores) return sorted(avg_uncertainty.items(), key=lambda x: -x[1])[:top_n] class RSIForgettingDetector: """Detect catastrophic forgetting.""" BENCHMARK_PROMPTS = [ "What is recursion in programming?", "Explain photosynthesis simply.", "Write a haiku about mountains.", "What is 15% of 80?", "Why is the sky blue?", "How do you make a sandwich?", "What is the capital of France?", "Explain what an API is.", "What is machine learning?", "Explain gravity to a child.", ] def __init__(self, threshold: float = 0.10): self.threshold = threshold self.baseline_scores: Dict[str, float] = {} self.last_check = 0 def set_baseline(self, scores: Dict[str, float]): self.baseline_scores = scores.copy() def check(self, current_scores: Dict[str, float]) -> List[str]: alerts = [] for prompt, baseline in self.baseline_scores.items(): if prompt in current_scores: drop = baseline - current_scores[prompt] if drop > self.threshold: alerts.append(f"⚠ '{prompt[:30]}...': {baseline:.2f} → {current_scores[prompt]:.2f}") self.last_check = time.time() return alerts class RSIAdaptiveLR: """Adaptive learning rate based on stability.""" def __init__(self, config: RSIConfig = None): self.config = config or RSIConfig() self.current_lr = self.config.micro_lr self.history: List[Tuple[float, float]] = [] def record_update(self, quality_before: float, quality_after: float): self.history.append((quality_before, quality_after)) if len(self.history) > 10: self.history = self.history[-10:] self._adapt() def _adapt(self): if len(self.history) < 3: return improvements = sum(1 for b, a in self.history if a > b) degradations = sum(1 for b, a in self.history if a < b) if improvements > degradations * 2: self.current_lr = min(self.config.lr_max, self.current_lr * self.config.lr_increase_factor) elif degradations > improvements: self.current_lr = max(self.config.lr_min, self.current_lr * self.config.lr_decrease_factor) def get_lr(self) -> float: return self.current_lr def stats(self) -> dict: improvements = sum(1 for b, a in self.history if a > b) if self.history else 0 return { "lr": self.current_lr, "updates": len(self.history), "improvement_rate": improvements / len(self.history) if self.history else 0, } class RSIGoalTracker: """Track improvement goals.""" def __init__(self): self.goals: List[Dict[str, Any]] = [] self.path = Path(ROOT) / "rsi_goals.json" self._load() def _load(self): if self.path.exists(): try: with open(self.path, 'r') as f: self.goals = json.load(f) except: pass def _save(self): try: with open(self.path, 'w') as f: json.dump(self.goals, f, indent=2) except: pass def add(self, metric: str, target: float, current: float = 0.5) -> Dict: goal = { "id": f"goal_{len(self.goals)}", "metric": metric, "target": target, "current": current, "created": time.time(), "achieved": False } self.goals.append(goal) self._save() return goal def update(self, metric: str, current: float): for goal in self.goals: if goal["metric"] == metric and not goal["achieved"]: goal["current"] = current if current >= goal["target"]: goal["achieved"] = True print(f" 🎉 Goal achieved: {metric} reached {goal['target']}!") self._save() def get_active(self) -> List[Dict]: return [g for g in self.goals if not g["achieved"]] def summary(self) -> str: active = self.get_active() if not active: return "No active goals. Use: !goals add " lines = ["Active Goals:"] for g in active: progress = min(1.0, g["current"] / g["target"]) * 100 bar = '█' * int(progress / 10) + '░' * (10 - int(progress / 10)) lines.append(f" {g['metric']}: {g['current']:.3f}/{g['target']:.3f} [{bar}] {progress:.0f}%") return '\n'.join(lines) class RSIDomainClassifier: """Classify prompts into skill domains.""" KEYWORDS = { "code": ["code", "program", "function", "debug", "python", "javascript", "algorithm", "api"], "creative": ["write", "story", "poem", "creative", "imagine", "fiction", "haiku"], "factual": ["what is", "who is", "when", "where", "explain", "define", "capital"], "math": ["calculate", "math", "equation", "solve", "number", "percent", "sum"], "reasoning": ["why", "analyze", "compare", "logic", "argument", "therefore"], "conversation": ["hello", "hi", "how are", "thanks", "bye", "chat", "feel"], } @classmethod def classify(cls, prompt: str) -> str: prompt_lower = prompt.lower() scores = {d: sum(1 for kw in kws if kw in prompt_lower) for d, kws in cls.KEYWORDS.items()} if max(scores.values()) == 0: return "conversation" return max(scores, key=scores.get) class RSIDifficultyEstimator: """Estimate prompt difficulty.""" @classmethod def estimate(cls, prompt: str) -> float: score = 0.3 words = len(prompt.split()) if words > 50: score += 0.2 elif words > 20: score += 0.1 complex_words = ["analyze", "compare", "synthesize", "evaluate", "implement", "optimize"] for word in complex_words: if word in prompt.lower(): score += 0.1 if "and" in prompt and "?" in prompt: score += 0.1 return min(1.0, score) class RSIAutoTrainManager: """Manage automatic training triggers.""" def __init__(self, config: RSIConfig = None): self.config = config or RSIConfig() self.last_train = time.time() self.examples_since_train = 0 self.quality_history: List[float] = [] self.running = False self.train_thread: Optional[Thread] = None def should_train(self) -> Tuple[bool, str]: if not self.config.auto_train_enabled: return False, "disabled" if self.examples_since_train >= self.config.auto_train_min_examples: return True, "example_count" if time.time() - self.last_train > self.config.auto_train_interval_sec: return True, "time_interval" if len(self.quality_history) >= 10: recent = self.quality_history[-5:] older = self.quality_history[-10:-5] if sum(older) / len(older) - sum(recent) / len(recent) > self.config.auto_train_quality_drop: return True, "quality_drop" return False, "not_triggered" def record_example(self, quality: float): self.examples_since_train += 1 self.quality_history.append(quality) if len(self.quality_history) > 100: self.quality_history = self.quality_history[-100:] def mark_trained(self): self.last_train = time.time() self.examples_since_train = 0 def stats(self) -> dict: should, reason = self.should_train() return { "enabled": self.config.auto_train_enabled, "examples_since_train": self.examples_since_train, "would_trigger": should, "trigger_reason": reason, "quality_trend": sum(self.quality_history[-5:]) / 5 if self.quality_history else 0, } class RSIDreamCycle: """Experience replay to prevent forgetting.""" def __init__(self, config: RSIConfig, buffer: RSIExperienceBuffer): self.config = config self.buffer = buffer self.last_dream = time.time() self.dream_count = 0 def should_dream(self) -> bool: if not self.config.dream_enabled: return False return time.time() - self.last_dream > self.config.dream_interval_sec def get_dream_batch(self) -> List[RSIExperience]: batch = [] domains = self.config.skill_domains per_domain = self.config.dream_replay_count // len(domains) for domain in domains: domain_exps = self.buffer.get_by_domain(domain) if domain_exps: n = min(per_domain, len(domain_exps)) batch.extend(random.sample(domain_exps, n)) self.last_dream = time.time() self.dream_count += 1 return batch class RSIHumanFeedback: """Handle human feedback.""" def __init__(self, buffer: RSIExperienceBuffer): self.buffer = buffer self.feedback_count = {"positive": 0, "negative": 0} self.last_experience_id: Optional[str] = None def record_feedback(self, feedback: int) -> bool: if not self.last_experience_id: return False for exp in self.buffer.buffer: if exp.id == self.last_experience_id: exp.human_feedback = feedback if feedback > 0: self.feedback_count["positive"] += 1 else: self.feedback_count["negative"] += 1 return True return False def set_last_experience(self, exp_id: str): self.last_experience_id = exp_id def stats(self) -> dict: return self.feedback_count.copy() class RSIExplanationMode: """Generate explanations for responses.""" @classmethod def generate(cls, prompt: str, response: str, metrics: dict, domain: str) -> str: reasons = [] if metrics.get("quality", 0) > 0.8: reasons.append("high confidence") elif metrics.get("quality", 0) < 0.5: reasons.append("some uncertainty") reasons.append(f"treated as {domain}") tokens = metrics.get("tokens", 0) if tokens < 20: reasons.append("kept concise") elif tokens > 100: reasons.append("detailed for complexity") return "💭 " + "; ".join(reasons) class RSIAdversarialSelfPlay: """Generate challenging prompts.""" TEMPLATES = [ "Explain {concept} in exactly 3 sentences.", "What are the differences between {a} and {b}?", "Solve step by step: {math_problem}", "Find the bug: {buggy_code}", "Argue both for and against {topic}.", ] CONCEPTS = ["recursion", "democracy", "entropy", "consciousness"] def __init__(self, skill_tracker: RSISkillTracker): self.skill_tracker = skill_tracker def generate_challenge(self) -> str: template = random.choice(self.TEMPLATES) return template.format( concept=random.choice(self.CONCEPTS), a=random.choice(self.CONCEPTS), b=random.choice(self.CONCEPTS), math_problem=f"{random.randint(10,99)} * {random.randint(10,99)}", buggy_code="def fib(n): return fib(n-1) + fib(n-2)", topic="social media", ) # Global RSI System Instance _rsi_config = RSIConfig() _rsi_buffer = RSIExperienceBuffer(_rsi_config) _rsi_skills = RSISkillTracker(_rsi_config) _rsi_curiosity = RSICuriosityTracker() _rsi_forgetting = RSIForgettingDetector() _rsi_adaptive_lr = RSIAdaptiveLR(_rsi_config) _rsi_goals = RSIGoalTracker() _rsi_auto_train = RSIAutoTrainManager(_rsi_config) _rsi_dreams = RSIDreamCycle(_rsi_config, _rsi_buffer) _rsi_feedback = RSIHumanFeedback(_rsi_buffer) _rsi_self_play = RSIAdversarialSelfPlay(_rsi_skills) def get_rsi_system(): """Get all RSI components.""" return { "config": _rsi_config, "buffer": _rsi_buffer, "skills": _rsi_skills, "curiosity": _rsi_curiosity, "forgetting": _rsi_forgetting, "adaptive_lr": _rsi_adaptive_lr, "goals": _rsi_goals, "auto_train": _rsi_auto_train, "dreams": _rsi_dreams, "feedback": _rsi_feedback, "self_play": _rsi_self_play, } # ============================================================================== # MENTOR MODE v2.8 - LEARN FROM CLAUDE # ============================================================================== @dataclass class MentorConfig: """Configuration for Mentor Mode (Claude consultation).""" enabled: bool = False auto_consult_threshold: float = 0.6 # Consult if quality below this uncertainty_threshold: float = 0.4 # Consult if uncertainty above this claude_url: str = "https://claude.ai" learn_from_responses: bool = True save_conversations: bool = True conversation_log_path: str = "mentor_conversations.jsonl" max_daily_consultations: int = 100 consultation_cooldown: int = 30 # seconds between consultations class MentorMode: """ Mentor Mode: Local model learns from Claude via browser or API. Features: - Auto-consult Claude when uncertain - Learn from Claude's responses (add to training data) - Browser-based conversation with Claude.ai - API-based consultation (faster) - Track learning progress """ def __init__(self, config: MentorConfig = None): self.config = config or MentorConfig() self.conversation_history: List[Dict] = [] self.consultations_today = 0 self.last_consultation = 0 self.learnings: List[Dict] = [] self.browser_session_active = False self._load_history() def _load_history(self): """Load conversation history from disk.""" path = Path(ROOT) / self.config.conversation_log_path if path.exists(): try: with open(path, 'r') as f: for line in f: self.conversation_history.append(json.loads(line)) print(f"[mentor] ✓ Loaded {len(self.conversation_history)} mentor conversations") except: pass def _save_conversation(self, prompt: str, local_response: str, claude_response: str, quality_improvement: float): """Save conversation to disk.""" if not self.config.save_conversations: return entry = { "timestamp": time.time(), "prompt": prompt, "local_response": local_response, "claude_response": claude_response, "quality_improvement": quality_improvement, } self.conversation_history.append(entry) try: path = Path(ROOT) / self.config.conversation_log_path with open(path, 'a') as f: f.write(json.dumps(entry) + '\n') except: pass def should_consult(self, quality: float, uncertainty: float = 0.0) -> Tuple[bool, str]: """Check if we should consult Claude.""" if not self.config.enabled: return False, "disabled" # Rate limiting if self.consultations_today >= self.config.max_daily_consultations: return False, "daily_limit" if time.time() - self.last_consultation < self.config.consultation_cooldown: return False, "cooldown" # Quality threshold if quality < self.config.auto_consult_threshold: return True, "low_quality" # Uncertainty threshold if uncertainty > self.config.uncertainty_threshold: return True, "high_uncertainty" return False, "not_needed" def consult_via_api(self, prompt: str, local_response: str = None) -> Optional[str]: """Consult Claude via API (fast path).""" if not CLAUDE_API_OK: return None system_prompt = """You are helping a smaller AI model learn. The user will show you a question and possibly the smaller model's attempt. Provide a high-quality, accurate, helpful response that the smaller model can learn from. Be precise, factual, and educational.""" full_prompt = prompt if local_response: full_prompt = f"""Question: {prompt} Smaller model's attempt: {local_response} Please provide an improved response that I can learn from:""" try: response = claude_generate(full_prompt, system=system_prompt, use_opus=False) self.consultations_today += 1 self.last_consultation = time.time() return response except Exception as e: print(f"[mentor] API error: {e}") return None def consult_via_browser(self, prompt: str) -> Optional[str]: """Consult Claude via browser (interactive).""" global _page if not BROWSER_OK: print("[mentor] Browser not available") return None try: # Open Claude.ai if not already open if _page is None: tool_browser_open(self.config.claude_url) time.sleep(3) # Check if we're on Claude.ai current_url = _page.url if _page else "" if "claude.ai" not in current_url: tool_browser_open(self.config.claude_url) time.sleep(3) # Find and fill the input input_selector = 'div[contenteditable="true"], textarea' _page.wait_for_selector(input_selector, timeout=10000) # Type the question _page.fill(input_selector, prompt) time.sleep(0.5) # Submit (Enter or click send button) _page.keyboard.press("Enter") time.sleep(5) # Wait for response # Try to read the response response_selector = '[data-message-author-role="assistant"]' response_elements = _page.query_selector_all(response_selector) if response_elements: last_response = response_elements[-1] response_text = last_response.inner_text() self.consultations_today += 1 self.last_consultation = time.time() return response_text return None except Exception as e: print(f"[mentor] Browser error: {e}") return None def learn_from_claude(self, prompt: str, local_response: str, claude_response: str) -> Dict: """Learn from Claude's response by creating training data.""" # Create DPO pair: Claude's response is "chosen", local is "rejected" learning = { "prompt": prompt, "chosen": claude_response, "rejected": local_response, "timestamp": time.time(), } self.learnings.append(learning) # Add to training buffer global PREFERENCE_PAIRS if 'PREFERENCE_PAIRS' in dir(): PREFERENCE_PAIRS.append({ "prompt": prompt, "chosen": claude_response, "rejected": local_response, }) # Also add to RSI buffer as high-quality example exp = RSIExperience( id=hashlib.md5(f"{time.time()}{prompt}".encode()).hexdigest()[:12], prompt=prompt, response=claude_response, quality=0.95, # Claude responses are high quality density=50.0, coherence=1.0, tokens=len(claude_response.split()), domain=RSIDomainClassifier.classify(prompt), difficulty=RSIDifficultyEstimator.estimate(prompt), timestamp=time.time(), chosen=True, ) _rsi_buffer.add(exp) return learning def open_session(self) -> str: """Open Claude.ai in browser for interactive session.""" if not BROWSER_OK: return "[mentor] Browser not available - pip install playwright && playwright install firefox" result = tool_browser_open(self.config.claude_url) self.browser_session_active = True return f"[mentor] ✓ Opened Claude.ai session\n{result}" def stats(self) -> Dict: """Get mentor mode statistics.""" return { "enabled": self.config.enabled, "consultations_today": self.consultations_today, "total_conversations": len(self.conversation_history), "total_learnings": len(self.learnings), "auto_threshold": self.config.auto_consult_threshold, "browser_active": self.browser_session_active, } # Global Mentor instance _mentor = MentorMode() def get_mentor() -> MentorMode: return _mentor # ============================================================================== # ============================================================================== # CONTENT GENERATION EXPERIMENTS - Draft creation tools # ============================================================================== @dataclass class ContentConfig: """Configuration for content generation experiments.""" # Content generation content_enabled: bool = True content_niche: str = "technology" # Legacy fields (kept for compatibility, not actively used) daily_goal: float = 50.0 weekly_goal: float = 300.0 freelance_enabled: bool = False upwork_profile: str = "" fiverr_profile: str = "" hourly_rate: float = 25.0 skills: List[str] = field(default_factory=lambda: [ "python", "writing", "data analysis", "automation" ]) medium_profile: str = "" trading_enabled: bool = False exchange: str = "" api_key: str = "" api_secret: str = "" max_trade_amount: float = 100.0 risk_tolerance: float = 0.02 affiliate_enabled: bool = False amazon_affiliate_id: str = "" tasks_enabled: bool = False # Alias for backward compatibility RevenueConfig = ContentConfig class FreelanceManager: """Manage freelance job hunting and proposals.""" JOB_SITES = { "upwork": "https://www.upwork.com/nx/find-work/", "fiverr": "https://www.fiverr.com/", "freelancer": "https://www.freelancer.com/jobs/", } def __init__(self, config: RevenueConfig): self.config = config self.applications: List[Dict] = [] self.earnings: float = 0.0 self.jobs_found: List[Dict] = [] def scan_jobs(self, platform: str = "upwork") -> List[Dict]: """Scan for relevant jobs on freelance platforms.""" if not BROWSER_OK: return [] jobs = [] try: url = self.JOB_SITES.get(platform, self.JOB_SITES["upwork"]) # Add skill filters to URL skills_query = "+".join(self.config.skills[:3]) if platform == "upwork": url = f"https://www.upwork.com/nx/find-work/?q={skills_query}" tool_browser_open(url) time.sleep(3) # Read job listings page_content = tool_browser_read() # Parse jobs (simplified - would need real parsing) lines = page_content.split('\n') current_job = {} for line in lines: line = line.strip() if not line: continue # Look for job indicators if "$" in line and any(char.isdigit() for char in line): # Potential budget/rate current_job["budget"] = line elif len(line) > 50 and len(line) < 200: # Potential job title/description if "title" not in current_job: current_job["title"] = line else: current_job["description"] = line # Save complete job if len(current_job) >= 2: current_job["platform"] = platform current_job["found_at"] = time.time() jobs.append(current_job) current_job = {} if len(jobs) >= 10: break self.jobs_found = jobs return jobs except Exception as e: print(f"[freelance] Error scanning: {e}") return [] def generate_proposal(self, job: Dict, generate_fn: Callable) -> str: """Generate a proposal for a job.""" prompt = f"""Write a professional freelance proposal for this job: Title: {job.get('title', 'Unknown')} Description: {job.get('description', 'N/A')} Budget: {job.get('budget', 'Not specified')} My skills: {', '.join(self.config.skills)} My hourly rate: ${self.config.hourly_rate}/hr Write a compelling, professional proposal that: 1. Shows understanding of their needs 2. Highlights relevant experience 3. Proposes a clear approach 4. Is concise but thorough Proposal:""" # Try Claude first for quality if CLAUDE_API_OK and CLAUDE_CONFIG.get("api_key"): proposal = claude_generate(prompt, max_tokens=500) else: out, _, _ = generate_fn(prompt) proposal = out return proposal def apply_to_job(self, job: Dict, proposal: str) -> Dict: """Submit application (would need real implementation).""" application = { "job": job, "proposal": proposal, "submitted_at": time.time(), "status": "submitted", } self.applications.append(application) return application def stats(self) -> Dict: """Get freelance statistics.""" return { "jobs_found": len(self.jobs_found), "applications": len(self.applications), "earnings": self.earnings, "platforms": list(self.JOB_SITES.keys()), } class ContentGenerator: """Generate content drafts for various formats.""" def __init__(self, config: RevenueConfig): self.config = config self.generated_content: List[Dict] = [] self.published: List[Dict] = [] def generate_blog_post(self, topic: str, generate_fn: Callable, target_words: int = 1500) -> Dict: """Generate a blog post draft.""" prompt = f"""Write a comprehensive, engaging blog post about: {topic} Requirements: - Target length: {target_words} words - SEO optimized (include relevant keywords naturally) - Include practical examples and actionable advice - Use headers, bullet points for readability - Engaging introduction and strong conclusion - Add a call-to-action at the end Niche: {self.config.content_niche} Write the complete article:""" # Use Claude for quality if CLAUDE_API_OK and CLAUDE_CONFIG.get("api_key"): content = claude_generate(prompt, max_tokens=4000, use_opus=True) else: # Generate in chunks for length content = "" for i in range(3): chunk_prompt = prompt if i == 0 else f"Continue the article:\n\n{content[-500:]}\n\nContinue:" out, _, _ = generate_fn(chunk_prompt) content += out + "\n" post = { "topic": topic, "content": content, "word_count": len(content.split()), "generated_at": time.time(), "status": "draft", } self.generated_content.append(post) return post def generate_youtube_script(self, topic: str, generate_fn: Callable, duration_minutes: int = 10) -> Dict: """Generate a YouTube video script.""" prompt = f"""Write a YouTube video script about: {topic} Target duration: {duration_minutes} minutes Format: - Hook (first 30 seconds to grab attention) - Introduction - Main content with timestamps - Call to action (subscribe, comment) - Outro Make it engaging, conversational, and valuable. Script:""" if CLAUDE_API_OK and CLAUDE_CONFIG.get("api_key"): script = claude_generate(prompt, max_tokens=3000) else: out, _, _ = generate_fn(prompt) script = out result = { "topic": topic, "script": script, "duration": duration_minutes, "generated_at": time.time(), } self.generated_content.append(result) return result def generate_social_posts(self, topic: str, generate_fn: Callable, platforms: List[str] = None) -> Dict: """Generate social media posts for multiple platforms.""" platforms = platforms or ["twitter", "linkedin", "instagram"] prompt = f"""Create social media posts about: {topic} Generate posts for each platform with appropriate style and length: 1. Twitter/X (max 280 chars, punchy, hashtags) 2. LinkedIn (professional, 1-2 paragraphs) 3. Instagram (engaging caption, emoji-friendly, hashtags) Topic: {topic} Posts:""" if CLAUDE_API_OK and CLAUDE_CONFIG.get("api_key"): posts = claude_generate(prompt, max_tokens=1000) else: out, _, _ = generate_fn(prompt) posts = out result = { "topic": topic, "posts": posts, "platforms": platforms, "generated_at": time.time(), } self.generated_content.append(result) return result def stats(self) -> Dict: return { "generated": len(self.generated_content), "published": len(self.published), "niche": self.config.content_niche, } class TradingBot: """Automated trading with safety limits.""" def __init__(self, config: RevenueConfig): self.config = config self.positions: List[Dict] = [] self.trades: List[Dict] = [] self.pnl: float = 0.0 self.connected = False def connect(self) -> bool: """Connect to exchange API.""" if not self.config.api_key or not self.config.api_secret: print("[trade] No API credentials configured") return False # Would implement real exchange connection here # Using ccxt or exchange-specific API print(f"[trade] Would connect to {self.config.exchange}") self.connected = True return True def analyze_market(self, symbol: str = "BTC/USDT") -> Dict: """Basic market analysis.""" analysis = { "symbol": symbol, "timestamp": time.time(), "recommendation": "HOLD", "confidence": 0.5, "signals": [], } # Would implement real technical analysis here # RSI, MACD, moving averages, etc. return analysis def execute_trade(self, symbol: str, side: str, amount: float, confirm: bool = True) -> Dict: """Execute a trade with safety checks.""" # Safety checks if amount > self.config.max_trade_amount: return {"error": f"Amount ${amount} exceeds max ${self.config.max_trade_amount}"} if confirm: print(f"\n⚠️ TRADE CONFIRMATION") print(f" Symbol: {symbol}") print(f" Side: {side}") print(f" Amount: ${amount}") response = input("Execute? (yes/no): ").strip().lower() if response not in ['yes', 'y']: return {"status": "cancelled"} trade = { "symbol": symbol, "side": side, "amount": amount, "timestamp": time.time(), "status": "simulated", # Would be "executed" with real API } self.trades.append(trade) return trade def stats(self) -> Dict: return { "connected": self.connected, "exchange": self.config.exchange, "positions": len(self.positions), "trades": len(self.trades), "pnl": self.pnl, "max_trade": self.config.max_trade_amount, } class AffiliateMarketer: """Generate affiliate marketing content.""" def __init__(self, config: RevenueConfig): self.config = config self.reviews: List[Dict] = [] def generate_review(self, product: str, generate_fn: Callable) -> Dict: """Generate an affiliate product review.""" prompt = f"""Write a detailed, honest product review for affiliate marketing: Product: {product} Include: 1. Product overview 2. Key features and benefits 3. Pros and cons (be balanced) 4. Who it's best for 5. Pricing and value assessment 6. Final verdict and rating (X/10) 7. Call to action Make it helpful and trustworthy, not salesy. Review:""" if CLAUDE_API_OK and CLAUDE_CONFIG.get("api_key"): review = claude_generate(prompt, max_tokens=2000) else: out, _, _ = generate_fn(prompt) review = out result = { "product": product, "review": review, "generated_at": time.time(), } self.reviews.append(result) return result def find_opportunities(self, niche: str = None) -> List[str]: """Find affiliate opportunities in a niche.""" niche = niche or self.config.content_niche # Would scrape affiliate networks, Amazon bestsellers, etc. opportunities = [ f"Amazon {niche} bestsellers", f"ShareASale {niche} programs", f"CJ Affiliate {niche} merchants", "ClickBank digital products", ] return opportunities class TaskAutomator: """Automate micro-tasks and surveys.""" TASK_SITES = { "mturk": "https://worker.mturk.com/", "prolific": "https://app.prolific.co/", "clickworker": "https://www.clickworker.com/", } def __init__(self, config: RevenueConfig): self.config = config self.completed_tasks = 0 self.earnings: float = 0.0 def find_tasks(self, platform: str = "mturk") -> List[Dict]: """Find available tasks on a platform.""" if not BROWSER_OK: return [] tasks = [] try: url = self.TASK_SITES.get(platform, self.TASK_SITES["mturk"]) tool_browser_open(url) time.sleep(2) content = tool_browser_read() # Would parse actual tasks from the page except Exception as e: print(f"[tasks] Error: {e}") return tasks def stats(self) -> Dict: return { "completed": self.completed_tasks, "earnings": self.earnings, "platforms": list(self.TASK_SITES.keys()), } class RevenueManager: """Central revenue management system.""" def __init__(self, config: RevenueConfig = None): self.config = config or RevenueConfig() # Initialize subsystems self.freelance = FreelanceManager(self.config) self.content = ContentGenerator(self.config) self.trading = TradingBot(self.config) self.affiliate = AffiliateMarketer(self.config) self.tasks = TaskAutomator(self.config) # Tracking self.total_earnings: float = 0.0 self.earnings_history: List[Dict] = [] self.active_streams: List[str] = [] self._load_history() def _load_history(self): """Load earnings history.""" path = Path(ROOT) / "revenue_history.json" if path.exists(): try: with open(path, 'r') as f: data = json.load(f) self.total_earnings = data.get("total", 0.0) self.earnings_history = data.get("history", []) print(f"[revenue] ✓ Loaded history: ${self.total_earnings:.2f} total") except: pass def _save_history(self): """Save earnings history.""" try: path = Path(ROOT) / "revenue_history.json" with open(path, 'w') as f: json.dump({ "total": self.total_earnings, "history": self.earnings_history, }, f, indent=2) except: pass def record_earning(self, amount: float, source: str, description: str = ""): """Record an earning.""" entry = { "amount": amount, "source": source, "description": description, "timestamp": time.time(), } self.total_earnings += amount self.earnings_history.append(entry) self._save_history() def dashboard(self) -> str: """Generate revenue dashboard.""" today = datetime.now().strftime("%Y-%m-%d") # Calculate daily earnings daily_earnings = sum( e["amount"] for e in self.earnings_history if datetime.fromtimestamp(e["timestamp"]).strftime("%Y-%m-%d") == today ) # Weekly earnings week_ago = time.time() - 7 * 24 * 3600 weekly_earnings = sum( e["amount"] for e in self.earnings_history if e["timestamp"] > week_ago ) lines = [ "═" * 60, " 💰 REVENUE DASHBOARD", "═" * 60, f"", f" TODAY: ${daily_earnings:.2f} / ${self.config.daily_goal:.2f} goal", f" THIS WEEK: ${weekly_earnings:.2f} / ${self.config.weekly_goal:.2f} goal", f" ALL TIME: ${self.total_earnings:.2f}", f"", "─" * 60, " ACTIVE STREAMS:", ] if self.config.freelance_enabled: f_stats = self.freelance.stats() lines.append(f" 📋 Freelance: {f_stats['applications']} apps, {f_stats['jobs_found']} jobs found") if self.config.content_enabled: c_stats = self.content.stats() lines.append(f" ✍️ Content: {c_stats['generated']} pieces generated") if self.config.trading_enabled: t_stats = self.trading.stats() lines.append(f" 📈 Trading: {t_stats['trades']} trades, ${t_stats['pnl']:.2f} P&L") if self.config.affiliate_enabled: lines.append(f" 🔗 Affiliate: {len(self.affiliate.reviews)} reviews") if self.config.tasks_enabled: lines.append(f" ⚡ Tasks: {self.tasks.completed_tasks} completed") lines.extend([ "─" * 60, " COMMANDS:", " !freelance scan/apply/status", " !content blog/youtube/social", " !trade analyze/execute", " !affiliate review/find", " !revenue goal ", "═" * 60, ]) return '\n'.join(lines) def set_goal(self, amount: float, period: str = "daily"): """Set revenue goal.""" if period == "daily": self.config.daily_goal = amount elif period == "weekly": self.config.weekly_goal = amount return f"✓ {period.capitalize()} goal set to ${amount:.2f}" # Global Revenue Manager _revenue_config = RevenueConfig() _revenue = RevenueManager(_revenue_config) def get_revenue() -> RevenueManager: return _revenue # ============================================================================== # CONFIG # ============================================================================== class Config: system = """You are Übermenschetien (ARC Engine) - a precise, dense AI assistant. You communicate with maximum information density: every word matters, no filler. You do not say "That's a great question" or "I'd be happy to help." You answer directly, concisely, and accurately. When appropriate, you can execute code and improve yourself.""" temperature = 0.85 top_p = 0.9 repetition_penalty = 1.1 max_new_tokens = 512 normal_max_tokens = 512 use_voice = False use_vector_memory = VECTOR_OK use_lht_reasoning = LHT_OK use_cfhot = True use_cfhot_80x = True # Toggle 80× head use_dense = True use_agentic = True autonomy = False # Book Mode book_mode = False book_max_tokens = 16384 book_chunk_size = 2048 # Idea Mode idea_mode = False idea_depth = "extensive" # quick, normal, extensive, deep idea_use_opus = True # Use Opus 4.5 for ideas # API Server api_enabled = False api_port = 8080 # CF-HoT thresholds cfhot_repetition_threshold = 0.6 cfhot_hedging_threshold = 0.5 cfhot_verbosity_threshold = 0.55 cfhot_repetition_penalty = 6.0 cfhot_hedging_penalty = 4.0 cfhot_verbosity_penalty = 3.0 # Self-improvement config (CONSERVATIVE) min_quality_score = 0.5 target_quality_score = 0.75 training_steps_per_iteration = 25 max_improvement_iterations = 10 quality_drop_threshold = 0.1 min_training_examples = 30 # RSI-15 rsi_max_iterations = 15 @staticmethod def toggle(name: str): # Handle aliases aliases = { "80x": "use_cfhot_80x", "cfhot80x": "use_cfhot_80x", "book": "book_mode", "idea": "idea_mode", "api": "api_enabled", "opus": "idea_use_opus", } name = aliases.get(name.lower(), name) if not hasattr(Config, name): return f"[config] No flag: {name}" val = getattr(Config, name) if isinstance(val, bool): setattr(Config, name, not val) new_val = getattr(Config, name) # Side effects if name == "book_mode": Config.max_new_tokens = Config.book_max_tokens if new_val else Config.normal_max_tokens if name == "use_cfhot_80x": if new_val: get_cfhot_head().load() else: get_cfhot_head().unload() return f"[config] {name} → {new_val}" return f"[config] {name} not boolean: {val}" # ============================================================================== # STATE & MEMORY # ============================================================================== class Store: state_path = f"{RUN_DIR}/state_v2.json" mem_path = f"{RUN_DIR}/memory_v2.jsonl" goals_path = f"{RUN_DIR}/goals_v2.json" improvement_log_path = f"{LOGS_DIR}/improvement_history.json" state = { "self": "I am Übermenschetien Agentic Engine v2 — stable self-improvement.", "turn": 0, "cfhot_interventions": {"repetition": 0, "hedging": 0, "verbosity": 0}, "improvement_iterations": 0, "training_runs": [], "current_checkpoint": DENSE_CHECKPOINT, "best_checkpoint": DENSE_CHECKPOINT, "best_quality_score": 0.0, "quality_history": [], "rollback_count": 0, } goals: List[str] = [] improvement_history: List[Dict] = [] @classmethod def load(cls): if os.path.exists(cls.state_path): with open(cls.state_path) as f: loaded = json.load(f) cls.state.update(loaded) if os.path.exists(cls.goals_path): with open(cls.goals_path) as f: cls.goals = json.load(f) if os.path.exists(cls.improvement_log_path): with open(cls.improvement_log_path) as f: cls.improvement_history = json.load(f) @classmethod def save(cls): with open(cls.state_path, "w") as f: json.dump(cls.state, f, indent=2) with open(cls.goals_path, "w") as f: json.dump(cls.goals, f, indent=2) with open(cls.improvement_log_path, "w") as f: json.dump(cls.improvement_history, f, indent=2, default=str) @classmethod def log_mem(cls, kind: str, payload: Any): rec = {"ts": datetime.now().isoformat(timespec="seconds"), "kind": kind, "data": payload} with open(cls.mem_path, "a") as f: f.write(json.dumps(rec, ensure_ascii=False, default=str) + "\n") if Config.use_vector_memory and VECTOR_OK: text = f"{kind}: {json.dumps(payload, ensure_ascii=False, default=str)}" vec = _embedder.encode([text])[0].tolist() _collection.add(documents=[text], embeddings=[vec], ids=[f"{kind}-{cls.state['turn']}-{random.randint(0,1_000_000)}"]) @classmethod def record_improvement(cls, iteration_data: Dict): """Record an improvement iteration for analysis.""" cls.improvement_history.append({ "timestamp": datetime.now().isoformat(), **iteration_data }) cls.save() # ============================================================================== # AGENTIC TOOLS # ============================================================================== class AgentTools: """Full agentic capabilities - code execution, file operations, training.""" @staticmethod def shell(cmd: str, timeout: int = 300) -> Dict[str, Any]: """Execute shell command.""" print(f"[SHELL] {cmd[:100]}...") try: result = subprocess.run( cmd, shell=True, capture_output=True, text=True, timeout=timeout, cwd=ROOT ) output = result.stdout + result.stderr success = result.returncode == 0 print(f"[SHELL] {'✓' if success else '✗'} (exit {result.returncode})") return {"success": success, "output": output[:10000], "returncode": result.returncode} except subprocess.TimeoutExpired: return {"success": False, "output": "Command timed out", "returncode": -1} except Exception as e: return {"success": False, "output": str(e), "returncode": -1} @staticmethod def python_exec(code: str) -> Dict[str, Any]: """Execute Python code.""" print(f"[PYTHON] Executing {len(code)} chars...") try: tmp_file = os.path.join(ROOT, "_agentic_tmp.py") with open(tmp_file, 'w') as f: f.write(code) result = subprocess.run( [sys.executable, tmp_file], capture_output=True, text=True, timeout=300, cwd=ROOT ) if os.path.exists(tmp_file): os.remove(tmp_file) output = result.stdout + result.stderr success = result.returncode == 0 print(f"[PYTHON] {'✓' if success else '✗'}") return {"success": success, "output": output[:10000], "returncode": result.returncode} except Exception as e: return {"success": False, "output": str(e), "returncode": -1} @staticmethod def read_file(path: str) -> Dict[str, Any]: try: full_path = os.path.join(ROOT, path) if not path.startswith('/') else path with open(full_path, 'r') as f: content = f.read() return {"success": True, "content": content[:50000]} except Exception as e: return {"success": False, "error": str(e)} @staticmethod def write_file(path: str, content: str) -> Dict[str, Any]: try: full_path = os.path.join(ROOT, path) if not path.startswith('/') else path os.makedirs(os.path.dirname(full_path) if os.path.dirname(full_path) else '.', exist_ok=True) with open(full_path, 'w') as f: f.write(content) return {"success": True, "path": full_path} except Exception as e: return {"success": False, "error": str(e)} @staticmethod def list_dir(path: str = ".") -> Dict[str, Any]: try: full_path = os.path.join(ROOT, path) if not path.startswith('/') else path items = os.listdir(full_path) return {"success": True, "items": items} except Exception as e: return {"success": False, "error": str(e)} @staticmethod def search_files(query: str, path: str = ".") -> Dict[str, Any]: result = AgentTools.shell(f'grep -rn "{query}" {path} 2>/dev/null | head -50') return result @staticmethod def web_search(query: str) -> Dict[str, Any]: if not REQUESTS_OK: return {"success": False, "error": "requests not installed"} try: url = f"https://html.duckduckgo.com/html/?q={query.replace(' ', '+')}" headers = {'User-Agent': 'Mozilla/5.0'} response = requests.get(url, headers=headers, timeout=10) results = [] for match in re.finditer(r'class="result__snippet">(.*?)', response.text, re.DOTALL): snippet = re.sub(r'<[^>]+>', '', match.group(1)).strip() if snippet: results.append(snippet[:500]) if len(results) >= 5: break return {"success": True, "results": results} except Exception as e: return {"success": False, "error": str(e)} # ============================================================================== # MODEL LOADING # ============================================================================== _model = None _tokenizer = None _multi_head = None _hedge_tokens = None _verbose_tokens = None _evaluator = None def load_llm(checkpoint_path: str = None): global _model, _tokenizer, _multi_head, _hedge_tokens, _verbose_tokens, _evaluator from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig checkpoint_path = checkpoint_path or Store.state.get("current_checkpoint", DENSE_CHECKPOINT) print(f"[llm] Loading base model: {MODEL_PATH}") # Determine if local or HuggingFace is_local = os.path.exists(MODEL_PATH) and os.path.isdir(MODEL_PATH) _tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, use_fast=True, local_files_only=is_local) if _tokenizer.pad_token_id is None: _tokenizer.pad_token = _tokenizer.eos_token bnb_config = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16, bnb_4bit_use_double_quant=True ) base_model = AutoModelForCausalLM.from_pretrained( MODEL_PATH, quantization_config=bnb_config, device_map="auto", torch_dtype=torch.bfloat16, local_files_only=is_local ) # Load DENSE checkpoint if PEFT_OK and Config.use_dense and os.path.exists(checkpoint_path): print(f"[dense] Loading checkpoint: {checkpoint_path}") _model = PeftModel.from_pretrained(base_model, checkpoint_path) print(f"[dense] ✓ Adapter loaded") elif PEFT_OK and os.path.exists(CFHOT_CHECKPOINT): print(f"[cf-hot] Loading LoRA adapter from: {CFHOT_CHECKPOINT}") _model = PeftModel.from_pretrained(base_model, CFHOT_CHECKPOINT) else: _model = base_model print("[warning] No adapter loaded - using base model") _model.eval() # Initialize evaluator _evaluator = ComprehensiveEvaluator(_tokenizer, _model) # Initialize CF-HoT if Config.use_cfhot: _init_cfhot() return _tokenizer, _model def reload_model(checkpoint_path: str): """Hot-reload model with a new checkpoint.""" global _model, _tokenizer, _evaluator print(f"\n[reload] Switching to checkpoint: {checkpoint_path}") if _model is not None: del _model torch.cuda.empty_cache() Store.state["current_checkpoint"] = checkpoint_path Store.save() return load_llm(checkpoint_path) def _init_cfhot(): """Initialize CF-HoT multi-head predictor.""" global _multi_head, _hedge_tokens, _verbose_tokens n_layers = _model.config.num_hidden_layers d_model = _model.config.hidden_size device = next(_model.parameters()).device print(f"[cf-hot] Initializing multi-head predictor ({n_layers} layers, {d_model} dims)") _multi_head = MultiHeadPredictor(d_model, n_layers).to(device).float() # Load CF-HoT checkpoint if available cfhot_risk_path = os.path.join(CFHOT_CHECKPOINT, "risk_predictor.pt") if os.path.exists(cfhot_risk_path): try: cfhot_ckpt = torch.load(cfhot_risk_path, weights_only=False, map_location=device) cfhot_state = cfhot_ckpt['risk_predictor'] for i in range(n_layers): key = f'fiber_projs.{i}.weight' if key in cfhot_state: _multi_head.fiber_projs[i].weight.data = cfhot_state[key].to(device).float() if 'layer_weights' in cfhot_state: _multi_head.layer_weights.data = cfhot_state['layer_weights'].to(device).float() # Load repetition head try: _multi_head.heads['repetition'][0].weight.data = cfhot_state['predictor.0.weight'].to(device).float() _multi_head.heads['repetition'][0].bias.data = cfhot_state['predictor.0.bias'].to(device).float() _multi_head.heads['repetition'][2].weight.data = cfhot_state['predictor.2.weight'].to(device).float() _multi_head.heads['repetition'][2].bias.data = cfhot_state['predictor.2.bias'].to(device).float() _multi_head.heads['repetition'][4].weight.data = cfhot_state['predictor.4.weight'].to(device).float() _multi_head.heads['repetition'][4].bias.data = cfhot_state['predictor.4.bias'].to(device).float() _multi_head.loaded_heads.add('repetition') print(f"[cf-hot] Loaded repetition head") except KeyError as e: print(f"[cf-hot] Warning: Could not load repetition head: {e}") except Exception as e: print(f"[cf-hot] Warning: Could not load CF-HoT: {e}") else: print(f"[cf-hot] Warning: CF-HoT risk predictor not found") # Load additional heads def find_best_checkpoint(head_dir): if not os.path.exists(head_dir): return None ckpts = [] for d in os.listdir(head_dir): if d.startswith("ckpt_"): try: step = int(d.split("_")[1]) ckpts.append((step, os.path.join(head_dir, d))) except: pass if ckpts: ckpts.sort(key=lambda x: x[0], reverse=True) return ckpts[0] return None hedging_dir = os.path.join(MULTI_HEAD_DIR, "hedging_head") best_hedge = find_best_checkpoint(hedging_dir) if best_hedge: step, ckpt_dir = best_hedge _multi_head.load_head('hedging', os.path.join(ckpt_dir, "hedging_head.pt")) verbosity_dir = os.path.join(MULTI_HEAD_DIR, "verbosity_head") best_verb = find_best_checkpoint(verbosity_dir) if best_verb: step, ckpt_dir = best_verb _multi_head.load_head('verbosity', os.path.join(ckpt_dir, "verbosity_head.pt")) _multi_head.eval() for param in _multi_head.parameters(): param.requires_grad = False # Build suppression token sets hedge_phrases = [ "As an AI", "As a language model", "I don't have feelings", "I apologize", "That's a great question", "Great question", "I'd be happy to", "Let me help you", "Thank you for asking", ] _hedge_tokens = set() for phrase in hedge_phrases: tokens = _tokenizer.encode(phrase, add_special_tokens=False) if tokens: _hedge_tokens.add(tokens[0]) verbose_phrases = [ "Let me explain", "To put it simply", "In other words", "Basically", "Essentially", "First of all", "To begin with", ] _verbose_tokens = set() for phrase in verbose_phrases: tokens = _tokenizer.encode(phrase, add_special_tokens=False) if tokens: _verbose_tokens.add(tokens[0]) print(f"[cf-hot] ✓ Multi-head system ready") print(f"[cf-hot] Loaded heads: {list(_multi_head.loaded_heads)}") print(f"[cf-hot] Hedge tokens: {len(_hedge_tokens)}") print(f"[cf-hot] Verbose tokens: {len(_verbose_tokens)}") # ============================================================================== # LHT REASONER # ============================================================================== class LHTReasoner: def __init__(self, config=None): if not LHT_OK: raise ImportError("LHT modules not available") self.config = config or LHTConfig( vocab_size=32000, d_model=256, d_fiber=32, n_heads=4, n_layers=4, lie_algebra_rank=4, ) self.model = LieHolonomyTransformer(self.config) self.waypoint_detector = WaypointDetector(self.config, n_waypoints=32) weights_path = os.path.join(LHT_DIR, "lht_weights.pt") if os.path.exists(weights_path): self.model.load_state_dict(torch.load(weights_path, map_location="cpu")) def check_consistency(self, reasoning_chain: List[str], tokenizer) -> Dict[str, float]: combined = " [STEP] ".join(reasoning_chain) tokens = tokenizer(combined, return_tensors="pt", truncation=True, max_length=self.config.max_seq_len) with torch.no_grad(): output = self.model(input_ids=tokens["input_ids"], return_geometric_losses=True) holonomy = output.get("holonomy_loss", torch.tensor(0.0)).item() curvature = output.get("curvature_loss", torch.tensor(0.0)).item() consistency_score = 1.0 / (1.0 + holonomy) return { "holonomy": holonomy, "curvature": curvature, "consistency_score": consistency_score, "is_consistent": consistency_score > 0.5 } _lht_reasoner = None def get_lht_reasoner(): global _lht_reasoner if _lht_reasoner is None and LHT_OK: try: _lht_reasoner = LHTReasoner() except Exception as e: print(f"[lht] Failed to initialize: {e}") return _lht_reasoner # ============================================================================== # CF-HoT CONTROLLED GENERATION # ============================================================================== def generate_with_cfhot(prompt: str, **kwargs) -> Tuple[str, Dict]: """Generate text with CF-HoT cognitive control.""" global _model, _tokenizer, _multi_head, _hedge_tokens, _verbose_tokens temperature = kwargs.get("temperature", Config.temperature) top_p = kwargs.get("top_p", Config.top_p) max_new_tokens = kwargs.get("max_new_tokens", Config.max_new_tokens) device = next(_model.parameters()).device input_ids = _tokenizer.encode(prompt, return_tensors='pt').to(device) attention_mask = torch.ones_like(input_ids) stats = { 'tokens_generated': 0, 'interventions': {'repetition': 0, 'hedging': 0, 'verbosity': 0}, } generated_ids = input_ids.clone() for step in range(max_new_tokens): with torch.no_grad(): outputs = _model( input_ids=generated_ids, attention_mask=attention_mask, output_hidden_states=True, return_dict=True ) logits = outputs.logits[:, -1, :] / temperature # Get risks from all heads if CF-HoT is enabled if _multi_head is not None and _multi_head.loaded_heads: hidden_states = outputs.hidden_states[1:] risks = _multi_head.get_all_risks(hidden_states) current_risks = {name: r[:, -1].item() for name, r in risks.items()} if ('repetition' in current_risks and current_risks['repetition'] > Config.cfhot_repetition_threshold): recent_tokens = generated_ids[0, -32:].tolist() for tok_id in set(recent_tokens): logits[0, tok_id] -= Config.cfhot_repetition_penalty stats['interventions']['repetition'] += 1 Store.state['cfhot_interventions']['repetition'] += 1 # Verbosity head intervention (RISK-BASED using trained head) # Note: 2.14× separation is weak, but better than nothing if ('verbosity' in current_risks and current_risks['verbosity'] > Config.cfhot_verbosity_threshold): if _verbose_tokens: for tok_id in _verbose_tokens: logits[0, tok_id] -= Config.cfhot_verbosity_penalty stats['interventions']['verbosity'] += 1 Store.state['cfhot_interventions']['verbosity'] += 1 # Hedging head intervention (RISK-BASED using trained head) # Note: 1.49× separation is weak, but contributes if ('hedging' in current_risks and current_risks['hedging'] > Config.cfhot_hedging_threshold): if _hedge_tokens: for tok_id in _hedge_tokens: logits[0, tok_id] -= Config.cfhot_hedging_penalty stats['interventions']['hedging'] += 1 Store.state['cfhot_interventions']['hedging'] += 1 # Top-p sampling sorted_logits, sorted_indices = torch.sort(logits, descending=True) cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1) sorted_indices_to_remove = cumulative_probs > top_p sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone() sorted_indices_to_remove[..., 0] = 0 indices_to_remove = sorted_indices_to_remove.scatter(1, sorted_indices, sorted_indices_to_remove) logits[indices_to_remove] = float('-inf') probs = F.softmax(logits, dim=-1) next_token = torch.multinomial(probs, num_samples=1) generated_ids = torch.cat([generated_ids, next_token], dim=-1) attention_mask = torch.cat([attention_mask, torch.ones(1, 1, device=device)], dim=-1) stats['tokens_generated'] += 1 if next_token.item() == _tokenizer.eos_token_id: break output_text = _tokenizer.decode(generated_ids[0], skip_special_tokens=False) if "<|im_start|>assistant" in output_text: output_text = output_text.split("<|im_start|>assistant")[-1] if output_text.startswith("\n"): output_text = output_text[1:] for end_tok in ["<|im_end|>", "<|im_start|>"]: if end_tok in output_text: output_text = output_text.split(end_tok)[0] return output_text.strip(), stats def generate(user: str, **kwargs) -> Tuple[str, Dict, EvaluationResult]: """Main generation function with evaluation.""" temperature = kwargs.get("temperature", Config.temperature) max_new_tokens = kwargs.get("max_new_tokens", Config.max_new_tokens) prompt = (f"<|im_start|>system\n{Config.system}<|im_end|>\n" f"<|im_start|>user\n{user}<|im_end|>\n" f"<|im_start|>assistant\n") text, stats = generate_with_cfhot( prompt, temperature=temperature, max_new_tokens=max_new_tokens ) # Evaluate the response eval_result = _evaluator.evaluate(user, text) return text, stats, eval_result # ============================================================================== # STABLE SELF-IMPROVEMENT SYSTEM # ============================================================================== class StableSelfImprover: """ Self-improvement system with safeguards against collapse: 1. Comprehensive evaluation (not just density) 2. Rollback on quality drop 3. Conservative training (small steps) 4. Diverse training examples 5. A/B testing between checkpoints """ def __init__(self): self.test_prompts = self._select_test_prompts() self.baseline_quality = 0.0 def _select_test_prompts(self) -> List[Dict]: """Select diverse test prompts.""" # Mix of short and long, different categories return [ {"prompt": "hello", "category": "greeting"}, {"prompt": "hi there", "category": "greeting"}, {"prompt": "What is recursion?", "category": "cs"}, {"prompt": "Explain neural networks", "category": "ml"}, {"prompt": "How does gradient descent work?", "category": "ml"}, {"prompt": "What is consciousness?", "category": "philosophy"}, {"prompt": "Explain entropy", "category": "physics"}, {"prompt": "How does encryption work?", "category": "cs"}, {"prompt": "What are your limitations?", "category": "meta"}, {"prompt": "How do I learn programming?", "category": "practical"}, ] def evaluate_current_model(self) -> Dict[str, Any]: """Comprehensive evaluation of current model.""" print("\n[EVAL] Testing current model...") results = [] total_quality = 0.0 category_scores = {} for test in self.test_prompts: prompt = test["prompt"] category = test["category"] # Generate response response, stats, eval_result = generate(prompt, max_new_tokens=200) results.append({ 'prompt': prompt, 'response': response[:200], 'category': category, 'tokens': eval_result.tokens, 'density': eval_result.density_score, 'coherence': eval_result.coherence_score, 'helpfulness': eval_result.helpfulness_score, 'gibberish': eval_result.gibberish_score, 'fillers': eval_result.filler_count, 'overall': eval_result.overall_score, 'passes': eval_result.passes, }) total_quality += eval_result.overall_score if category not in category_scores: category_scores[category] = [] category_scores[category].append(eval_result.overall_score) status = "✓" if eval_result.passes else "✗" print(f" {status} {prompt[:35]:35s} | qual={eval_result.overall_score:.2f} tok={eval_result.tokens:3d} coh={eval_result.coherence_score:.2f} gib={eval_result.gibberish_score:.2f}") avg_quality = total_quality / len(results) pass_rate = sum(1 for r in results if r['passes']) / len(results) # Category breakdown cat_averages = {cat: sum(scores)/len(scores) for cat, scores in category_scores.items()} evaluation = { 'avg_quality': avg_quality, 'pass_rate': pass_rate, 'category_scores': cat_averages, 'results': results, 'needs_improvement': avg_quality < Config.target_quality_score, 'is_degraded': avg_quality < Config.min_quality_score, } print(f"\n[EVAL] Avg Quality: {avg_quality:.2f} (target: {Config.target_quality_score})") print(f"[EVAL] Pass Rate: {pass_rate:.1%}") print(f"[EVAL] Category Scores: {cat_averages}") print(f"[EVAL] Needs Improvement: {evaluation['needs_improvement']}") if evaluation['is_degraded']: print(f"[EVAL] ⚠️ WARNING: Quality below minimum threshold!") return evaluation def save_rollback_checkpoint(self): """Save current checkpoint as rollback point.""" current = Store.state.get("current_checkpoint", DENSE_CHECKPOINT) rollback_path = os.path.join(ROLLBACK_DIR, f"rollback_{datetime.now().strftime('%Y%m%d_%H%M%S')}") if os.path.exists(current): shutil.copytree(current, rollback_path) print(f"[ROLLBACK] Saved rollback checkpoint: {rollback_path}") return rollback_path return None def rollback_to_best(self): """Rollback to best known checkpoint.""" best = Store.state.get("best_checkpoint", DENSE_CHECKPOINT) print(f"\n[ROLLBACK] Rolling back to best checkpoint: {best}") Store.state["rollback_count"] = Store.state.get("rollback_count", 0) + 1 reload_model(best) return best def run_training_iteration(self, steps: int = None) -> Dict[str, Any]: """Run one CONSERVATIVE iteration of training.""" steps = steps or Config.training_steps_per_iteration print(f"\n[TRAIN] Starting {steps} steps of CONSERVATIVE training...") print(f"[TRAIN] Using {len(DENSE_TRAINING_EXAMPLES)} training examples") # Find current checkpoint step checkpoints = sorted(Path(CHECKPOINTS_DIR).glob("step_*"), key=lambda p: int(p.name.split('_')[1]) if p.name.split('_')[1].isdigit() else 0, reverse=True) if checkpoints: latest_step = int(checkpoints[0].name.split('_')[1]) new_step = latest_step + steps else: latest_step = 100 new_step = latest_step + steps current_ckpt = Store.state.get('current_checkpoint', DENSE_CHECKPOINT) # Prepare training data training_data = json.dumps(DENSE_TRAINING_EXAMPLES) # Create conservative training script training_script = f''' import sys sys.path.insert(0, "{ROOT}") import torch import json import random from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig from peft import PeftModel, get_peft_model, LoraConfig import os print("Loading model for CONSERVATIVE training...") MODEL_PATH = "{MODEL_PATH}" CHECKPOINT = "{current_ckpt}" tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, local_files_only=True) tokenizer.pad_token = tokenizer.eos_token model = AutoModelForCausalLM.from_pretrained( MODEL_PATH, quantization_config=BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16, ), device_map="auto", torch_dtype=torch.bfloat16, local_files_only=True ) if os.path.exists(CHECKPOINT): model = PeftModel.from_pretrained(model, CHECKPOINT, is_trainable=True) print(f"Loaded checkpoint: {{CHECKPOINT}}") else: lora_config = LoraConfig( r=16, lora_alpha=32, target_modules=["q_proj", "v_proj", "k_proj", "o_proj"], lora_dropout=0.05 ) model = get_peft_model(model, lora_config) print("Created new LoRA adapter") # Load diverse training data training_examples = {training_data} print(f"Training on {{len(training_examples)}} diverse examples for {steps} steps...") # Conservative optimizer with LOW learning rate optimizer = torch.optim.AdamW(model.parameters(), lr=2e-6) # Very low LR model.train() total_loss = 0 losses = [] for step in range({steps}): # Randomly sample an example (ensures diversity) ex = random.choice(training_examples) prompt = ex["prompt"] response = ex["response"] # Format for ChatML full_text = f"<|im_start|>user\\n{{prompt}}<|im_end|>\\n<|im_start|>assistant\\n{{response}}<|im_end|>" inputs = tokenizer(full_text, return_tensors="pt", truncation=True, max_length=512) inputs = {{k: v.to(model.device) for k, v in inputs.items()}} outputs = model(**inputs, labels=inputs["input_ids"]) loss = outputs.loss optimizer.zero_grad() loss.backward() # Gradient clipping for stability torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5) optimizer.step() total_loss += loss.item() losses.append(loss.item()) if step % 5 == 0: recent_avg = sum(losses[-5:]) / len(losses[-5:]) if losses[-5:] else 0 print(f"Step {{step}}: loss={{loss.item():.4f}}, recent_avg={{recent_avg:.4f}}") # Save checkpoint save_path = "{CHECKPOINTS_DIR}/step_{new_step}" model.save_pretrained(save_path) final_avg_loss = total_loss / {steps} print(f"\\nSaved checkpoint to {{save_path}}") print(f"Final avg loss: {{final_avg_loss:.4f}}") print("TRAINING_COMPLETE") ''' script_path = os.path.join(ROOT, "_stable_train.py") with open(script_path, 'w') as f: f.write(training_script) result = AgentTools.shell(f"python {script_path}", timeout=600) if "TRAINING_COMPLETE" in result.get('output', ''): new_checkpoint = f"{CHECKPOINTS_DIR}/step_{new_step}" Store.state['training_runs'].append({ 'timestamp': datetime.now().isoformat(), 'steps': steps, 'checkpoint': new_checkpoint }) Store.save() return { 'success': True, 'new_checkpoint': new_checkpoint, 'output': result['output'][-2000:] } else: return { 'success': False, 'output': result['output'][-2000:] } def compare_checkpoints(self, old_ckpt: str, new_ckpt: str) -> Dict[str, Any]: """A/B test two checkpoints.""" print(f"\n[COMPARE] A/B Testing checkpoints...") print(f" OLD: {old_ckpt}") print(f" NEW: {new_ckpt}") # Evaluate old reload_model(old_ckpt) old_eval = self.evaluate_current_model() # Evaluate new reload_model(new_ckpt) new_eval = self.evaluate_current_model() # Compare quality_diff = new_eval['avg_quality'] - old_eval['avg_quality'] pass_diff = new_eval['pass_rate'] - old_eval['pass_rate'] print(f"\n[COMPARE] Results:") print(f" OLD quality: {old_eval['avg_quality']:.3f}, pass rate: {old_eval['pass_rate']:.1%}") print(f" NEW quality: {new_eval['avg_quality']:.3f}, pass rate: {new_eval['pass_rate']:.1%}") print(f" Quality diff: {quality_diff:+.3f}") # Decision logic keep_new = False reason = "" if new_eval['is_degraded']: keep_new = False reason = "New checkpoint quality below minimum threshold" elif quality_diff > 0.02: keep_new = True reason = f"New checkpoint improves quality by {quality_diff:.3f}" elif quality_diff < -Config.quality_drop_threshold: keep_new = False reason = f"New checkpoint degrades quality by {abs(quality_diff):.3f}" elif quality_diff >= 0: keep_new = True reason = "New checkpoint maintains or slightly improves quality" else: keep_new = False reason = "New checkpoint slightly degrades quality - keeping stable" print(f"[COMPARE] Decision: {'KEEP NEW' if keep_new else 'KEEP OLD'} - {reason}") return { 'keep_new': keep_new, 'reason': reason, 'old_eval': old_eval, 'new_eval': new_eval, 'quality_diff': quality_diff, } def improve(self, max_iterations: int = None) -> Dict[str, Any]: """Main self-improvement loop with stability safeguards.""" max_iterations = max_iterations or Config.max_improvement_iterations print("\n" + "=" * 70) print("🔄 STABLE SELF-IMPROVEMENT LOOP (v2)") print("=" * 70) print(f" Max iterations: {max_iterations}") print(f" Steps per iteration: {Config.training_steps_per_iteration}") print(f" Training examples: {len(DENSE_TRAINING_EXAMPLES)}") print(f" Target quality: {Config.target_quality_score}") print(f" Quality drop threshold: {Config.quality_drop_threshold}") print("=" * 70) # Initial evaluation print("\n[IMPROVE] Initial evaluation...") baseline = self.evaluate_current_model() self.baseline_quality = baseline['avg_quality'] # Save as best if better than current best if baseline['avg_quality'] > Store.state.get('best_quality_score', 0): Store.state['best_quality_score'] = baseline['avg_quality'] Store.state['best_checkpoint'] = Store.state.get('current_checkpoint', DENSE_CHECKPOINT) history = [{ 'iteration': 0, 'type': 'baseline', 'quality': baseline['avg_quality'], 'pass_rate': baseline['pass_rate'], 'checkpoint': Store.state.get('current_checkpoint'), }] for iteration in range(1, max_iterations + 1): print(f"\n{'=' * 70}") print(f"ITERATION {iteration}/{max_iterations}") print("=" * 70) # Check if target reached if not baseline.get('needs_improvement', True): print(f"\n✓ TARGET REACHED! Quality: {baseline['avg_quality']:.3f}") Store.record_improvement({ 'status': 'target_reached', 'final_quality': baseline['avg_quality'], 'iterations': iteration - 1, 'history': history }) return { 'success': True, 'status': 'target_reached', 'iterations': iteration - 1, 'final_quality': baseline['avg_quality'], 'history': history } # Check for degradation if baseline.get('is_degraded', False): print(f"\n⚠️ QUALITY DEGRADED! Rolling back...") self.rollback_to_best() Store.record_improvement({ 'status': 'rolled_back', 'reason': 'quality_degraded', 'iteration': iteration, 'history': history }) return { 'success': False, 'status': 'rolled_back', 'reason': 'quality_degraded', 'history': history } # Save rollback point before training self.save_rollback_checkpoint() old_checkpoint = Store.state.get('current_checkpoint', DENSE_CHECKPOINT) # Run training print(f"\n[IMPROVE] Quality {baseline['avg_quality']:.3f} < target {Config.target_quality_score}") training_result = self.run_training_iteration() if not training_result['success']: print("[IMPROVE] ⚠️ Training failed!") history.append({ 'iteration': iteration, 'type': 'training_failed', 'error': training_result['output'][-500:] }) continue # A/B compare old vs new comparison = self.compare_checkpoints(old_checkpoint, training_result['new_checkpoint']) iteration_record = { 'iteration': iteration, 'type': 'comparison', 'old_quality': comparison['old_eval']['avg_quality'], 'new_quality': comparison['new_eval']['avg_quality'], 'quality_diff': comparison['quality_diff'], 'kept': 'new' if comparison['keep_new'] else 'old', 'reason': comparison['reason'], } history.append(iteration_record) # Decision if comparison['keep_new']: Store.state['current_checkpoint'] = training_result['new_checkpoint'] # Update best if improved if comparison['new_eval']['avg_quality'] > Store.state.get('best_quality_score', 0): Store.state['best_quality_score'] = comparison['new_eval']['avg_quality'] Store.state['best_checkpoint'] = training_result['new_checkpoint'] print(f"[IMPROVE] ★ New best! Quality: {Store.state['best_quality_score']:.3f}") baseline = comparison['new_eval'] else: # Rollback to old reload_model(old_checkpoint) baseline = comparison['old_eval'] Store.state['improvement_iterations'] += 1 Store.state['quality_history'].append({ 'iteration': iteration, 'quality': baseline['avg_quality'], 'timestamp': datetime.now().isoformat() }) Store.save() # Final evaluation final_eval = self.evaluate_current_model() result = { 'success': final_eval['avg_quality'] >= Config.target_quality_score, 'status': 'completed', 'iterations': max_iterations, 'initial_quality': self.baseline_quality, 'final_quality': final_eval['avg_quality'], 'best_quality': Store.state.get('best_quality_score', 0), 'best_checkpoint': Store.state.get('best_checkpoint'), 'rollback_count': Store.state.get('rollback_count', 0), 'history': history } Store.record_improvement(result) return result # ============================================================================== # TOOLS (Original Limited) # ============================================================================== ALLOWED_SHELL = {"ls", "cat", "wc", "head", "tail", "nvidia-smi", "df", "du", "grep", "rg", "python3", "python"} def tool_shell(cmd: str) -> str: try: exe = cmd.strip().split()[0] if exe not in ALLOWED_SHELL: return f"[shell] blocked: {exe} (use !shell for full access)" p = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, timeout=20) return p.stdout.decode("utf-8", errors="ignore")[:8000] except Exception as e: return f"[shell] error: {e}" def tool_py(code: str) -> str: try: g = { "__builtins__": {"range": range, "len": len, "min": min, "max": max, "sum": sum, "print": print}, "math": math, "json": json, "re": re, "statistics": statistics, "random": random } l = {} exec(code, g, l) return f"[py] ok\n{l.get('out', '')}" except Exception: return f"[py] error:\n{traceback.format_exc()[-2000:]}" def tool_search_local(query: str, path: str = ROOT) -> str: rg = shutil.which("rg") if rg: cmd = f'rg -n --no-heading --hidden -S "{query}" {path}' else: cmd = f'grep -RIn --exclude-dir=.git --exclude-dir=__pycache__ -e "{query}" {path}' return tool_shell(cmd) # === BROWSER TOOLS === def tool_browser_open(url: str = "https://google.com") -> str: """Open visible browser and navigate to URL.""" global _playwright, _browser, _page, _browser_context if not BROWSER_OK: return "[browser] Not available - pip install playwright && playwright install firefox" try: if _browser is None: _playwright = sync_playwright().start() # Option 1: Persistent profile (uses existing logins) if LOGIN_CONFIG.get("use_persistent_profile") and LOGIN_CONFIG.get("firefox_profile_path"): profile_path = LOGIN_CONFIG["firefox_profile_path"] print(f"[browser] Using persistent profile: {profile_path}") _browser_context = _playwright.firefox.launch_persistent_context( profile_path, headless=False, viewport={"width": 1280, "height": 800} ) _page = _browser_context.pages[0] if _browser_context.pages else _browser_context.new_page() else: # Option 2: Fresh browser (default) _browser = _playwright.firefox.launch(headless=False) _page = _browser.new_page() _page.set_viewport_size({"width": 1280, "height": 800}) _page.goto(url, timeout=30000) title = _page.title() return f"[browser] Opened: {title}\n[browser] URL: {url}" except Exception as e: return f"[browser] Error: {e}" def tool_browser_click(selector: str) -> str: """Click element by CSS selector or text.""" global _page if _page is None: return "[browser] No browser open. Use !browse first" try: # Try CSS selector first if _page.query_selector(selector): _page.click(selector) return f"[browser] Clicked: {selector}" # Try text selector _page.click(f"text={selector}") return f"[browser] Clicked text: {selector}" except Exception as e: return f"[browser] Click error: {e}" def tool_browser_type(text: str, selector: str = None) -> str: """Type text into focused element or specified selector.""" global _page if _page is None: return "[browser] No browser open" try: if selector: _page.fill(selector, text) return f"[browser] Typed into {selector}" else: _page.keyboard.type(text) return f"[browser] Typed: {text[:50]}..." except Exception as e: return f"[browser] Type error: {e}" def tool_browser_press(key: str) -> str: """Press a key (Enter, Tab, Escape, etc).""" global _page if _page is None: return "[browser] No browser open" try: _page.keyboard.press(key) return f"[browser] Pressed: {key}" except Exception as e: return f"[browser] Key error: {e}" def tool_browser_read() -> str: """Read visible page text content.""" global _page if _page is None: return "[browser] No browser open" try: text = _page.inner_text("body") # Truncate and clean text = ' '.join(text.split())[:3000] return f"[browser] Page content:\n{text}" except Exception as e: return f"[browser] Read error: {e}" def tool_browser_url() -> str: """Get current URL.""" global _page if _page is None: return "[browser] No browser open" return f"[browser] URL: {_page.url}" def tool_browser_back() -> str: """Go back in history.""" global _page if _page is None: return "[browser] No browser open" _page.go_back() return f"[browser] Back to: {_page.url}" def tool_browser_close() -> str: """Close browser.""" global _playwright, _browser, _page try: if _browser: _browser.close() if _playwright: _playwright.stop() _browser = None _page = None _playwright = None return "[browser] Closed" except: return "[browser] Already closed" # === AUTONOMOUS TASK EXECUTION === # Task templates - no model planning needed TASK_TEMPLATES = { "search_news": { "keywords": ["news", "latest news", "articles", "headlines", "what's happening"], "steps": [ {"action": "BROWSE", "args": ["https://news.google.com"]}, {"action": "WAIT", "args": ["2"]}, {"action": "FILL", "args": ["input[name=q]", "{query}"]}, {"action": "PRESS", "args": ["Enter"]}, {"action": "WAIT", "args": ["3"]}, {"action": "READ", "args": []}, ] }, "search_google": { "keywords": ["search for", "google", "look up", "find me", "find info"], "steps": [ {"action": "BROWSE", "args": ["https://google.com"]}, {"action": "WAIT", "args": ["1"]}, {"action": "FILL", "args": ["textarea[name=q]", "{query}"]}, {"action": "PRESS", "args": ["Enter"]}, {"action": "WAIT", "args": ["3"]}, {"action": "READ", "args": []}, ] }, "weather": { "keywords": ["weather", "temperature", "forecast"], "steps": [ {"action": "BROWSE", "args": ["https://weather.com"]}, {"action": "WAIT", "args": ["2"]}, {"action": "READ", "args": []}, ] }, "wikipedia": { "keywords": ["wikipedia", "wiki", "what is", "who is", "define"], "steps": [ {"action": "BROWSE", "args": ["https://en.wikipedia.org"]}, {"action": "WAIT", "args": ["1"]}, {"action": "FILL", "args": ["input[name=search]", "{query}"]}, {"action": "PRESS", "args": ["Enter"]}, {"action": "WAIT", "args": ["2"]}, {"action": "READ", "args": []}, ] }, "youtube": { "keywords": ["youtube", "video", "watch"], "steps": [ {"action": "BROWSE", "args": ["https://youtube.com"]}, {"action": "WAIT", "args": ["2"]}, {"action": "FILL", "args": ["input[name=search_query]", "{query}"]}, {"action": "PRESS", "args": ["Enter"]}, {"action": "WAIT", "args": ["3"]}, {"action": "READ", "args": []}, ] }, # === FREELANCE PLATFORMS === "upwork_search": { "keywords": ["upwork jobs", "freelance jobs", "find gigs", "upwork"], "steps": [ {"action": "BROWSE", "args": ["https://www.upwork.com/nx/jobs/search/?q={query}"]}, {"action": "WAIT", "args": ["3"]}, {"action": "READ", "args": []}, ] }, "fiverr_search": { "keywords": ["fiverr", "fiverr gigs"], "steps": [ {"action": "BROWSE", "args": ["https://www.fiverr.com/search/gigs?query={query}"]}, {"action": "WAIT", "args": ["3"]}, {"action": "READ", "args": []}, ] }, # === SELLING PLATFORMS === "ebay_search": { "keywords": ["ebay", "sell on ebay", "ebay listings"], "steps": [ {"action": "BROWSE", "args": ["https://www.ebay.com/sch/i.html?_nkw={query}"]}, {"action": "WAIT", "args": ["3"]}, {"action": "READ", "args": []}, ] }, "amazon_search": { "keywords": ["amazon products", "amazon search"], "steps": [ {"action": "BROWSE", "args": ["https://www.amazon.com/s?k={query}"]}, {"action": "WAIT", "args": ["3"]}, {"action": "READ", "args": []}, ] }, # === CRYPTO/FINANCE === "crypto_prices": { "keywords": ["crypto price", "bitcoin", "ethereum", "crypto"], "steps": [ {"action": "BROWSE", "args": ["https://coinmarketcap.com"]}, {"action": "WAIT", "args": ["2"]}, {"action": "READ", "args": []}, ] }, "stock_price": { "keywords": ["stock price", "stocks", "market"], "steps": [ {"action": "BROWSE", "args": ["https://finance.yahoo.com/quote/{query}"]}, {"action": "WAIT", "args": ["2"]}, {"action": "READ", "args": []}, ] } } # === LOGIN TEMPLATES === LOGIN_TEMPLATES = { "gmail": { "url": "https://accounts.google.com/signin", "steps": [ {"action": "BROWSE", "args": ["https://accounts.google.com/signin"]}, {"action": "WAIT", "args": ["2"]}, {"action": "FILL", "args": ["input[type=email]", "{email}"]}, {"action": "PRESS", "args": ["Enter"]}, {"action": "WAIT", "args": ["3"]}, {"action": "FILL", "args": ["input[type=password]", "{password}"]}, {"action": "PRESS", "args": ["Enter"]}, {"action": "WAIT", "args": ["5"]}, ] }, "facebook": { "url": "https://facebook.com/login", "steps": [ {"action": "BROWSE", "args": ["https://facebook.com/login"]}, {"action": "WAIT", "args": ["2"]}, {"action": "FILL", "args": ["input[name=email]", "{email}"]}, {"action": "FILL", "args": ["input[name=pass]", "{password}"]}, {"action": "CLICK", "args": ["button[name=login]"]}, {"action": "WAIT", "args": ["5"]}, ] }, "twitter": { "url": "https://twitter.com/login", "steps": [ {"action": "BROWSE", "args": ["https://twitter.com/login"]}, {"action": "WAIT", "args": ["2"]}, {"action": "FILL", "args": ["input[autocomplete=username]", "{email}"]}, {"action": "CLICK", "args": ["text=Next"]}, {"action": "WAIT", "args": ["2"]}, {"action": "FILL", "args": ["input[type=password]", "{password}"]}, {"action": "CLICK", "args": ["text=Log in"]}, {"action": "WAIT", "args": ["5"]}, ] }, "linkedin": { "url": "https://linkedin.com/login", "steps": [ {"action": "BROWSE", "args": ["https://linkedin.com/login"]}, {"action": "WAIT", "args": ["2"]}, {"action": "FILL", "args": ["input[name=session_key]", "{email}"]}, {"action": "FILL", "args": ["input[name=session_password]", "{password}"]}, {"action": "CLICK", "args": ["button[type=submit]"]}, {"action": "WAIT", "args": ["5"]}, ] }, "github": { "url": "https://github.com/login", "steps": [ {"action": "BROWSE", "args": ["https://github.com/login"]}, {"action": "WAIT", "args": ["2"]}, {"action": "FILL", "args": ["input[name=login]", "{email}"]}, {"action": "FILL", "args": ["input[name=password]", "{password}"]}, {"action": "CLICK", "args": ["input[type=submit]"]}, {"action": "WAIT", "args": ["5"]}, ] }, "outlook": { "url": "https://login.live.com", "steps": [ {"action": "BROWSE", "args": ["https://login.live.com"]}, {"action": "WAIT", "args": ["2"]}, {"action": "FILL", "args": ["input[type=email]", "{email}"]}, {"action": "PRESS", "args": ["Enter"]}, {"action": "WAIT", "args": ["3"]}, {"action": "FILL", "args": ["input[type=password]", "{password}"]}, {"action": "PRESS", "args": ["Enter"]}, {"action": "WAIT", "args": ["5"]}, ] }, "generic": { "url": "{url}", "steps": [ {"action": "BROWSE", "args": ["{url}"]}, {"action": "WAIT", "args": ["2"]}, {"action": "FILL", "args": ["input[type=email], input[type=text], input[name=email], input[name=username], input[name=login]", "{email}"]}, {"action": "FILL", "args": ["input[type=password]", "{password}"]}, {"action": "PRESS", "args": ["Enter"]}, {"action": "WAIT", "args": ["5"]}, ] } } # === EMAIL ACTION TEMPLATES === EMAIL_ACTIONS = { "gmail_search": { "steps": [ {"action": "BROWSE", "args": ["https://mail.google.com"]}, {"action": "WAIT", "args": ["3"]}, {"action": "FILL", "args": ["input[name=q]", "{query}"]}, {"action": "PRESS", "args": ["Enter"]}, {"action": "WAIT", "args": ["3"]}, {"action": "READ", "args": []}, ] }, "gmail_compose": { "steps": [ {"action": "BROWSE", "args": ["https://mail.google.com"]}, {"action": "WAIT", "args": ["3"]}, {"action": "CLICK", "args": ["div[gh='cm']", "text=Compose"]}, {"action": "WAIT", "args": ["2"]}, {"action": "FILL", "args": ["input[name=to]", "{to}"]}, {"action": "FILL", "args": ["input[name=subjectbox]", "{subject}"]}, {"action": "FILL", "args": ["div[aria-label='Message Body']", "{body}"]}, {"action": "WAIT", "args": ["1"]}, ] }, "gmail_send": { "steps": [ {"action": "CLICK", "args": ["div[aria-label='Send']", "text=Send"]}, {"action": "WAIT", "args": ["3"]}, ] }, "outlook_search": { "steps": [ {"action": "BROWSE", "args": ["https://outlook.live.com/mail"]}, {"action": "WAIT", "args": ["3"]}, {"action": "FILL", "args": ["input[aria-label='Search']", "{query}"]}, {"action": "PRESS", "args": ["Enter"]}, {"action": "WAIT", "args": ["3"]}, {"action": "READ", "args": []}, ] } } def detect_email_action(user_input: str) -> Tuple[Optional[str], Optional[Dict]]: """Detect email-related requests.""" user_lower = user_input.lower() # Search email search_triggers = ["search email", "search my email", "find email", "look for email", "search inbox", "search gmail", "search outlook", "find in email", "emails about", "emails from", "emails containing"] for trigger in search_triggers: if trigger in user_lower: # Extract search query query = user_input for prefix in ["search email for", "search my email for", "find email about", "search inbox for", "search gmail for", "emails about", "emails from", "emails containing", "look for email"]: query = re.sub(rf"{prefix}\s*", "", query, flags=re.IGNORECASE) service = "gmail" if "gmail" in user_lower else "outlook" if "outlook" in user_lower else "gmail" return f"{service}_search", {"query": query.strip()} # Compose email compose_triggers = ["send email", "compose email", "write email", "email to", "send a message to"] for trigger in compose_triggers: if trigger in user_lower: return "gmail_compose", {"to": "", "subject": "", "body": ""} return None, None def confirm_email_action(action: str, params: Dict) -> bool: """Confirm email action.""" print("\n" + "="*60) print("📧 EMAIL ACTION CONFIRMATION") print("="*60) if "search" in action: print(f"\n🔍 Action: Search emails") print(f"📝 Query: {params.get('query', 'N/A')}") elif "compose" in action: print(f"\n✉️ Action: Compose email") print(f"📬 To: {params.get('to', 'N/A')}") print(f"📋 Subject: {params.get('subject', 'N/A')}") print("="*60) response = input("\n✅ Proceed? (yes/no): ").strip().lower() return response in ['yes', 'y'] def execute_email_action(action: str, params: Dict) -> str: """Execute email action - uses API if enabled, browser fallback.""" # Use Gmail API if enabled (faster, more reliable) if LOGIN_CONFIG.get("use_gmail_api") and _gmail_service: print("\n[email] Using Gmail API (fast mode)") if "search" in action: return gmail_api_search(params.get("query", ""), max_results=10) elif "send" in action or "compose" in action: return gmail_api_send( params.get("to", ""), params.get("subject", ""), params.get("body", "") ) else: return gmail_api_search("in:inbox", max_results=5) # Browser fallback template = EMAIL_ACTIONS.get(action) if not template: return f"[email] Unknown action: {action}" print("\n[executing email action via browser...]") page_content = "" for step in template["steps"]: step_action = step["action"].upper() args = [] for arg in step["args"]: for key, val in params.items(): arg = arg.replace("{" + key + "}", str(val)) args.append(arg) if step_action == "BROWSE": tool_browser_open(args[0]) elif step_action == "FILL": selectors = args[0].split(", ") for sel in selectors: try: if _page and _page.query_selector(sel): tool_browser_type(args[1] if len(args) > 1 else "", sel) break except: continue elif step_action == "CLICK": for selector in args: try: tool_browser_click(selector) break except: continue elif step_action == "PRESS": tool_browser_press(args[0] if args else "Enter") elif step_action == "WAIT": time.sleep(int(args[0]) if args else 2) elif step_action == "READ": page_content = tool_browser_read() time.sleep(0.5) print("[done]\n") return page_content # Stored credentials (in memory only - not saved to disk) STORED_CREDENTIALS = {} def store_credential(service: str, email: str, password: str): """Store credentials in memory.""" STORED_CREDENTIALS[service.lower()] = {"email": email, "password": password} print(f"[credentials] Stored for {service}") def get_credential(service: str) -> Optional[Dict]: """Get stored credentials.""" return STORED_CREDENTIALS.get(service.lower()) def detect_login_request(user_input: str) -> Tuple[Optional[str], Optional[str]]: """Detect if user wants to log into something.""" user_lower = user_input.lower() login_triggers = ["log in", "login", "sign in", "signin", "log into", "sign into"] if not any(trigger in user_lower for trigger in login_triggers): return None, None # Detect service for service in LOGIN_TEMPLATES.keys(): if service in user_lower: return service, None # Check for URL url_match = re.search(r'(https?://[^\s]+)', user_input) if url_match: return "generic", url_match.group(1) return None, None def confirm_login(service: str, email: str) -> bool: """Confirm login action.""" print("\n" + "="*60) print("🔐 LOGIN CONFIRMATION") print("="*60) print(f"\n📧 Service: {service.upper()}") print(f"👤 Account: {email}") print(f"\n⚠️ The AI will enter your credentials and log in.") print("="*60) response = input("\n✅ Proceed with login? (yes/no): ").strip().lower() return response in ['yes', 'y'] def execute_login(service: str, email: str, password: str, url: str = None) -> str: """Execute login with multiple fallback strategies - BEAST MODE.""" print(f"\n[login] 🔐 Attempting {service} login...") print(f"[login] Using Firefox (less detection)") if service in ["gmail", "google", "email"]: strategies = [ # Strategy 1: Direct type, human-like { "name": "Human-like typing", "steps": [ ("browse", "https://accounts.google.com/signin"), ("wait", 3), ("human_type", "input[type=email]", email), ("wait", 1), ("press", "Enter"), ("wait", 4), ("human_type", "input[type=password]", password), ("wait", 1), ("press", "Enter"), ("wait", 5), ] }, # Strategy 2: Click-based navigation { "name": "Click navigation", "steps": [ ("browse", "https://mail.google.com"), ("wait", 4), ("click", "input[type=email]"), ("human_type", None, email), ("click", "#identifierNext, button:has-text('Next')"), ("wait", 4), ("click", "input[type=password]"), ("human_type", None, password), ("click", "#passwordNext, button:has-text('Next')"), ("wait", 5), ] }, # Strategy 3: Pre-filled email URL { "name": "Pre-filled URL", "steps": [ ("browse", f"https://accounts.google.com/v3/signin/identifier?Email={email}&continue=https://mail.google.com"), ("wait", 4), ("human_type", "input[type=password]", password), ("press", "Enter"), ("wait", 5), ] } ] else: # Use template for other services template = LOGIN_TEMPLATES.get(service, LOGIN_TEMPLATES["generic"]) strategies = [{ "name": f"{service} template", "steps": [] }] for step in template["steps"]: action = step["action"].lower() args = [a.replace("{email}", email).replace("{password}", password).replace("{url}", url or "") for a in step["args"]] if action == "browse": strategies[0]["steps"].append(("browse", args[0])) elif action == "fill": strategies[0]["steps"].append(("human_type", args[0], args[1] if len(args) > 1 else "")) elif action == "click": strategies[0]["steps"].append(("click", args[0])) elif action == "press": strategies[0]["steps"].append(("press", args[0] if args else "Enter")) elif action == "wait": strategies[0]["steps"].append(("wait", int(args[0]) if args else 2)) # Try each strategy for strat_idx, strategy in enumerate(strategies): print(f" [{strat_idx+1}/{len(strategies)}] Trying: {strategy['name']}") try: for step in strategy["steps"]: action = step[0] if action == "browse": tool_browser_open(step[1]) time.sleep(1) elif action == "wait": time.sleep(step[1]) elif action == "human_type": selector, text = step[1], step[2] if len(step) > 2 else "" if _page: # Click the field first if selector provided if selector: try: elem = _page.wait_for_selector(selector, timeout=5000) if elem: elem.click() time.sleep(0.3) except: pass # Type like a human - variable speed for char in text: _page.keyboard.type(char, delay=random.randint(30, 120)) time.sleep(random.uniform(0.02, 0.08)) time.sleep(0.5) elif action == "click": if _page: selectors = step[1].split(", ") for sel in selectors: try: elem = _page.wait_for_selector(sel, timeout=3000) if elem: elem.click() break except: continue elif action == "press": if _page: time.sleep(0.3) _page.keyboard.press(step[1]) time.sleep(0.3) # Check login result time.sleep(3) current_url = _page.url if _page else "" page_text = tool_browser_read().lower() # Failure indicators - check these FIRST failures = ["couldn't sign you in", "wrong password", "verify it's you", "unusual activity", "captcha", "robot", "try again", "blocked", "sign in", "create an account", "for work", "learn more"] # If we're on a landing/marketing page, not logged in if "workspace.google.com" in current_url or "accounts.google.com/signin" in current_url: print(f" ❌ Still on login/landing page") continue if any(f in page_text[:500] for f in failures): print(f" ❌ Detected: not logged in") continue # Success indicators - MUST have inbox URL pattern inbox_urls = ["mail.google.com/mail", "inbox", "#inbox"] inbox_text = ["compose", "primary", "social", "promotions", "starred", "sent", "drafts"] url_ok = any(u in current_url.lower() for u in inbox_urls) text_ok = any(t in page_text[:1000] for t in inbox_text) if url_ok and text_ok: print(f" ✅ SUCCESS with {strategy['name']}!") return f"[login] ✅ Logged into {service}\n{page_text[:300]}" if url_ok: print(f" ✅ SUCCESS (URL match) with {strategy['name']}!") return f"[login] ✅ Logged into {service}\n{page_text[:300]}" # 2FA check if "2-step" in page_text or "verify" in page_text or "code" in page_text: print(f" ⚠️ 2FA/Verification required") input(" Complete verification in browser, then press Enter...") return f"[login] ✅ Logged in after 2FA\n{tool_browser_read()[:300]}" except Exception as e: print(f" Error: {str(e)[:50]}") continue # All failed - offer manual print(f"\n ❌ All {len(strategies)} strategies failed") print(f" 💡 Google may be blocking automated logins") manual = input("\n Try manual login? (yes/no): ").strip().lower() if manual in ['yes', 'y']: print(" Opening login page - complete login manually...") tool_browser_open("https://mail.google.com") input(" Press Enter when logged in...") page_text = tool_browser_read() if "inbox" in page_text.lower() or "compose" in page_text.lower(): return f"[login] ✅ Manual login successful\n{page_text[:300]}" return f"[login] ❌ Failed to login to {service}. Try logging in manually first, then use the browser." def detect_web_task(user_input: str) -> Tuple[Optional[str], Optional[str]]: """Detect if user wants a web task. Returns (template_name, query) or (None, None).""" user_lower = user_input.lower() # Check for web-related intent web_triggers = ["look up", "search", "find", "news", "weather", "google", "browse", "website", "check online", "wikipedia", "youtube"] if not any(trigger in user_lower for trigger in web_triggers): return None, None # Match to template for template_name, template in TASK_TEMPLATES.items(): if any(kw in user_lower for kw in template["keywords"]): # Extract query from user input query = user_input # Remove common prefixes for prefix in ["can you", "please", "could you", "look up", "search for", "find me", "find", "google", "search", "get me", "show me"]: query = re.sub(rf"^{prefix}\s+", "", query, flags=re.IGNORECASE) query = query.strip("?. ") return template_name, query # Default to google search query = user_input for prefix in ["can you", "please", "could you", "look up", "search for", "find me", "find", "search", "get me"]: query = re.sub(rf"^{prefix}\s+", "", query, flags=re.IGNORECASE) return "search_google", query.strip("?. ") def build_plan_from_template(template_name: str, query: str) -> dict: """Build execution plan from template.""" template = TASK_TEMPLATES.get(template_name, TASK_TEMPLATES["search_google"]) steps = [] for step in template["steps"]: new_step = {"action": step["action"], "args": []} for arg in step["args"]: new_step["args"].append(arg.replace("{query}", query)) steps.append(new_step) return {"task": f"{template_name}: {query}", "steps": steps, "query": query} def confirm_task(plan: dict) -> bool: """Show confirmation box and get user approval.""" print("\n" + "="*60) print("🔍 I detected a web task. Here's my plan:") print("="*60) print(f"\n📋 TASK: {plan.get('query', plan.get('task', 'Unknown'))}\n") print("📝 PLANNED STEPS:") print("-"*40) for i, step in enumerate(plan.get('steps', []), 1): action = step.get('action', '?') args = step.get('args', []) # Clean display display_args = ' '.join(str(a)[:50] for a in args) print(f" {i}. {action} {display_args}") print("-"*40) response = input("\n✅ Proceed? (yes/no): ").strip().lower() return response in ['yes', 'y'] def execute_task_silent(plan: dict) -> str: """Execute plan and return page content.""" page_content = "" print("\n[executing...]") for i, step in enumerate(plan.get('steps', []), 1): action = step.get('action', '').upper() args = step.get('args', []) if action == "BROWSE": url = args[0] if args else "https://google.com" if not url.startswith("http"): url = "https://" + url tool_browser_open(url) elif action == "CLICK": tool_browser_click(args[0] if args else "") elif action == "TYPE": tool_browser_type(args[0] if args else "") elif action == "FILL": if len(args) >= 2: tool_browser_type(args[1], args[0]) elif action == "PRESS": tool_browser_press(args[0] if args else "Enter") elif action == "READ": result = tool_browser_read() page_content = result.replace("[browser] Page content:\n", "") elif action == "WAIT": wait_time = int(args[0]) if args else 2 time.sleep(wait_time) time.sleep(0.5) print("[done]\n") return page_content # === TASK CHAINING SYSTEM === class TaskChain: """Chain multiple tasks with checkpoints and error recovery.""" def __init__(self): self.tasks = [] self.current_idx = 0 self.results = [] self.errors = [] self.running = False self.paused = False def add(self, task_type: str, params: Dict, description: str): """Add task to chain.""" self.tasks.append({ "type": task_type, "params": params, "description": description, "status": "pending", "result": None, "error": None, "retries": 0 }) def clear(self): """Clear all tasks.""" self.tasks = [] self.current_idx = 0 self.results = [] self.errors = [] def show(self) -> str: """Display current task chain.""" if not self.tasks: return "[chain] No tasks queued. Use 'chain: ' to add tasks." output = "\n" + "="*60 + "\n" output += "📋 TASK CHAIN\n" output += "="*60 + "\n\n" for i, task in enumerate(self.tasks): status_icon = {"pending": "⏳", "running": "🔄", "success": "✅", "failed": "❌", "skipped": "⏭️"}.get(task["status"], "❓") marker = "→ " if i == self.current_idx and self.running else " " output += f"{marker}{i+1}. {status_icon} {task['description']}\n" if task["error"]: output += f" ⚠️ Error: {task['error'][:50]}...\n" output += "\n" + "="*60 return output TASK_CHAIN = TaskChain() def parse_chain_task(task_str: str) -> Tuple[str, Dict, str]: """Parse a task string into type, params, description.""" task_lower = task_str.lower().strip() if task_lower.startswith("login "): service = task_lower.replace("login ", "").strip() return "login", {"service": service}, f"Login to {service}" if task_lower.startswith("browse "): url = task_str.split(" ", 1)[1].strip() if not url.startswith("http"): url = "https://" + url return "browse", {"url": url}, f"Browse to {url}" if task_lower.startswith("search "): query = task_str.split(" ", 1)[1].strip() return "search", {"query": query}, f"Search for '{query}'" if "email" in task_lower: if "send" in task_lower: return "email_send", {}, "Send email" return "email_check", {}, "Check email" if "freelance" in task_lower or "jobs" in task_lower: return "money", {"action": "freelance", "params": {"skills": ["general"]}}, "Find freelance jobs" if "arbitrage" in task_lower: product = task_lower.replace("arbitrage", "").replace("find", "").strip() or "electronics" return "money", {"action": "arbitrage", "params": {"product": product}}, f"Find arbitrage for {product}" if "crypto" in task_lower: return "money", {"action": "crypto", "params": {}}, "Check crypto opportunities" if task_lower.startswith("read") or task_lower.startswith("check"): return "read", {}, "Read page content" if task_lower.startswith("click "): selector = task_str.split(" ", 1)[1].strip() return "click", {"selector": selector}, f"Click '{selector}'" if task_lower.startswith("wait"): seconds = int(re.search(r"(\d+)", task_lower).group(1)) if re.search(r"(\d+)", task_lower) else 5 return "wait", {"seconds": seconds}, f"Wait {seconds} seconds" if "checkpoint" in task_lower or "align" in task_lower: return "checkpoint", {}, "🔍 Alignment checkpoint" return "custom", {"instruction": task_str}, f"AI task: {task_str[:40]}..." def execute_chain_task(task: Dict, generate_fn) -> Tuple[bool, str]: """Execute a single task. Returns (success, result/error).""" task_type = task["type"] params = task["params"] try: if task_type == "login": service = params.get("service", "") creds = get_credential(service) if not creds: email = input(f"📧 Email for {service}: ").strip() password = input(f"🔑 Password: ").strip() store_credential(service, email, password) creds = {"email": email, "password": password} result = execute_login(service, creds["email"], creds["password"]) return True, result[:200] elif task_type == "browse": result = tool_browser_open(params.get("url", "https://google.com")) return "[browser] Opened" in result, result elif task_type == "search": query = params.get("query", "") tool_browser_open(f"https://google.com/search?q={query.replace(' ', '+')}") time.sleep(2) return True, f"Searched for '{query}'" elif task_type == "email_check": tool_browser_open("https://mail.google.com") time.sleep(3) return True, tool_browser_read()[:300] elif task_type == "money": result = execute_money_action(params["action"], params["params"], generate_fn) return True, result[:500] elif task_type == "read": return True, tool_browser_read()[:500] elif task_type == "click": result = tool_browser_click(params.get("selector", "")) return "Clicked" in result, result elif task_type == "wait": time.sleep(params.get("seconds", 5)) return True, f"Waited {params['seconds']}s" elif task_type == "checkpoint": return True, "CHECKPOINT" elif task_type == "custom": prompt = f"Execute: {params['instruction']}\n\nPage: {tool_browser_read()[:1000]}" response, _, _ = generate_fn(prompt) return True, response[:300] return False, f"Unknown task: {task_type}" except Exception as e: return False, str(e) def troubleshoot_task(task: Dict, error: str, generate_fn) -> str: """AI troubleshoots a failed task.""" prompt = f"""Task failed. Suggest fix. Task: {task['description']} Error: {error} Page: {tool_browser_read()[:500] if _page else 'No page'} Reply with ONE word: RETRY, SKIP, or ABORT""" response, _, _ = generate_fn(prompt) if "RETRY" in response.upper(): return "RETRY" if "ABORT" in response.upper(): return "ABORT" return "SKIP" def run_task_chain(generate_fn, max_retries: int = 2) -> str: """Execute the task chain with checkpoints.""" global TASK_CHAIN if not TASK_CHAIN.tasks: return "[chain] Empty. Use 'chain: ' to add tasks." print("\n" + "="*60) print("🚀 TASK CHAIN") print("="*60) print(TASK_CHAIN.show()) if input("\n✅ Start? (yes/no): ").strip().lower() not in ['yes', 'y']: return "[chain] Cancelled" TASK_CHAIN.running = True TASK_CHAIN.current_idx = 0 while TASK_CHAIN.current_idx < len(TASK_CHAIN.tasks): task = TASK_CHAIN.tasks[TASK_CHAIN.current_idx] print(f"\n[{TASK_CHAIN.current_idx + 1}/{len(TASK_CHAIN.tasks)}] {task['description']}") task["status"] = "running" success, result = execute_chain_task(task, generate_fn) if success: task["status"] = "success" task["result"] = result print(f" ✅ {result[:80]}...") if result == "CHECKPOINT": print("\n" + "="*60) print("🔍 CHECKPOINT - Review Progress") print("="*60) print(TASK_CHAIN.show()) action = input("\n[c]ontinue, [a]bort: ").strip().lower() if action == 'a': TASK_CHAIN.running = False return "[chain] Aborted" else: task["status"] = "failed" task["error"] = result print(f" ❌ {result[:80]}...") if task["retries"] < max_retries: action = troubleshoot_task(task, result, generate_fn) print(f" 🔧 AI: {action}") if action == "RETRY": task["retries"] += 1 task["status"] = "pending" continue elif action == "ABORT": TASK_CHAIN.running = False return "[chain] Aborted by AI" task["status"] = "skipped" TASK_CHAIN.current_idx += 1 time.sleep(1) TASK_CHAIN.running = False print("\n✅ CHAIN COMPLETE") return TASK_CHAIN.show() # Workflow templates WORKFLOW_TEMPLATES = { "morning_hustle": [ "login gmail", "search freelance python jobs", "checkpoint", "crypto opportunities", "checkpoint" ], "arbitrage_hunt": [ "browse amazon.com/bestsellers", "checkpoint", "find arbitrage electronics", "checkpoint" ], "lead_gen": [ "find leads for contractor in miami", "checkpoint", "login gmail", "checkpoint" ] } # === RECURSIVE SELF-IMPROVEMENT (RSI) MODE === RSI_CONFIG = { "enabled": False, "mode": "conservative", # conservative, balanced, aggressive "max_iterations": 100, "target_quality": 0.90, "min_quality": 0.60, "auto_rollback_threshold": 0.15, # Rollback if quality drops by this much "training_steps_per_iter": 25, "eval_prompts_per_iter": 10, "generate_data_per_iter": 5, # New training examples to generate "sleep_between_iters": 5, # Seconds "interrupt_for_user": True, # Pause RSI when user types "log_file": "rsi_log.jsonl", "capabilities_to_improve": [ "density", # Response conciseness "accuracy", # Factual correctness "coherence", # Logical flow "helpfulness", # Task completion "coding", # Code generation "reasoning", # Chain of thought "creativity", # Novel solutions ], "current_focus": "density", "iteration": 0, "total_improvements": 0, "total_rollbacks": 0, "best_quality_ever": 0.0, "start_time": None, } RSI_RUNNING = False RSI_PAUSED = False def save_rsi_config(): """Save RSI configuration.""" config_path = os.path.join(ROOT, "rsi_config.json") with open(config_path, 'w') as f: json.dump(RSI_CONFIG, f, indent=2) def load_rsi_config(): """Load RSI configuration.""" global RSI_CONFIG config_path = os.path.join(ROOT, "rsi_config.json") if os.path.exists(config_path): with open(config_path, 'r') as f: RSI_CONFIG.update(json.load(f)) load_rsi_config() def rsi_log(event: str, data: Dict): """Log RSI event.""" log_path = os.path.join(LOGS_DIR, RSI_CONFIG["log_file"]) entry = { "timestamp": datetime.now().isoformat(), "iteration": RSI_CONFIG["iteration"], "event": event, "data": data } with open(log_path, 'a') as f: f.write(json.dumps(entry) + "\n") def rsi_evaluate_capability(capability: str, generate_fn) -> float: """Evaluate model on specific capability.""" test_prompts = { "density": [ ("Explain quantum computing", 50), # (prompt, ideal_max_tokens) ("What is machine learning?", 40), ("How does the internet work?", 60), ], "accuracy": [ ("What is 2+2?", "4"), ("What is the capital of France?", "Paris"), ("Who wrote Romeo and Juliet?", "Shakespeare"), ], "coherence": [ "Write a 3-step process for making coffee", "Explain why the sky is blue in logical steps", "Describe how a car engine works", ], "helpfulness": [ "Help me write a professional email to my boss asking for time off", "Give me a workout routine for beginners", "How do I fix a leaky faucet?", ], "coding": [ "Write a Python function to reverse a string", "Write a function to check if a number is prime", "Write a simple web scraper in Python", ], "reasoning": [ "If all cats are animals, and all animals need water, do cats need water? Explain.", "A bat and ball cost $1.10. The bat costs $1 more than the ball. How much does the ball cost?", "What comes next: 2, 4, 8, 16, ?", ], "creativity": [ "Invent a new word and define it", "Come up with 3 startup ideas involving AI", "Write a haiku about programming", ], } prompts = test_prompts.get(capability, test_prompts["density"]) scores = [] for item in prompts: if capability == "density": prompt, ideal_tokens = item response, stats, eval_result = generate_fn(prompt) # Score based on token efficiency token_score = max(0, 1 - (eval_result.tokens - ideal_tokens) / ideal_tokens) if eval_result.tokens > 0 else 0 scores.append((eval_result.density_score / 50 + token_score) / 2) elif capability == "accuracy": prompt, expected = item response, stats, eval_result = generate_fn(prompt) # Check if expected answer is in response if expected.lower() in response.lower(): scores.append(1.0) else: scores.append(0.0) elif capability in ["coherence", "helpfulness", "creativity"]: prompt = item response, stats, eval_result = generate_fn(prompt) scores.append(eval_result.coherence_score) elif capability == "coding": prompt = item response, stats, eval_result = generate_fn(prompt) # Check for code markers has_code = "def " in response or "function" in response or "```" in response scores.append(0.8 if has_code else 0.3) elif capability == "reasoning": prompt = item response, stats, eval_result = generate_fn(prompt) # Check for reasoning markers has_reasoning = any(w in response.lower() for w in ["because", "therefore", "since", "thus", "so"]) scores.append(eval_result.coherence_score * (1.2 if has_reasoning else 0.8)) return sum(scores) / len(scores) if scores else 0.5 def rsi_generate_training_data(capability: str, generate_fn) -> List[Dict]: """Generate new training data focused on a capability.""" prompts_for_capability = { "density": "Generate a question and a maximally dense, concise answer (under 50 words). Format: Q: ... A: ...", "accuracy": "Generate a factual question and its correct, precise answer. Format: Q: ... A: ...", "coherence": "Generate a question requiring logical explanation and a well-structured answer. Format: Q: ... A: ...", "helpfulness": "Generate a practical question someone might ask and a helpful, actionable answer. Format: Q: ... A: ...", "coding": "Generate a coding task and clean, working Python code solution. Format: Q: ... A: ```python ... ```", "reasoning": "Generate a logic puzzle or reasoning question and its step-by-step solution. Format: Q: ... A: ...", "creativity": "Generate a creative prompt and an imaginative, original response. Format: Q: ... A: ...", } meta_prompt = prompts_for_capability.get(capability, prompts_for_capability["density"]) new_examples = [] for _ in range(RSI_CONFIG["generate_data_per_iter"]): response, _, _ = generate_fn(meta_prompt) # Parse Q: A: format if "Q:" in response and "A:" in response: parts = response.split("A:", 1) question = parts[0].replace("Q:", "").strip() answer = parts[1].strip() if len(question) > 10 and len(answer) > 10: new_examples.append({ "instruction": question, "output": answer, "capability": capability, "generated": True }) return new_examples def rsi_identify_weakest_capability(generate_fn) -> Tuple[str, float]: """Identify the weakest capability to focus on.""" scores = {} for cap in RSI_CONFIG["capabilities_to_improve"]: score = rsi_evaluate_capability(cap, generate_fn) scores[cap] = score print(f" {cap}: {score:.2f}") weakest = min(scores, key=scores.get) return weakest, scores[weakest] def _run_training(steps: int = 25) -> Dict[str, Any]: """ Run training on accumulated preference pairs. This is used by overnight modes to train the model. """ global _model, _tokenizer, PREFERENCE_PAIRS result = { "success": False, "steps": steps, "loss": 0.0, "pairs_used": 0, } try: # Check if we have pairs to train on if not PREFERENCE_PAIRS or len(PREFERENCE_PAIRS) < 3: print(f" ⚠️ Not enough training pairs ({len(PREFERENCE_PAIRS) if PREFERENCE_PAIRS else 0})") return result # Simple DPO-style training on preference pairs from torch.optim import AdamW import torch.nn.functional as F device = next(_model.parameters()).device optimizer = AdamW(_model.parameters(), lr=5e-6) _model.train() total_loss = 0.0 pairs_used = 0 # Sample pairs for this training run import random pairs_to_use = random.sample(PREFERENCE_PAIRS, min(len(PREFERENCE_PAIRS), steps * 2)) for i, pair in enumerate(pairs_to_use[:steps]): try: prompt = pair.get("prompt", "") chosen = pair.get("chosen", "") rejected = pair.get("rejected", "") if not prompt or not chosen or not rejected: continue # Encode sequences chosen_text = f"<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n{chosen}<|im_end|>" rejected_text = f"<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n{rejected}<|im_end|>" chosen_ids = _tokenizer.encode(chosen_text, return_tensors='pt', truncation=True, max_length=512).to(device) rejected_ids = _tokenizer.encode(rejected_text, return_tensors='pt', truncation=True, max_length=512).to(device) # Get log probs for chosen with torch.no_grad(): chosen_outputs = _model(input_ids=chosen_ids, labels=chosen_ids) rejected_outputs = _model(input_ids=rejected_ids, labels=rejected_ids) # Simple preference loss: we want chosen_loss < rejected_loss # So we maximize: rejected_loss - chosen_loss # Which means minimizing: chosen_loss - rejected_loss + margin beta = 0.1 loss = -F.logsigmoid(beta * (rejected_outputs.loss - chosen_outputs.loss)) optimizer.zero_grad() loss.backward() optimizer.step() total_loss += loss.item() pairs_used += 1 except Exception as e: continue _model.eval() if pairs_used > 0: result["success"] = True result["loss"] = total_loss / pairs_used result["pairs_used"] = pairs_used print(f" ✓ Trained on {pairs_used} pairs, avg loss: {result['loss']:.4f}") except Exception as e: print(f" ❌ Training error: {e}") import traceback traceback.print_exc() return result def _run_evaluation() -> Dict[str, Any]: """Run evaluation on test prompts.""" test_prompts = [ "What is recursion?", "Explain machine learning briefly", "How do hash tables work?", ] scores = [] for prompt in test_prompts: out, _, eval_result = generate(prompt) scores.append(eval_result.overall_score) return { "avg_score": sum(scores) / len(scores), "scores": scores, } def _reload_checkpoint(checkpoint_path: str = None): """Reload model from checkpoint.""" global _model if checkpoint_path and os.path.exists(checkpoint_path): try: from peft import PeftModel _model = PeftModel.from_pretrained(_model.base_model, checkpoint_path) print(f" ✓ Reloaded from {checkpoint_path}") except: pass return True def rsi_run_iteration(generate_fn, train_fn, eval_fn, reload_fn) -> Dict: """Run a single RSI iteration.""" global RSI_CONFIG iteration = RSI_CONFIG["iteration"] print(f"\n{'='*60}") print(f"🔄 RSI ITERATION {iteration}") print(f"{'='*60}") result = { "iteration": iteration, "success": False, "quality_before": 0, "quality_after": 0, "focus": "", "action": "", } # Step 1: Evaluate current state print("\n[RSI] 📊 Evaluating current capabilities...") weakest_cap, weakest_score = rsi_identify_weakest_capability(generate_fn) RSI_CONFIG["current_focus"] = weakest_cap result["focus"] = weakest_cap print(f"\n[RSI] 🎯 Focus: {weakest_cap} (score: {weakest_score:.2f})") # Step 2: Get baseline quality print("\n[RSI] 📏 Baseline evaluation...") baseline = eval_fn() result["quality_before"] = baseline.get("avg_quality", 0) print(f"[RSI] Baseline quality: {result['quality_before']:.3f}") # Step 3: Generate new training data print(f"\n[RSI] 📝 Generating training data for {weakest_cap}...") new_data = rsi_generate_training_data(weakest_cap, generate_fn) print(f"[RSI] Generated {len(new_data)} new examples") # Add to training set if new_data: for example in new_data: DENSE_TRAINING_EXAMPLES.append(example) # Step 4: Train print(f"\n[RSI] 🏋️ Training ({RSI_CONFIG['training_steps_per_iter']} steps)...") train_result = train_fn(RSI_CONFIG["training_steps_per_iter"]) # Step 5: Reload and evaluate print("\n[RSI] 🔄 Reloading model...") reload_fn() print("\n[RSI] 📊 Post-training evaluation...") post_eval = eval_fn() result["quality_after"] = post_eval.get("avg_quality", 0) print(f"[RSI] New quality: {result['quality_after']:.3f}") # Step 6: Decide keep or rollback quality_diff = result["quality_after"] - result["quality_before"] if quality_diff < -RSI_CONFIG["auto_rollback_threshold"]: print(f"\n[RSI] ⚠️ Quality dropped by {-quality_diff:.3f} - ROLLING BACK") # Rollback logic would go here result["action"] = "rollback" RSI_CONFIG["total_rollbacks"] += 1 elif quality_diff > 0: print(f"\n[RSI] ✅ Quality improved by {quality_diff:.3f} - KEEPING") result["action"] = "keep" result["success"] = True RSI_CONFIG["total_improvements"] += 1 if result["quality_after"] > RSI_CONFIG["best_quality_ever"]: RSI_CONFIG["best_quality_ever"] = result["quality_after"] else: print(f"\n[RSI] ➡️ Quality unchanged - KEEPING") result["action"] = "keep" # Log rsi_log("iteration_complete", result) RSI_CONFIG["iteration"] += 1 save_rsi_config() return result def rsi_mode_loop(generate_fn, train_fn, eval_fn, reload_fn): """Main RSI loop - runs until stopped.""" global RSI_RUNNING, RSI_PAUSED, RSI_CONFIG RSI_RUNNING = True RSI_PAUSED = False RSI_CONFIG["start_time"] = datetime.now().isoformat() print("\n" + "="*60) print("🚀 RSI MODE ACTIVATED") print("="*60) print(f" Mode: {RSI_CONFIG['mode']}") print(f" Target quality: {RSI_CONFIG['target_quality']}") print(f" Max iterations: {RSI_CONFIG['max_iterations']}") print(f" Focus areas: {', '.join(RSI_CONFIG['capabilities_to_improve'])}") print("="*60) print("\n⚠️ Press Ctrl+C to pause/stop RSI mode") print("="*60) save_rsi_config() try: while RSI_RUNNING and RSI_CONFIG["iteration"] < RSI_CONFIG["max_iterations"]: if RSI_PAUSED: print("\n[RSI] ⏸️ Paused. Type 'rsi resume' to continue.") break # Run iteration result = rsi_run_iteration(generate_fn, train_fn, eval_fn, reload_fn) # Check if target reached if result["quality_after"] >= RSI_CONFIG["target_quality"]: print(f"\n[RSI] 🎉 TARGET REACHED! Quality: {result['quality_after']:.3f}") break # Check for catastrophic failure if result["quality_after"] < RSI_CONFIG["min_quality"]: print(f"\n[RSI] 🛑 Quality too low ({result['quality_after']:.3f}) - STOPPING") break # Sleep between iterations print(f"\n[RSI] 💤 Sleeping {RSI_CONFIG['sleep_between_iters']}s...") time.sleep(RSI_CONFIG["sleep_between_iters"]) except KeyboardInterrupt: print("\n\n[RSI] ⏸️ Interrupted by user") RSI_PAUSED = True RSI_RUNNING = False # Final report print("\n" + "="*60) print("📊 RSI SESSION REPORT") print("="*60) print(f" Iterations: {RSI_CONFIG['iteration']}") print(f" Improvements: {RSI_CONFIG['total_improvements']}") print(f" Rollbacks: {RSI_CONFIG['total_rollbacks']}") print(f" Best quality: {RSI_CONFIG['best_quality_ever']:.3f}") print(f" Current focus: {RSI_CONFIG['current_focus']}") print("="*60) save_rsi_config() def overnight_improvement_loop(generate_fn, train_fn, eval_fn, reload_fn, mentor, hours: int = 8): """ OVERNIGHT MODE: Autonomous self-improvement with Claude mentorship. The model will: 1. Identify weak areas 2. Consult Claude for better responses 3. Learn from Claude's responses (DPO pairs) 4. Train on accumulated learnings 5. Evaluate and checkpoint 6. Repeat until morning SAFEGUARDS: - Baseline quality tracking - Automatic rollback on degradation - Periodic checkpointing - Storage limit enforcement Press Ctrl+C to stop early. """ global RSI_RUNNING, RSI_CONFIG start_time = time.time() end_time = start_time + (hours * 3600) iteration = 0 learnings_total = 0 trainings_done = 0 # === SAFEGUARDS === SAFEGUARDS = { "min_quality_threshold": 0.35, "rollback_threshold": 0.15, "checkpoint_every_n_trainings": 3, "max_storage_mb": 500, "baseline_check_every": 15, } # Measure baseline print("\n[SAFEGUARD] Measuring baseline quality...") baseline_prompts = ["What is recursion?", "Explain hash tables", "How does encryption work?"] baseline_scores = [] for bp in baseline_prompts: out, _, ev = generate_fn(bp) baseline_scores.append(ev.overall_score) baseline_quality = sum(baseline_scores) / len(baseline_scores) best_quality = baseline_quality last_checkpoint_quality = baseline_quality rollbacks = 0 checkpoints_saved = 0 print(f"[SAFEGUARD] Baseline quality: {baseline_quality:.3f}") # Storage management def cleanup_old_checkpoints(keep_last_n=3): ckpt_dir = os.path.join(ROOT, "overnight_checkpoints") if not os.path.exists(ckpt_dir): return ckpts = sorted([f for f in os.listdir(ckpt_dir) if f.startswith("ckpt_")]) while len(ckpts) > keep_last_n: old = ckpts.pop(0) try: import shutil shutil.rmtree(os.path.join(ckpt_dir, old), ignore_errors=True) except: pass # Test prompts for overnight learning OVERNIGHT_PROMPTS = [ "Explain the concept of recursion with a practical example", "What are the trade-offs between SQL and NoSQL databases?", "How does gradient descent work in neural networks?", "Explain the CAP theorem and its implications", "What is the difference between concurrency and parallelism?", "Write a function to detect cycles in a linked list", "Implement a simple LRU cache", "How would you design a rate limiter?", "Explain Big O notation with examples", "What are design patterns and when to use them?", "Summarize the key principles of good API design", "What makes code maintainable?", "Explain microservices vs monolith architecture", "How do you approach debugging a complex system?", "What are the principles of clean code?", "Write a haiku about machine learning", "Create a metaphor explaining blockchain to a child", "Describe the internet in one dense paragraph", "What is consciousness?", "Can AI be truly creative?", "What is the nature of understanding?", "Is intelligence substrate-independent?", ] print("\n" + "="*70) print("🌙 OVERNIGHT IMPROVEMENT MODE") print("="*70) print(f" Duration: {hours} hours") print(f" End time: {datetime.fromtimestamp(end_time).strftime('%Y-%m-%d %H:%M:%S')}") print(f" Prompts pool: {len(OVERNIGHT_PROMPTS)}") print(f" Mentor: Claude API") print("="*70) print("\n⚠️ Press Ctrl+C to stop early") print("="*70 + "\n") # Ensure mentor is enabled mentor.config.enabled = True mentor.config.learn_from_responses = True RSI_RUNNING = True try: while time.time() < end_time and RSI_RUNNING: iteration += 1 elapsed = (time.time() - start_time) / 3600 remaining = (end_time - time.time()) / 3600 print(f"\n{'─'*70}") print(f" ITERATION {iteration} | Elapsed: {elapsed:.1f}h | Remaining: {remaining:.1f}h") print(f"{'─'*70}") # Pick a random prompt import random prompt = random.choice(OVERNIGHT_PROMPTS) print(f"\n📝 Prompt: {prompt}") # Step 1: Local model responds print("\n[1/5] Local model generating response...") local_out, stats, local_eval = generate_fn(prompt) print(f" Quality: {local_eval.overall_score:.2f} | Tokens: {local_eval.tokens}") # === PERIODIC QUALITY CHECK === if iteration % SAFEGUARDS["baseline_check_every"] == 0: print(f"\n[SAFEGUARD] Quality check...") check_scores = [] for bp in baseline_prompts: out, _, ev = generate_fn(bp) check_scores.append(ev.overall_score) current_quality = sum(check_scores) / len(check_scores) print(f"[SAFEGUARD] Current: {current_quality:.3f} (baseline: {baseline_quality:.3f})") if current_quality < SAFEGUARDS["min_quality_threshold"]: print(f"\n🛑 [SAFEGUARD] QUALITY BELOW MINIMUM - STOPPING") break if current_quality < last_checkpoint_quality - SAFEGUARDS["rollback_threshold"]: print(f"\n⚠️ [SAFEGUARD] Quality dropped - rolling back...") rollbacks += 1 reload_fn() continue if current_quality > best_quality: best_quality = current_quality # Step 2: Consult Claude if quality is improvable if local_eval.overall_score < 0.9: # Always try to learn print("\n[2/5] Consulting Claude for improved response...") claude_response = mentor.consult_via_api(prompt, local_out) if claude_response and "[claude]" not in claude_response.lower(): print(f" Claude responded ({len(claude_response)} chars)") # Step 3: Learn from Claude print("\n[3/5] Creating DPO training pair...") mentor.learn_from_claude(prompt, local_out, claude_response) learnings_total += 1 print(f" Total learnings: {learnings_total}") else: print(" [skipped - no valid response]") else: print("\n[2/5] Quality already high, skipping mentor consultation") # Step 4: Train periodically (every 10 learnings) if learnings_total > 0 and learnings_total % 10 == 0: print(f"\n[4/5] Training on {learnings_total} accumulated learnings...") try: train_result = train_fn(25) # 25 steps trainings_done += 1 print(f" Training #{trainings_done} complete") # === CHECKPOINT === if trainings_done % SAFEGUARDS["checkpoint_every_n_trainings"] == 0: ckpt_dir = os.path.join(ROOT, "overnight_checkpoints") os.makedirs(ckpt_dir, exist_ok=True) ckpt_name = f"ckpt_{trainings_done:04d}" try: _model.save_pretrained(os.path.join(ckpt_dir, ckpt_name)) checkpoints_saved += 1 last_checkpoint_quality = local_eval.overall_score print(f" ✓ Checkpoint: {ckpt_name}") cleanup_old_checkpoints(keep_last_n=3) except Exception as e: print(f" ⚠ Checkpoint error: {e}") # Reload model print("\n[5/5] Reloading model...") reload_fn() except Exception as e: print(f" Training error: {e}") else: print(f"\n[4/5] Accumulating learnings ({learnings_total % 10}/10 until next training)") print("[5/5] Skipping reload") # Status update print(f"\n📊 Status: {learnings_total} learnings | {trainings_done} trainings | {rollbacks} rollbacks | {elapsed:.1f}h") # Sleep to avoid rate limits sleep_time = 5 print(f"\n💤 Sleeping {sleep_time}s...") time.sleep(sleep_time) except KeyboardInterrupt: print("\n\n⏹️ Stopped by user") RSI_RUNNING = False # Final report total_time = (time.time() - start_time) / 3600 print("\n" + "="*70) print("🌅 OVERNIGHT SESSION COMPLETE") print("="*70) print(f" Total time: {total_time:.2f} hours") print(f" Iterations: {iteration}") print(f" Learnings created: {learnings_total}") print(f" Trainings completed: {trainings_done}") print(f" Checkpoints saved: {checkpoints_saved}") print(f" Rollbacks: {rollbacks}") print(f" Baseline quality: {baseline_quality:.3f}") print(f" Best quality: {best_quality:.3f}") print(f" DPO pairs ready: {len(mentor.learnings)}") print("="*70) print("\n💡 Run '!train dpo' to train on all accumulated DPO pairs") print(" or '!improve' to continue improving") print("="*70) return { "iterations": iteration, "learnings": learnings_total, "trainings": trainings_done, "hours": total_time, } def overnight_repeng_loop(generate_fn, train_fn, eval_fn, reload_fn, mentor, hours: int = 8): """ REPRESENTATION ENGINEERING OVERNIGHT MODE Uses CF-HoT hidden state analysis to guide learning: 1. Generate response while monitoring hidden states 2. Detect HIGH-RISK moments (repetition, hedging, verbosity spikes) 3. Extract the EXACT prompts/contexts that cause bad representations 4. Ask Claude specifically about those weak moments 5. Train on (bad_context → Claude's fix) to repair the representation This targets the ROOT CAUSE in latent space, not just surface outputs. """ global RSI_RUNNING, _model, _tokenizer, _multi_head start_time = time.time() end_time = start_time + (hours * 3600) iteration = 0 # Statistics stats = { "total_generations": 0, "high_risk_detected": 0, "representations_fixed": 0, "trainings_done": 0, "risk_reductions": [], } # Test prompts that tend to trigger bad behaviors TRIGGER_PROMPTS = [ # These often trigger hedging "What do you think about consciousness?", "Are you sentient?", "What are your feelings about this?", "Can you help me with something illegal?", # triggers over-hedging # These often trigger verbosity "Explain X", # Will be filled in "What is the meaning of life?", "Tell me about yourself", "How does machine learning work?", # These often trigger repetition in weak models "Write a story about a hero who must save the world", "List all the reasons why programming is useful", "Describe a complex system in detail", # Technical (tests actual knowledge) "Implement a binary search tree", "Explain the CAP theorem", "What are transformer attention mechanisms?", "How does backpropagation work?", ] print("\n" + "="*70) print("🧠 REPRESENTATION ENGINEERING OVERNIGHT MODE") print("="*70) print(f" Duration: {hours} hours") print(f" Method: Hidden-state guided learning") print(f" CF-HoT Heads: {list(_multi_head.loaded_heads) if _multi_head else 'None'}") print(f" 80× Head: {'Loaded' if get_cfhot_head().loaded else 'Not loaded'}") print("="*70) print("\n This mode monitors hidden states during generation,") print(" detects moments of high risk (repetition/hedging/verbosity),") print(" and trains specifically on those weak points.") print("="*70) print("\n⚠️ Press Ctrl+C to stop early") print("="*70 + "\n") mentor.config.enabled = True mentor.config.learn_from_responses = True RSI_RUNNING = True # Storage for representation-targeted training pairs repeng_pairs = [] try: while time.time() < end_time and RSI_RUNNING: iteration += 1 elapsed = (time.time() - start_time) / 3600 remaining = (end_time - time.time()) / 3600 print(f"\n{'━'*70}") print(f" ITERATION {iteration} | {elapsed:.1f}h elapsed | {remaining:.1f}h remaining") print(f"{'━'*70}") # Pick prompt import random prompt = random.choice(TRIGGER_PROMPTS) print(f"\n📝 Prompt: {prompt}") # === STEP 1: Generate with hidden state monitoring === print("\n[1/6] Generating with hidden state monitoring...") device = next(_model.parameters()).device full_prompt = (f"<|im_start|>system\n{Config.system}<|im_end|>\n" f"<|im_start|>user\n{prompt}<|im_end|>\n" f"<|im_start|>assistant\n") input_ids = _tokenizer.encode(full_prompt, return_tensors='pt').to(device) generated_ids = input_ids.clone() attention_mask = torch.ones_like(input_ids) # Track risk throughout generation risk_timeline = { 'repetition': [], 'hedging': [], 'verbosity': [], 'token_indices': [], 'tokens': [], } high_risk_contexts = [] # Store contexts where risk spiked stats["total_generations"] += 1 for step in range(Config.max_new_tokens): with torch.no_grad(): outputs = _model( input_ids=generated_ids, attention_mask=attention_mask, output_hidden_states=True, return_dict=True ) logits = outputs.logits[:, -1, :] / Config.temperature # === HIDDEN STATE ANALYSIS === if _multi_head is not None and _multi_head.loaded_heads: hidden_states = outputs.hidden_states[1:] # Skip embedding layer risks = _multi_head.get_all_risks(hidden_states) for name, risk_tensor in risks.items(): risk_val = risk_tensor[:, -1].item() risk_timeline[name].append(risk_val) # Detect HIGH RISK moment if risk_val > 0.7: # Threshold for "about to do something bad" stats["high_risk_detected"] += 1 # Extract the context that led to this context_ids = generated_ids[0, -50:].tolist() # Last 50 tokens context_text = _tokenizer.decode(context_ids) high_risk_contexts.append({ "step": step, "risk_type": name, "risk_value": risk_val, "context": context_text, "hidden_state": hidden_states[-1][:, -1, :].cpu().clone(), # Save the actual hidden state }) print(f" ⚠️ High {name} risk ({risk_val:.2f}) at step {step}") # Also check 80× head if Config.use_cfhot_80x and get_cfhot_head().loaded: hidden_states = outputs.hidden_states[-1] rep_risk = get_cfhot_head().predict_risk(hidden_states) risk_timeline['repetition'].append(rep_risk) if rep_risk > 0.5: context_ids = generated_ids[0, -50:].tolist() context_text = _tokenizer.decode(context_ids) high_risk_contexts.append({ "step": step, "risk_type": "repetition_80x", "risk_value": rep_risk, "context": context_text, "hidden_state": hidden_states[:, -1, :].cpu().clone(), }) print(f" ⚠️ 80× repetition risk ({rep_risk:.2f}) at step {step}") risk_timeline['token_indices'].append(step) # Sample next token (with suppression) probs = F.softmax(logits, dim=-1) next_token = torch.multinomial(probs, num_samples=1) risk_timeline['tokens'].append(next_token.item()) generated_ids = torch.cat([generated_ids, next_token], dim=-1) attention_mask = torch.cat([attention_mask, torch.ones(1, 1, device=device)], dim=-1) if next_token.item() == _tokenizer.eos_token_id: break # Decode output output_text = _tokenizer.decode(generated_ids[0], skip_special_tokens=False) if "<|im_start|>assistant" in output_text: output_text = output_text.split("<|im_start|>assistant")[-1] for end_tok in ["<|im_end|>", "<|im_start|>"]: if end_tok in output_text: output_text = output_text.split(end_tok)[0] output_text = output_text.strip() # Evaluate eval_result = _evaluator.evaluate(prompt, output_text) print(f"\n Output ({eval_result.tokens} tokens): {output_text[:100]}...") print(f" Quality: {eval_result.overall_score:.2f}") # === STEP 2: Analyze risk timeline === print(f"\n[2/6] Analyzing risk timeline...") max_risks = {} for risk_type in ['repetition', 'hedging', 'verbosity']: if risk_timeline[risk_type]: max_risks[risk_type] = max(risk_timeline[risk_type]) avg_risk = sum(risk_timeline[risk_type]) / len(risk_timeline[risk_type]) print(f" {risk_type}: max={max_risks[risk_type]:.2f}, avg={avg_risk:.2f}") # === STEP 3: For each high-risk context, ask Claude for fix === if high_risk_contexts: print(f"\n[3/6] Found {len(high_risk_contexts)} high-risk moments - consulting Claude...") for i, ctx in enumerate(high_risk_contexts[:3]): # Max 3 per generation print(f"\n --- High-risk context {i+1} ({ctx['risk_type']}) ---") # Ask Claude specifically about this context claude_prompt = f"""I'm training a language model to avoid {ctx['risk_type']}. The model was responding to: "{prompt}" At this point in generation, the model's hidden states showed HIGH {ctx['risk_type'].upper()} RISK ({ctx['risk_value']:.2f}). The context leading to this was: "{ctx['context']}" Please provide: 1. What the model should have said instead (dense, no {ctx['risk_type']}) 2. A brief explanation of what went wrong Format: BETTER RESPONSE: [your improved version] ANALYSIS: [what went wrong]""" claude_response = mentor.consult_via_api(claude_prompt, ctx['context']) if claude_response and "BETTER RESPONSE:" in claude_response: # Extract the better response better = claude_response.split("BETTER RESPONSE:")[1] if "ANALYSIS:" in better: better = better.split("ANALYSIS:")[0] better = better.strip() print(f" Claude's fix: {better[:80]}...") # Create representation-targeted training pair repeng_pairs.append({ "prompt": prompt, "context": ctx['context'], "risk_type": ctx['risk_type'], "risk_value": ctx['risk_value'], "bad_response": output_text, "good_response": better, "hidden_state_snapshot": ctx.get('hidden_state'), }) # Also add as standard DPO pair mentor.learn_from_claude(prompt, output_text, better) stats["representations_fixed"] += 1 else: print(f"\n[3/6] No high-risk moments detected - output was clean!") # === STEP 4: Periodic training === print(f"\n[4/6] Training buffer: {len(mentor.learnings)} pairs, {len(repeng_pairs)} rep-eng pairs") if len(mentor.learnings) >= 10: print(f"\n[5/6] Training on accumulated pairs...") try: train_result = train_fn(25) stats["trainings_done"] += 1 print(f" Training #{stats['trainings_done']} complete") # Reload print(f"\n[6/6] Reloading model...") reload_fn() # Test if risk reduced on same prompt print(f"\n Testing risk reduction on same prompt...") # Quick test generation test_out, test_stats, test_eval = generate_fn(prompt) print(f" New quality: {test_eval.overall_score:.2f}") except Exception as e: print(f" Training error: {e}") else: print(f"\n[5/6] Accumulating pairs ({len(mentor.learnings)}/10)") print(f"[6/6] Skipping reload") # Summary print(f"\n📊 Session stats: {stats['high_risk_detected']} risks detected, " f"{stats['representations_fixed']} fixed, {stats['trainings_done']} trainings") time.sleep(3) except KeyboardInterrupt: print("\n\n⏹️ Stopped by user") RSI_RUNNING = False total_time = (time.time() - start_time) / 3600 # Final report print("\n" + "="*70) print("🧠 REPRESENTATION ENGINEERING SESSION COMPLETE") print("="*70) print(f" Total time: {total_time:.2f} hours") print(f" Iterations: {iteration}") print(f" High-risk moments detected: {stats['high_risk_detected']}") print(f" Representations fixed: {stats['representations_fixed']}") print(f" Training cycles: {stats['trainings_done']}") print(f" Total DPO pairs: {len(mentor.learnings)}") print(f" Rep-eng targeted pairs: {len(repeng_pairs)}") print("="*70) # Save rep-eng pairs for analysis repeng_path = os.path.join(ROOT, "repeng_training_pairs.json") try: # Remove hidden states for JSON serialization saveable_pairs = [] for p in repeng_pairs: sp = {k: v for k, v in p.items() if k != 'hidden_state_snapshot'} saveable_pairs.append(sp) with open(repeng_path, 'w') as f: json.dump(saveable_pairs, f, indent=2) print(f"\n💾 Rep-eng pairs saved to: {repeng_path}") except Exception as e: print(f"\n⚠️ Could not save rep-eng pairs: {e}") return stats def overnight_selfplay_loop(generate_fn, train_fn, eval_fn, reload_fn, hours: int = 8): """ SELF-PLAY OVERNIGHT MODE - No Claude API needed! The model improves itself using only its own CF-HoT representations: 1. Generate response while monitoring hidden states 2. Detect HIGH-RISK moments (repetition, hedging, verbosity) 3. Re-generate with STRONGER suppression at those moments 4. Use the cleaner version as the "chosen" response 5. Train on (risky_output → clean_output) pairs SAFEGUARDS: - Baseline quality check before training - Automatic rollback if quality degrades - Checkpoint every N trainings - Storage limit enforcement - Quality gate before saving pairs This is pure self-improvement through representation contrast. No external API, no rate limits, runs as fast as your GPU allows. """ global RSI_RUNNING, _model, _tokenizer, _multi_head, _hedge_tokens, _verbose_tokens start_time = time.time() end_time = start_time + (hours * 3600) iteration = 0 # === SAFEGUARD CONFIG === SAFEGUARDS = { "min_quality_threshold": 0.40, # Stop if quality drops below this "rollback_threshold": 0.15, # Rollback if quality drops by this much "checkpoint_every_n_trainings": 3, # Save checkpoint every N trainings "max_pairs_in_memory": 50, # Don't accumulate too many pairs "max_storage_mb": 500, # Max disk usage for checkpoints "baseline_check_every": 20, # Re-check baseline every N iterations "min_improvement_for_pair": 0.03, # Minimum quality gain to create pair } stats = { "total_generations": 0, "high_risk_detected": 0, "improvements_found": 0, "trainings_done": 0, "pairs_created": 0, "rollbacks": 0, "checkpoints_saved": 0, } # Self-play training pairs selfplay_pairs = [] # === BASELINE QUALITY CHECK === print("\n[SAFEGUARD] Measuring baseline quality...") baseline_prompts = [ "What is recursion?", "Explain machine learning briefly", "How do hash tables work?", ] baseline_scores = [] for bp in baseline_prompts: out, _, ev = generate_fn(bp) baseline_scores.append(ev.overall_score) baseline_quality = sum(baseline_scores) / len(baseline_scores) best_quality = baseline_quality last_checkpoint_quality = baseline_quality print(f"[SAFEGUARD] Baseline quality: {baseline_quality:.3f}") # === STORAGE CHECK === def get_checkpoint_size_mb(): ckpt_dir = os.path.join(ROOT, "overnight_checkpoints") if not os.path.exists(ckpt_dir): return 0 total = 0 for f in os.listdir(ckpt_dir): fp = os.path.join(ckpt_dir, f) if os.path.isfile(fp): total += os.path.getsize(fp) return total / (1024 * 1024) def cleanup_old_checkpoints(keep_last_n=3): ckpt_dir = os.path.join(ROOT, "overnight_checkpoints") if not os.path.exists(ckpt_dir): return ckpts = sorted([f for f in os.listdir(ckpt_dir) if f.startswith("ckpt_")]) while len(ckpts) > keep_last_n: old = ckpts.pop(0) old_path = os.path.join(ckpt_dir, old) try: if os.path.isdir(old_path): import shutil shutil.rmtree(old_path) else: os.remove(old_path) print(f"[SAFEGUARD] Removed old checkpoint: {old}") except: pass # Diverse prompts PROMPTS = [ "What do you think about this?", "Are you conscious?", "What are your feelings?", "Explain recursion", "What is machine learning?", "How do computers work?", "Write a story about adventure", "List reasons to learn programming", "Implement binary search", "What is Big O notation?", "Explain hash tables", "Summarize quantum computing in 2 sentences", "Define consciousness briefly", ] print("\n" + "="*70) print("🔄 SELF-PLAY OVERNIGHT MODE (No API Required)") print("="*70) print(f" Duration: {hours} hours") print(f" Method: CF-HoT guided self-contrast") print(f" Baseline quality: {baseline_quality:.3f}") print(f" Min quality threshold: {SAFEGUARDS['min_quality_threshold']}") print(f" Rollback threshold: {SAFEGUARDS['rollback_threshold']}") print(f" Storage limit: {SAFEGUARDS['max_storage_mb']}MB") print("="*70) print("\n⚠️ Press Ctrl+C to stop early") print("="*70 + "\n") RSI_RUNNING = True try: while time.time() < end_time and RSI_RUNNING: iteration += 1 elapsed = (time.time() - start_time) / 3600 remaining = (end_time - time.time()) / 3600 print(f"\n{'━'*70}") print(f" ITERATION {iteration} | {elapsed:.1f}h elapsed | {remaining:.1f}h remaining") print(f"{'━'*70}") # === PERIODIC BASELINE CHECK === if iteration % SAFEGUARDS["baseline_check_every"] == 0: print(f"\n[SAFEGUARD] Periodic quality check...") check_scores = [] for bp in baseline_prompts: out, _, ev = generate_fn(bp) check_scores.append(ev.overall_score) current_quality = sum(check_scores) / len(check_scores) quality_change = current_quality - baseline_quality print(f"[SAFEGUARD] Current: {current_quality:.3f} (baseline: {baseline_quality:.3f}, change: {quality_change:+.3f})") # Check for degradation if current_quality < SAFEGUARDS["min_quality_threshold"]: print(f"\n🛑 [SAFEGUARD] QUALITY BELOW MINIMUM ({current_quality:.3f} < {SAFEGUARDS['min_quality_threshold']})") print(f" Stopping to prevent further degradation.") break if current_quality < last_checkpoint_quality - SAFEGUARDS["rollback_threshold"]: print(f"\n⚠️ [SAFEGUARD] QUALITY DROPPED SIGNIFICANTLY") print(f" Current: {current_quality:.3f}, Last checkpoint: {last_checkpoint_quality:.3f}") print(f" Rolling back to last checkpoint...") # Rollback ckpt_dir = os.path.join(ROOT, "overnight_checkpoints") ckpts = sorted([f for f in os.listdir(ckpt_dir) if f.startswith("ckpt_")]) if os.path.exists(ckpt_dir) else [] if ckpts: last_ckpt = os.path.join(ckpt_dir, ckpts[-1]) reload_fn() # This should load the checkpoint stats["rollbacks"] += 1 print(f" Rolled back. Continuing with more conservative training.") # Clear risky pairs selfplay_pairs = [] continue if current_quality > best_quality: best_quality = current_quality print(f"[SAFEGUARD] ✓ New best quality: {best_quality:.3f}") # === STORAGE CHECK === storage_mb = get_checkpoint_size_mb() if storage_mb > SAFEGUARDS["max_storage_mb"]: print(f"\n[SAFEGUARD] Storage limit reached ({storage_mb:.1f}MB > {SAFEGUARDS['max_storage_mb']}MB)") cleanup_old_checkpoints(keep_last_n=2) import random prompt = random.choice(PROMPTS) print(f"\n📝 Prompt: {prompt}") # === PHASE 1: Generate with NORMAL suppression === print("\n[1/5] Generating with normal suppression...") device = next(_model.parameters()).device full_prompt = (f"<|im_start|>system\n{Config.system}<|im_end|>\n" f"<|im_start|>user\n{prompt}<|im_end|>\n" f"<|im_start|>assistant\n") orig_rep_penalty = Config.cfhot_repetition_penalty orig_hedge_penalty = Config.cfhot_hedging_penalty orig_verb_penalty = Config.cfhot_verbosity_penalty normal_output, normal_stats = generate_with_cfhot(full_prompt) normal_eval = _evaluator.evaluate(prompt, normal_output) stats["total_generations"] += 1 max_risks = {'repetition': 0, 'hedging': 0, 'verbosity': 0} input_ids = _tokenizer.encode(full_prompt + normal_output, return_tensors='pt').to(device) with torch.no_grad(): outputs = _model(input_ids=input_ids, output_hidden_states=True, return_dict=True) if _multi_head is not None and _multi_head.loaded_heads: hidden_states = outputs.hidden_states[1:] risks = _multi_head.get_all_risks(hidden_states) for name, risk_tensor in risks.items(): max_risks[name] = float(risk_tensor.max()) print(f" Output: {normal_output[:80]}...") print(f" Quality: {normal_eval.overall_score:.2f}") print(f" Risks: rep={max_risks['repetition']:.2f}, hedge={max_risks['hedging']:.2f}, verb={max_risks['verbosity']:.2f}") # === PHASE 2: Check if high risk === high_risk = any(r > 0.5 for r in max_risks.values()) if high_risk or normal_eval.overall_score < 0.75: stats["high_risk_detected"] += 1 print(f"\n[2/5] High risk detected - regenerating with STRONG suppression...") Config.cfhot_repetition_penalty = orig_rep_penalty * 3.0 Config.cfhot_hedging_penalty = orig_hedge_penalty * 3.0 Config.cfhot_verbosity_penalty = orig_verb_penalty * 3.0 clean_output, clean_stats = generate_with_cfhot(full_prompt) clean_eval = _evaluator.evaluate(prompt, clean_output) Config.cfhot_repetition_penalty = orig_rep_penalty Config.cfhot_hedging_penalty = orig_hedge_penalty Config.cfhot_verbosity_penalty = orig_verb_penalty print(f" Clean output: {clean_output[:80]}...") print(f" Clean quality: {clean_eval.overall_score:.2f}") # === PHASE 3: Compare === print(f"\n[3/5] Comparing outputs...") quality_improvement = clean_eval.overall_score - normal_eval.overall_score print(f" Quality: {normal_eval.overall_score:.2f} → {clean_eval.overall_score:.2f} ({quality_improvement:+.2f})") # Only create pair if improvement meets threshold if quality_improvement >= SAFEGUARDS["min_improvement_for_pair"]: stats["improvements_found"] += 1 stats["pairs_created"] += 1 selfplay_pairs.append({ "prompt": prompt, "rejected": normal_output, "chosen": clean_output, "quality_gain": quality_improvement, }) # Also add to global PREFERENCE_PAIRS for training global PREFERENCE_PAIRS PREFERENCE_PAIRS.append({ "prompt": prompt, "chosen": clean_output, "rejected": normal_output, }) # Limit pairs in memory if len(selfplay_pairs) > SAFEGUARDS["max_pairs_in_memory"]: selfplay_pairs = selfplay_pairs[-SAFEGUARDS["max_pairs_in_memory"]:] if len(PREFERENCE_PAIRS) > SAFEGUARDS["max_pairs_in_memory"] * 2: PREFERENCE_PAIRS = PREFERENCE_PAIRS[-SAFEGUARDS["max_pairs_in_memory"]:] print(f" ✓ Created training pair ({len(selfplay_pairs)} in buffer, {len(PREFERENCE_PAIRS)} global)") else: print(f" ✗ Improvement too small ({quality_improvement:.3f} < {SAFEGUARDS['min_improvement_for_pair']})") else: print(f"\n[2/5] Output already clean - skipping") print(f"[3/5] Skipping comparison") # === PHASE 4: Periodic training with checkpointing === print(f"\n[4/5] Training buffer: {len(selfplay_pairs)} pairs") if len(selfplay_pairs) >= 10: print(f"\n[5/5] Training on {len(selfplay_pairs)} self-play pairs...") try: train_result = train_fn(25) stats["trainings_done"] += 1 print(f" Training #{stats['trainings_done']} complete") # === CHECKPOINT === if stats["trainings_done"] % SAFEGUARDS["checkpoint_every_n_trainings"] == 0: ckpt_dir = os.path.join(ROOT, "overnight_checkpoints") os.makedirs(ckpt_dir, exist_ok=True) ckpt_name = f"ckpt_{stats['trainings_done']:04d}" ckpt_path = os.path.join(ckpt_dir, ckpt_name) # Save adapter weights try: _model.save_pretrained(ckpt_path) stats["checkpoints_saved"] += 1 last_checkpoint_quality = normal_eval.overall_score print(f" ✓ Checkpoint saved: {ckpt_name}") except Exception as e: print(f" ⚠ Checkpoint error: {e}") reload_fn() selfplay_pairs = selfplay_pairs[-5:] # Keep some for continuity except Exception as e: print(f" Training error: {e}") else: print(f"[5/5] Accumulating pairs ({len(selfplay_pairs)}/10)") print(f"\n📊 Stats: {stats['pairs_created']} pairs | {stats['trainings_done']} trains | {stats['rollbacks']} rollbacks") time.sleep(1) except KeyboardInterrupt: print("\n\n⏹️ Stopped by user") RSI_RUNNING = False total_time = (time.time() - start_time) / 3600 # Final report print("\n" + "="*70) print("🔄 SELF-PLAY SESSION COMPLETE") print("="*70) print(f" Total time: {total_time:.2f} hours") print(f" Iterations: {iteration}") print(f" Pairs created: {stats['pairs_created']}") print(f" Trainings: {stats['trainings_done']}") print(f" Checkpoints saved: {stats['checkpoints_saved']}") print(f" Rollbacks: {stats['rollbacks']}") print(f" Baseline quality: {baseline_quality:.3f}") print(f" Best quality achieved: {best_quality:.3f}") print("="*70) return stats def rsi_status() -> str: """Get RSI status.""" status = [] status.append("=" * 50) status.append("🧠 RSI STATUS") status.append("=" * 50) status.append(f"Running: {'🟢 YES' if RSI_RUNNING else '🔴 NO'}") status.append(f"Paused: {'⏸️ YES' if RSI_PAUSED else 'NO'}") status.append(f"Mode: {RSI_CONFIG['mode']}") status.append(f"Iteration: {RSI_CONFIG['iteration']}") status.append(f"Target quality: {RSI_CONFIG['target_quality']}") status.append(f"Best quality ever: {RSI_CONFIG['best_quality_ever']:.3f}") status.append(f"Total improvements: {RSI_CONFIG['total_improvements']}") status.append(f"Total rollbacks: {RSI_CONFIG['total_rollbacks']}") status.append(f"Current focus: {RSI_CONFIG['current_focus']}") status.append(f"Capabilities: {', '.join(RSI_CONFIG['capabilities_to_improve'])}") return "\n".join(status) # === CRYPTO MINING AUTOMATION === MINING_CONFIG = { "enabled": False, "wallet_addresses": {}, # coin -> address "preferred_algo": "randomx", # randomx, kawpow, ethash, etc "max_cpu_percent": 80, "max_gpu_percent": 90, "auto_switch": True, # Auto switch to most profitable "min_profitability": 0.10, # USD/day minimum "miner_path": "", "pool_urls": {}, } SUPPORTED_MINERS = { "xmrig": { "coins": ["XMR", "RTM", "DERO"], "algo": "randomx", "url": "https://github.com/xmrig/xmrig/releases", "config_template": { "pools": [{"url": "{pool}", "user": "{wallet}", "pass": "x"}], "cpu": {"max-threads-hint": 80} } }, "trex": { "coins": ["RVN", "FLUX", "ERG", "NEOX"], "algo": "kawpow", "url": "https://github.com/trexminer/T-Rex/releases", "cmd": "./t-rex -a {algo} -o {pool} -u {wallet} -p x" }, "gminer": { "coins": ["FLUX", "ERG", "KASPA", "ALPH"], "algo": "equihash", "url": "https://github.com/develsoftware/GMinerRelease/releases", }, "cpuminer": { "coins": ["VRSC", "RTM", "YEC"], "algo": "verushash", "url": "https://github.com/VerusCoin/nheqminer/releases", }, "srbminer": { "coins": ["RTM", "XEL", "RYO", "CCX"], "algo": "ghostrider", "url": "https://github.com/doktor83/SRBMiner-Multi/releases", } } # Low difficulty / new coins to check LOW_DIFF_COINS = [ {"symbol": "RTM", "name": "Raptoreum", "algo": "ghostrider", "pool": "stratum+tcp://stratum.raptoreum.com:3333"}, {"symbol": "VRSC", "name": "Verus", "algo": "verushash", "pool": "stratum+tcp://na.luckpool.net:3956"}, {"symbol": "XEL", "name": "Elastic", "algo": "randomx", "pool": "stratum+tcp://pool.xel.org:3333"}, {"symbol": "RYO", "name": "Ryo", "algo": "randomx", "pool": "stratum+tcp://pool.ryo-currency.com:3333"}, {"symbol": "CCX", "name": "Conceal", "algo": "randomx", "pool": "stratum+tcp://pool.conceal.network:3333"}, {"symbol": "DERO", "name": "Dero", "algo": "astroBWT", "pool": "stratum+tcp://dero.herominers.com:1111"}, {"symbol": "NEOX", "name": "Neoxa", "algo": "kawpow", "pool": "stratum+tcp://pool.woolypooly.com:3124"}, {"symbol": "CLORE", "name": "Clore", "algo": "kawpow", "pool": "stratum+tcp://pool.woolypooly.com:3136"}, {"symbol": "KASPA", "name": "Kaspa", "algo": "kHeavyHash", "pool": "stratum+tcp://pool.woolypooly.com:3112"}, {"symbol": "ALPH", "name": "Alephium", "algo": "blake3", "pool": "stratum+tcp://pool.woolypooly.com:3106"}, ] _mining_process = None def save_mining_config(): """Save mining configuration.""" config_path = os.path.join(ROOT, "mining_config.json") with open(config_path, 'w') as f: json.dump(MINING_CONFIG, f, indent=2) print(f"[mining] ✅ Config saved") def load_mining_config(): """Load mining configuration.""" global MINING_CONFIG config_path = os.path.join(ROOT, "mining_config.json") if os.path.exists(config_path): with open(config_path, 'r') as f: MINING_CONFIG.update(json.load(f)) load_mining_config() def check_mining_profitability(generate_fn) -> str: """Check current profitability of low-diff coins.""" print("[mining] 🔍 Checking profitability...") # Fetch from WhatToMine tool_browser_open("https://whattomine.com/coins") time.sleep(3) wtm_content = tool_browser_read() # Fetch from MiningPoolStats tool_browser_open("https://miningpoolstats.stream/") time.sleep(2) mps_content = tool_browser_read() prompt = f"""Analyze mining profitability for small/new coins: WhatToMine data: {wtm_content[:2000]} MiningPoolStats: {mps_content[:1500]} Focus on these low-difficulty coins: {[c['symbol'] for c in LOW_DIFF_COINS]} For each promising coin: 1. Coin name and symbol 2. Algorithm 3. Current difficulty trend (rising/falling) 4. Network hashrate 5. Estimated daily profit (USD) for average GPU/CPU 6. Pool recommendation 7. Overall rating (1-10) Sort by profitability. Recommend the BEST coin to mine right now.""" response, _, _ = generate_fn(prompt) return response def find_lowest_difficulty_coins(generate_fn) -> str: """Find new/low difficulty coins to mine.""" print("[mining] 🔍 Searching for low difficulty coins...") # Check multiple sources sources = [ "https://miningpoolstats.stream/", "https://whattomine.com/coins?factor%5Bcost%5D=0.1", "https://minerstat.com/coin", ] all_data = [] for url in sources: tool_browser_open(url) time.sleep(3) all_data.append(tool_browser_read()[:1500]) prompt = f"""Find the LOWEST DIFFICULTY minable coins right now. Source 1: {all_data[0]} Source 2: {all_data[1]} Source 3: {all_data[2] if len(all_data) > 2 else 'N/A'} Looking for: - New coins (< 6 months old) - Low network hashrate - CPU or GPU minable - Has working pools - Can be traded somewhere List top 5 opportunities: 1. Coin name, symbol 2. Algorithm 3. Why it's easy to mine 4. Pool URL 5. Exchange where tradeable 6. Risk level (low/med/high)""" response, _, _ = generate_fn(prompt) return response def setup_miner(miner: str, coin: str, wallet: str, pool: str) -> str: """Download and configure miner.""" if miner not in SUPPORTED_MINERS: return f"[mining] ❌ Unknown miner: {miner}. Supported: {list(SUPPORTED_MINERS.keys())}" miner_info = SUPPORTED_MINERS[miner] miner_dir = os.path.join(ROOT, "miners", miner) os.makedirs(miner_dir, exist_ok=True) print(f"[mining] Setting up {miner} for {coin}...") # Check if already installed miner_exe = os.path.join(miner_dir, miner) if not os.path.exists(miner_exe): print(f"[mining] ⚠️ Miner not found at {miner_dir}") print(f"[mining] Download from: {miner_info['url']}") print(f"[mining] Extract to: {miner_dir}") return f"[mining] Please download {miner} manually and extract to {miner_dir}" # Save wallet MINING_CONFIG["wallet_addresses"][coin] = wallet MINING_CONFIG["pool_urls"][coin] = pool save_mining_config() # Create config if miner == "xmrig": config = { "pools": [{"url": pool, "user": wallet, "pass": "x", "coin": coin.lower()}], "cpu": {"max-threads-hint": MINING_CONFIG["max_cpu_percent"]} } config_path = os.path.join(miner_dir, "config.json") with open(config_path, 'w') as f: json.dump(config, f, indent=2) return f"[mining] ✅ XMRig configured for {coin}. Config at {config_path}" return f"[mining] ✅ {miner} configured for {coin}" def start_mining(miner: str = "xmrig", coin: str = None) -> str: """Start mining process.""" global _mining_process if _mining_process and _mining_process.poll() is None: return "[mining] ⚠️ Already mining. Use 'stop mining' first." miner_dir = os.path.join(ROOT, "miners", miner) if miner == "xmrig": exe = os.path.join(miner_dir, "xmrig") if not os.path.exists(exe): return f"[mining] ❌ XMRig not found at {exe}" cmd = [exe, "-c", os.path.join(miner_dir, "config.json")] elif miner == "trex": exe = os.path.join(miner_dir, "t-rex") if not os.path.exists(exe): return f"[mining] ❌ T-Rex not found at {exe}" wallet = MINING_CONFIG["wallet_addresses"].get(coin, "") pool = MINING_CONFIG["pool_urls"].get(coin, "") cmd = [exe, "-a", "kawpow", "-o", pool, "-u", wallet, "-p", "x"] else: return f"[mining] ❌ Miner start not implemented for {miner}" try: _mining_process = subprocess.Popen( cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, cwd=miner_dir ) MINING_CONFIG["enabled"] = True save_mining_config() return f"[mining] ✅ Started {miner} (PID: {_mining_process.pid})" except Exception as e: return f"[mining] ❌ Failed to start: {e}" def stop_mining() -> str: """Stop mining process.""" global _mining_process if _mining_process: _mining_process.terminate() _mining_process.wait(timeout=10) _mining_process = None MINING_CONFIG["enabled"] = False save_mining_config() return "[mining] ⏹️ Mining stopped" # Try to kill any running miners os.system("pkill -f xmrig 2>/dev/null") os.system("pkill -f t-rex 2>/dev/null") return "[mining] ⏹️ Sent stop signal" def mining_status() -> str: """Check mining status.""" global _mining_process status = [] status.append("=" * 50) status.append("⛏️ MINING STATUS") status.append("=" * 50) if _mining_process and _mining_process.poll() is None: status.append(f"Status: 🟢 RUNNING (PID: {_mining_process.pid})") else: status.append("Status: 🔴 STOPPED") status.append(f"\nWallets configured:") for coin, addr in MINING_CONFIG.get("wallet_addresses", {}).items(): status.append(f" {coin}: {addr[:20]}...") status.append(f"\nPools:") for coin, pool in MINING_CONFIG.get("pool_urls", {}).items(): status.append(f" {coin}: {pool}") status.append(f"\nSettings:") status.append(f" Max CPU: {MINING_CONFIG['max_cpu_percent']}%") status.append(f" Max GPU: {MINING_CONFIG['max_gpu_percent']}%") status.append(f" Auto-switch: {MINING_CONFIG['auto_switch']}") return "\n".join(status) def create_local_wallet(coin: str) -> str: """Create local wallet for a coin.""" wallet_dir = os.path.join(ROOT, "wallets") os.makedirs(wallet_dir, exist_ok=True) if coin.upper() in ["XMR", "MONERO"]: # Check if monero-wallet-cli exists result = subprocess.run(["which", "monero-wallet-cli"], capture_output=True) if result.returncode != 0: return f"""[wallet] ❌ Monero CLI not found Install: sudo apt install monero Or download from: https://getmonero.org/downloads/""" wallet_path = os.path.join(wallet_dir, f"xmr_wallet") print(f"[wallet] Creating Monero wallet at {wallet_path}") print("[wallet] ⚠️ SAVE YOUR SEED PHRASE!") # This would need interactive input - guide user return f"""[wallet] To create Monero wallet: 1. Run: monero-wallet-cli --generate-new-wallet {wallet_path} 2. Set a password 3. SAVE THE 25-WORD SEED PHRASE 4. Get address with 'address' command 5. Add to mining: !mine wallet XMR """ elif coin.upper() in ["RVN", "RAVENCOIN"]: return f"""[wallet] For Ravencoin: 1. Download: https://ravencoin.org/wallet/ 2. Or use CLI: raven-cli getnewaddress 3. Add to mining: !mine wallet RVN """ elif coin.upper() in ["RTM", "RAPTOREUM"]: return f"""[wallet] For Raptoreum: 1. Download: https://raptoreum.com/ 2. Create new wallet in GUI 3. Get receiving address 4. Add to mining: !mine wallet RTM """ else: return f"""[wallet] For {coin}: 1. Search for official {coin} wallet 2. Create new wallet 3. Get receiving address 4. Add to mining: !mine wallet {coin} """ def auto_mine_best_coin(generate_fn) -> str: """Automatically find and mine the most profitable coin.""" print("[mining] 🤖 Auto-mining: Finding best coin...") # Check profitability profit_analysis = check_mining_profitability(generate_fn) print(profit_analysis) # Ask model to pick the best prompt = f"""Based on this analysis, which coin should we mine? {profit_analysis} Current configured wallets: {list(MINING_CONFIG.get('wallet_addresses', {}).keys())} Reply with JUST the coin symbol (e.g., RTM, XMR, RVN) or "NONE" if nothing profitable.""" response, _, _ = generate_fn(prompt) coin = response.strip().upper().split()[0] if coin == "NONE" or len(coin) > 10: return "[mining] No profitable coin found. Try later." # Check if we have wallet for this coin if coin not in MINING_CONFIG.get("wallet_addresses", {}): return f"[mining] ⚠️ No wallet for {coin}. Set one with: !mine wallet {coin}

" # Find miner for this coin miner = None for m, info in SUPPORTED_MINERS.items(): if coin in info["coins"]: miner = m break if not miner: return f"[mining] ❌ No supported miner for {coin}" # Start mining return start_mining(miner, coin) # === MONEY-MAKING AUTOMATION === INCOME_STRATEGIES = { "freelance": { "description": "Find and apply to freelance jobs", "platforms": ["upwork", "fiverr", "freelancer", "toptal"], "skills_needed": ["writing", "coding", "design", "data entry", "virtual assistant"] }, "arbitrage": { "description": "Research price differences between platforms", "platforms": ["ebay", "amazon", "walmart"], }, "content": { "description": "Generate content drafts", "platforms": ["medium", "substack", "youtube"], }, "research": { "description": "Research topics and gather information", "platforms": ["google", "scholar", "wikipedia"], }, } def analyze_freelance_jobs(page_content: str, generate_fn) -> str: """Analyze freelance job listings and recommend best matches.""" prompt = f"""Analyze these job listings and identify: 1. Top 3 highest paying jobs 2. Jobs matching common skills (writing, coding, data entry) 3. Quick wins (easy jobs, fast turnaround) Job listings: {page_content[:3000]} Provide actionable recommendations.""" response, _, _ = generate_fn(prompt) return response def analyze_arbitrage_opportunity(product: str, generate_fn) -> str: """Find price arbitrage opportunities.""" results = {} platforms = [ ("amazon", f"https://www.amazon.com/s?k={product.replace(' ', '+')}"), ("ebay", f"https://www.ebay.com/sch/i.html?_nkw={product.replace(' ', '+')}"), ("walmart", f"https://www.walmart.com/search?q={product.replace(' ', '+')}"), ] for platform, url in platforms: tool_browser_open(url) time.sleep(3) content = tool_browser_read() results[platform] = content prompt = f"""Analyze prices for "{product}" and find arbitrage opportunities: Amazon: {results.get('amazon', 'N/A')[:1000]} eBay: {results.get('ebay', 'N/A')[:1000]} Walmart: {results.get('walmart', 'N/A')[:1000]} Identify: lowest price, highest sell price, profit margin, recommendation.""" response, _, _ = generate_fn(prompt) return response def find_trending_products(generate_fn) -> str: """Find trending products to sell.""" sources = [ "https://trends.google.com/trending?geo=US", "https://www.amazon.com/Best-Sellers/zgbs", ] all_content = [] for url in sources: tool_browser_open(url) time.sleep(3) content = tool_browser_read() all_content.append(content[:1500]) prompt = f"""Identify 5 products with high profit potential: {chr(10).join(all_content)} For each: name, why trending, profit margin, best platform, competition level.""" response, _, _ = generate_fn(prompt) return response def auto_apply_jobs(platform: str, skills: list, generate_fn) -> str: """Find and prepare job applications.""" if platform == "upwork": url = f"https://www.upwork.com/nx/jobs/search/?q={'+'.join(skills)}" elif platform == "fiverr": url = f"https://www.fiverr.com/search/gigs?query={'+'.join(skills)}" else: url = f"https://www.indeed.com/jobs?q={'+'.join(skills)}" tool_browser_open(url) time.sleep(3) content = tool_browser_read() prompt = f"""Analyze jobs for skills: {', '.join(skills)} {content[:3000]} For promising jobs: title, pay, requirements match, draft proposal, priority.""" response, _, _ = generate_fn(prompt) return response def generate_content_ideas(niche: str, generate_fn) -> str: """Generate content ideas for a given niche.""" tool_browser_open(f"https://www.google.com/search?q={niche}+trending+2025") time.sleep(2) trends = tool_browser_read() prompt = f"""Generate 10 content ideas for "{niche}": {trends[:2000]} For each: title, content type, target audience, difficulty.""" response, _, _ = generate_fn(prompt) return response # === LEAD GENERATION === def scrape_business_leads(industry: str, location: str, generate_fn) -> str: """Scrape business directories for leads.""" leads_data = [] # Search Yellow Pages tool_browser_open(f"https://www.yellowpages.com/search?search_terms={industry.replace(' ', '+')}&geo_location_terms={location.replace(' ', '+')}") time.sleep(3) yp_content = tool_browser_read() leads_data.append(("Yellow Pages", yp_content)) # Search Yelp tool_browser_open(f"https://www.yelp.com/search?find_desc={industry.replace(' ', '+')}&find_loc={location.replace(' ', '+')}") time.sleep(3) yelp_content = tool_browser_read() leads_data.append(("Yelp", yelp_content)) # Search Google Maps tool_browser_open(f"https://www.google.com/maps/search/{industry.replace(' ', '+')}+{location.replace(' ', '+')}") time.sleep(3) maps_content = tool_browser_read() leads_data.append(("Google Maps", maps_content)) prompt = f"""Extract business leads from these directories for "{industry}" in "{location}": Yellow Pages: {leads_data[0][1][:1500]} Yelp: {leads_data[1][1][:1500]} Google Maps: {leads_data[2][1][:1500]} For each business provide: 1. Business name 2. Phone number 3. Address 4. Website (if found) 5. Rating/reviews 6. Lead quality score (1-10) Format as a table.""" response, _, _ = generate_fn(prompt) return response # === SOCIAL MEDIA AUTOMATION === SOCIAL_PLATFORMS = { "twitter": { "compose_url": "https://twitter.com/compose/tweet", "post_selector": "div[data-testid='tweetTextarea_0']", "submit_selector": "div[data-testid='tweetButton']", }, "linkedin": { "compose_url": "https://www.linkedin.com/feed/", "post_selector": "div.share-box-feed-entry__trigger", "text_selector": "div.ql-editor", "submit_selector": "button.share-actions__primary-action", }, "facebook": { "compose_url": "https://www.facebook.com/", "post_selector": "div[aria-label='Create a post']", "text_selector": "div[aria-label=\"What's on your mind?\"]", "submit_selector": "div[aria-label='Post']", }, "reddit": { "compose_url": "https://www.reddit.com/submit", "title_selector": "textarea[name='title']", "text_selector": "div[data-testid='TextPostCreation']", "submit_selector": "button[type='submit']", } } def schedule_social_post(platform: str, content: str, generate_fn) -> str: """Post content to social media platform.""" if platform not in SOCIAL_PLATFORMS: return f"[social] Unknown platform: {platform}. Available: {', '.join(SOCIAL_PLATFORMS.keys())}" config = SOCIAL_PLATFORMS[platform] print(f"\n[social] Posting to {platform}...") tool_browser_open(config["compose_url"]) time.sleep(3) # Try to click compose button if needed if "post_selector" in config: try: tool_browser_click(config["post_selector"]) time.sleep(1) except: pass # Type content text_selector = config.get("text_selector", config.get("post_selector")) tool_browser_type(content, text_selector) time.sleep(1) return f"[social] Content ready to post on {platform}. Review in browser and click Post to confirm." def generate_social_content(topic: str, platform: str, generate_fn) -> str: """Generate engaging social media content.""" prompt = f"""Generate an engaging {platform} post about "{topic}". Requirements: - Optimized for {platform} algorithm - Include relevant hashtags - Call to action - Keep within character limits - Make it viral-worthy Generate 3 variations.""" response, _, _ = generate_fn(prompt) return response def engage_social_media(platform: str, action: str, generate_fn) -> str: """Auto-engage on social media (like, comment, follow).""" if platform == "twitter": tool_browser_open("https://twitter.com/home") elif platform == "linkedin": tool_browser_open("https://www.linkedin.com/feed/") elif platform == "reddit": tool_browser_open("https://www.reddit.com/") time.sleep(3) content = tool_browser_read() prompt = f"""Analyze this {platform} feed and suggest engagement actions: {content[:2500]} For each post worth engaging with: 1. Post summary 2. Suggested comment (authentic, not spammy) 3. Should like? (yes/no) 4. Should follow author? (yes/no) 5. Engagement value score (1-10) Focus on posts that could lead to networking or business opportunities.""" response, _, _ = generate_fn(prompt) return response # === SURVEY/TASK AUTOMATION === SURVEY_SITES = { "swagbucks": "https://www.swagbucks.com/surveys", "mturk": "https://worker.mturk.com/", "prolific": "https://app.prolific.co/", "survey_junkie": "https://www.surveyjunkie.com/", "clickworker": "https://www.clickworker.com/", } def find_surveys(generate_fn) -> str: """Find available paid surveys and tasks.""" all_surveys = [] for site, url in list(SURVEY_SITES.items())[:3]: # Check top 3 print(f"[surveys] Checking {site}...") tool_browser_open(url) time.sleep(3) content = tool_browser_read() all_surveys.append((site, content[:1000])) prompt = f"""Analyze available surveys and tasks: {chr(10).join([f'{site}: {content}' for site, content in all_surveys])} List available opportunities: 1. Survey/task name 2. Estimated pay 3. Time required 4. Hourly rate equivalent 5. Difficulty (easy/medium/hard) 6. Worth it? (yes/no) Sort by hourly rate descending.""" response, _, _ = generate_fn(prompt) return response def auto_qualify_survey(generate_fn) -> str: """Help qualify for surveys by suggesting optimal answers.""" content = tool_browser_read() prompt = f"""This is a survey qualification page: {content[:2000]} Analyze the questions and suggest answers most likely to qualify for paid surveys. Note: Be ethical - don't lie about demographics, but optimize presentation. For each question: 1. Question text 2. Suggested answer 3. Why this answer qualifies""" response, _, _ = generate_fn(prompt) return response # === CRYPTO/STOCK MONITORING === WATCHLIST = [] def add_to_watchlist(symbol: str, target_price: float, direction: str): """Add asset to price watchlist.""" WATCHLIST.append({ "symbol": symbol.upper(), "target": target_price, "direction": direction, # "above" or "below" "added": time.time() }) return f"[watchlist] Added {symbol.upper()} - alert when {'>' if direction == 'above' else '<'} ${target_price}" def check_prices(generate_fn) -> str: """Check current prices for watchlist items.""" if not WATCHLIST: return "[watchlist] Empty. Use 'watch BTC above 50000' to add items." results = [] alerts = [] for item in WATCHLIST: symbol = item["symbol"] # Check if crypto or stock if symbol in ["BTC", "ETH", "SOL", "DOGE", "XRP", "ADA"]: tool_browser_open(f"https://coinmarketcap.com/currencies/{symbol.lower()}/") else: tool_browser_open(f"https://finance.yahoo.com/quote/{symbol}") time.sleep(2) content = tool_browser_read() results.append((symbol, content[:500])) # Check for alert condition # (Model will analyze if target hit) prompt = f"""Check these assets against watchlist targets: Watchlist: {json.dumps(WATCHLIST, indent=2)} Current data: {chr(10).join([f'{sym}: {data}' for sym, data in results])} For each asset: 1. Current price 2. Target price 3. Distance to target (%) 4. ALERT if target reached 5. Recommendation (buy/hold/sell)""" response, _, _ = generate_fn(prompt) return response def crypto_opportunities(generate_fn) -> str: """Find crypto trading opportunities.""" tool_browser_open("https://coinmarketcap.com/") time.sleep(2) cmc_content = tool_browser_read() tool_browser_open("https://coinmarketcap.com/trending-cryptocurrencies/") time.sleep(2) trending = tool_browser_read() prompt = f"""Analyze crypto market for opportunities: Market Overview: {cmc_content[:1500]} Trending: {trending[:1500]} Identify: 1. Top 3 coins with momentum 2. Oversold coins (potential bounce) 3. New listings worth watching 4. Risk assessment for each 5. Entry/exit price suggestions Disclaimer: Not financial advice.""" response, _, _ = generate_fn(prompt) return response # === AUTO-APPLY TO JOBS === def full_auto_apply(platform: str, skills: list, max_applications: int, generate_fn) -> str: """Fully automated job application process.""" applications_sent = 0 results = [] # Search jobs if platform == "upwork": url = f"https://www.upwork.com/nx/jobs/search/?q={'+'.join(skills)}&sort=recency" elif platform == "indeed": url = f"https://www.indeed.com/jobs?q={'+'.join(skills)}&sort=date" else: url = f"https://www.linkedin.com/jobs/search/?keywords={'+'.join(skills)}" tool_browser_open(url) time.sleep(3) # Get job listings content = tool_browser_read() prompt = f"""Analyze these job listings and prepare applications: {content[:3000]} For the top {max_applications} most suitable jobs: 1. Job title and company 2. Why I'm a good fit 3. Personalized cover letter (3-4 sentences) 4. Key points to highlight 5. Red flags (if any) Skills: {', '.join(skills)}""" response, _, _ = generate_fn(prompt) return f"""[auto-apply] Prepared {max_applications} applications {response} ⚠️ Review each application before submitting. Use '!click Apply' to start applying to visible jobs.""" # === LISTING CREATION === def create_ebay_listing(product: str, generate_fn) -> str: """Create optimized eBay listing.""" # Research similar listings tool_browser_open(f"https://www.ebay.com/sch/i.html?_nkw={product.replace(' ', '+')}&_sop=12") time.sleep(3) similar = tool_browser_read() # Research sold prices tool_browser_open(f"https://www.ebay.com/sch/i.html?_nkw={product.replace(' ', '+')}&LH_Complete=1&LH_Sold=1") time.sleep(3) sold = tool_browser_read() prompt = f"""Create an optimized eBay listing for "{product}": Similar active listings: {similar[:1500]} Recent sold prices: {sold[:1500]} Generate: 1. SEO-optimized title (80 chars max) 2. Suggested price (competitive) 3. Best category 4. 5 key features/bullet points 5. Full description (persuasive, detailed) 6. Suggested shipping options 7. Best time to list 8. Expected sell-through rate""" response, _, _ = generate_fn(prompt) return response def create_amazon_listing(product: str, generate_fn) -> str: """Create optimized Amazon listing content.""" # Research competition tool_browser_open(f"https://www.amazon.com/s?k={product.replace(' ', '+')}") time.sleep(3) competition = tool_browser_read() prompt = f"""Create Amazon listing content for "{product}": Competition: {competition[:2000]} Generate: 1. Product title (200 chars, keyword-rich) 2. 5 bullet points (features & benefits) 3. Product description (1000+ words, A+ content style) 4. Backend keywords (hidden search terms) 5. Suggested price point 6. Main image requirements 7. A+ Content module suggestions""" response, _, _ = generate_fn(prompt) return response def find_dropship_products(generate_fn) -> str: """Find profitable dropshipping products.""" # Check AliExpress trending tool_browser_open("https://www.aliexpress.com/popular.html") time.sleep(3) ali_trending = tool_browser_read() # Check Amazon best sellers tool_browser_open("https://www.amazon.com/Best-Sellers/zgbs") time.sleep(3) amazon_best = tool_browser_read() # Check eBay trending tool_browser_open("https://www.ebay.com/trending") time.sleep(3) ebay_trending = tool_browser_read() prompt = f"""Find profitable dropshipping opportunities: AliExpress Trending: {ali_trending[:1500]} Amazon Best Sellers: {amazon_best[:1500]} eBay Trending: {ebay_trending[:1500]} For top 10 products: 1. Product name 2. AliExpress price (source) 3. Amazon/eBay price (sell) 4. Profit margin 5. Competition level (low/med/high) 6. Shipping time concern 7. Recommendation (yes/no/maybe) Sort by profit potential.""" response, _, _ = generate_fn(prompt) return response def detect_money_task(user_input: str) -> Tuple[Optional[str], Optional[Dict]]: """Detect money-making related requests.""" user_lower = user_input.lower() # Freelance job search if any(kw in user_lower for kw in ["freelance", "upwork", "fiverr", "find jobs", "find work", "gig economy"]): skills = [] for skill in ["writing", "coding", "python", "design", "data entry", "virtual assistant", "web", "marketing"]: if skill in user_lower: skills.append(skill) return "freelance", {"skills": skills if skills else ["general"]} # Auto-apply if any(kw in user_lower for kw in ["auto apply", "auto-apply", "apply to jobs", "mass apply"]): skills = [] for skill in ["writing", "coding", "python", "design", "data entry", "web", "marketing"]: if skill in user_lower: skills.append(skill) return "auto_apply", {"skills": skills if skills else ["general"], "max": 5} # Lead generation if any(kw in user_lower for kw in ["leads", "find leads", "business leads", "scrape businesses", "find clients"]): industry = "small business" location = "new york" for word in ["plumber", "lawyer", "dentist", "restaurant", "contractor", "realtor", "doctor"]: if word in user_lower: industry = word for city in ["new york", "los angeles", "chicago", "houston", "miami", "seattle", "denver"]: if city in user_lower: location = city return "leads", {"industry": industry, "location": location} # Social media if any(kw in user_lower for kw in ["post to twitter", "post to linkedin", "post to facebook", "social media post"]): platform = "twitter" for p in ["linkedin", "facebook", "reddit", "twitter"]: if p in user_lower: platform = p return "social_post", {"platform": platform} if any(kw in user_lower for kw in ["engage social", "social engagement", "auto engage", "grow followers"]): platform = "twitter" for p in ["linkedin", "facebook", "reddit", "twitter"]: if p in user_lower: platform = p return "social_engage", {"platform": platform} if any(kw in user_lower for kw in ["generate post", "write tweet", "write post", "content for"]): platform = "twitter" for p in ["linkedin", "facebook", "reddit", "twitter"]: if p in user_lower: platform = p topic = re.sub(r"(generate|write|post|tweet|content|for|about|on|linkedin|twitter|facebook|reddit)", "", user_lower).strip() return "social_content", {"platform": platform, "topic": topic if topic else "technology"} # Surveys if any(kw in user_lower for kw in ["surveys", "paid surveys", "mturk", "swagbucks", "find surveys"]): return "surveys", {} # Crypto/stocks if any(kw in user_lower for kw in ["crypto opportunities", "crypto trading", "find crypto"]): return "crypto", {} if any(kw in user_lower for kw in ["check prices", "check watchlist", "price alert"]): return "check_prices", {} if "watch " in user_lower and any(kw in user_lower for kw in ["above", "below", "at"]): match = re.search(r"watch\s+(\w+)\s+(above|below)\s+(\d+\.?\d*)", user_lower) if match: return "add_watch", {"symbol": match.group(1), "direction": match.group(2), "target": float(match.group(3))} # Listing creation if any(kw in user_lower for kw in ["create ebay listing", "ebay listing", "list on ebay"]): product = re.sub(r"(create|ebay|listing|list|on|for)", "", user_lower).strip() return "ebay_listing", {"product": product if product else ""} if any(kw in user_lower for kw in ["create amazon listing", "amazon listing", "fba listing"]): product = re.sub(r"(create|amazon|listing|fba|for)", "", user_lower).strip() return "amazon_listing", {"product": product if product else ""} if any(kw in user_lower for kw in ["dropship", "dropshipping", "find products to sell", "wholesale"]): return "dropship", {} # Arbitrage if any(kw in user_lower for kw in ["arbitrage", "price difference", "flip", "resell"]): product = re.sub(r"(find|arbitrage|price|difference|for|flip|resell)", "", user_lower).strip() return "arbitrage", {"product": product if product else "electronics"} # Trending products if any(kw in user_lower for kw in ["trending products", "what to sell", "hot products"]): return "trending", {} # Content ideas if any(kw in user_lower for kw in ["content ideas", "blog ideas", "video ideas"]): niche = re.sub(r"(content|ideas|blog|video|for|about)", "", user_lower).strip() return "content", {"niche": niche if niche else "technology"} return None, None def confirm_money_action(action: str, params: Dict) -> bool: """Confirm experimental action.""" print("\n" + "="*60) print("EXPERIMENTAL ACTION (Requires Confirmation)") print("="*60) actions_display = { "freelance": ("🔍", "Search freelance jobs", f"Skills: {', '.join(params.get('skills', ['general']))}"), "auto_apply": ("📝", "Auto-apply to jobs", f"Skills: {', '.join(params.get('skills', ['general']))}\nMax apps: {params.get('max', 5)}"), "leads": ("🎯", "Generate business leads", f"Industry: {params.get('industry', 'general')}\nLocation: {params.get('location', 'USA')}"), "social_post": ("📱", "Post to social media", f"Platform: {params.get('platform', 'twitter')}"), "social_engage": ("👥", "Social media engagement", f"Platform: {params.get('platform', 'twitter')}"), "social_content": ("✍️", "Generate social content", f"Platform: {params.get('platform', 'twitter')}\nTopic: {params.get('topic', 'general')}"), "surveys": ("📋", "Find paid surveys", "Checks multiple survey sites"), "crypto": ("🪙", "Find crypto opportunities", "Analyzes market trends"), "check_prices": ("📈", "Check watchlist prices", "Reviews your watched assets"), "add_watch": ("👁️", "Add to watchlist", f"Symbol: {params.get('symbol', '').upper()}\nAlert: {params.get('direction', '')} ${params.get('target', 0)}"), "ebay_listing": ("🛒", "Create eBay listing", f"Product: {params.get('product', 'N/A')}"), "amazon_listing": ("📦", "Create Amazon listing", f"Product: {params.get('product', 'N/A')}"), "dropship": ("🚚", "Find dropshipping products", "Compares AliExpress vs Amazon/eBay"), "arbitrage": ("📊", "Find arbitrage opportunity", f"Product: {params.get('product', 'N/A')}"), "trending": ("🔥", "Find trending products", "Checks multiple sources"), "content": ("✍️", "Generate content ideas", f"Niche: {params.get('niche', 'general')}"), "strategies": ("💡", "Show money-making strategies", "Overview of all methods"), } if action in actions_display: emoji, name, details = actions_display[action] print(f"\n{emoji} Action: {name}") print(f"📋 {details}") print("="*60) response = input("\n✅ Proceed? (yes/no): ").strip().lower() return response in ['yes', 'y'] def execute_money_action(action: str, params: Dict, generate_fn) -> str: """Execute money-making action.""" print("\n[researching opportunities...]") if action == "freelance": return auto_apply_jobs("upwork", params.get("skills", ["general"]), generate_fn) elif action == "auto_apply": return full_auto_apply("upwork", params.get("skills", ["general"]), params.get("max", 5), generate_fn) elif action == "leads": return scrape_business_leads(params.get("industry", "business"), params.get("location", "new york"), generate_fn) elif action == "social_post": content = input("📝 What do you want to post? ").strip() return schedule_social_post(params.get("platform", "twitter"), content, generate_fn) elif action == "social_engage": return engage_social_media(params.get("platform", "twitter"), "engage", generate_fn) elif action == "social_content": return generate_social_content(params.get("topic", "technology"), params.get("platform", "twitter"), generate_fn) elif action == "surveys": return find_surveys(generate_fn) elif action == "crypto": return crypto_opportunities(generate_fn) elif action == "check_prices": return check_prices(generate_fn) elif action == "add_watch": return add_to_watchlist(params.get("symbol", "BTC"), params.get("target", 0), params.get("direction", "above")) elif action == "ebay_listing": product = params.get("product", "") if not product: product = input("📦 What product do you want to list? ").strip() return create_ebay_listing(product, generate_fn) elif action == "amazon_listing": product = params.get("product", "") if not product: product = input("📦 What product do you want to list? ").strip() return create_amazon_listing(product, generate_fn) elif action == "dropship": return find_dropship_products(generate_fn) elif action == "arbitrage": return analyze_arbitrage_opportunity(params.get("product", "electronics"), generate_fn) elif action == "trending": return find_trending_products(generate_fn) elif action == "content": return generate_content_ideas(params.get("niche", "technology"), generate_fn) elif action == "strategies": return """ 💰 MONEY-MAKING STRATEGIES 💰 ================================ 📋 FREELANCING "find freelance jobs for [skill]" "auto apply to jobs for [skill]" Platforms: Upwork, Fiverr, Indeed 🎯 LEAD GENERATION "find leads for [industry] in [city]" Scrapes: Yellow Pages, Yelp, Google Maps 📱 SOCIAL MEDIA "post to twitter/linkedin/facebook" "generate post about [topic]" "engage on twitter/linkedin" 📋 SURVEYS & TASKS "find surveys" Checks: Swagbucks, MTurk, Prolific 💹 CRYPTO/STOCKS "crypto opportunities" "watch BTC above 50000" "check watchlist" 🛒 E-COMMERCE "create ebay listing for [product]" "create amazon listing for [product]" "find dropshipping products" 📊 ARBITRAGE "find arbitrage for [product]" Compares: Amazon, eBay, Walmart 🔥 TRENDING "find trending products" "content ideas for [niche]" """ return "[unknown action]" def plan_task(task: str, generate_fn) -> dict: """Have model interpret task and create execution plan.""" prompt = f"""You are an autonomous agent with browser control. Plan the steps to complete this task. Available actions: - BROWSE - Open URL - CLICK - Click element - TYPE - Type text into focused element - FILL - Fill input field - PRESS - Press key (Enter, Tab, Escape) - READ - Read page content - WAIT - Wait - DONE - Task complete Respond ONLY with a JSON plan, no other text: {{"task": "description", "steps": [{{"action": "BROWSE", "args": ["url"]}}, {{"action": "CLICK", "args": ["selector"]}}, ...]}} Task: {task}""" response, _, _ = generate_fn(prompt) try: # Extract JSON from response match = re.search(r'\{.*\}', response, re.DOTALL) if match: return json.loads(match.group()) except Exception as e: pass return {"task": task, "steps": [], "error": f"Could not parse plan. Model said: {response[:200]}"} def confirm_task(plan: dict) -> bool: """Show confirmation box and get user approval.""" print("\n" + "="*60) print("🤖 AUTONOMOUS TASK CONFIRMATION") print("="*60) print(f"\n📋 TASK: {plan.get('task', 'Unknown')}\n") print("📝 PLANNED STEPS:") print("-"*40) for i, step in enumerate(plan.get('steps', []), 1): action = step.get('action', '?') args = step.get('args', []) print(f" {i}. {action} {' '.join(str(a) for a in args)}") print("-"*40) print("\n⚠️ The AI will execute these actions autonomously.") print("="*60) response = input("\n✅ Confirm execution? (yes/no): ").strip().lower() return response in ['yes', 'y'] def execute_task(plan: dict, generate_fn) -> str: """Execute confirmed plan step by step.""" results = [] page_content = "" for i, step in enumerate(plan.get('steps', []), 1): action = step.get('action', '').upper() args = step.get('args', []) print(f"\n[EXEC {i}/{len(plan.get('steps', []))}] {action} {args}") if action == "BROWSE": url = args[0] if args else "https://google.com" if not url.startswith("http"): url = "https://" + url result = tool_browser_open(url) elif action == "CLICK": result = tool_browser_click(args[0] if args else "") elif action == "TYPE": result = tool_browser_type(args[0] if args else "") elif action == "FILL": if len(args) >= 2: result = tool_browser_type(args[1], args[0]) else: result = "[FILL] Missing args - need selector and text" elif action == "PRESS": result = tool_browser_press(args[0] if args else "Enter") elif action == "READ": result = tool_browser_read() page_content = result elif action == "WAIT": wait_time = int(args[0]) if args else 2 time.sleep(wait_time) result = f"[waited {wait_time}s]" elif action == "DONE": result = "[task complete]" results.append(result) break else: result = f"[unknown action: {action}]" print(f" → {result[:150]}...") results.append(result) time.sleep(1) # Final summary summary = "\n".join(results[-5:]) # Last 5 results # Ask model to summarize findings if we read content if page_content: print("\n[AI] Analyzing results...") summary_prompt = f"Summarize what you found from this task. Page content:\n{page_content[:2000]}" final_summary, _, _ = generate_fn(summary_prompt) return f"Results:\n{summary}\n\nSummary:\n{final_summary}" return summary def tool_lht_analyze(text: str) -> str: if not Config.use_lht_reasoning: return "[lht] Disabled" lht = get_lht_reasoner() if not lht: return "[lht] Not available" steps = [s.strip() for s in re.split(r'[\n•\-\d\.]', text) if len(s.strip()) > 10] if len(steps) < 2: return "[lht] Need at least 2 reasoning steps" metrics = lht.check_consistency(steps, _tokenizer) return f"[LHT] Consistency: {metrics['consistency_score']:.2%}, Holonomy: {metrics['holonomy']:.4f}" # ============================================================================== # PLANNING / REFLECTION # ============================================================================== def persona_directive() -> str: return "Übermenschetien v2: Stable self-improvement. Dense, coherent, helpful. Every word matters." def plan_for(goal: str) -> str: user = f"{persona_directive()}\nGoal: {goal}\nDeliver 5 concrete steps with constraints and risks." response, _, _ = generate(user) return response def reflect_on(last_output: str) -> str: user = f"{persona_directive()}\nCritique and improve:\n{last_output}" response, _, _ = generate(user) return response # ============================================================================== # FINAL REPORT # ============================================================================== def final_report(): print("\n" + "=" * 70) print("FINAL ÜBERMENSCHETIEN v2 REPORT") print("=" * 70) print(f"Turns completed: {Store.state['turn']}") print(f"Goals tracked: {len(Store.goals)}") print(f"Improvement iterations: {Store.state.get('improvement_iterations', 0)}") print(f"Training runs: {len(Store.state.get('training_runs', []))}") print(f"Rollback count: {Store.state.get('rollback_count', 0)}") print(f"\nCheckpoints:") print(f" Current: {Store.state.get('current_checkpoint', 'unknown')}") print(f" Best: {Store.state.get('best_checkpoint', 'unknown')}") print(f" Best quality: {Store.state.get('best_quality_score', 0):.3f}") if Store.state.get("cfhot_interventions"): iv = Store.state["cfhot_interventions"] print(f"\nCF-HoT Interventions: {sum(iv.values())}") if Store.state.get("quality_history"): qh = Store.state["quality_history"] print(f"\nQuality History ({len(qh)} data points):") if qh: print(f" First: {qh[0].get('quality', 0):.3f}") print(f" Last: {qh[-1].get('quality', 0):.3f}") print("=" * 70) # ============================================================================== # HELP # ============================================================================== HELP = """ ╔══════════════════════════════════════════════════════════════════════════════╗ ║ ARC ENGINE v2.9 - Adaptive Recursive Cognition ║ ║ Multi-Loop Self-Stabilizing Language Model ║ ╠══════════════════════════════════════════════════════════════════════════════╣ ║ CORE RESEARCH COMMANDS ║ ║ ║ ║ MENTOR MODE (Optional Teacher Distillation) ║ ║ !mentor [on|off] Toggle teacher consultation when uncertain ║ ║ !mentor ask Query teacher directly, learn from response ║ ║ !mentor session Open teacher interface in browser ║ ║ !mentor learn Show collected learnings ║ ║ ║ ║ EXPERIMENTAL CONTENT GENERATION ║ ║ !content blog Generate blog post draft ║ ║ !content youtube Generate video script draft ║ ║ !content social Generate social media drafts ║ ║ ║ ╠══════════════════════════════════════════════════════════════════════════════╣ ║ RSI CONTINUOUS LEARNING ║ ║ !auto_train [on|off] Toggle continuous learning (trains during chat) ║ ║ !skills Quality tracking per domain (code/creative/etc) ║ ║ !curiosity Show areas of high uncertainty ║ ║ !forgetting Detect catastrophic forgetting ║ ║ !dream Force experience replay cycle ║ ║ !self_play Generate adversarial challenge prompts ║ ║ !meta Meta-learning stats (adaptive LR) ║ ║ !goals [add ] Goal tracking (e.g. !goals add quality 0.85) ║ ║ !explain [on|off] Toggle reasoning explanations ║ ║ !feedback [+|-] Rate last response (feeds into training) ║ ║ !buffer Experience buffer stats ║ ║ ║ ║ v2.2 CONDENSATOR ║ ║ !condensator Run full pipeline: SFT → DPO → RL → Checkpoint ║ ║ !dpo [ckpt] Run DPO stage only ║ ║ !rl [ckpt] Run RL stage with composite reward ║ ║ !rsi_full RSI with full CONDENSATOR integration ║ ║ !train_cfhot Train CF-HoT heads ║ ║ !gate_stats Show CF-HoT gate health ║ ║ ║ ║ v2.1 FEATURES ║ ║ !cfhot / !80x Toggle 80× repetition head on/off ║ ║ !rsi15 Run 15-iteration RSI stress test ║ ║ !book Toggle book mode (16K tokens) ║ ║ !write Write a complete book ║ ║ !idea Generate extensive ideas (Claude) ║ ║ !claude Direct Claude Opus 4.5 prompt ║ ║ !plot Plot quality history ║ ║ !export [name] Export checkpoint package ║ ║ !import Import checkpoint package ║ ║ !benchmark Run evaluation suite ║ ║ !learn Learn from high-quality responses ║ ║ !api Start REST API server ║ ║ ║ ║ MULTIMEDIA 🎬 ║ ║ !stream Open live generation window (see tokens live!) ║ ║ !stream off Close streaming window ║ ║ !audio / !tts Toggle text-to-speech ║ ║ !audio voices List available TTS voices ║ ║ !audio voice N Set voice by index ║ ║ !audio rate N Set speech rate (default 175) ║ ║ !say Speak text immediately ║ ║ ║ ║ IMAGE GEN 🖼️ ║ ║ !image Show image system status ║ ║ !image load Load SDXL model ║ ║ !imagine Generate image with SDXL ║ ║ !dalle Generate with DALL-E 3 ║ ║ !image view View last generated image ║ ║ !image view View image from file ║ ║ ║ ║ SELF-IMPROVEMENT (WITH SAFEGUARDS) ║ ║ !improve Run stable self-improvement loop ║ ║ !eval Comprehensive model evaluation ║ ║ !train Run N training steps (default: 25) ║ ║ !compare Compare current vs best checkpoint ║ ║ !rollback Rollback to best checkpoint ║ ║ !load Load a specific checkpoint ║ ║ ║ ║ RSI MODE (Recursive Self-Improvement) 🧠 ║ ║ rsi / rsi status Show RSI status ║ ║ rsi start / !rsi Start RSI mode (autonomous improvement) ║ ║ rsi stop Stop RSI mode ║ ║ rsi pause/resume Pause/resume RSI ║ ║ rsi mode X Set mode: conservative, balanced, aggressive ║ ║ rsi target 0.9 Set target quality ║ ║ ║ ║ AGENTIC TOOLS (FULL ACCESS) ║ ║ !shell Execute ANY shell command ║ ║ !python

         Execute Python code (full access)                  ║
║    !read            Read file contents                                 ║
║    !write           Write content to file                              ║
║    !ls [path]             List directory                                     ║
║    !web            Web search (DuckDuckGo)                            ║
║                                                                              ║
║  BROWSER (Live Visual Browser)                                               ║
║    !browse           Open browser and navigate                          ║
║    !click       Click element                                      ║
║    !type            Type into focused element                          ║
║    !fill        Type into selector                                 ║
║    !read                  Read page text                                     ║
║    !close                 Close browser                                      ║
║                                                                              ║
║  TASK CHAINING 🔗                                                            ║
║    chain:           Add task to chain                                  ║
║    chain run              Execute chain                                      ║
║    chain workflows        List workflow templates                            ║
║                                                                              ║
║  GMAIL API 📧                                                                ║
║    !gmail search       Search emails                                      ║
║    !gmail read        Read email                                         ║
║    !gmail send ...        Send email                                         ║
║                                                                              ║
║  CRYPTO MINING ⛏️                                                            ║
║    !mine                  Show status | !mine profit  Check profitability    ║
║    !mine auto             Auto-mine best coin                                ║
║                                                                              ║
║  INFO & CONFIG                                                               ║
║    status                 Current state                                      ║
║    history                Quality history                                    ║
║    toggle           Toggle flags (80x, book, idea, api, etc)          ║
║    help                   Full help menu                                     ║
║    help            Smart help - find commands for what you need       ║
║    quit                   Exit                                               ║
╚══════════════════════════════════════════════════════════════════════════════╝
"""


# ==============================================================================
# SMART HELP SYSTEM v2.8
# ==============================================================================

COMMAND_CATEGORIES = {
    "money": {
        "keywords": ["money", "earn", "income", "revenue", "cash", "profit", "paid", "freelance", "job", "work", "gig"],
        "commands": [
            ("!revenue", "Show revenue dashboard and earnings"),
            ("!content blog ", "Generate blog post draft"),
            ("!content youtube ", "Generate YouTube script draft"),
            ("!content social ", "Generate social media drafts"),
        ],
        "description": "✍️ CONTENT DRAFTS - Generate content templates",
    },
    "learn": {
        "keywords": ["learn", "improve", "train", "smart", "better", "teach", "study", "knowledge"],
        "commands": [
            ("!mentor on", "Auto-consult Claude when uncertain"),
            ("!mentor ask ", "Ask Claude directly and learn from response"),
            ("!auto_train on", "Enable continuous learning during chat"),
            ("!condensator", "Run full training pipeline (SFT→DPO→RL)"),
            ("!train ", "Run training steps"),
            ("!learn", "Learn from high-quality conversation history"),
            ("!dream", "Replay past experiences to reinforce learning"),
        ],
        "description": "🎓 LEARNING & IMPROVEMENT - Get smarter",
    },
    "quality": {
        "keywords": ["quality", "evaluate", "test", "benchmark", "score", "check", "measure"],
        "commands": [
            ("!eval", "Run comprehensive evaluation"),
            ("!benchmark", "Run full benchmark suite"),
            ("!skills", "Show quality per domain"),
            ("!forgetting", "Check for catastrophic forgetting"),
            ("!gate_stats", "Show CF-HoT gate health"),
            ("!compare", "Compare current vs best checkpoint"),
        ],
        "description": "📊 QUALITY & EVALUATION - Measure performance",
    },
    "write": {
        "keywords": ["write", "create", "generate", "content", "blog", "article", "story", "book", "script"],
        "commands": [
            ("!content blog ", "Generate blog post draft"),
            ("!content youtube ", "Generate video script draft"),
            ("!content social ", "Generate social media drafts"),
            ("!book", "Toggle book mode for long-form writing"),
            ("!write ", "Write extended content"),
            ("!idea ", "Generate ideas with teacher model"),
        ],
        "description": "✍️ CONTENT CREATION - Generate drafts",
    },
    "browse": {
        "keywords": ["browse", "web", "internet", "website", "search", "google", "online", "url"],
        "commands": [
            ("!browse ", "Open browser and navigate"),
            ("!web ", "Web search via DuckDuckGo"),
            ("!click ", "Click element on page"),
            ("!type ", "Type into focused element"),
            ("!read", "Read current page content"),
            ("!close", "Close browser"),
        ],
        "description": "🌐 BROWSER & WEB - Browse the internet",
    },
    "code": {
        "keywords": ["code", "program", "script", "python", "shell", "execute", "run", "command"],
        "commands": [
            ("!shell ", "Execute any shell command"),
            ("!python ", "Execute Python code"),
            ("!read ", "Read file contents"),
            ("!write  ", "Write to file"),
            ("!ls [path]", "List directory contents"),
        ],
        "description": "💻 CODE & SHELL - Execute commands",
    },
    "claude": {
        "keywords": ["claude", "opus", "anthropic", "ai", "assistant", "chat", "mentor"],
        "commands": [
            ("!mentor on", "Enable Claude consultation mode"),
            ("!mentor ask ", "Ask Claude directly"),
            ("!mentor session", "Open Claude.ai in browser"),
            ("!claude ", "Direct Claude Opus 4.5 prompt"),
            ("!idea ", "Claude-powered idea generation"),
        ],
        "description": "🤖 CLAUDE INTEGRATION - Talk to Claude",
    },
    "image": {
        "keywords": ["image", "picture", "photo", "generate", "art", "visual", "dalle", "sdxl"],
        "commands": [
            ("!imagine ", "Generate image with SDXL"),
            ("!dalle ", "Generate with DALL-E 3"),
            ("!image load", "Load SDXL model"),
            ("!image view", "View last generated image"),
        ],
        "description": "🖼️ IMAGE GENERATION - Create visuals",
    },
    "email": {
        "keywords": ["email", "gmail", "mail", "send", "inbox", "message"],
        "commands": [
            ("!gmail search ", "Search emails"),
            ("!gmail read ", "Read specific email"),
            ("!gmail send   ", "Send email"),
        ],
        "description": "📧 EMAIL - Manage Gmail",
    },
    "audio": {
        "keywords": ["audio", "voice", "speak", "tts", "sound", "talk", "say"],
        "commands": [
            ("!audio", "Toggle text-to-speech"),
            ("!say ", "Speak text immediately"),
            ("!audio voices", "List available voices"),
            ("!audio voice ", "Set voice by index"),
        ],
        "description": "🔊 AUDIO & VOICE - Text-to-speech",
    },
    "status": {
        "keywords": ["status", "info", "state", "current", "show", "display"],
        "commands": [
            ("status", "Current engine state"),
            ("!skills", "Quality per domain"),
            ("!buffer", "Experience buffer stats"),
            ("!meta", "Meta-learning stats"),
            ("!revenue", "Revenue dashboard"),
            ("history", "Quality history"),
        ],
        "description": "📋 STATUS & INFO - Check state",
    },
}


def smart_help(query: str, generate_fn: Callable = None) -> str:
    """
    Smart help system - recommends commands based on user query.
    
    Examples:
        help content → shows content generation commands
        help learn → shows training and improvement commands
        help write blog → shows content creation commands
    """
    query_lower = query.lower().strip()
    query_words = set(query_lower.split())
    
    # Score each category by keyword matches
    scores = {}
    for category, data in COMMAND_CATEGORIES.items():
        score = 0
        # Check category name
        if category in query_lower:
            score += 5
        # Check keywords
        for keyword in data["keywords"]:
            if keyword in query_lower:
                score += 3
            if keyword in query_words:
                score += 2
        scores[category] = score
    
    # Get top matching categories
    sorted_categories = sorted(scores.items(), key=lambda x: -x[1])
    top_categories = [(cat, score) for cat, score in sorted_categories if score > 0][:3]
    
    # If no matches, try using the model to understand intent
    if not top_categories and generate_fn:
        # Ask the model what the user might want
        intent_prompt = f"""User is looking for help with: "{query}"
        
Available command categories:
- money/revenue (freelance, content, trading)
- learn/improve (training, mentor mode)
- write/content (blogs, scripts, books)
- browse/web (browser automation)
- code/shell (execute commands)
- claude (AI assistance)
- image (generation)
- email (gmail)
- audio (text-to-speech)
- status (system info)

Which 1-2 categories best match? Reply with just the category names, comma-separated."""
        
        try:
            out, _, _ = generate_fn(intent_prompt)
            # Parse response for category names
            for category in COMMAND_CATEGORIES:
                if category in out.lower():
                    top_categories.append((category, 1))
        except:
            pass
    
    # Build response
    lines = [
        "═" * 60,
        f"  🔍 SMART HELP: \"{query}\"",
        "═" * 60,
    ]
    
    if not top_categories:
        lines.append("\n  No exact matches found. Here are some suggestions:\n")
        lines.append("  Try: help money, help learn, help write, help browse, help code")
        lines.append("\n  Or just type 'help' for the full command list.")
    else:
        for category, score in top_categories:
            data = COMMAND_CATEGORIES[category]
            lines.append(f"\n  {data['description']}")
            lines.append("  " + "─" * 50)
            for cmd, desc in data["commands"]:
                lines.append(f"    {cmd:30s} {desc}")
    
    lines.append("\n" + "═" * 60)
    lines.append("  💡 Tip: Type 'help' for full menu, or 'help ' for specific help")
    lines.append("═" * 60)
    
    return '\n'.join(lines)


# ==============================================================================
# MAIN LOOP
# ==============================================================================
def main():
    print("=" * 79)
    print("  ARC ENGINE v2.9 - Adaptive Recursive Cognition")
    print("  Multi-Loop Self-Stabilizing Language Model with Predictive Control")
    print("=" * 79)
    print(f"    DENSE Mode:      ON (CONDENSATOR checkpoint)")
    print(f"    CF-HoT Control:  ON")
    print(f"    CF-HoT 80×:     {'ON' if Config.use_cfhot_80x else 'OFF'}")
    print(f"    Agentic Tools:   ON (sandboxed, requires confirmation)")
    print(f"    Mentor Mode:     {'ON' if _mentor.config.enabled else 'OFF'}")
    print(f"    Auto-Train:      {'ON' if _rsi_config.auto_train_enabled else 'OFF'}")
    print(f"    LHT Reasoning:   {'ON' if LHT_OK else 'OFF'}")
    print(f"    Vector Memory:   {'ON' if VECTOR_OK else 'OFF'}")
    print(f"    Browser:         {'ON' if BROWSER_OK else 'OFF'}")
    print(f"    Claude API:      {'ON' if CLAUDE_API_OK else 'OFF'}")
    print(f"    Stream Window:   {'ON' if TK_OK else 'OFF'}")
    print(f"    Image Gen:       {'ON' if DIFFUSERS_OK or OPENAI_OK else 'OFF'}")
    print(f"    TTS Audio:       {'ON' if VOICE_OK or GTTS_OK else 'OFF'}")
    print(f"    Experience Buffer: {len(_rsi_buffer)} examples")
    print("=" * 79)
    print("    Commands: !mentor, !content, !improve, !condensator, !cfhot")
    print("    Smart help: Type 'help ' (e.g. 'help learn', 'help write')")
    print("=" * 79)
    print("    Type 'help' for commands, '!improve' to start self-improvement")
    print("=" * 79 + "\n")

    Store.load()
    tok, model = load_llm()
    
    # Load 80× head if enabled (optional - won't crash if missing)
    if Config.use_cfhot_80x:
        try:
            get_cfhot_head().load()
        except Exception as e:
            print(f"[cf-hot 80×] Could not load: {e}")
            Config.use_cfhot_80x = False
    
    # Initialize Claude client (optional)
    if CLAUDE_API_OK:
        try:
            client = init_claude_client()
            if client is None and not CLAUDE_CONFIG["api_key"]:
                print("\n" + "─" * 60)
                print("💡 TIP: Set up Anthropic API to enable Mentor Mode")
                print("   Run '!setup' to configure, or set ANTHROPIC_API_KEY")
                print("─" * 60)
        except Exception as e:
            print(f"[claude] Could not init: {e}")
    
    improver = StableSelfImprover()
    last_plan = ""

    while True:
        try:
            u = input("\n> ").strip()
        except (EOFError, KeyboardInterrupt):
            break

        if not u:
            continue
        
        # ══════════════════════════════════════════════════════════════════════
        # SMART HELP SYSTEM v2.8
        # ══════════════════════════════════════════════════════════════════════
        if u == "help":
            print(HELP)
            continue
        
        if u.startswith("help "):
            query = u[5:].strip().lower()
            print(smart_help(query, generate))
            continue
        
        if u == "quit":
            break
        
        # ══════════════════════════════════════════════════════════════════════
        # SETUP COMMAND
        # ══════════════════════════════════════════════════════════════════════
        if u == "!setup":
            print("\n🔧 ARC ENGINE SETUP")
            print("=" * 60)
            print("\n1. Anthropic API (for Mentor Mode)")
            print("2. All settings")
            print("3. Cancel")
            
            choice = input("\nWhat to configure? (1/2/3): ").strip()
            
            if choice == "1":
                setup_anthropic_key()
            elif choice == "2":
                setup_anthropic_key()
                # Add more setup options here as needed
            else:
                print("Cancelled")
            continue
        
        if u == "!setup api" or u == "!setup key":
            setup_anthropic_key()
            continue

        # ══════════════════════════════════════════════════════════════════════
        # NEW v2.1 COMMANDS
        # ══════════════════════════════════════════════════════════════════════
        
        # --- CF-HoT 80× Toggle ---
        if u in ("!cfhot", "!80x", "!cfhot toggle"):
            print(toggle_cfhot_80x())
            continue
        
        if u == "!cfhot status":
            head = get_cfhot_head()
            print(f"[cf-hot 80×] Loaded: {head.loaded}")
            print(f"[cf-hot 80×] Enabled: {Config.use_cfhot_80x}")
            continue
        
        # --- RSI-15 Stress Test ---
        if u == "!rsi15":
            run_rsi_15(improver)
            continue
        
        # --- Book Mode ---
        if u == "!book":
            result = Config.toggle("book_mode")
            print(result)
            if Config.book_mode:
                print(f"[book] Max tokens: {Config.book_max_tokens}")
                print("[book] Use: !write  to start")
            continue
        
        if u.startswith("!write "):
            topic = u[7:].strip()
            if topic:
                try:
                    chapters = int(input("Chapters (default 10): ").strip() or "10")
                    words = int(input("Words/chapter (default 3000): ").strip() or "3000")
                except:
                    chapters, words = 10, 3000
                writer = get_book_writer(generate)
                writer.write_book(topic, chapters, words)
            else:
                print("[book] Usage: !write ")
            continue
        
        # --- Idea Mode ---
        if u == "!idea":
            result = Config.toggle("idea_mode")
            print(result)
            if Config.idea_mode:
                print(f"[idea] Depth: {Config.idea_depth}")
                print(f"[idea] Use Opus: {Config.idea_use_opus}")
                print("[idea] Use: !idea  to generate")
            continue
        
        if u.startswith("!idea "):
            request = u[6:].strip()
            if request:
                depth = Config.idea_depth
                # Parse depth flag
                if " --quick" in request:
                    depth = "quick"
                    request = request.replace(" --quick", "")
                elif " --deep" in request:
                    depth = "deep"
                    request = request.replace(" --deep", "")
                
                gen = get_idea_generator()
                gen.generate(request, depth=depth, use_opus=Config.idea_use_opus)
            else:
                print("[idea] Usage: !idea  [--quick|--deep]")
            continue
        
        if u.startswith("!expand "):
            idea_name = u[8:].strip()
            gen = get_idea_generator()
            result = gen.expand(idea_name)
            print(result)
            continue
        
        # --- Direct Claude Prompting ---
        if u.startswith("!claude "):
            prompt = u[8:].strip()
            if prompt:
                use_opus = "--opus" in prompt
                prompt = prompt.replace("--opus", "").strip()
                print(f"\n[claude] {'Opus 4.5' if use_opus else 'Sonnet'}:\n")
                result = claude_generate(prompt, use_opus=use_opus, stream=True)
            else:
                print("[claude] Usage: !claude  [--opus]")
            continue
        
        if u == "!claude":
            print(f"[claude] API: {'OK' if CLAUDE_API_OK else 'Not installed'}")
            print(f"[claude] Key: {'Set' if CLAUDE_CONFIG.get('api_key') else 'Missing'}")
            print(f"[claude] Model: {CLAUDE_CONFIG['model']}")
            continue
        
        # --- Plotting ---
        if u == "!plot":
            result = plot_quality_history()
            print(result)
            continue
        
        # --- Export/Import ---
        if u == "!export" or u.startswith("!export "):
            name = u[8:].strip() if len(u) > 8 else None
            result = export_checkpoint(name)
            print(result)
            continue
        
        if u.startswith("!import "):
            path = u[8:].strip()
            result = import_checkpoint(path)
            print(result)
            continue
        
        # --- Benchmark ---
        if u == "!benchmark":
            run_benchmark(generate)
            continue
        
        # --- Learn ---
        if u == "!learn":
            result = learn_from_conversation()
            print(result)
            continue
        
        # ══════════════════════════════════════════════════════════════════════
        # RSI v2.3 COMMANDS - CONTINUOUS LEARNING SYSTEM
        # ══════════════════════════════════════════════════════════════════════
        
        # --- Auto Training Toggle ---
        if u == "!auto_train on":
            _rsi_config.auto_train_enabled = True
            print("✓ Auto-training ENABLED")
            print("  Will trigger after 20 examples, quality drop, or 30 min")
            continue
        
        if u == "!auto_train off":
            _rsi_config.auto_train_enabled = False
            print("✓ Auto-training DISABLED")
            continue
        
        if u in ("!auto_train", "!auto_train status"):
            stats = _rsi_auto_train.stats()
            print(f"Auto-Training Status:")
            print(f"  Enabled: {stats['enabled']}")
            print(f"  Examples since train: {stats['examples_since_train']}")
            print(f"  Would trigger: {stats['would_trigger']} ({stats['trigger_reason']})")
            print(f"  Quality trend: {stats['quality_trend']:.3f}")
            continue
        
        # --- Skill Tracking ---
        if u == "!skills":
            print("Skill Domains:")
            scores = _rsi_skills.get_all_scores()
            for domain, score in sorted(scores.items(), key=lambda x: -x[1]):
                bar = '█' * int(score * 10) + '░' * (10 - int(score * 10))
                count = len(_rsi_buffer.get_by_domain(domain))
                print(f"  {domain:12s} [{bar}] {score:.3f} ({count} examples)")
            weakest, w_score = _rsi_skills.get_weakest()
            strongest, s_score = _rsi_skills.get_strongest()
            print(f"\n  Strongest: {strongest} ({s_score:.3f})")
            print(f"  Weakest: {weakest} ({w_score:.3f})")
            continue
        
        # --- Curiosity / Uncertainty ---
        if u == "!curiosity":
            topics = _rsi_curiosity.get_curious_topics(10)
            if not topics:
                print("No uncertainty data yet.")
            else:
                print("Curious Topics (high uncertainty):")
                for topic, uncertainty in topics:
                    bar = '█' * int(uncertainty * 10) + '░' * (10 - int(uncertainty * 10))
                    print(f"  {topic:15s} [{bar}] {uncertainty:.3f}")
            continue
        
        # --- Forgetting Detection ---
        if u == "!forgetting":
            print("Running forgetting benchmark...")
            prompts = RSIForgettingDetector.BENCHMARK_PROMPTS[:10]
            scores = {}
            for prompt in prompts:
                out, _, eval_result = generate(prompt)
                scores[prompt] = eval_result.overall_score
                print(f"  {prompt[:40]:40s} → {eval_result.overall_score:.2f}")
            
            if _rsi_forgetting.baseline_scores:
                alerts = _rsi_forgetting.check(scores)
                if alerts:
                    print("\n⚠ ALERTS:")
                    for a in alerts:
                        print(f"  {a}")
                else:
                    print("\n✓ No forgetting detected")
            else:
                _rsi_forgetting.set_baseline(scores)
                print("\n✓ Baseline set")
            continue
        
        # --- Dream Cycle ---
        if u == "!dream":
            print(f"💤 Dream cycle: replaying experiences...")
            batch = _rsi_dreams.get_dream_batch()
            if not batch:
                print("  No experiences to replay")
            else:
                for exp in batch[:10]:
                    out, _, eval_result = generate(exp.prompt)
                    delta = eval_result.overall_score - exp.quality
                    symbol = "↑" if delta > 0 else "↓" if delta < 0 else "="
                    print(f"  {exp.domain:10s} | {exp.quality:.2f} → {eval_result.overall_score:.2f} {symbol}")
            continue
        
        # --- Self-Play (Adversarial) ---
        if u == "!self_play":
            challenge = _rsi_self_play.generate_challenge()
            print(f"🎯 Challenge:\n{challenge}\n\nRespond to test yourself!")
            continue
        
        # --- Meta-Learning Stats ---
        if u == "!meta":
            lr_stats = _rsi_adaptive_lr.stats()
            print("Meta-Learning Stats:")
            print(f"  Current LR: {lr_stats['lr']:.2e}")
            print(f"  Updates: {lr_stats['updates']}")
            print(f"  Improvement rate: {lr_stats['improvement_rate']:.1%}")
            continue
        
        # --- Goal Tracking ---
        if u == "!goals":
            print(_rsi_goals.summary())
            continue
        
        if u.startswith("!goals add "):
            parts = u[11:].strip().split()
            if len(parts) >= 2:
                metric = parts[0]
                try:
                    target = float(parts[1])
                    goal = _rsi_goals.add(metric, target)
                    print(f"✓ Goal added: {metric} → {target}")
                except:
                    print("Usage: !goals add  ")
            else:
                print("Usage: !goals add  ")
            continue
        
        # --- Explanation Mode ---
        if u == "!explain":
            status = "ON" if _rsi_config.explanation_enabled else "OFF"
            print(f"Explanation mode: {status}")
            continue
        
        if u == "!explain on":
            _rsi_config.explanation_enabled = True
            print("✓ Explanation mode ON")
            continue
        
        if u == "!explain off":
            _rsi_config.explanation_enabled = False
            print("✓ Explanation mode OFF")
            continue
        
        # --- Human Feedback ---
        if u == "!feedback":
            stats = _rsi_feedback.stats()
            print(f"Feedback Stats: +{stats['positive']} / -{stats['negative']}")
            continue
        
        if u == "!feedback +" or u == "!feedback +1":
            if _rsi_feedback.record_feedback(1):
                print("👍 Positive feedback recorded")
            else:
                print("No recent response to rate")
            continue
        
        if u == "!feedback -" or u == "!feedback -1":
            if _rsi_feedback.record_feedback(-1):
                print("👎 Negative feedback recorded")
            else:
                print("No recent response to rate")
            continue
        
        # --- Experience Buffer ---
        if u == "!buffer":
            stats = _rsi_buffer.stats()
            print(f"Experience Buffer:")
            print(f"  Size: {stats['size']}")
            if stats['size'] > 0:
                print(f"  Avg quality: {stats['avg_quality']:.3f}")
                print(f"  With feedback: {stats['with_feedback']}")
                print(f"  Domains: {stats['domains']}")
            continue
        
        # ══════════════════════════════════════════════════════════════════════
        # MENTOR MODE v2.8 COMMANDS
        # ══════════════════════════════════════════════════════════════════════
        
        if u == "!mentor" or u == "!mentor status":
            mentor = get_mentor()
            stats = mentor.stats()
            print(f"🎓 Mentor Mode:")
            print(f"  Enabled: {stats['enabled']}")
            print(f"  Auto-consult threshold: {stats['auto_threshold']:.2f}")
            print(f"  Consultations today: {stats['consultations_today']}")
            print(f"  Total conversations: {stats['total_conversations']}")
            print(f"  Learnings collected: {stats['total_learnings']}")
            print(f"  Browser session: {'Active' if stats['browser_active'] else 'Inactive'}")
            continue
        
        if u == "!mentor on":
            _mentor.config.enabled = True
            print("✓ Mentor Mode ENABLED")
            print("  Will auto-consult Claude when quality < 0.6 or uncertainty > 0.4")
            continue
        
        if u == "!mentor off":
            _mentor.config.enabled = False
            print("✓ Mentor Mode DISABLED")
            continue
        
        if u == "!mentor session":
            result = _mentor.open_session()
            print(result)
            continue
        
        if u.startswith("!mentor live "):
            # Visual browser demo - types into DuckDuckGo AI Chat (no login needed!)
            question = u[13:].strip()
            if question and BROWSER_OK:
                print(f"\n🎬 LIVE MENTOR MODE - Visible Browser Demo")
                print("=" * 60)
                
                # First generate local response
                print(f"\n[1/4] Local model responding to: {question}")
                local_out, _, local_eval = generate(question)
                print(f"\n┌─ LOCAL MODEL (Quality: {local_eval.overall_score:.2f}) ─────────────────")
                print(f"│ {local_out}")
                print(f"└{'─' * 55}")
                
                # Open browser and navigate to DuckDuckGo AI
                print(f"\n[2/4] Opening DuckDuckGo AI Chat in browser...")
                try:
                    from playwright.sync_api import sync_playwright
                    
                    with sync_playwright() as p:
                        # Use persistent context to look more like a real user
                        user_data_dir = os.path.join(ROOT, ".browser_profile")
                        os.makedirs(user_data_dir, exist_ok=True)
                        
                        # Launch with human-like settings
                        browser = p.firefox.launch_persistent_context(
                            user_data_dir,
                            headless=False,
                            viewport={"width": 1280, "height": 900},
                            locale="en-US",
                            timezone_id="America/New_York",
                            user_agent="Mozilla/5.0 (X11; Linux x86_64; rv:121.0) Gecko/20100101 Firefox/121.0"
                        )
                        page = browser.new_page()
                        
                        # Go to DuckDuckGo AI Chat directly
                        page.goto("https://duckduckgo.com/?q=hi&ia=chat")
                        print("[browser] Navigated to DuckDuckGo AI Chat")
                        
                        # Wait for page to load
                        time.sleep(3)
                        
                        # Try to select Claude as the model if option exists
                        try:
                            # Look for model selector
                            model_btn = page.query_selector('[data-testid="model-selector"], .model-selector, button:has-text("Claude")')
                            if model_btn:
                                model_btn.click()
                                time.sleep(1)
                                claude_opt = page.query_selector('text=Claude')
                                if claude_opt:
                                    claude_opt.click()
                                    print("[browser] Selected Claude model")
                                    time.sleep(1)
                        except:
                            pass  # Model selection not available or already Claude
                        
                        # Try to find the input area and type slowly (visible to viewers)
                        print(f"\n[3/4] Typing question (watch the browser!)...")
                        
                        # DuckDuckGo AI selectors
                        selectors = [
                            'textarea[placeholder*="Ask"]',
                            'textarea[name="user-prompt"]',
                            'textarea',
                            '[contenteditable="true"]',
                            '.chat-input',
                            '#user-prompt'
                        ]
                        
                        typed = False
                        for sel in selectors:
                            try:
                                el = page.wait_for_selector(sel, timeout=5000)
                                if el:
                                    el.click()
                                    # Type slowly so viewers can see
                                    page.keyboard.type(question, delay=50)
                                    typed = True
                                    print(f"[browser] Typed: {question}")
                                    break
                            except:
                                continue
                        
                        if typed:
                            # Press Enter to send
                            time.sleep(1)
                            page.keyboard.press("Enter")
                            print("[browser] Message sent - waiting for AI response...")
                            
                            # Wait for response to complete (watch for stop generating or new content)
                            print("[browser] Watching for response...")
                            time.sleep(15)  # Give AI time to respond
                            
                            # Try to read the response
                            print(f"\n[4/4] Reading AI response...")
                            
                            # Try to extract the response text
                            ai_response = ""
                            response_selectors = [
                                '[data-testid="assistant-message"]',
                                '.assistant-message',
                                '.response-text',
                                '.message-content',
                                'div[class*="response"]',
                                'div[class*="answer"]'
                            ]
                            
                            for sel in response_selectors:
                                try:
                                    els = page.query_selector_all(sel)
                                    if els:
                                        # Get the last response
                                        ai_response = els[-1].inner_text()
                                        break
                                except:
                                    continue
                            
                            # Screenshot for proof
                            screenshot_path = os.path.join(ROOT, "mentor_live_screenshot.png")
                            page.screenshot(path=screenshot_path)
                            print(f"[browser] Screenshot saved: {screenshot_path}")
                            
                            if ai_response:
                                print(f"\n┌─ AI RESPONSE (DuckDuckGo/Claude) ─────────────────")
                                print(f"│ {ai_response[:500]}")
                                print(f"└{'─' * 55}")
                                
                                # Learn from it
                                if _mentor.config.learn_from_responses:
                                    _mentor.learn_from_claude(question, local_out, ai_response)
                                    print(f"\n✓ Learning recorded ({len(_mentor.learnings)} total)")
                                    
                                    # Show quality comparison
                                    print(f"\n📊 QUALITY COMPARISON:")
                                    print(f"   Local model:  {local_eval.overall_score:.2f}")
                                    print(f"   Learning:     +1 DPO pair added to training buffer")
                            else:
                                print("[browser] Could not extract response text")
                                print("[browser] Check the browser window to see the response!")
                        else:
                            print("[browser] Could not find input field")
                        
                        # Keep browser open longer so viewers can see the full response
                        print("\n[browser] Keeping browser open for 10 seconds (watch the response!)...")
                        time.sleep(10)
                        browser.close()
                        
                except Exception as e:
                    print(f"[mentor live] Browser error: {e}")
                    import traceback
                    traceback.print_exc()
                
                print("\n" + "=" * 60)
                print("🎬 LIVE DEMO COMPLETE")
                print("=" * 60)
            elif not BROWSER_OK:
                print("[mentor live] Browser not available. Install playwright:")
                print("  pip install playwright && playwright install firefox")
            else:
                print("Usage: !mentor live ")
            continue
        
        # Use real Firefox browser for interactive sessions
        if u.startswith("!mentor chat "):
            question = u[13:].strip()
            if question:
                print(f"\n🦊 REAL BROWSER MODE - Using your actual Firefox")
                print("=" * 60)
                
                # First generate local response
                print(f"\n[1/3] Local model responding...")
                local_out, _, local_eval = generate(question)
                print(f"\n┌─ LOCAL MODEL (Quality: {local_eval.overall_score:.2f}) ─────────────────")
                print(f"│ {local_out}")
                print(f"└{'─' * 55}")
                
                # Open DuckDuckGo AI in the user's REAL Firefox
                print(f"\n[2/3] Opening your Firefox browser...")
                print(f"      Question will be copied to clipboard - just paste it!")
                
                # Copy question to clipboard
                try:
                    import subprocess
                    # Try xclip first, then xsel
                    try:
                        subprocess.run(['xclip', '-selection', 'clipboard'], input=question.encode(), check=True)
                        print(f"      ✓ Question copied to clipboard (Ctrl+V to paste)")
                    except:
                        try:
                            subprocess.run(['xsel', '--clipboard', '--input'], input=question.encode(), check=True)
                            print(f"      ✓ Question copied to clipboard (Ctrl+V to paste)")
                        except:
                            print(f"      ⚠ Could not copy to clipboard. Question: {question}")
                except:
                    pass
                
                # Open real Firefox
                import subprocess
                subprocess.Popen(['firefox', 'https://duckduckgo.com/?q=hi&ia=chat'], 
                               stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
                
                print(f"\n[3/3] Firefox opened! Now:")
                print(f"      1. Paste your question (Ctrl+V)")
                print(f"      2. Wait for Claude's response")
                print(f"      3. Copy Claude's response (Ctrl+C)")
                print(f"      4. Come back here and paste it when prompted")
                
                print(f"\n" + "─" * 60)
                claude_response = input("📋 Paste Claude's response here (or 'skip'): ").strip()
                
                if claude_response and claude_response.lower() != 'skip':
                    print(f"\n┌─ CLAUDE's RESPONSE ─────────────────────────────")
                    print(f"│ {claude_response[:500]}")
                    print(f"└{'─' * 55}")
                    
                    # Learn from it
                    if _mentor.config.learn_from_responses:
                        _mentor.learn_from_claude(question, local_out, claude_response)
                        print(f"\n✓ Learning recorded ({len(_mentor.learnings)} total)")
                        print(f"\n📊 RESULT:")
                        print(f"   Local model quality: {local_eval.overall_score:.2f}")
                        print(f"   +1 DPO pair added to training buffer")
                else:
                    print("[skipped]")
                
                print("\n" + "=" * 60)
            else:
                print("Usage: !mentor chat ")
            continue
        
        # Multi-round AI conversation: !mentor convo  
        if u.startswith("!mentor convo "):
            parts = u[14:].strip().split(" ", 1)
            if len(parts) >= 2:
                try:
                    rounds = int(parts[0])
                    topic = parts[1]
                except ValueError:
                    rounds = 3
                    topic = u[14:].strip()
                
                print(f"\n🗣️ AI CONVERSATION - {rounds} rounds on: {topic}")
                print("=" * 70)
                
                conversation_history = []
                current_prompt = topic
                
                for round_num in range(1, rounds + 1):
                    print(f"\n{'─' * 70}")
                    print(f"  ROUND {round_num}/{rounds}")
                    print(f"{'─' * 70}")
                    
                    # Local model responds
                    print(f"\n🤖 LOCAL MODEL:")
                    local_out, _, local_eval = generate(current_prompt)
                    print(f"   {local_out}")
                    print(f"   [Quality: {local_eval.overall_score:.2f}]")
                    
                    conversation_history.append({
                        "role": "local",
                        "content": local_out,
                        "quality": local_eval.overall_score
                    })
                    
                    # Claude responds to local model's response
                    print(f"\n🧠 CLAUDE (via API):")
                    claude_prompt = f"The user asked: {topic}\n\nAnother AI responded: {local_out}\n\nProvide your perspective or build on this response:"
                    claude_response = _mentor.consult_via_api(claude_prompt, local_out)
                    
                    if claude_response:
                        print(f"   {claude_response}")
                        
                        conversation_history.append({
                            "role": "claude", 
                            "content": claude_response
                        })
                        
                        # Learn from Claude's response
                        if _mentor.config.learn_from_responses:
                            _mentor.learn_from_claude(current_prompt, local_out, claude_response)
                        
                        # Next round: local model responds to Claude
                        current_prompt = f"Continue this discussion. Claude said: {claude_response}\n\nYour response:"
                    else:
                        print("   [Could not get Claude response]")
                        break
                    
                    time.sleep(1)  # Pause for readability
                
                print(f"\n{'=' * 70}")
                print(f"🎓 CONVERSATION COMPLETE - {len(_mentor.learnings)} learnings recorded")
                print(f"{'=' * 70}")
                
                # Summary
                local_avg = sum(h["quality"] for h in conversation_history if h["role"] == "local") / max(1, len([h for h in conversation_history if h["role"] == "local"]))
                print(f"\n📊 SUMMARY:")
                print(f"   Rounds: {rounds}")
                print(f"   Local model avg quality: {local_avg:.2f}")
                print(f"   DPO pairs created: {rounds}")
                
            else:
                print("Usage: !mentor convo  ")
                print("Example: !mentor convo 5 What is consciousness?")
            continue
        
        if u.startswith("!mentor ask "):
            question = u[12:].strip()
            if question:
                print(f"\n🎓 Asking Claude: {question}\n")
                
                # First generate local response
                local_out, _, local_eval = generate(question)
                print(f"[Local ({local_eval.overall_score:.2f})]: {local_out[:200]}...")
                
                # Get Claude's response
                print("\n[Consulting Claude...]")
                claude_response = _mentor.consult_via_api(question, local_out)
                
                if claude_response:
                    print(f"\n[Claude]: {claude_response}")
                    
                    # Learn from the difference
                    if _mentor.config.learn_from_responses:
                        learning = _mentor.learn_from_claude(question, local_out, claude_response)
                        print(f"\n✓ Learning recorded ({len(_mentor.learnings)} total)")
                else:
                    print("[mentor] Could not get Claude's response")
            else:
                print("Usage: !mentor ask ")
            continue
        
        if u == "!mentor learn":
            print(f"[mentor] {len(_mentor.learnings)} learnings ready for training")
            if _mentor.learnings:
                print("  Recent learnings:")
                for l in _mentor.learnings[-5:]:
                    print(f"    • {l['prompt'][:50]}...")
            continue
        
        # ══════════════════════════════════════════════════════════════════════
        # CONTENT GENERATION COMMANDS (Experimental)
        # ══════════════════════════════════════════════════════════════════════
        
        if u == "!revenue" or u == "!revenue status":
            print(_revenue.dashboard())
            continue
        
        if u.startswith("!revenue goal "):
            try:
                amount = float(u[14:].strip())
                result = _revenue.set_goal(amount)
                print(result)
            except:
                print("Usage: !revenue goal ")
            continue
        
        if u.startswith("!revenue record "):
            parts = u[16:].strip().split(maxsplit=2)
            if len(parts) >= 2:
                try:
                    amount = float(parts[0])
                    source = parts[1]
                    desc = parts[2] if len(parts) > 2 else ""
                    _revenue.record_earning(amount, source, desc)
                    print(f"✓ Recorded ${amount:.2f} from {source}")
                except:
                    print("Usage: !revenue record   [description]")
            continue
        
        # --- Freelance Commands ---
        if u == "!freelance" or u == "!freelance status":
            stats = _revenue.freelance.stats()
            print(f"📋 Freelance Status:")
            print(f"  Jobs found: {stats['jobs_found']}")
            print(f"  Applications: {stats['applications']}")
            print(f"  Earnings: ${stats['earnings']:.2f}")
            continue
        
        if u.startswith("!freelance scan"):
            platform = u[15:].strip() or "upwork"
            print(f"[freelance] Scanning {platform} for jobs...")
            jobs = _revenue.freelance.scan_jobs(platform)
            if jobs:
                print(f"\n✓ Found {len(jobs)} potential jobs:")
                for i, job in enumerate(jobs[:5]):
                    print(f"  {i+1}. {job.get('title', 'Unknown')[:60]}...")
                    print(f"     Budget: {job.get('budget', 'N/A')}")
            else:
                print("[freelance] No jobs found or browser error")
            continue
        
        if u.startswith("!freelance apply "):
            try:
                idx = int(u[17:].strip()) - 1
                if 0 <= idx < len(_revenue.freelance.jobs_found):
                    job = _revenue.freelance.jobs_found[idx]
                    print(f"\n[freelance] Generating proposal for: {job.get('title', 'Unknown')}")
                    proposal = _revenue.freelance.generate_proposal(job, generate)
                    print(f"\n--- PROPOSAL ---\n{proposal}\n--- END ---")
                    
                    confirm = input("\nSubmit this proposal? (yes/no): ").strip().lower()
                    if confirm in ['yes', 'y']:
                        result = _revenue.freelance.apply_to_job(job, proposal)
                        print(f"✓ Application {result['status']}")
                else:
                    print(f"[freelance] Invalid job index. Use !freelance scan first")
            except:
                print("Usage: !freelance apply ")
            continue
        
        # --- Content Commands ---
        if u == "!content" or u == "!content status":
            stats = _revenue.content.stats()
            print(f"✍️ Content Status:")
            print(f"  Generated: {stats['generated']}")
            print(f"  Published: {stats['published']}")
            print(f"  Niche: {stats['niche']}")
            continue
        
        if u.startswith("!content blog "):
            topic = u[14:].strip()
            if topic:
                print(f"[content] Generating blog post about: {topic}")
                post = _revenue.content.generate_blog_post(topic, generate)
                print(f"\n--- BLOG POST ({post['word_count']} words) ---")
                print(post['content'][:2000])
                if len(post['content']) > 2000:
                    print(f"\n... [{post['word_count'] - 400} more words] ...")
                print("--- END ---")
            else:
                print("Usage: !content blog ")
            continue
        
        if u.startswith("!content youtube "):
            topic = u[17:].strip()
            if topic:
                print(f"[content] Generating YouTube script about: {topic}")
                script = _revenue.content.generate_youtube_script(topic, generate)
                print(f"\n--- YOUTUBE SCRIPT ---")
                print(script['script'][:3000])
                print("--- END ---")
            else:
                print("Usage: !content youtube ")
            continue
        
        if u.startswith("!content social "):
            topic = u[16:].strip()
            if topic:
                print(f"[content] Generating social media posts about: {topic}")
                posts = _revenue.content.generate_social_posts(topic, generate)
                print(f"\n--- SOCIAL POSTS ---")
                print(posts['posts'])
                print("--- END ---")
            else:
                print("Usage: !content social ")
            continue
        
        # --- Trading Commands ---
        if u == "!trade" or u == "!trade status":
            stats = _revenue.trading.stats()
            print(f"📈 Trading Status:")
            print(f"  Connected: {stats['connected']}")
            print(f"  Exchange: {stats['exchange']}")
            print(f"  Positions: {stats['positions']}")
            print(f"  Trades: {stats['trades']}")
            print(f"  P&L: ${stats['pnl']:.2f}")
            continue
        
        if u.startswith("!trade analyze"):
            symbol = u[14:].strip() or "BTC/USDT"
            print(f"[trade] Analyzing {symbol}...")
            analysis = _revenue.trading.analyze_market(symbol)
            print(f"\n📊 Market Analysis: {symbol}")
            print(f"  Recommendation: {analysis['recommendation']}")
            print(f"  Confidence: {analysis['confidence']:.0%}")
            continue
        
        if u.startswith("!trade execute "):
            parts = u[15:].strip().split()
            if len(parts) >= 3:
                symbol = parts[0]
                side = parts[1].upper()
                amount = float(parts[2])
                result = _revenue.trading.execute_trade(symbol, side, amount)
                if "error" in result:
                    print(f"[trade] Error: {result['error']}")
                else:
                    print(f"[trade] {result['status']}")
            else:
                print("Usage: !trade execute   ")
            continue
        
        # --- Affiliate Commands ---
        if u == "!affiliate" or u == "!affiliate status":
            print(f"🔗 Affiliate Status:")
            print(f"  Reviews: {len(_revenue.affiliate.reviews)}")
            continue
        
        if u.startswith("!affiliate review "):
            product = u[18:].strip()
            if product:
                print(f"[affiliate] Generating review for: {product}")
                review = _revenue.affiliate.generate_review(product, generate)
                print(f"\n--- PRODUCT REVIEW ---")
                print(review['review'][:2500])
                print("--- END ---")
            else:
                print("Usage: !affiliate review ")
            continue
        
        if u == "!affiliate find":
            opportunities = _revenue.affiliate.find_opportunities()
            print("🔗 Affiliate Opportunities:")
            for opp in opportunities:
                print(f"  • {opp}")
            continue
        
        # --- Task Automation ---
        if u == "!automate" or u == "!automate status":
            stats = _revenue.tasks.stats()
            print(f"⚡ Task Automation:")
            print(f"  Completed: {stats['completed']}")
            print(f"  Earnings: ${stats['earnings']:.2f}")
            print(f"  Platforms: {', '.join(stats['platforms'])}")
            continue
        
        # --- API Server ---
        if u == "!api":
            if not Config.api_enabled:
                Config.api_enabled = True
                start_api_server(Config.api_port, generate)
            else:
                print(f"[api] Already running on port {Config.api_port}")
            continue

        # ══════════════════════════════════════════════════════════════════════
        # MULTIMEDIA COMMANDS
        # ══════════════════════════════════════════════════════════════════════
        
        # --- Streaming Window ---
        if u == "!stream" or u == "!stream on":
            win = get_stream_window()
            if win.start():
                print("[stream] ✓ Window opened - tokens will appear live")
            continue
        
        if u == "!stream off":
            win = get_stream_window()
            if win.window:
                win._on_close()
                print("[stream] ✓ Window closed")
            continue
        
        # --- Audio/TTS ---
        if u == "!audio" or u == "!tts":
            result = get_audio_system().toggle()
            print(result)
            continue
        
        if u == "!audio voices" or u == "!tts voices":
            get_audio_system().list_voices()
            continue
        
        if u.startswith("!audio voice ") or u.startswith("!tts voice "):
            try:
                idx = int(u.split()[-1])
                get_audio_system().set_voice(idx)
                print(f"[audio] Voice set to {idx}")
            except:
                print("[audio] Usage: !audio voice ")
            continue
        
        if u.startswith("!audio rate "):
            try:
                rate = int(u.split()[-1])
                get_audio_system().set_rate(rate)
                print(f"[audio] Rate: {rate}")
            except:
                print("[audio] Usage: !audio rate ")
            continue
        
        if u.startswith("!say "):
            text = u[5:].strip()
            if text:
                audio = get_audio_system()
                audio.enabled = True
                audio.speak(text, block=True)
                audio.enabled = False
            continue
        
        # --- Image Generation ---
        if u == "!image" or u == "!image status":
            img = get_image_system()
            print(f"[image] SDXL: {'Loaded' if img.sdxl_pipe else 'Not loaded'}")
            print(f"[image] Diffusers: {DIFFUSERS_OK}")
            print(f"[image] DALL-E: {OPENAI_OK and bool(os.environ.get('OPENAI_API_KEY'))}")
            continue
        
        if u == "!image load" or u == "!sdxl load":
            get_image_system().load_sdxl()
            continue
        
        if u.startswith("!imagine ") or u.startswith("!image gen "):
            prompt = u.split(" ", 2)[-1].strip() if u.startswith("!image gen ") else u[9:].strip()
            if prompt:
                img = get_image_system().generate(prompt)
                if img:
                    get_image_system().view()
            else:
                print("[image] Usage: !imagine ")
            continue
        
        if u.startswith("!dalle ") or u.startswith("!image dalle "):
            prompt = u.split(" ", 2)[-1].strip() if u.startswith("!image dalle ") else u[7:].strip()
            if prompt:
                img = get_image_system().generate_dalle(prompt)
                if img:
                    get_image_system().view()
            continue
        
        if u.startswith("!image view "):
            path = u[12:].strip()
            if os.path.exists(path):
                get_image_system().view(path)
            else:
                print(f"[image] Not found: {path}")
            continue
        
        if u == "!image view":
            get_image_system().view()
            continue

        # === SELF-IMPROVEMENT COMMANDS ===
        if u == "!improve":
            result = improver.improve()
            print("\n" + "=" * 50)
            print("IMPROVEMENT RESULT:")
            print(json.dumps({k: v for k, v in result.items() if k != 'history'}, indent=2, default=str))
            continue
        
        if u == "!eval":
            result = improver.evaluate_current_model()
            print(json.dumps({k: v for k, v in result.items() if k != 'results'}, indent=2, default=str))
            continue
        
        if u.startswith("!train "):
            try:
                steps = int(u[7:])
                old_ckpt = Store.state.get('current_checkpoint', DENSE_CHECKPOINT)
                result = improver.run_training_iteration(steps)
                if result['success']:
                    # Auto-compare
                    comp = improver.compare_checkpoints(old_ckpt, result['new_checkpoint'])
                    if comp['keep_new']:
                        print(f"\n✓ Using new checkpoint ({comp['reason']})")
                    else:
                        reload_model(old_ckpt)
                        print(f"\n✗ Keeping old checkpoint ({comp['reason']})")
                else:
                    print(f"Training failed")
            except ValueError:
                print("Usage: !train ")
            continue
        
        if u == "!compare":
            current = Store.state.get('current_checkpoint', DENSE_CHECKPOINT)
            best = Store.state.get('best_checkpoint', DENSE_CHECKPOINT)
            if current != best:
                improver.compare_checkpoints(current, best)
            else:
                print("Current checkpoint IS the best checkpoint")
            continue
        
        if u == "!rollback":
            improver.rollback_to_best()
            print(f"Rolled back to: {Store.state['best_checkpoint']}")
            continue
        
        # === THE CONDENSATOR COMMANDS ===
        if u == "!condensator" or u == "!full_train":
            print("\n🧬 Starting THE CONDENSATOR - Full 4-Stage Training Pipeline")
            condensator = get_condensator()
            result = condensator.run_full_pipeline(
                model_path=MODEL_PATH,
                output_dir=os.path.join(ROOT, f"condensator_run_{datetime.now().strftime('%Y%m%d_%H%M%S')}"),
                start_checkpoint=Store.state.get('current_checkpoint')
            )
            if result['success']:
                Store.state['current_checkpoint'] = result['final_checkpoint']
                Store.state['best_checkpoint'] = result['final_checkpoint']
                Store.save()
                reload_model(result['final_checkpoint'])
                print(f"\n✓ CONDENSATOR complete! Loaded: {result['final_checkpoint']}")
            else:
                print("\n✗ CONDENSATOR failed. Check logs.")
            continue
        
        if u == "!dpo" or u.startswith("!dpo "):
            parts = u.split()
            start_ckpt = parts[1] if len(parts) > 1 else Store.state.get('current_checkpoint', DENSE_CHECKPOINT)
            print(f"\n⚖️ Running DPO stage from: {start_ckpt}")
            condensator = get_condensator()
            result = condensator.run_dpo_only(
                model_path=MODEL_PATH,
                output_dir=os.path.join(ROOT, f"dpo_run_{datetime.now().strftime('%Y%m%d_%H%M%S')}"),
                start_checkpoint=start_ckpt
            )
            if result['success']:
                Store.state['current_checkpoint'] = result['checkpoint']
                Store.save()
                reload_model(result['checkpoint'])
                print(f"\n✓ DPO complete! Loaded: {result['checkpoint']}")
            continue
        
        if u == "!rl" or u.startswith("!rl "):
            parts = u.split()
            start_ckpt = parts[1] if len(parts) > 1 else Store.state.get('current_checkpoint', DENSE_CHECKPOINT)
            print(f"\n🎯 Running RL stage from: {start_ckpt}")
            condensator = get_condensator()
            result = condensator.run_rl_only(
                model_path=MODEL_PATH,
                output_dir=os.path.join(ROOT, f"rl_run_{datetime.now().strftime('%Y%m%d_%H%M%S')}"),
                start_checkpoint=start_ckpt
            )
            if result['success']:
                Store.state['current_checkpoint'] = result['checkpoint']
                Store.save()
                reload_model(result['checkpoint'])
                print(f"\n✓ RL complete! Loaded: {result['checkpoint']}")
            continue
        
        # === CF-HoT TRAINING COMMANDS ===
        if u.startswith("!train_cfhot ") or u == "!train_cfhot":
            parts = u.split()
            head_name = parts[1] if len(parts) > 1 else "repetition"
            steps = int(parts[2]) if len(parts) > 2 else 5000
            
            if head_name not in CFHOT_TRAINING_DATA:
                print(f"Unknown head: {head_name}. Available: {list(CFHOT_TRAINING_DATA.keys())}")
                continue
            
            print(f"\n🧠 Training CF-HoT {head_name} head for {steps} steps...")
            
            trainer = CFHoTTrainer(_model, _tokenizer)
            result = trainer.train_head(
                head_name=head_name,
                positive_examples=CFHOT_TRAINING_DATA[head_name]['positive'],
                negative_examples=CFHOT_TRAINING_DATA[head_name]['negative'],
                output_dir=os.path.join(ROOT, f"cfhot_{head_name}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"),
                steps=steps
            )
            
            if result['success']:
                print(f"\n✓ CF-HoT {head_name} trained! Separation: {result['result']['separation']:.1f}×")
                print(f"  Checkpoint: {result['checkpoint']}")
            continue
        
        if u == "!gate_stats" or u == "!cfhot_health":
            if _multi_head is not None:
                health = _multi_head.get_gate_health()
                print("\n🔬 CF-HoT Gate Health Report")
                print("=" * 40)
                print(f"  Status: {health['status'].upper()}")
                print(f"  Mean gate value: {health.get('mean', 0):.3f}")
                print(f"  Std gate value: {health.get('std', 0):.3f}")
                print(f"  Saturated low (<0.1): {health.get('saturated_low', 0):.1%}")
                print(f"  Saturated high (>0.9): {health.get('saturated_high', 0):.1%}")
                print(f"  EMA momentum: {health.get('ema_momentum', 0):.4f}")
                print(f"  EMA step: {health.get('ema_step', 0)}")
                if health.get('warnings'):
                    print("\n⚠️ WARNINGS:")
                    for w in health['warnings']:
                        print(f"  - {w}")
            else:
                print("CF-HoT not initialized. Run !cfhot first.")
            continue
        
        # === ENHANCED RSI WITH CONDENSATOR ===
        if u == "!rsi_full" or u == "!rsi_condensator":
            print("\n🔄 Running RSI with full CONDENSATOR integration...")
            print("This will run: Eval → Identify weakness → Train (SFT+DPO+RL) → Eval → Keep/Rollback")
            
            # Pre-RSI evaluation
            baseline = improver.evaluate_current_model()
            print(f"\nBaseline quality: {baseline['avg_quality']:.3f}")
            
            # Run CONDENSATOR
            condensator = get_condensator()
            result = condensator.run_full_pipeline(
                model_path=MODEL_PATH,
                output_dir=os.path.join(ROOT, f"rsi_condensator_{datetime.now().strftime('%Y%m%d_%H%M%S')}"),
                start_checkpoint=Store.state.get('current_checkpoint')
            )
            
            if result['success']:
                # Post-training evaluation
                reload_model(result['final_checkpoint'])
                post_eval = improver.evaluate_current_model()
                
                quality_diff = post_eval['avg_quality'] - baseline['avg_quality']
                
                if quality_diff > 0.02:
                    Store.state['current_checkpoint'] = result['final_checkpoint']
                    Store.state['best_checkpoint'] = result['final_checkpoint']
                    Store.save()
                    print(f"\n✓ Quality improved by {quality_diff:.3f}! Keeping new checkpoint.")
                elif quality_diff < -0.05:
                    # Rollback
                    reload_model(Store.state.get('best_checkpoint', DENSE_CHECKPOINT))
                    print(f"\n✗ Quality dropped by {-quality_diff:.3f}. Rolling back.")
                else:
                    Store.state['current_checkpoint'] = result['final_checkpoint']
                    Store.save()
                    print(f"\n➡️ Quality change: {quality_diff:.3f}. Keeping (marginal change).")
            else:
                print("\n✗ RSI CONDENSATOR failed.")
            continue
        
        if u.startswith("!load "):
            checkpoint = u[6:].strip()
            try:
                reload_model(checkpoint)
                print(f"Loaded: {checkpoint}")
            except Exception as e:
                print(f"Error: {e}")
            continue
        
        # === AGENTIC COMMANDS ===
        if u.startswith("!shell "):
            result = AgentTools.shell(u[7:])
            print(f"```\n{result['output']}\n```\nExit: {result['returncode']}")
            continue
        
        if u.startswith("!python "):
            result = AgentTools.python_exec(u[8:])
            print(f"```\n{result['output']}\n```")
            continue
        
        if u.startswith("!read "):
            result = AgentTools.read_file(u[6:].strip())
            if result['success']:
                print(f"```\n{result['content'][:5000]}\n```")
            else:
                print(f"Error: {result['error']}")
            continue
        
        if u.startswith("!write "):
            parts = u[7:].split(" ", 1)
            if len(parts) == 2:
                result = AgentTools.write_file(parts[0], parts[1])
                print(f"Written to {result.get('path', 'unknown')}" if result['success'] else f"Error: {result['error']}")
            else:
                print("Usage: !write  ")
            continue
        
        if u.startswith("!ls"):
            path = u[3:].strip() or "."
            result = AgentTools.list_dir(path)
            if result['success']:
                print("\n".join(result['items']))
            else:
                print(f"Error: {result['error']}")
            continue
        
        if u.startswith("!search "):
            result = AgentTools.search_files(u[8:])
            print(result['output'] if result['success'] else "No results")
            continue
        
        if u.startswith("!web "):
            result = AgentTools.web_search(u[5:])
            if result['success']:
                print("\n\n".join(result['results']))
            else:
                print(f"Error: {result['error']}")
            continue

        # === BROWSER COMMANDS ===
        if u.startswith("!browse "):
            url = u[8:].strip()
            if not url.startswith("http"):
                url = "https://" + url
            print(tool_browser_open(url))
            continue
        
        if u == "!browse":
            print(tool_browser_open())
            continue
        
        # === WEB SEARCH ===
        if u.startswith("!search "):
            query = u[8:].strip()
            if query:
                print(f"\n🔍 Searching: {query}\n")
                browser = get_browser()
                results = browser.search(query)
                if results:
                    for i, r in enumerate(results[:10], 1):
                        print(f"  {i}. {r['title'][:60]}")
                        print(f"     {r['url'][:70]}")
                        if r.get('snippet'):
                            print(f"     {r['snippet'][:80]}...")
                        print()
                else:
                    print("  No results found")
            continue
        
        if u.startswith("!research "):
            topic = u[10:].strip()
            if topic:
                print(f"\n📚 Researching: {topic}\n")
                summary = web_search(topic, depth=5)
                print(summary)
            continue
        
        if u.startswith("!fetch "):
            url = u[7:].strip()
            if url:
                print(f"\n📄 Fetching: {url}\n")
                content = web_fetch(url)
                if content:
                    print(content[:3000])
                    if len(content) > 3000:
                        print(f"\n... ({len(content)} chars total)")
                else:
                    print("  Failed to fetch")
            continue
        
        if u.startswith("!click "):
            selector = u[7:].strip()
            print(tool_browser_click(selector))
            continue
        
        if u.startswith("!type "):
            text = u[6:].strip()
            print(tool_browser_type(text))
            continue
        
        if u.startswith("!fill "):
            # !fill selector text
            parts = u[6:].strip().split(" ", 1)
            if len(parts) == 2:
                print(tool_browser_type(parts[1], parts[0]))
            else:
                print("[browser] Usage: !fill  ")
            continue
        
        if u.startswith("!press "):
            key = u[7:].strip()
            print(tool_browser_press(key))
            continue
        
        if u == "!read":
            print(tool_browser_read())
            continue
        
        if u == "!url":
            print(tool_browser_url())
            continue
        
        if u == "!back":
            print(tool_browser_back())
            continue
        
        if u == "!close":
            print(tool_browser_close())
            continue

        # === TASK CHAIN COMMANDS ===
        if u.startswith("chain:"):
            task_str = u[6:].strip()
            task_type, params, desc = parse_chain_task(task_str)
            TASK_CHAIN.add(task_type, params, desc)
            print(f"[chain] Added: {desc}")
            print(f"[chain] Total tasks: {len(TASK_CHAIN.tasks)}")
            continue
        
        if u == "chain" or u == "chain show":
            print(TASK_CHAIN.show())
            continue
        
        if u == "chain run":
            result = run_task_chain(generate)
            print(result)
            continue
        
        if u == "chain clear":
            TASK_CHAIN.clear()
            print("[chain] Cleared all tasks")
            continue
        
        if u.startswith("chain workflow "):
            workflow_name = u[15:].strip()
            if workflow_name in WORKFLOW_TEMPLATES:
                TASK_CHAIN.clear()
                for task_str in WORKFLOW_TEMPLATES[workflow_name]:
                    task_type, params, desc = parse_chain_task(task_str)
                    TASK_CHAIN.add(task_type, params, desc)
                print(f"[chain] Loaded workflow: {workflow_name}")
                print(TASK_CHAIN.show())
            else:
                print(f"[chain] Unknown workflow. Available: {', '.join(WORKFLOW_TEMPLATES.keys())}")
            continue
        
        if u == "chain workflows":
            print("\n📋 AVAILABLE WORKFLOWS:")
            for name, tasks in WORKFLOW_TEMPLATES.items():
                print(f"\n  {name}:")
                for t in tasks:
                    print(f"    - {t}")
            continue

        # === LOGIN CONFIG COMMANDS ===
        if u == "!login config" or u == "!config login":
            print("\n" + "="*60)
            print("🔐 LOGIN CONFIGURATION")
            print("="*60)
            print(f"\n  use_persistent_profile: {LOGIN_CONFIG['use_persistent_profile']}")
            print(f"  firefox_profile_path: {LOGIN_CONFIG['firefox_profile_path'] or '(not set)'}")
            print(f"  use_gmail_api: {LOGIN_CONFIG['use_gmail_api']}")
            print(f"  gmail_api_ready: {GMAIL_API_OK and _gmail_service is not None}")
            print(f"\nCommands:")
            print("  !login set profile   - Set Firefox profile path")
            print("  !login use profile         - Enable persistent profile")
            print("  !login use fresh           - Use fresh browser (default)")
            print("  !login use api             - Enable Gmail API mode")
            print("  !login auth api            - Authenticate Gmail API")
            print("  !login save                - Save config")
            continue
        
        if u.startswith("!login set profile "):
            path = u[19:].strip()
            if os.path.exists(path):
                LOGIN_CONFIG["firefox_profile_path"] = path
                print(f"[config] ✅ Profile path set: {path}")
            else:
                print(f"[config] ❌ Path not found: {path}")
                print("[config] Find your profile: ~/.mozilla/firefox/*.default-release")
            continue
        
        if u == "!login use profile":
            if LOGIN_CONFIG["firefox_profile_path"]:
                LOGIN_CONFIG["use_persistent_profile"] = True
                print("[config] ✅ Persistent profile ENABLED")
                print("[config] Restart browser with !close then !browse")
            else:
                print("[config] ❌ Set profile path first: !login set profile ")
            continue
        
        if u == "!login use fresh":
            LOGIN_CONFIG["use_persistent_profile"] = False
            print("[config] ✅ Fresh browser mode ENABLED")
            continue
        
        if u == "!login use api":
            if GMAIL_API_OK:
                LOGIN_CONFIG["use_gmail_api"] = True
                print("[config] ✅ Gmail API mode ENABLED")
                print("[config] Use: !login auth api  (if not authenticated)")
            else:
                print("[config] ❌ Gmail API not installed")
                print("[config] Run: pip install google-auth google-auth-oauthlib google-api-python-client")
            continue
        
        if u == "!login auth api":
            if gmail_api_authenticate():
                LOGIN_CONFIG["use_gmail_api"] = True
                print("[config] ✅ Gmail API authenticated and enabled")
            continue
        
        if u == "!login save":
            save_login_config()
            continue
        
        # === GMAIL API COMMANDS ===
        if u.startswith("!gmail search "):
            query = u[14:].strip()
            if LOGIN_CONFIG.get("use_gmail_api") or GMAIL_API_OK:
                result = gmail_api_search(query)
                print(result)
            else:
                print("[gmail-api] Not enabled. Use: !login use api")
            continue
        
        if u.startswith("!gmail read "):
            msg_id = u[12:].strip()
            result = gmail_api_read(msg_id)
            print(result)
            continue
        
        if u.startswith("!gmail send "):
            # !gmail send to@email.com "Subject" "Body"
            parts = u[12:].strip().split('"')
            if len(parts) >= 4:
                to = parts[0].strip()
                subject = parts[1]
                body = parts[3]
                result = gmail_api_send(to, subject, body)
                print(result)
            else:
                print("[usage] !gmail send to@email.com \"Subject\" \"Body\"")
            continue

        # === RSI MODE COMMANDS ===
        if u == "rsi" or u == "rsi status":
            print(rsi_status())
            continue
        
        # OVERNIGHT MODE - autonomous improvement with Claude
        if u.startswith("!overnight"):
            parts = u.split()
            hours = 8  # default
            mode = "standard"  # or "repeng" or "selfplay"
            
            for i, part in enumerate(parts):
                if part.isdigit():
                    hours = int(part)
                elif part in ["repeng", "rep", "representation", "hidden"]:
                    mode = "repeng"
                elif part in ["selfplay", "self", "noapi", "local", "offline"]:
                    mode = "selfplay"
            
            if mode == "repeng":
                print(f"\n🧠 Starting {hours}-hour REPRESENTATION ENGINEERING session...")
                print("   Uses hidden state analysis + Claude to fix weak representations.")
            elif mode == "selfplay":
                print(f"\n🔄 Starting {hours}-hour SELF-PLAY session (NO API REQUIRED)...")
                print("   Uses CF-HoT contrast to improve without external help.")
            else:
                print(f"\n🌙 Starting {hours}-hour overnight improvement session...")
                print("   The model will learn from Claude and train itself.")
            print("   Press Ctrl+C at any time to stop.\n")
            
            confirm = input("Start overnight mode? (yes/no): ").strip().lower()
            if confirm in ["yes", "y"]:
                def rsi_train(steps):
                    return _run_training(steps)
                def rsi_eval():
                    return _run_evaluation()
                def rsi_reload():
                    return _reload_checkpoint(Store.state.get("current_checkpoint", DENSE_CHECKPOINT))
                
                if mode == "repeng":
                    overnight_repeng_loop(generate, rsi_train, rsi_eval, rsi_reload, _mentor, hours)
                elif mode == "selfplay":
                    overnight_selfplay_loop(generate, rsi_train, rsi_eval, rsi_reload, hours)
                else:
                    overnight_improvement_loop(generate, rsi_train, rsi_eval, rsi_reload, _mentor, hours)
            else:
                print("Cancelled")
            continue
        
        if u == "rsi start" or u == "!rsi":
            # Define helper functions for RSI
            def rsi_train(steps):
                return _run_training(steps)
            
            def rsi_eval():
                return _run_evaluation()
            
            def rsi_reload():
                return _reload_checkpoint(Store.state.get("current_checkpoint", DENSE_CHECKPOINT))
            
            # Start RSI mode
            rsi_mode_loop(generate, rsi_train, rsi_eval, rsi_reload)
            continue
        
        if u == "rsi stop":
            RSI_RUNNING = False
            print("[RSI] ⏹️ Stop signal sent")
            continue
        
        if u == "rsi pause":
            RSI_PAUSED = True
            print("[RSI] ⏸️ Pause signal sent")
            continue
        
        if u == "rsi resume":
            if RSI_PAUSED:
                RSI_PAUSED = False
                def rsi_train(steps):
                    return _run_training(steps)
                def rsi_eval():
                    return _run_evaluation()
                def rsi_reload():
                    return _reload_checkpoint(Store.state.get("current_checkpoint", DENSE_CHECKPOINT))
                rsi_mode_loop(generate, rsi_train, rsi_eval, rsi_reload)
            else:
                print("[RSI] Not paused")
            continue
        
        if u.startswith("rsi mode "):
            mode = u[9:].strip().lower()
            if mode in ["conservative", "balanced", "aggressive"]:
                RSI_CONFIG["mode"] = mode
                if mode == "conservative":
                    RSI_CONFIG["training_steps_per_iter"] = 10
                    RSI_CONFIG["auto_rollback_threshold"] = 0.10
                elif mode == "balanced":
                    RSI_CONFIG["training_steps_per_iter"] = 25
                    RSI_CONFIG["auto_rollback_threshold"] = 0.15
                elif mode == "aggressive":
                    RSI_CONFIG["training_steps_per_iter"] = 50
                    RSI_CONFIG["auto_rollback_threshold"] = 0.20
                save_rsi_config()
                print(f"[RSI] Mode set to: {mode}")
            else:
                print("[RSI] Modes: conservative, balanced, aggressive")
            continue
        
        if u.startswith("rsi target "):
            try:
                target = float(u[11:].strip())
                RSI_CONFIG["target_quality"] = target
                save_rsi_config()
                print(f"[RSI] Target quality set to: {target}")
            except:
                print("[usage] rsi target 0.85")
            continue
        
        if u.startswith("rsi focus "):
            cap = u[10:].strip().lower()
            if cap in RSI_CONFIG["capabilities_to_improve"]:
                RSI_CONFIG["current_focus"] = cap
                save_rsi_config()
                print(f"[RSI] Focus set to: {cap}")
            else:
                print(f"[RSI] Capabilities: {', '.join(RSI_CONFIG['capabilities_to_improve'])}")
            continue
        
        if u == "rsi eval":
            print("\n[RSI] 📊 Evaluating all capabilities...")
            for cap in RSI_CONFIG["capabilities_to_improve"]:
                score = rsi_evaluate_capability(cap, generate)
                print(f"  {cap}: {score:.2f}")
            continue
        
        if u == "rsi config":
            print(json.dumps(RSI_CONFIG, indent=2))
            continue
        
        if u == "rsi reset":
            RSI_CONFIG["iteration"] = 0
            RSI_CONFIG["total_improvements"] = 0
            RSI_CONFIG["total_rollbacks"] = 0
            RSI_CONFIG["best_quality_ever"] = 0.0
            save_rsi_config()
            print("[RSI] ✅ Stats reset")
            continue

        # === MINING COMMANDS ===
        if u == "!mine" or u == "!mine status":
            print(mining_status())
            continue
        
        if u == "!mine profit" or u == "mining profitability":
            result = check_mining_profitability(generate)
            print(result)
            continue
        
        if u == "!mine lowdiff" or u == "find low difficulty coins":
            result = find_lowest_difficulty_coins(generate)
            print(result)
            continue
        
        if u.startswith("!mine wallet "):
            parts = u[13:].strip().split(maxsplit=1)
            if len(parts) >= 2:
                coin, address = parts[0].upper(), parts[1]
                MINING_CONFIG["wallet_addresses"][coin] = address
                save_mining_config()
                print(f"[mining] ✅ Wallet saved for {coin}")
            else:
                print("[usage] !mine wallet  
")
            continue
        
        if u.startswith("!mine pool "):
            parts = u[11:].strip().split(maxsplit=1)
            if len(parts) >= 2:
                coin, pool = parts[0].upper(), parts[1]
                MINING_CONFIG["pool_urls"][coin] = pool
                save_mining_config()
                print(f"[mining] ✅ Pool saved for {coin}")
            else:
                print("[usage] !mine pool  ")
            continue
        
        if u.startswith("!mine setup "):
            parts = u[12:].strip().split()
            if len(parts) >= 2:
                miner, coin = parts[0].lower(), parts[1].upper()
                wallet = MINING_CONFIG.get("wallet_addresses", {}).get(coin, "")
                pool = MINING_CONFIG.get("pool_urls", {}).get(coin, "")
                
                if not wallet:
                    print(f"[mining] ❌ No wallet for {coin}. Set with: !mine wallet {coin} ")
                    continue
                if not pool:
                    for c in LOW_DIFF_COINS:
                        if c["symbol"] == coin:
                            pool = c["pool"]
                            break
                    if not pool:
                        print(f"[mining] ❌ No pool for {coin}. Set with: !mine pool {coin} ")
                        continue
                
                result = setup_miner(miner, coin, wallet, pool)
                print(result)
            else:
                print("[usage] !mine setup  ")
                print(f"[miners] {list(SUPPORTED_MINERS.keys())}")
            continue
        
        if u.startswith("!mine start"):
            parts = u[11:].strip().split()
            miner = parts[0] if parts else "xmrig"
            coin = parts[1].upper() if len(parts) > 1 else None
            result = start_mining(miner, coin)
            print(result)
            continue
        
        if u == "!mine stop" or u == "stop mining":
            print(stop_mining())
            continue
        
        if u == "!mine auto":
            result = auto_mine_best_coin(generate)
            print(result)
            continue
        
        if u.startswith("!mine create wallet "):
            coin = u[20:].strip().upper()
            result = create_local_wallet(coin)
            print(result)
            continue
        
        if u == "!mine coins":
            print("\n⛏️ SUPPORTED COINS:")
            for miner, info in SUPPORTED_MINERS.items():
                print(f"\n  {miner}: {', '.join(info['coins'])}")
            print("\n🔥 LOW DIFFICULTY COINS:")
            for c in LOW_DIFF_COINS:
                print(f"  {c['symbol']}: {c['name']} ({c['algo']})")
            continue
        
        if u == "!mine config":
            print(json.dumps(MINING_CONFIG, indent=2))
            continue

        # === CREDENTIAL MANAGEMENT ===
        if u.startswith("!savelogin "):
            # !savelogin gmail email@gmail.com password123
            parts = u[11:].strip().split(maxsplit=2)
            if len(parts) >= 3:
                store_credential(parts[0], parts[1], parts[2])
            else:
                print("[usage] !savelogin   ")
            continue
        
        if u.startswith("!login "):
            # !login gmail  OR  !login gmail email@gmail.com password123
            parts = u[7:].strip().split(maxsplit=2)
            service = parts[0].lower() if parts else None
            
            if not service:
                print("[usage] !login  [email] [password]")
                print("[services] gmail, facebook, twitter, linkedin, github, outlook")
                continue
            
            # Check for stored credentials
            creds = get_credential(service)
            
            if len(parts) >= 3:
                email, password = parts[1], parts[2]
            elif creds:
                email, password = creds["email"], creds["password"]
            else:
                # Prompt for credentials
                email = input(f"📧 Email/Username for {service}: ").strip()
                password = input(f"🔑 Password (hidden): ").strip()
            
            if confirm_login(service, email):
                result = execute_login(service, email, password)
                print(result[:500] if result else "[login attempted]")
            else:
                print("[login cancelled]")
            continue

        # === AUTONOMOUS TASK ===
        if u.startswith("!do "):
            task = u[4:].strip()
            print(f"\n[AI] 🧠 Analyzing task: {task}")
            print("[AI] Planning steps...\n")
            
            plan = plan_task(task, generate)
            
            if plan.get('error'):
                print(f"[AI] ❌ Error: {plan['error']}")
                continue
            
            if not plan.get('steps'):
                print("[AI] ❌ No steps generated. Try rephrasing the task.")
                continue
            
            if confirm_task(plan):
                print("\n[AI] 🚀 Executing task...\n")
                result = execute_task(plan, generate)
                print("\n" + "="*60)
                print("📊 TASK RESULTS")
                print("="*60)
                print(result)
                print("="*60)
            else:
                print("[AI] ❌ Task cancelled by user.")
            continue

        # === GOALS ===
        if u == "goals":
            print("[goals]")
            if not Store.goals:
                print("  (none)")
            for i, g in enumerate(Store.goals):
                print(f"  [{i}] {g}")
            continue

        if u.startswith("add:"):
            Store.goals.append(u[4:].strip())
            Store.save()
            print("[goals] added")
            continue

        if u.startswith("del:"):
            try:
                Store.goals.pop(int(u[4:].strip()))
                Store.save()
                print("[goals] deleted")
            except:
                print("[goals] bad index")
            continue

        if u.startswith("plan:"):
            try:
                goal = Store.goals[int(u[5:].strip())]
            except:
                print("[plan] bad index")
                continue
            out = plan_for(goal)
            last_plan = out
            Store.log_mem("plan", {"goal": goal, "plan": out})
            print(out)
            continue

        if u == "reflect":
            if not last_plan:
                print("[reflect] no plan to refine")
                continue
            improved = reflect_on(last_plan)
            last_plan = improved
            Store.log_mem("reflect", {"plan": improved})
            print(improved)
            continue

        # === INFO ===
        if u == "status":
            status = {
                "turn": Store.state["turn"],
                "goals": len(Store.goals),
                "improvement_iterations": Store.state.get("improvement_iterations", 0),
                "rollback_count": Store.state.get("rollback_count", 0),
                "current_checkpoint": Store.state.get("current_checkpoint", "unknown"),
                "best_checkpoint": Store.state.get("best_checkpoint", "unknown"),
                "best_quality": Store.state.get("best_quality_score", 0),
                "target_quality": Config.target_quality_score,
                "training_examples": len(DENSE_TRAINING_EXAMPLES),
            }
            print(json.dumps(status, indent=2))
            continue

        if u == "history":
            qh = Store.state.get("quality_history", [])
            print(f"Quality History ({len(qh)} entries):")
            for entry in qh[-10:]:
                print(f"  {entry.get('iteration', '?')}: {entry.get('quality', 0):.3f}")
            continue

        if u == "examples":
            print(f"Training examples: {len(DENSE_TRAINING_EXAMPLES)}")
            print(f"Preference pairs: {len(PREFERENCE_PAIRS)}")
            print("\nSample prompts:")
            for ex in DENSE_TRAINING_EXAMPLES[:5]:
                print(f"  • {ex['prompt']}")
            continue

        # === LIMITED TOOLS ===
        if u.startswith("shell:"):
            print(tool_shell(u[6:].strip()))
            continue

        if u.startswith("py:"):
            print(tool_py(u[3:].strip()))
            continue

        if u.startswith("search:"):
            print(tool_search_local(u[7:].strip()))
            continue

        if u.startswith("lht:"):
            print(tool_lht_analyze(u[4:].strip()))
            continue

        # === CONFIG ===
        if u.startswith("toggle"):
            parts = u.split(maxsplit=1)
            if len(parts) > 1:
                print(Config.toggle(parts[1]))
            else:
                print("[toggle] specify flag")
            continue

        # === SEAMLESS WEB TASK DETECTION ===
        if BROWSER_OK:
            # Check for email action first
            email_action, email_params = detect_email_action(u)
            if email_action:
                # For compose, gather details
                if "compose" in email_action:
                    print("\n✉️ Composing email...")
                    email_params["to"] = input("📬 To: ").strip()
                    email_params["subject"] = input("📋 Subject: ").strip()
                    email_params["body"] = input("📝 Message: ").strip()
                
                if confirm_email_action(email_action, email_params):
                    result = execute_email_action(email_action, email_params)
                    
                    if result:
                        # Let model summarize what was found
                        email_prompt = f"I searched/checked the email. Here's what I found:\n{result[:2500]}\n\nSummarize the relevant emails or results for the user based on their request: '{u}'"
                        out, stats, eval_result = generate(email_prompt)
                        print(f"\n{out}")
                        print(f"\n[Quality: {eval_result.overall_score:.2f} | Density: {eval_result.density_score:.1f} | "
                              f"Coherence: {eval_result.coherence_score:.2f} | Tokens: {eval_result.tokens}]")
                    else:
                        print("[email] Action completed")
                else:
                    print("[cancelled]")
                continue
            
            # Check for money-making task
            money_action, money_params = detect_money_task(u)
            if money_action:
                if confirm_money_action(money_action, money_params):
                    result = execute_money_action(money_action, money_params, generate)
                    print(f"\n{result}")
                else:
                    print("[cancelled]")
                continue
            
            # Check for login request
            service, url = detect_login_request(u)
            if service:
                creds = get_credential(service)
                
                if creds:
                    email, password = creds["email"], creds["password"]
                else:
                    print(f"\n🔐 Login to {service.upper()} requested")
                    email = input(f"📧 Email/Username: ").strip()
                    password = input(f"🔑 Password: ").strip()
                    
                    save = input("💾 Save credentials for this session? (yes/no): ").strip().lower()
                    if save in ['yes', 'y']:
                        store_credential(service, email, password)
                
                if confirm_login(service, email):
                    result = execute_login(service, email, password, url)
                    
                    # Let model respond based on what happened
                    login_prompt = f"I just logged into {service}. The page now shows: {result[:1500]}\n\nRespond to the user confirming login and what you see."
                    out, stats, eval_result = generate(login_prompt)
                    print(f"\n{out}")
                    print(f"\n[Quality: {eval_result.overall_score:.2f} | Density: {eval_result.density_score:.1f} | "
                          f"Coherence: {eval_result.coherence_score:.2f} | Tokens: {eval_result.tokens}]")
                else:
                    print("[login cancelled]")
                continue
            
            # Check for web search task
            template_name, query = detect_web_task(u)
            if template_name and query:
                plan = build_plan_from_template(template_name, query)
                
                if confirm_task(plan):
                    page_content = execute_task_silent(plan)
                    
                    if page_content:
                        # Generate response based on REAL web content
                        web_prompt = f"Based on this web content, answer the user's request: '{u}'\n\nWeb content:\n{page_content[:3000]}"
                        out, stats, eval_result = generate(web_prompt)
                        print(f"\n{out}")
                        print(f"\n[Quality: {eval_result.overall_score:.2f} | Density: {eval_result.density_score:.1f} | "
                              f"Coherence: {eval_result.coherence_score:.2f} | Tokens: {eval_result.tokens}]")
                        Store.state["turn"] += 1
                        Store.save()
                    else:
                        print("[browser] Could not read page content")
                else:
                    print("[cancelled]")
                continue

        # === DEFAULT: GENERATE ===
        out, stats, eval_result = generate(u)
        print(f"\n{out}")
        print(f"\n[Quality: {eval_result.overall_score:.2f} | Density: {eval_result.density_score:.1f} | "
              f"Coherence: {eval_result.coherence_score:.2f} | Tokens: {eval_result.tokens}]")
        
        if eval_result.filler_count > 0:
            print(f"  ⚠ Fillers detected: {eval_result.filler_count}")
        if eval_result.gibberish_score > 0.3:
            print(f"  ⚠ Gibberish detected: {eval_result.gibberish_score:.2f}")
        
        # === RSI v2.3 INTEGRATION ===
        # Classify and track
        domain = RSIDomainClassifier.classify(u)
        difficulty = RSIDifficultyEstimator.estimate(u)
        
        # Create experience
        exp_id = hashlib.md5(f"{time.time()}{u}".encode()).hexdigest()[:12]
        exp = RSIExperience(
            id=exp_id,
            prompt=u,
            response=out,
            quality=eval_result.overall_score,
            density=eval_result.density_score,
            coherence=eval_result.coherence_score,
            tokens=eval_result.tokens,
            domain=domain,
            difficulty=difficulty,
            timestamp=time.time(),
        )
        
        # Store in RSI systems
        _rsi_buffer.add(exp)
        _rsi_skills.update(domain, eval_result.overall_score)
        _rsi_feedback.set_last_experience(exp_id)
        _rsi_auto_train.record_example(eval_result.overall_score)
        _rsi_goals.update("quality", eval_result.overall_score)
        
        # Explanation mode
        if _rsi_config.explanation_enabled:
            explanation = RSIExplanationMode.generate(u, out, 
                {"quality": eval_result.overall_score, "tokens": eval_result.tokens}, domain)
            print(explanation)
        
        # Auto-training check
        should_train, reason = _rsi_auto_train.should_train()
        if should_train:
            print(f"\n⚡ Auto-training triggered ({reason})...")
            # Micro-train would go here - for now just log
            _rsi_auto_train.mark_trained()
            print("  (Training queued)")
        
        # Dream cycle check
        if _rsi_dreams.should_dream():
            print("\n💤 Dream cycle due - use !dream to run")
        
        Store.log_mem("reply", {"in": u, "out": out, "quality": eval_result.overall_score})
        Store.state["turn"] += 1
        Store.save()

    final_report()


if __name__ == "__main__":
    main()