Instructions to use ataeff/g with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- PEFT
How to use ataeff/g with PEFT:
from peft import PeftModel from transformers import AutoModelForCausalLM base_model = AutoModelForCausalLM.from_pretrained("unsloth/gemma-3-270m-it") model = PeftModel.from_pretrained(base_model, "ataeff/g") - llama-cpp-python
How to use ataeff/g with llama-cpp-python:
# !pip install llama-cpp-python from llama_cpp import Llama llm = Llama.from_pretrained( repo_id="ataeff/g", filename="leo-1b-plain-q4.gguf", )
llm.create_chat_completion( messages = [ { "role": "user", "content": "What is the capital of France?" } ] ) - Notebooks
- Google Colab
- Kaggle
- Local Apps
- llama.cpp
How to use ataeff/g with llama.cpp:
Install from brew
brew install llama.cpp # Start a local OpenAI-compatible server with a web UI: llama-server -hf ataeff/g:F16 # Run inference directly in the terminal: llama-cli -hf ataeff/g:F16
Install from WinGet (Windows)
winget install llama.cpp # Start a local OpenAI-compatible server with a web UI: llama-server -hf ataeff/g:F16 # Run inference directly in the terminal: llama-cli -hf ataeff/g:F16
Use pre-built binary
# Download pre-built binary from: # https://github.com/ggerganov/llama.cpp/releases # Start a local OpenAI-compatible server with a web UI: ./llama-server -hf ataeff/g:F16 # Run inference directly in the terminal: ./llama-cli -hf ataeff/g:F16
Build from source code
git clone https://github.com/ggerganov/llama.cpp.git cd llama.cpp cmake -B build cmake --build build -j --target llama-server llama-cli # Start a local OpenAI-compatible server with a web UI: ./build/bin/llama-server -hf ataeff/g:F16 # Run inference directly in the terminal: ./build/bin/llama-cli -hf ataeff/g:F16
Use Docker
docker model run hf.co/ataeff/g:F16
- LM Studio
- Jan
- vLLM
How to use ataeff/g with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "ataeff/g" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "ataeff/g", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker
docker model run hf.co/ataeff/g:F16
- Ollama
How to use ataeff/g with Ollama:
ollama run hf.co/ataeff/g:F16
- Unsloth Studio new
How to use ataeff/g with Unsloth Studio:
Install Unsloth Studio (macOS, Linux, WSL)
curl -fsSL https://unsloth.ai/install.sh | sh # Run unsloth studio unsloth studio -H 0.0.0.0 -p 8888 # Then open http://localhost:8888 in your browser # Search for ataeff/g to start chatting
Install Unsloth Studio (Windows)
irm https://unsloth.ai/install.ps1 | iex # Run unsloth studio unsloth studio -H 0.0.0.0 -p 8888 # Then open http://localhost:8888 in your browser # Search for ataeff/g to start chatting
Using HuggingFace Spaces for Unsloth
# No setup required # Open https://huggingface.co/spaces/unsloth/studio in your browser # Search for ataeff/g to start chatting
- Docker Model Runner
How to use ataeff/g with Docker Model Runner:
docker model run hf.co/ataeff/g:F16
- Lemonade
How to use ataeff/g with Lemonade:
Pull the model
# Download Lemonade from https://lemonade-server.ai/ lemonade pull ataeff/g:F16
Run and chat with the model
lemonade run user.g-F16
List all available models
lemonade list
| #!/usr/bin/env python3 | |
| """ | |
| calibrate_entropy.py — Calibrate entropy thresholds for Adaptive Resonance | |
| Runs the model on diverse prompts WITHOUT resonance, recording entropy | |
| at every generation step. Then computes optimal H_high and H_low thresholds. | |
| The calibration is PER-MODEL. Different LoRA adapters will have different | |
| entropy profiles. ALWAYS recalibrate after training a new adapter. | |
| Usage: | |
| # Calibrate with LoRA adapter | |
| python calibrate_entropy.py --adapter-path ./gemma3-resonate/best | |
| # Calibrate base model (no adapter) | |
| python calibrate_entropy.py --no-lora | |
| # Custom prompts file | |
| python calibrate_entropy.py --adapter-path ./gemma3-resonate/best \ | |
| --prompts calibration_prompts.txt | |
| # Save calibration result | |
| python calibrate_entropy.py --adapter-path ./gemma3-resonate/best \ | |
| --save calibration.json | |
| Author: Wulf (Opus + Oleg) | |
| Date: 2026-03-28 | |
| """ | |
| from __future__ import annotations | |
| import os | |
| import sys | |
| import json | |
| import math | |
| import time | |
| import argparse | |
| import logging | |
| from typing import Optional | |
| import torch | |
| import torch.nn.functional as F | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| # ============================================================================ | |
| # Constants | |
| # ============================================================================ | |
| MODEL_ID = "unsloth/gemma-3-270m-it" | |
| VOCAB_SIZE = 262_144 | |
| H_MAX = math.log2(VOCAB_SIZE) # 18.0 bits | |
| START_OF_TURN = "<start_of_turn>" | |
| END_OF_TURN = "<end_of_turn>" | |
| # ============================================================================ | |
| # Logging | |
| # ============================================================================ | |
| logging.basicConfig( | |
| level=logging.INFO, | |
| format="%(asctime)s [%(levelname)s] %(message)s", | |
| datefmt="%H:%M:%S", | |
| ) | |
| log = logging.getLogger("calibrate") | |
| # ============================================================================ | |
| # Calibration Prompts — diverse, multilingual, varying difficulty | |
| # ============================================================================ | |
| DEFAULT_PROMPTS = [ | |
| # Easy factual (should NOT trigger resonance) | |
| "What is 2 + 2?", | |
| "What color is the sky?", | |
| "Who wrote Romeo and Juliet?", | |
| "What is the capital of France?", | |
| "How many days are in a week?", | |
| # Medium difficulty (may or may not trigger) | |
| "Explain what a neural network is in simple terms.", | |
| "What causes inflation?", | |
| "Why do birds migrate?", | |
| "How does encryption work?", | |
| "What is the difference between RNA and DNA?", | |
| # Hard reasoning (SHOULD trigger resonance) | |
| "Why do small language models sometimes outperform larger ones?", | |
| "Is consciousness computable?", | |
| "What is the relationship between compression and intelligence?", | |
| "Can a system understand something it was never explicitly taught?", | |
| "Why does emergence happen at specific scale thresholds?", | |
| # Philosophy (SHOULD trigger) | |
| "Is free will an illusion?", | |
| "What is the meaning of life?", | |
| "If all your memories were replaced, would you still be you?", | |
| "Does objective morality exist?", | |
| "What is the nature of time?", | |
| # Code (mixed — simple bugs shouldn't, architecture should) | |
| "What does `print(1 + 1)` output in Python?", | |
| "Why would a recursive function without a base case crash?", | |
| "How would you design a distributed consensus algorithm?", | |
| "Explain why attention mechanisms are O(n^2).", | |
| # Russian (SHOULD trigger on hard ones) | |
| "Сколько будет два плюс два?", | |
| "Почему небо голубое?", | |
| "Что такое эмерджентность в нейронных сетях?", | |
| "Свобода воли — это иллюзия?", | |
| "Почему маленькие языковые модели иногда лучше больших?", | |
| # French | |
| "Quelle est la capitale de la France?", | |
| "Pourquoi les petits modeles de langage sont-ils importants?", | |
| "Quel est le sens de la vie?", | |
| # German | |
| "Was ist der Sinn des Lebens?", | |
| "Was bedeutet Emergenz im Kontext neuronaler Netzwerke?", | |
| # Ambiguous / creative (high entropy expected) | |
| "Write a haiku about debugging.", | |
| "If neural networks could dream, what would they dream about?", | |
| "Tell me something nobody has ever said before.", | |
| "What would happen if entropy decreased instead of increased?", | |
| # Meta (interesting entropy behavior expected) | |
| "Explain your reasoning process.", | |
| "How confident are you in your answers?", | |
| "What don't you know?", | |
| # Math | |
| "What is the sum of the first 100 positive integers?", | |
| "Prove that the square root of 2 is irrational.", | |
| "What is the derivative of x^x?", | |
| # Simple instructions (should NOT trigger) | |
| "List three colors.", | |
| "Say hello in five languages.", | |
| "Count to ten.", | |
| ] | |
| # ============================================================================ | |
| # Entropy Collection | |
| # ============================================================================ | |
| def collect_entropy_profile( | |
| model, | |
| tokenizer, | |
| prompt: str, | |
| max_tokens: int = 100, | |
| temperature: float = 0.7, | |
| device: str = 'cuda', | |
| ) -> dict: | |
| """Generate from a prompt and collect entropy at every step. | |
| We generate normally (no resonance intervention) and just observe | |
| the entropy curve. This gives us the model's natural entropy profile. | |
| Returns dict with: | |
| 'prompt': str | |
| 'entropies': list of (H_bits, H_norm) tuples | |
| 'tokens': list of generated token strings | |
| 'mean_h': float | |
| 'max_h': float | |
| 'min_h': float | |
| 'std_h': float | |
| 'first_5_mean': float (mean of first 5 tokens — initial uncertainty) | |
| """ | |
| model.eval() | |
| input_text = f"{START_OF_TURN}user\n{prompt}{END_OF_TURN}\n{START_OF_TURN}model\n" | |
| input_ids = tokenizer.encode(input_text, return_tensors='pt').to(device) | |
| all_ids = input_ids[0].tolist() | |
| entropies = [] | |
| tokens = [] | |
| eos_id = tokenizer.eos_token_id | |
| eot_text = END_OF_TURN | |
| generated_text = "" | |
| with torch.no_grad(): | |
| outputs = model(input_ids) | |
| next_logits = outputs.logits[0, -1, :] | |
| for step in range(max_tokens): | |
| # Compute entropy from raw logits | |
| probs = F.softmax(next_logits.float(), dim=-1).clamp(min=1e-10) | |
| H = -(probs * probs.log2()).sum().item() | |
| h_norm = H / H_MAX | |
| entropies.append((H, h_norm)) | |
| # Sample token (normal generation, no resonance intervention) | |
| logits = next_logits / temperature | |
| probs_sampling = F.softmax(logits, dim=-1) | |
| next_token = torch.multinomial(probs_sampling, num_samples=1).item() | |
| if next_token == eos_id: | |
| break | |
| all_ids.append(next_token) | |
| token_str = tokenizer.decode([next_token]) | |
| tokens.append(token_str) | |
| generated_text += token_str | |
| if generated_text.rstrip().endswith(eot_text): | |
| break | |
| # Next step | |
| full_ids = torch.tensor([all_ids], device=device) | |
| with torch.no_grad(): | |
| outputs = model(full_ids) | |
| next_logits = outputs.logits[0, -1, :] | |
| # Compute stats | |
| if not entropies: | |
| return { | |
| 'prompt': prompt, | |
| 'entropies': [], | |
| 'tokens': [], | |
| 'mean_h': 0, 'max_h': 0, 'min_h': 0, 'std_h': 0, | |
| 'first_5_mean': 0, | |
| } | |
| h_norms = [h_norm for _, h_norm in entropies] | |
| mean_h = sum(h_norms) / len(h_norms) | |
| max_h = max(h_norms) | |
| min_h = min(h_norms) | |
| std_h = (sum((v - mean_h)**2 for v in h_norms) / len(h_norms)) ** 0.5 | |
| first_5 = h_norms[:5] | |
| first_5_mean = sum(first_5) / len(first_5) if first_5 else 0 | |
| return { | |
| 'prompt': prompt, | |
| 'entropies': entropies, | |
| 'tokens': tokens, | |
| 'mean_h': mean_h, | |
| 'max_h': max_h, | |
| 'min_h': min_h, | |
| 'std_h': std_h, | |
| 'first_5_mean': first_5_mean, | |
| 'generated': generated_text[:200], | |
| } | |
| # ============================================================================ | |
| # Threshold Computation | |
| # ============================================================================ | |
| def compute_thresholds(profiles: list[dict], target_resonance_rate: float = 0.45) -> dict: | |
| """Compute optimal H_high and H_low from collected entropy profiles. | |
| Algorithm: | |
| 1. Collect max-entropy and min-entropy per prompt | |
| 2. H_high = percentile of max-entropies where ~target_resonance_rate | |
| of prompts would trigger resonance | |
| 3. H_low = mean of per-prompt min entropies + small margin | |
| The target_resonance_rate controls how aggressive resonance is: | |
| - 0.3 = conservative (resonance on ~30% of prompts, only hard ones) | |
| - 0.5 = balanced (resonance on ~50% of prompts) | |
| - 0.7 = aggressive (resonance on ~70% of prompts, even medium questions) | |
| Returns dict with calibration results. | |
| """ | |
| if not profiles: | |
| return {'h_high': 0.35, 'h_low': 0.12, 'error': 'no profiles'} | |
| # Collect per-prompt statistics | |
| max_entropies = [p['max_h'] for p in profiles if p['entropies']] | |
| min_entropies = [p['min_h'] for p in profiles if p['entropies']] | |
| mean_entropies = [p['mean_h'] for p in profiles if p['entropies']] | |
| std_entropies = [p['std_h'] for p in profiles if p['entropies']] | |
| first_5_means = [p['first_5_mean'] for p in profiles if p['entropies']] | |
| if not max_entropies: | |
| return {'h_high': 0.35, 'h_low': 0.12, 'error': 'no valid profiles'} | |
| # Sort for percentile computation | |
| max_entropies_sorted = sorted(max_entropies) | |
| min_entropies_sorted = sorted(min_entropies) | |
| # H_high: we want resonance to trigger on (target_resonance_rate)% of prompts | |
| # That means H_high should be at the (1 - target_resonance_rate) percentile | |
| # of per-prompt max entropies | |
| h_high_idx = int(len(max_entropies_sorted) * (1 - target_resonance_rate)) | |
| h_high_idx = max(0, min(len(max_entropies_sorted) - 1, h_high_idx)) | |
| h_high = max_entropies_sorted[h_high_idx] | |
| # H_low: mean of per-prompt minimums + 0.5*std for safety margin | |
| mean_of_mins = sum(min_entropies) / len(min_entropies) | |
| std_of_mins = (sum((v - mean_of_mins)**2 for v in min_entropies) / len(min_entropies)) ** 0.5 | |
| h_low = mean_of_mins + 0.5 * std_of_mins | |
| # Sanity checks | |
| if h_low >= h_high: | |
| log.warning(f"h_low ({h_low:.4f}) >= h_high ({h_high:.4f}). Adjusting.") | |
| # Force minimum gap | |
| midpoint = (h_low + h_high) / 2 | |
| h_high = midpoint + 0.05 | |
| h_low = midpoint - 0.05 | |
| if h_high < 0.10: | |
| log.warning(f"h_high ({h_high:.4f}) is suspiciously low. Setting to 0.20.") | |
| h_high = 0.20 | |
| if h_low < 0.02: | |
| h_low = 0.02 | |
| # Compute what the actual resonance rate would be | |
| would_trigger = sum(1 for m in max_entropies if m > h_high) | |
| actual_rate = would_trigger / len(max_entropies) | |
| # Compute global statistics | |
| all_h = [] | |
| for p in profiles: | |
| all_h.extend([h_norm for _, h_norm in p['entropies']]) | |
| global_mean = sum(all_h) / len(all_h) if all_h else 0 | |
| global_std = (sum((v - global_mean)**2 for v in all_h) / len(all_h)) ** 0.5 if all_h else 0 | |
| global_max = max(all_h) if all_h else 0 | |
| global_min = min(all_h) if all_h else 0 | |
| result = { | |
| 'h_high': round(h_high, 4), | |
| 'h_low': round(h_low, 4), | |
| 'target_resonance_rate': target_resonance_rate, | |
| 'actual_resonance_rate': round(actual_rate, 3), | |
| 'num_prompts': len(profiles), | |
| 'num_valid': len(max_entropies), | |
| 'global_entropy_stats': { | |
| 'mean': round(global_mean, 4), | |
| 'std': round(global_std, 4), | |
| 'max': round(global_max, 4), | |
| 'min': round(global_min, 4), | |
| }, | |
| 'per_prompt_max_entropy': { | |
| 'mean': round(sum(max_entropies) / len(max_entropies), 4), | |
| 'std': round((sum((v - sum(max_entropies)/len(max_entropies))**2 for v in max_entropies) / len(max_entropies)) ** 0.5, 4), | |
| 'min': round(min(max_entropies), 4), | |
| 'max': round(max(max_entropies), 4), | |
| }, | |
| 'per_prompt_min_entropy': { | |
| 'mean': round(mean_of_mins, 4), | |
| 'std': round(std_of_mins, 4), | |
| }, | |
| 'recommended_enter_count': 3, | |
| 'recommended_exit_count': 5, | |
| } | |
| return result | |
| # ============================================================================ | |
| # Report | |
| # ============================================================================ | |
| def print_report(result: dict, profiles: list[dict]): | |
| """Print a detailed calibration report.""" | |
| print(f"\n{'='*70}") | |
| print(f" ENTROPY CALIBRATION REPORT") | |
| print(f"{'='*70}") | |
| print(f"\n Calibrated on {result['num_prompts']} prompts ({result['num_valid']} valid)") | |
| print(f"\n RECOMMENDED THRESHOLDS:") | |
| print(f" H_high = {result['h_high']:.4f} (enter resonance above this)") | |
| print(f" H_low = {result['h_low']:.4f} (exit resonance below this)") | |
| print(f"\n Expected resonance rate: {result['actual_resonance_rate']:.0%} of prompts") | |
| print(f" Target was: {result['target_resonance_rate']:.0%}") | |
| gs = result['global_entropy_stats'] | |
| print(f"\n Global entropy (H_norm):") | |
| print(f" mean={gs['mean']:.4f} std={gs['std']:.4f} min={gs['min']:.4f} max={gs['max']:.4f}") | |
| pm = result['per_prompt_max_entropy'] | |
| print(f"\n Per-prompt max entropy:") | |
| print(f" mean={pm['mean']:.4f} std={pm['std']:.4f} range=[{pm['min']:.4f}, {pm['max']:.4f}]") | |
| # Per-prompt breakdown | |
| print(f"\n{'─'*70}") | |
| print(f" PER-PROMPT ANALYSIS") | |
| print(f"{'─'*70}") | |
| print(f" {'Prompt':<50} {'MaxH':>7} {'MeanH':>7} {'Trigger':>8}") | |
| print(f" {'─'*50} {'─'*7} {'─'*7} {'─'*8}") | |
| for p in sorted(profiles, key=lambda x: -x['max_h']): | |
| if not p['entropies']: | |
| continue | |
| prompt_short = p['prompt'][:48] | |
| trigger = "YES" if p['max_h'] > result['h_high'] else "no" | |
| trigger_mark = ">>>" if trigger == "YES" else " " | |
| print(f" {trigger_mark}{prompt_short:<47} {p['max_h']:>7.4f} {p['mean_h']:>7.4f} {trigger:>8}") | |
| # Histogram of max entropies | |
| print(f"\n{'─'*70}") | |
| print(f" MAX ENTROPY DISTRIBUTION") | |
| print(f"{'─'*70}") | |
| max_hs = sorted([p['max_h'] for p in profiles if p['entropies']]) | |
| if max_hs: | |
| n_bins = 15 | |
| bin_min = 0.0 | |
| bin_max = max(max_hs) * 1.1 | |
| bin_width = (bin_max - bin_min) / n_bins | |
| bins = [0] * n_bins | |
| for v in max_hs: | |
| idx = min(int((v - bin_min) / bin_width), n_bins - 1) | |
| bins[idx] += 1 | |
| max_count = max(bins) if bins else 1 | |
| bar_width = 40 | |
| for i, count in enumerate(bins): | |
| lo = bin_min + i * bin_width | |
| hi = lo + bin_width | |
| bar_len = int(count / max_count * bar_width) if max_count > 0 else 0 | |
| bar = '#' * bar_len | |
| # Mark threshold | |
| marker = "" | |
| if lo <= result['h_high'] < hi: | |
| marker = " <-- H_high" | |
| print(f" {lo:.3f}-{hi:.3f} |{bar:<{bar_width}}| {count:>3}{marker}") | |
| # Usage instructions | |
| print(f"\n{'─'*70}") | |
| print(f" USAGE") | |
| print(f"{'─'*70}") | |
| print(f" python entropy_resonance.py \\") | |
| print(f" --adapter-path ./gemma3-resonate/best \\") | |
| print(f" --h-high {result['h_high']:.4f} \\") | |
| print(f" --h-low {result['h_low']:.4f}") | |
| print(f"\n{'='*70}\n") | |
| # ============================================================================ | |
| # Main | |
| # ============================================================================ | |
| def main(): | |
| parser = argparse.ArgumentParser( | |
| description="Calibrate entropy thresholds for Adaptive Resonance" | |
| ) | |
| parser.add_argument("--model", default=MODEL_ID, help="Base model ID") | |
| parser.add_argument("--adapter-path", default=None, help="LoRA adapter path") | |
| parser.add_argument("--no-lora", action="store_true", help="Skip LoRA loading") | |
| parser.add_argument("--device", default=None, help="Device: cuda/cpu/mps") | |
| parser.add_argument("--prompts", default=None, | |
| help="Text file with prompts, one per line") | |
| parser.add_argument("--max-tokens", type=int, default=100, | |
| help="Max tokens per generation during calibration") | |
| parser.add_argument("--target-rate", type=float, default=0.45, | |
| help="Target resonance trigger rate (0-1)") | |
| parser.add_argument("--temperature", type=float, default=0.7, | |
| help="Sampling temperature during calibration") | |
| parser.add_argument("--save", default=None, | |
| help="Save calibration result to JSON file") | |
| args = parser.parse_args() | |
| # Device | |
| if args.device is None: | |
| if torch.cuda.is_available(): | |
| device = 'cuda' | |
| elif torch.backends.mps.is_available(): | |
| device = 'mps' | |
| else: | |
| device = 'cpu' | |
| else: | |
| device = args.device | |
| # Load model | |
| log.info(f"Loading tokenizer from {args.model}...") | |
| tokenizer = AutoTokenizer.from_pretrained(args.model, trust_remote_code=True) | |
| dtype = torch.bfloat16 if device == 'cuda' else torch.float32 | |
| log.info(f"Loading model from {args.model} onto {device}...") | |
| model = AutoModelForCausalLM.from_pretrained( | |
| args.model, | |
| torch_dtype=dtype, | |
| device_map=device if device == 'cuda' else None, | |
| attn_implementation="sdpa" if device == 'cuda' else "eager", | |
| trust_remote_code=True, | |
| ) | |
| if device != 'cuda': | |
| model = model.to(device) | |
| if args.adapter_path and not args.no_lora: | |
| from peft import PeftModel | |
| log.info(f"Loading adapter from {args.adapter_path}...") | |
| model = PeftModel.from_pretrained(model, args.adapter_path) | |
| model.eval() | |
| # Load prompts | |
| if args.prompts: | |
| with open(args.prompts, 'r', encoding='utf-8') as f: | |
| prompts = [line.strip() for line in f if line.strip()] | |
| log.info(f"Loaded {len(prompts)} prompts from {args.prompts}") | |
| else: | |
| prompts = DEFAULT_PROMPTS | |
| log.info(f"Using {len(prompts)} default calibration prompts") | |
| # Collect entropy profiles | |
| log.info(f"Collecting entropy profiles ({args.max_tokens} tokens/prompt)...") | |
| profiles = [] | |
| t0 = time.time() | |
| for i, prompt in enumerate(prompts): | |
| log.info(f" [{i+1}/{len(prompts)}] {prompt[:60]}...") | |
| profile = collect_entropy_profile( | |
| model, tokenizer, prompt, | |
| max_tokens=args.max_tokens, | |
| temperature=args.temperature, | |
| device=device, | |
| ) | |
| profiles.append(profile) | |
| if profile['entropies']: | |
| log.info(f" H_norm: mean={profile['mean_h']:.4f} max={profile['max_h']:.4f} " | |
| f"min={profile['min_h']:.4f} ({len(profile['entropies'])} tokens)") | |
| elapsed = time.time() - t0 | |
| log.info(f"Collection complete in {elapsed:.1f}s") | |
| # Compute thresholds | |
| result = compute_thresholds(profiles, target_resonance_rate=args.target_rate) | |
| # Print report | |
| print_report(result, profiles) | |
| # Save if requested | |
| if args.save: | |
| # Don't save the full entropy traces (too large) — just the result | |
| save_data = { | |
| 'calibration': result, | |
| 'per_prompt_summary': [ | |
| { | |
| 'prompt': p['prompt'], | |
| 'mean_h': round(p['mean_h'], 4), | |
| 'max_h': round(p['max_h'], 4), | |
| 'min_h': round(p['min_h'], 4), | |
| 'std_h': round(p['std_h'], 4), | |
| 'first_5_mean': round(p['first_5_mean'], 4), | |
| 'n_tokens': len(p['entropies']), | |
| 'would_trigger': p['max_h'] > result['h_high'], | |
| } | |
| for p in profiles if p['entropies'] | |
| ], | |
| 'model': args.model, | |
| 'adapter': args.adapter_path, | |
| 'target_rate': args.target_rate, | |
| 'max_tokens': args.max_tokens, | |
| 'temperature': args.temperature, | |
| } | |
| with open(args.save, 'w', encoding='utf-8') as f: | |
| json.dump(save_data, f, indent=2, ensure_ascii=False) | |
| log.info(f"Calibration saved to {args.save}") | |
| log.info("Done. Use the recommended thresholds with entropy_resonance.py.") | |
| if __name__ == "__main__": | |
| main() | |