github-actions
Clean sync: code and weights only.
46b55ef
import torch
import numpy as np
# -------------------------
# Generation Engines
# -------------------------
def execute_generation(model, tokenizer, prompt, max_tokens=300, temperature=0.85, do_sample=False):
"""
A universal engine that handles GPU movement, sampling, and decoding.
The do_sample=False default is ideal for objective Critic tasks.
"""
model.eval()
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=max_tokens,
no_repeat_ngram_size=3,
do_sample=do_sample,
temperature=temperature if do_sample else None,
repetition_penalty=1.2,
pad_token_id=tokenizer.pad_token_id,
eos_token_id=tokenizer.eos_token_id
)
input_length = inputs.input_ids.shape[1]
generated_text = tokenizer.decode(outputs[0][input_length:], skip_special_tokens=True)
return generated_text.strip()
def get_token_confidences(model, tokenizer, prompt, max_tokens=50):
"""
Generates text and returns a list of (token, confidence_score) tuples.
Used for creating confidence heatmaps in the UI.
"""
model.eval()
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=max_tokens,
return_dict_in_generate=True,
output_scores=True,
do_sample=True,
temperature=0.8,
pad_token_id=tokenizer.pad_token_id
)
input_len = inputs.input_ids.shape[1]
gen_ids = outputs.sequences[0][input_len:]
# Calculate softmax probabilities for each generated token
probs = [torch.softmax(score, dim=-1)[0, tid].item() for tid, score in zip(gen_ids, outputs.scores)]
tokens = [tokenizer.decode(tid) for tid in gen_ids]
return list(zip(tokens, probs))
# -------------------------
# Evaluation Metrics
# -------------------------
def calculate_perplexity(model, tokenizer, text):
"""
Computes the perplexity (uncertainty) of the model for a specific text sequence.
Lower score = the model finds the text natural/predictable.
Higher score = the model finds the text confusing/alien.
"""
model.eval()
inputs = tokenizer(text, return_tensors="pt").to(model.device)
with torch.no_grad():
outputs = model(inputs.input_ids, labels=inputs.input_ids)
loss = outputs.loss
# Perplexity is mathematically the exponential of the cross-entropy loss
return torch.exp(loss).item()
# -------------------------
# Trainer Log Parsers
# -------------------------
def extract_trainer_metrics(model_trainer):
"""
Universal log parser for Hugging Face Trainer.
Extracts step-by-step history for plotting and final validation.
"""
logs = model_trainer.state.log_history
# Extract coordinates for plotting (Training vs Evaluation)
train_metrics = [{"step": x["step"], "loss": x["loss"]} for x in logs if "loss" in x]
eval_metrics = [{"step": x["step"], "loss": x["eval_loss"]} for x in logs if "eval_loss" in x]
final_loss = eval_metrics[-1]["loss"] if eval_metrics else None
return {
"train_steps": [x["step"] for x in train_metrics],
"train_loss": [x["loss"] for x in train_metrics],
"eval_steps": [x["step"] for x in eval_metrics],
"eval_loss": [x["loss"] for x in eval_metrics],
"val_loss": final_loss,
"perplexity": np.exp(final_loss) if final_loss else None
}