#!/usr/bin/env python3 """Compare AION against small Hugging Face causal LMs on the same tiny local suite. This script is optional. It requires transformers/torch for HF baselines. Example: python benchmark/benchmark_compare_small_models.py --models TinyLlama/TinyLlama-1.1B-Chat-v1.0 HuggingFaceTB/SmolLM2-135M-Instruct """ from __future__ import annotations import argparse, json, re, time from pathlib import Path import sys sys.path.append(str(Path(__file__).resolve().parents[1])) from aion import generate as aion_generate TESTS = [ {"suite":"chat", "prompt":"hola", "contains":["hello", "awake"]}, {"suite":"python", "prompt":"write code to keep numbers greater than 12", "contains":["x > 12", "filter"]}, {"suite":"web", "prompt":"create a responsive landing page with dark mode", "contains":["", "@media"]}, {"suite":"math", "prompt":"solve 2x + 5 = 17", "contains":["6"]}, {"suite":"science", "prompt":"force mass 10 acceleration 2", "contains":["20"]}, ] def score_output(out, needles): low = out.lower() return any(n.lower() in low for n in needles) def eval_generator(name, gen): rows=[]; passed=0; t0=time.time() for t in TESTS: out=gen(t["prompt"]) ok=score_output(out, t["contains"]) passed += int(ok) rows.append({"suite":t["suite"],"prompt":t["prompt"],"passed":ok,"output_preview":out[:500]}) return {"model":name,"passed":passed,"total":len(TESTS),"accuracy":passed/len(TESTS),"seconds":time.time()-t0,"rows":rows} def hf_generator(model_id, max_new_tokens=350): from transformers import AutoTokenizer, AutoModelForCausalLM import torch tok=AutoTokenizer.from_pretrained(model_id) model=AutoModelForCausalLM.from_pretrained(model_id, device_map="auto" if torch.cuda.is_available() else None) model.eval() def gen(prompt): full=f"Answer the request.\nRequest: {prompt}\nAnswer:" inputs=tok(full, return_tensors="pt") inputs={k:v.to(model.device) for k,v in inputs.items()} with torch.no_grad(): out=model.generate(**inputs, max_new_tokens=max_new_tokens, do_sample=False, pad_token_id=tok.eos_token_id) return tok.decode(out[0], skip_special_tokens=True) return gen def main(): ap=argparse.ArgumentParser() ap.add_argument("--models", nargs="*", default=[]) ap.add_argument("--out", default="results/small_model_comparison.json") args=ap.parse_args() results=[eval_generator("AION-1", aion_generate)] for model_id in args.models: try: results.append(eval_generator(model_id, hf_generator(model_id))) except Exception as e: results.append({"model":model_id,"error":str(e)}) out=Path(__file__).resolve().parents[1]/args.out out.parent.mkdir(exist_ok=True) out.write_text(json.dumps(results, indent=2, ensure_ascii=False), encoding="utf-8") print(json.dumps([{k:v for k,v in r.items() if k!='rows'} for r in results], indent=2)) if __name__=="__main__": main()