"""Ensemble de probas: usa os 5 folds e devolve a media. E a melhor pratica cientifica - replica o numero 0.7920 macro-F1 reportado. Em GPU T4: ~250ms por par. Em CPU: ~30s por par. """ import json, torch from transformers import AutoTokenizer, AutoModelForCausalLM from peft import PeftModel from huggingface_hub import snapshot_download REPO = "histlearn/community-notes-reranker-ptbr" path = snapshot_download(REPO, allow_patterns=["manifesto.json", "adapter_fold_*/*"]) m = json.load(open(f"{path}/manifesto.json")) tok = AutoTokenizer.from_pretrained(m["base_model"], padding_side="left") base = AutoModelForCausalLM.from_pretrained( m["base_model"], torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32) if torch.cuda.is_available(): base.cuda() def make_text(tw, nt): return (m["prompt_prefixo"] + ": " + m["instrucao"] + "\n: " + tw + "\n: " + nt + m["prompt_sufixo"]) def score_ensemble(tweet, nota): probs = [] for k in range(1, 6): model = PeftModel.from_pretrained(base, f"{path}/adapter_fold_{k}") model.eval() enc = tok(make_text(tweet, nota), return_tensors="pt", truncation=True, max_length=m["max_length"]).to(model.device) with torch.no_grad(): logits = model(**enc).logits[:, -1, :] probs.append(float(torch.sigmoid( logits[:, m["id_yes"]] - logits[:, m["id_no"]]).item())) model.unload() # libera memoria do adapter return sum(probs) / 5 print(score_ensemble("Bolsonaro disse que a Terra e plana", "Bolsonaro nunca afirmou isso; checagem em https://exemplo.org"))