BERT / src /inference.py
Empfloo's picture
Upload 12 files
e829681 verified
from peft import AutoPeftModelForSequenceClassification
from transformers import AutoTokenizer
from pathlib import Path
import torch
import json
model_name = "oracat/bert-paper-classifier-arxiv"
output_model_path = Path(f"./checkpoints/{model_name.split('/')[-1]}/checkpoints")
save_path = Path("./data")
device = "cuda" if torch.cuda.is_available() else "cpu"
# Загружаем id -> category
with open(save_path / "ids2cat.json", "r", encoding="utf-8") as f:
ids2cat = json.load(f)
ids2cat = {int(k): v for k, v in ids2cat.items()}
num_labels = len(ids2cat)
# Загружаем PEFT-модель из папки final_model
model = AutoPeftModelForSequenceClassification.from_pretrained(
output_model_path / "final_model",
num_labels=num_labels,
problem_type="multi_label_classification",
ignore_mismatched_sizes=True
)
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = model.to(device)
model.eval()
@torch.inference_mode()
def predict(
text: str,
threshold: float = 0.5,
max_length: int = 512,
):
inputs = tokenizer(
text,
return_tensors="pt",
truncation=True,
padding=True,
max_length=max_length,
)
inputs = {k: v.to(device) for k, v in inputs.items()}
outputs = model(**inputs)
probs = torch.sigmoid(outputs.logits[0]).cpu()
pred_ids = (probs >= threshold).nonzero(as_tuple=True)[0].tolist()
pred_labels = [ids2cat[i] for i in pred_ids]
return {
"labels": pred_labels,
"scores": {ids2cat[i]: float(probs[i]) for i in range(num_labels)},
}
if __name__ == "__main__":
text = """Macroscopic transport patterns of UAV traffic in 3D anisotropic wind fields: A constraint-preserving hybrid PINN-FVM approach""" + '\n\n' + """Macroscopic unmanned aerial vehicle (UAV) traffic organization in three-dimensional airspace faces significant challenges from static wind fields and complex obstacles. A critical difficulty lies in simultaneously capturing the strong anisotropy induced by wind while strictly preserving transport consistency and boundary semantics, which are often compromised in standard physics-informed learning approaches. To resolve this, we propose a constraint-preserving hybrid solver that integrates a physics-informed neural network for the anisotropic Eikonal value problem with a conservative finite-volume method for steady density transport. These components are coupled through an outer Picard iteration with under-relaxation, where the target condition is hard-encoded and strictly conservative no-flux boundaries are enforced during the transport step. We evaluate the framework on reproducible homing and point-to-point scenarios, effectively capturing value slices, induced-motion patterns, and steady density structures such as bands and bottlenecks. Ultimately, our perspective emphasizes the value of a reproducible computational framework supported by transparent empirical diagnostics to enable the traceable assessment of macroscopic traffic phenomena."""
result = predict(text, threshold=0.7)
print("Predicted labels:")
for label in result["labels"]:
print(label)
print("\nTop-10 scores:")
top_scores = sorted(result["scores"].items(), key=lambda x: x[1], reverse=True)[:10]
for label, score in top_scores:
print(f"{label}: {score:.4f}")