File size: 2,260 Bytes
2dffc6d a409186 2dffc6d a409186 2dffc6d | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 | """
Inference script for UnixCoder-512
=====================================
Usage: Simply run this script with your code samples
"""
import torch
import torch.nn as nn
import torch.nn.functional as F
from transformers import AutoTokenizer, AutoModel, AutoConfig, AutoModelForSequenceClassification
from safetensors.torch import load_file
import numpy as np
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
CLASS_NAMES = ["Human", "AI-Generated", "Hybrid", "Adversarial"]
class UnixCoderModel(nn.Module):
def __init__(self, config):
super().__init__()
from transformers import RobertaModel
self.encoder = RobertaModel(config)
self.classifier = nn.Linear(config.hidden_size, 4)
def forward(self, input_ids, attention_mask):
return self.classifier(self.encoder(input_ids, attention_mask=attention_mask)[0][:, 0, :])
def load_model():
"""Load the model and tokenizer"""
from transformers import RobertaConfig
from huggingface_hub import hf_hub_download
repo = "YoungDSMLKZ/UnixCoder-512"
config = RobertaConfig.from_pretrained(repo)
tokenizer = AutoTokenizer.from_pretrained(repo)
model = UnixCoderModel(config)
weights_path = hf_hub_download(repo_id=repo, filename="model.safetensors")
weights = load_file(weights_path)
model.load_state_dict({k.replace("unixcoder.", "encoder."): v for k, v in weights.items()})
model.to(DEVICE).eval()
return model, tokenizer
def predict(code: str, model, tokenizer) -> dict:
"""Predict class for a single code sample"""
inputs = tokenizer(code, return_tensors="pt", truncation=True, max_length=512, padding=True).to(DEVICE)
with torch.no_grad():
logits = model(inputs["input_ids"], inputs["attention_mask"])
probs = F.softmax(logits, dim=-1)[0]
pred = torch.argmax(probs).item()
return {"class": CLASS_NAMES[pred], "confidence": probs[pred].item()}
if __name__ == "__main__":
print("Loading model...")
model, tokenizer = load_model()
# Example usage
test_code = """
def hello_world():
print("Hello, World!")
"""
result = predict(test_code, model, tokenizer)
print(f"Predicted: {result['class']} (confidence: {result['confidence']:.2%})")
|