Spaces:

minhtudragon
/

AI_Detector

Sleeping

AI_Detector / src /models /ai_detector_model.py

Tany Nguyen

change to short model

a147390 about 1 month ago

3.2 kB

	import os
	import torch
	from transformers import AutoTokenizer, AutoModelForSequenceClassification
	import logging

	logger = logging.getLogger(__name__)

	class AIDetectorModel:
	def __init__(self, model_dir: str, model_filename: str):
	self.model_dir = model_dir
	self.model_path = os.path.join(model_dir, model_filename)
	self.tokenizer = None
	self.model = None
	self.device = torch.device("cpu") # Force CPU as requested

	def load(self):
	"""Loads the tokenizer and model. Tries local quantized first, falls back to HF base model."""
	logger.info(f"Loading AI Detector model from: {self.model_path}...")

	# 1. Try loading local quantized model
	try:
	# Load tokenizer from the directory (saved by download_model.py)
	self.tokenizer = AutoTokenizer.from_pretrained(self.model_dir)

	# Load the full quantized model object
	try:
	self.model = torch.load(self.model_path, map_location=self.device, weights_only=False)
	except TypeError:
	# Fallback for older torch versions
	self.model = torch.load(self.model_path, map_location=self.device)

	self.model.eval()
	logger.info("AI Detector quantized model loaded successfully.")
	return

	except Exception as e:
	logger.warning(f"Failed to load quantized model: {e}. Attempting fallback...")

	# 2. Fallback to base Hugging Face model
	fallback_tag = "yuchuantian/AIGC_detector_env3short"
	logger.info(f"Loading fallback model: {fallback_tag}...")

	try:
	self.tokenizer = AutoTokenizer.from_pretrained(fallback_tag)
	self.model = AutoModelForSequenceClassification.from_pretrained(fallback_tag)
	self.model.to(self.device)
	self.model.eval()
	logger.info("Fallback model loaded successfully.")
	except Exception as e:
	logger.error(f"Failed to load fallback model: {e}")
	raise RuntimeError(f"Failed to load AI Detector model (both quantized and fallback failed): {e}")

	def predict(self, text: str):
	"""
	Runs inference on the text.
	Returns a dictionary with label, confidence score, and raw probabilities.
	"""
	if self.model is None or self.tokenizer is None:
	raise RuntimeError("Model is not loaded.")

	inputs = self.tokenizer(text, return_tensors="pt", truncation=True, max_length=512).to(self.device)

	with torch.no_grad():
	outputs = self.model(**inputs)
	logits = outputs.logits
	probabilities = torch.softmax(logits, dim=1)
	predicted_class = torch.argmax(probabilities, dim=1).item()

	# Labels: 0 -> Human, 1 -> AI
	labels_map = {0: "Human", 1: "AI"}
	label = labels_map.get(predicted_class, "Unknown")
	confidence = probabilities[0][predicted_class].item()

	return {
	"label": label,
	"score": confidence,
	"probabilities": probabilities.cpu().numpy().tolist()[0]
	}