AI_Humanizer / agents /verifier.py
AJAY KASU
feat: implement math-based humanizer (style+semantic+detector scoring)
37bddee
"""
Verifier Agent
--------------
Uses roberta-base-openai-detector via HF Inference API (not
loaded locally!) to check whether text reads as human- or
AI-generated. Also has a post-processing fallback for texts
that stubbornly read as AI after 3 humanizer loops.
"""
import os
import re
import random
import logging
from huggingface_hub import InferenceClient
logger = logging.getLogger(__name__)
_REAL_LABEL = "Real"
_FAKE_LABEL = "Fake"
class Verifier:
"""Runs AI-detection on text via HF Inference API."""
def __init__(self, hf_token=None):
self.token = hf_token or os.getenv("HF_TOKEN", "")
self.client = InferenceClient(token=self.token)
self.model = "openai-community/roberta-base-openai-detector"
# ------------------------------------------------------------------
# public api
# ------------------------------------------------------------------
def verify(self, text):
"""
Returns
-------
dict {"label": "Real"|"Fake", "confidence": float}
"""
# truncate to ~1500 chars (roberta context limit)
snippet = text[:1500]
try:
results = self.client.text_classification(snippet, model=self.model)
# results is a list, e.g. [{"label": "LABEL_0", "score": 0.99}, ...]
top = results[0]
label_raw = top["label"] # e.g. LABEL_0 or Real
score = round(top["score"], 4)
# map labels -- some model versions use LABEL_0/1,
# others use Real/Fake. handle both.
if label_raw in ("LABEL_0", "Real"):
label = _REAL_LABEL
ai_confidence = round(1 - score, 4)
else:
label = _FAKE_LABEL
ai_confidence = score
logger.info("verifier: label=%s ai_confidence=%.4f", label, ai_confidence)
return {"label": label, "confidence": ai_confidence}
except Exception as exc:
logger.error("verifier API call failed: %s", exc)
return {"label": "Unknown", "confidence": 0.5}
# ------------------------------------------------------------------
# post-processing fallback (last resort after 3 loops)
# ------------------------------------------------------------------
@staticmethod
def apply_last_resort(text):
"""
If the humanizer loop maxed out and text still reads as AI,
apply brute-force perturbations:
1. lightly shuffle adjacent sentences
2. inject a minor typo in a random word
3. break one long sentence into two
"""
sentences = re.split(r'(?<=[.!?])\s+', text)
# 1) swap a random pair of adjacent sentences
if len(sentences) > 3:
idx = random.randint(1, len(sentences) - 2)
sentences[idx], sentences[idx - 1] = sentences[idx - 1], sentences[idx]
# 2) inject a subtle typo
if len(sentences) > 1:
target_idx = random.randint(0, len(sentences) - 1)
words = sentences[target_idx].split()
if len(words) > 4:
word_idx = random.randint(2, len(words) - 1)
w = words[word_idx]
if len(w) > 4:
pos = random.randint(1, len(w) - 2)
w = w[:pos] + w[pos + 1] + w[pos] + w[pos + 2:]
words[word_idx] = w
sentences[target_idx] = " ".join(words)
# 3) break one long sentence
for i, s in enumerate(sentences):
wds = s.split()
if len(wds) > 18:
mid = len(wds) // 2
for offset in range(5):
check = mid + offset
if check < len(wds) and wds[check].rstrip(",") in (
"and", "but", "which", "that", "because"
):
first_half = " ".join(wds[:check]).rstrip(",") + "."
second_half = " ".join(wds[check:])
second_half = second_half[0].upper() + second_half[1:]
sentences[i] = first_half + " " + second_half
break
break
return " ".join(sentences)
if __name__ == "__main__":
v = Verifier()
sample = (
"The rapid advancement of artificial intelligence presents both "
"opportunities and challenges for modern society."
)
print(v.verify(sample))