Spaces:
Sleeping
Sleeping
| import warnings | |
| warnings.filterwarnings("ignore") | |
| import joblib | |
| import numpy as np | |
| import torch | |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
| # ============================================================ | |
| # Load TF-IDF models (trained by us on 728K samples) | |
| # ============================================================ | |
| vectorizer = joblib.load("text_vectorizer.pkl") | |
| lr_model = joblib.load("text_model_lr.pkl") | |
| sgd_model = joblib.load("text_model_sgd.pkl") | |
| # ============================================================ | |
| # Load GLYPH β DeBERTa-v3 pretrained detector | |
| # 98.85% accuracy, covers GPT-2 through GPT-4 (14 AI families) | |
| # Used as benchmark comparison model | |
| # ============================================================ | |
| GLYPH_MODEL = "ogmatrixllm/glyph-v1.1" | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| print("Loading GLYPH model...") | |
| try: | |
| # IMPORTANT: use_fast=False required β fast tokenizer has | |
| # a confirmed regression in transformers>=4.47 for DeBERTa-v3 | |
| glyph_tokenizer = AutoTokenizer.from_pretrained(GLYPH_MODEL, use_fast=False) | |
| glyph_model = AutoModelForSequenceClassification.from_pretrained(GLYPH_MODEL) | |
| glyph_model.to(device) | |
| glyph_model.eval() | |
| GLYPH_AVAILABLE = True | |
| print("β GLYPH loaded") | |
| print(glyph_model.config.id2label) | |
| print("LABEL_0 = HUMAN , LABEL_1= AI") | |
| except Exception as e: | |
| print(f"β GLYPH not available: {e}") | |
| GLYPH_AVAILABLE = False | |
| # ============================================================ | |
| # Individual predictors | |
| # ============================================================ | |
| def predict_lr(vec) -> dict: | |
| proba = lr_model.predict_proba(vec)[0] | |
| ai_prob = float(proba[1]) # index 1 = AI | |
| return { | |
| "label": "AI-generated" if ai_prob >= 0.5 else "Human-written", | |
| "confidence": round(ai_prob, 4), | |
| "ai_prob": round(ai_prob, 4) | |
| } | |
| def predict_sgd(vec) -> dict: | |
| if hasattr(sgd_model, "predict_proba"): | |
| proba = sgd_model.predict_proba(vec)[0] | |
| ai_prob = float(proba[1]) | |
| else: | |
| df = float(sgd_model.decision_function(vec)[0]) | |
| ai_prob = 1 / (1 + np.exp(-df)) | |
| return { | |
| "label": "AI-generated" if ai_prob >= 0.5 else "Human-written", | |
| "confidence": round(ai_prob, 4), | |
| "ai_prob": round(ai_prob, 4) | |
| } | |
| def predict_glyph(text: str) -> dict | None: | |
| """ | |
| GLYPH β DeBERTa-v3-base fine-tuned detector. | |
| 98.85% accuracy across 14 AI model families (GPT-2 to GPT-4). | |
| LABEL_0 = human, LABEL_1 = AI-generated. | |
| """ | |
| if not GLYPH_AVAILABLE: | |
| return None | |
| try: | |
| inputs = glyph_tokenizer( | |
| text, | |
| return_tensors="pt", | |
| truncation=True, | |
| max_length=512 # trained at 512, longer texts truncated | |
| ).to(device) | |
| with torch.no_grad(): | |
| logits = glyph_model(**inputs).logits | |
| probs = torch.softmax(logits, dim=-1) | |
| p_human, p_ai = probs[0].tolist() | |
| ai_prob = round(float(p_ai), 4) | |
| label = "AI-generated" if p_ai > 0.5 else "Human-written" | |
| return { | |
| "label": label, | |
| "confidence": round(max(p_human, p_ai), 4), | |
| "ai_prob": ai_prob | |
| } | |
| except Exception as e: | |
| print(f"GLYPH error: {e}") | |
| return None | |
| # ============================================================ | |
| # Main predict function | |
| # ============================================================ | |
| def predict_text(text: str) -> dict: | |
| vec = vectorizer.transform([text]) | |
| # Run all models | |
| lr_result = predict_lr(vec) | |
| sgd_result = predict_sgd(vec) | |
| glyph_result = predict_glyph(text) | |
| # Weighted average | |
| # GLYPH gets higher weight β 98.85% accuracy vs ~91% for TF-IDF models | |
| scores = [lr_result["ai_prob"], sgd_result["ai_prob"]] | |
| weights = [1.0, 1.0] | |
| if glyph_result: | |
| scores.append(glyph_result["ai_prob"]) | |
| weights.append(2.0) # GLYPH gets 2x weight β significantly better model | |
| avg_ai_score = round( | |
| sum(s * w for s, w in zip(scores, weights)) / sum(weights), 4 | |
| ) | |
| final_label = "AI-generated" if avg_ai_score >= 0.5 else "Human-written" | |
| confidence = avg_ai_score if avg_ai_score >= 0.5 else 1 - avg_ai_score | |
| # Warning logic | |
| all_labels = [lr_result["label"], sgd_result["label"]] | |
| if glyph_result: | |
| all_labels.append(glyph_result["label"]) | |
| warning = None | |
| if 0.4 <= avg_ai_score <= 0.6: | |
| warning = "Borderline prediction β low confidence. Consider this result as indicative only." | |
| elif len(set(all_labels)) > 1: | |
| warning = "Models disagree on this text. GLYPH result is weighted higher as it covers more AI model families." | |
| # Short text warning (GLYPH docs: <50 words has reduced F1) | |
| if len(text.split()) < 50: | |
| warning = (warning or "") + " Short text detected β accuracy may be reduced. Provide more text for reliable detection." | |
| warning = warning.strip() | |
| return { | |
| "lr": lr_result, | |
| "sgd": sgd_result, | |
| "glyph": glyph_result, # None if unavailable | |
| "final": { | |
| "label": final_label, | |
| "confidence": round(float(confidence), 4), | |
| "ai_score": avg_ai_score | |
| }, | |
| "warning": warning | |
| } | |