Spaces:
Sleeping
Sleeping
File size: 4,428 Bytes
a53dc0a 6480e0c de65627 a53dc0a 7c4688e de65627 a53dc0a 7c4688e a53dc0a 7c4688e a53dc0a 7c4688e a53dc0a 7c4688e a53dc0a de65627 a53dc0a 7c4688e a53dc0a de65627 7c4688e a53dc0a de65627 a53dc0a de65627 a53dc0a de65627 a53dc0a 03180f1 a53dc0a de65627 a53dc0a de65627 a53dc0a 03180f1 a53dc0a de65627 a53dc0a de65627 03180f1 a53dc0a 03180f1 a53dc0a 03180f1 a53dc0a 029cea2 a53dc0a 03180f1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 | import os
import torch
from pathlib import Path
from nltk.tokenize import sent_tokenize
from transformers import RobertaTokenizer, RobertaForSequenceClassification
from reportlab.lib.pagesizes import A4
from reportlab.pdfgen import canvas
from reportlab.lib import colors
from io import BytesIO
import nltk
# === Environment Setup ===
os.environ["HF_HOME"] = "/tmp/hf_home"
os.environ["TRANSFORMERS_CACHE"] = "/tmp/hf_home"
os.environ["NLTK_DATA"] = "/tmp/nltk_data"
nltk.data.path.append("/tmp/nltk_data")
# === Model Source (Hugging Face or Local) ===
USE_HF_MODEL = os.getenv("USE_HF_MODEL") == "1"
hf_token = os.getenv("HF_TOKEN")
MODEL_PATH = "AlyanAkram/stealth-roberta" if USE_HF_MODEL else "./detector/models/roberta-detector"
if USE_HF_MODEL:
print("π Loading model from Hugging Face Hub...")
tokenizer = RobertaTokenizer.from_pretrained(MODEL_PATH, token=hf_token)
model = RobertaForSequenceClassification.from_pretrained(MODEL_PATH, token=hf_token)
else:
print("π Loading model from local files...")
tokenizer = RobertaTokenizer.from_pretrained(MODEL_PATH, local_files_only=True)
model = RobertaForSequenceClassification.from_pretrained(MODEL_PATH, local_files_only=True)
model.eval().to("cuda" if torch.cuda.is_available() else "cpu")
device = next(model.parameters()).device
# === Constants ===
AI_THRESHOLD = 0.5
# === Main Analysis Function ===
def analyze_text(text: str):
results = []
paragraphs = [p.strip() for p in text.split("\n") if p.strip()]
ai_count, total_sentences = 0, 0
for paragraph in paragraphs:
sentence_results = []
for sentence in sent_tokenize(paragraph):
inputs = tokenizer(sentence, return_tensors="pt", truncation=True, padding=True, max_length=512).to(device)
with torch.no_grad():
outputs = model(**inputs)
probs = torch.nn.functional.softmax(outputs.logits, dim=-1)[0]
ai_prob = probs[1].item()
is_ai = ai_prob >= AI_THRESHOLD
sentence_results.append((sentence, is_ai, ai_prob))
total_sentences += 1
if is_ai:
ai_count += 1
results.append(sentence_results)
return {
"overall_ai_percent": round((ai_count / total_sentences) * 100, 2) if total_sentences else 0,
"total_sentences": total_sentences,
"ai_sentences": ai_count,
"results": results
}
# === PDF Report Generator (In-Memory) ===
def generate_pdf_report(results: dict, filename: str) -> BytesIO:
buffer = BytesIO()
c = canvas.Canvas(buffer, pagesize=A4)
width, height = A4
x, y = 40, height - 60
line_height, font_size = 18, 12
c.setFont("Helvetica-Bold", 14)
c.drawString(x, y, f"π AI Detection Report: {filename}")
y -= 25
c.setFont("Helvetica", font_size)
c.drawString(x, y, f"π§ AI Detected: {results['overall_ai_percent']}% of {results['total_sentences']} sentences")
y -= 30
for para_result in results["results"]:
for sentence, is_ai, _ in para_result:
sentence = sentence.strip()
if not sentence:
continue
if y < 50:
c.showPage()
y = height - 50
words = sentence.split()
current_line = ""
for word in words:
test_line = f"{current_line} {word}".strip()
if c.stringWidth(test_line, "Helvetica", font_size) > width - 80:
if is_ai:
c.setFillColor(colors.cyan)
c.rect(x - 2, y - 4, c.stringWidth(current_line, "Helvetica", font_size) + 4, line_height + 2, fill=True, stroke=False)
c.setFillColor(colors.black)
c.drawString(x, y, current_line)
y -= line_height
current_line = word
else:
current_line = test_line
if current_line:
if is_ai:
c.setFillColor(colors.cyan)
c.rect(x - 2, y - 4, c.stringWidth(current_line, "Helvetica", font_size) + 4, line_height + 2, fill=True, stroke=False)
c.setFillColor(colors.black)
c.drawString(x, y, current_line)
y -= line_height
y -= line_height
c.save()
buffer.seek(0)
return buffer
|