Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import torch | |
| import torch.nn.functional as F | |
| from transformers import AutoModelForSequenceClassification, AutoTokenizer, GPT2LMHeadModel, GPT2TokenizerFast | |
| from nltk.tokenize import sent_tokenize | |
| import nltk | |
| # --- CONFIGURATION --- | |
| MODEL_NAME = "ShivamVN/My-Ai-Text-Detector" | |
| # --- SETUP --- | |
| # Fix for the nltk error | |
| nltk.download('punkt') | |
| nltk.download('punkt_tab') | |
| print("Initializing App...") | |
| # Detect Hardware | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| # 1. Load RoBERTa | |
| print(f"Loading {MODEL_NAME}...") | |
| try: | |
| clf_model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME).to(device) | |
| clf_tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) | |
| except Exception as e: | |
| print(f"Error loading RoBERTa: {e}") | |
| # 2. Load GPT-2 | |
| print("Loading GPT-2...") | |
| try: | |
| ppl_model = GPT2LMHeadModel.from_pretrained("gpt2").to(device) | |
| ppl_tokenizer = GPT2TokenizerFast.from_pretrained("gpt2") | |
| except Exception as e: | |
| print(f"Error loading GPT-2: {e}") | |
| # --- CORE FUNCTIONS --- | |
| def get_roberta_prob(text): | |
| if not text.strip(): return 0.0 | |
| inputs = clf_tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512).to(device) | |
| with torch.no_grad(): | |
| outputs = clf_model(**inputs) | |
| return F.softmax(outputs.logits, dim=-1).cpu().numpy()[0][1] | |
| def get_perplexity(text): | |
| if not text.strip(): return 0.0 | |
| encodings = ppl_tokenizer(text, return_tensors="pt") | |
| input_ids = encodings.input_ids.to(device) | |
| with torch.no_grad(): | |
| outputs = ppl_model(input_ids, labels=input_ids) | |
| return torch.exp(outputs.loss).item() | |
| # ========================================== | |
| # TEMPLATE 1: ONLY MODEL (Fast Check) | |
| # ========================================== | |
| def template_model_only(text): | |
| if not text: return "Please enter text." | |
| ai_prob = get_roberta_prob(text) | |
| percent = ai_prob * 100 | |
| label = "AI-GENERATED" if ai_prob > 0.5 else "HUMAN-WRITTEN" | |
| emoji = "π΄" if ai_prob > 0.5 else "π’" | |
| return f"# {emoji} {label}\n**Confidence:** {percent:.2f}%" | |
| # ========================================== | |
| # TEMPLATE 2: FULL SYSTEM (Advanced Logic) | |
| # ========================================== | |
| def template_full_system(text): | |
| if not text: return "Please enter text." | |
| sentences = sent_tokenize(text) | |
| if not sentences: return "No text detected." | |
| # 1. SLIDING WINDOW | |
| window_size = 2 | |
| sentence_raw_scores = {i: [] for i in range(len(sentences))} | |
| for i in range(len(sentences) - window_size + 1): | |
| chunk = " ".join(sentences[i : i + window_size]) | |
| prob = get_roberta_prob(chunk) | |
| for j in range(window_size): | |
| sentence_raw_scores[i+j].append(prob) | |
| # 2. HYBRID LOGIC | |
| log_output = f"{'SENTENCE':<60} | {'SCORE':<5} | {'PPL':<4} | {'VERDICT'}\n" | |
| log_output += "-" * 95 + "\n" | |
| total_ai = 0 | |
| for i in range(len(sentences)): | |
| sent = sentences[i] | |
| scores = sentence_raw_scores[i] | |
| if not scores: scores = [0.0] | |
| # Calculate Stats | |
| min_s = min(scores) | |
| max_s = max(scores) | |
| avg_s = sum(scores) / len(scores) # <--- NEW: Calculate Average | |
| # Determine Status | |
| status = "UNCERTAIN" | |
| if min_s > 0.80: status = "AI" | |
| elif max_s < 0.20: status = "HUMAN" | |
| # Perplexity Check | |
| ppl = get_perplexity(sent) | |
| # Final Decision Logic | |
| final = "HUMAN" | |
| if status == "UNCERTAIN": | |
| if ppl < 40: final = "AI" | |
| elif status == "AI": | |
| if ppl < 100: final = "AI" | |
| if final == "AI": total_ai += 1 | |
| # --- DISPLAY LOGIC FIX --- | |
| # If Uncertain, show the Average (e.g., 50%) instead of Max (e.g., 99%) | |
| if status == "UNCERTAIN": | |
| display_score = avg_s | |
| else: | |
| display_score = max_s | |
| # Formatting | |
| disp_sent = (sent[:57] + "..") if len(sent) > 57 else sent.ljust(59) | |
| score_val = f"{display_score*100:.0f}%" | |
| ppl_val = f"{int(ppl)}" | |
| log_output += f"{disp_sent} | {score_val:<5} | {ppl_val:<4} | {final}\n" | |
| # 3. SUMMARY | |
| ai_percent = (total_ai / len(sentences)) * 100 | |
| verdict = "π’ LIKELY HUMAN" | |
| if ai_percent > 40: verdict = "π΄ LIKELY AI" | |
| return f"# {verdict}\n**AI Sentence Count:** {ai_percent:.1f}%\n\n```text\n{log_output}\n```" | |
| # ========================================== | |
| # USER INTERFACE | |
| # ========================================== | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# π΅οΈββοΈ AI Text Detector Suite") | |
| gr.Markdown(f"Current Model: `{MODEL_NAME}`") | |
| with gr.Tabs(): | |
| with gr.TabItem("Template 1: Only Model"): | |
| gr.Markdown("### β‘ Fast Check") | |
| t1_input = gr.Textbox(lines=5, placeholder="Paste text here...", label="Input Text") | |
| t1_button = gr.Button("Analyze (Model Only)", variant="primary") | |
| t1_output = gr.Markdown(label="Result") | |
| t1_button.click(template_model_only, inputs=t1_input, outputs=t1_output) | |
| with gr.TabItem("Template 2: Full System"): | |
| gr.Markdown("### π§ Deep Analysis") | |
| t2_input = gr.Textbox(lines=8, placeholder="Paste text here...", label="Input Text") | |
| t2_button = gr.Button("Analyze (Full System)", variant="primary") | |
| t2_output = gr.Markdown(label="Detailed Report") | |
| t2_button.click(template_full_system, inputs=t2_input, outputs=t2_output) | |
| demo.launch() |