import gradio as gr
import torch
import torch.nn.functional as F
from transformers import AutoModelForSequenceClassification, AutoTokenizer, GPT2LMHeadModel, GPT2TokenizerFast
from nltk.tokenize import sent_tokenize
import nltk

# --- CONFIGURATION ---
MODEL_NAME = "ShivamVN/My-Ai-Text-Detector" 

# --- SETUP ---
# Fix for the nltk error
nltk.download('punkt')
nltk.download('punkt_tab') 
print("Initializing App...")

# Detect Hardware
device = "cuda" if torch.cuda.is_available() else "cpu"

# 1. Load RoBERTa
print(f"Loading {MODEL_NAME}...")
try:
    clf_model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME).to(device)
    clf_tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
except Exception as e:
    print(f"Error loading RoBERTa: {e}")

# 2. Load GPT-2
print("Loading GPT-2...")
try:
    ppl_model = GPT2LMHeadModel.from_pretrained("gpt2").to(device)
    ppl_tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")
except Exception as e:
    print(f"Error loading GPT-2: {e}")

# --- CORE FUNCTIONS ---

def get_roberta_prob(text):
    if not text.strip(): return 0.0
    inputs = clf_tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512).to(device)
    with torch.no_grad():
        outputs = clf_model(**inputs)
    return F.softmax(outputs.logits, dim=-1).cpu().numpy()[0][1]

def get_perplexity(text):
    if not text.strip(): return 0.0
    encodings = ppl_tokenizer(text, return_tensors="pt")
    input_ids = encodings.input_ids.to(device)
    with torch.no_grad():
        outputs = ppl_model(input_ids, labels=input_ids)
    return torch.exp(outputs.loss).item()

# ==========================================
# TEMPLATE 1: ONLY MODEL (Fast Check)
# ==========================================
def template_model_only(text):
    if not text: return "Please enter text."
    
    ai_prob = get_roberta_prob(text)
    percent = ai_prob * 100
    
    label = "AI-GENERATED" if ai_prob > 0.5 else "HUMAN-WRITTEN"
    emoji = "🔴" if ai_prob > 0.5 else "🟢"
    
    return f"# {emoji} {label}\n**Confidence:** {percent:.2f}%"

# ==========================================
# TEMPLATE 2: FULL SYSTEM (Advanced Logic)
# ==========================================
def template_full_system(text):
    if not text: return "Please enter text."
    
    sentences = sent_tokenize(text)
    if not sentences: return "No text detected."

    # 1. SLIDING WINDOW
    window_size = 2
    sentence_raw_scores = {i: [] for i in range(len(sentences))}

    for i in range(len(sentences) - window_size + 1):
        chunk = " ".join(sentences[i : i + window_size])
        prob = get_roberta_prob(chunk)
        for j in range(window_size):
            sentence_raw_scores[i+j].append(prob)

    # 2. HYBRID LOGIC
    log_output = f"{'SENTENCE':<60} | {'SCORE':<5} | {'PPL':<4} | {'VERDICT'}\n"
    log_output += "-" * 95 + "\n"
    
    total_ai = 0
    
    for i in range(len(sentences)):
        sent = sentences[i]
        scores = sentence_raw_scores[i]
        if not scores: scores = [0.0]
        
        # Calculate Stats
        min_s = min(scores)
        max_s = max(scores)
        avg_s = sum(scores) / len(scores) # <--- NEW: Calculate Average
        
        # Determine Status
        status = "UNCERTAIN"
        if min_s > 0.80: status = "AI"
        elif max_s < 0.20: status = "HUMAN"
        
        # Perplexity Check
        ppl = get_perplexity(sent)
        
        # Final Decision Logic
        final = "HUMAN"
        if status == "UNCERTAIN":
            if ppl < 40: final = "AI"
        elif status == "AI":
            if ppl < 100: final = "AI"
            
        if final == "AI": total_ai += 1
        
        # --- DISPLAY LOGIC FIX ---
        # If Uncertain, show the Average (e.g., 50%) instead of Max (e.g., 99%)
        if status == "UNCERTAIN":
            display_score = avg_s
        else:
            display_score = max_s
            
        # Formatting
        disp_sent = (sent[:57] + "..") if len(sent) > 57 else sent.ljust(59)
        score_val = f"{display_score*100:.0f}%"
        ppl_val = f"{int(ppl)}"
        log_output += f"{disp_sent} | {score_val:<5} | {ppl_val:<4} | {final}\n"

    # 3. SUMMARY
    ai_percent = (total_ai / len(sentences)) * 100
    verdict = "🟢 LIKELY HUMAN"
    if ai_percent > 40: verdict = "🔴 LIKELY AI"
    
    return f"# {verdict}\n**AI Sentence Count:** {ai_percent:.1f}%\n\n```text\n{log_output}\n```"

# ==========================================
# USER INTERFACE
# ==========================================
with gr.Blocks() as demo:
    gr.Markdown("# 🕵️‍♂️ AI Text Detector Suite")
    gr.Markdown(f"Current Model: `{MODEL_NAME}`")
    
    with gr.Tabs():
        with gr.TabItem("Template 1: Only Model"):
            gr.Markdown("### ⚡ Fast Check")
            t1_input = gr.Textbox(lines=5, placeholder="Paste text here...", label="Input Text")
            t1_button = gr.Button("Analyze (Model Only)", variant="primary")
            t1_output = gr.Markdown(label="Result")
            t1_button.click(template_model_only, inputs=t1_input, outputs=t1_output)
            
        with gr.TabItem("Template 2: Full System"):
            gr.Markdown("### 🧠 Deep Analysis")
            t2_input = gr.Textbox(lines=8, placeholder="Paste text here...", label="Input Text")
            t2_button = gr.Button("Analyze (Full System)", variant="primary")
            t2_output = gr.Markdown(label="Detailed Report")
            t2_button.click(template_full_system, inputs=t2_input, outputs=t2_output)

demo.launch()