import os
import sys
import torch
import docx
import nltk
from nltk.tokenize import sent_tokenize
from transformers import RobertaTokenizer, RobertaForSequenceClassification
from reportlab.lib.pagesizes import A4
from reportlab.pdfgen import canvas
from reportlab.lib import colors

nltk.download("punkt")

# Load model
model_dir = "./models/roberta-detector"
tokenizer = RobertaTokenizer.from_pretrained(model_dir)
model = RobertaForSequenceClassification.from_pretrained(model_dir)
model.eval().to("cuda" if torch.cuda.is_available() else "cpu")
device = next(model.parameters()).device

# === THRESHOLD CONFIG ===
AI_THRESHOLD = 0.50  # Adjust this as needed for better results

# === Input File ===
filepath = sys.argv[1]
filename = os.path.splitext(os.path.basename(filepath))[0]
output_dir = "output_reports"
os.makedirs(output_dir, exist_ok=True)
output_path = os.path.join(output_dir, f"{filename}_report.pdf")

# === DOCX Reader ===
def read_docx_paragraphs(path):
    doc = docx.Document(path)
    return [para.text for para in doc.paragraphs]

paragraphs = read_docx_paragraphs(filepath)

# === Detection Loop ===
results = []
total_sentences = 0
ai_sentences = 0

for paragraph in paragraphs:
    if not paragraph.strip():
        results.append([])  # preserve spacing
        continue

    sentences = sent_tokenize(paragraph)
    para_result = []

    for sentence in sentences:
        inputs = tokenizer(sentence, return_tensors="pt", truncation=True, padding=True, max_length=512).to(device)
        with torch.no_grad():
            outputs = model(**inputs)
            probs = torch.nn.functional.softmax(outputs.logits, dim=-1)[0]
            ai_prob = probs[1].item()

        is_ai = ai_prob >= AI_THRESHOLD
        para_result.append((sentence, is_ai, ai_prob))

        total_sentences += 1
        if is_ai:
            ai_sentences += 1

        # Debugging
        print(f"[DEBUG] AI probability: {ai_prob:.2f} — {'✔ Highlight' if is_ai else '✘ Skip'}")

    results.append(para_result)

ai_percent = round((ai_sentences / total_sentences) * 100, 2) if total_sentences else 0

# === PDF Writer ===
c = canvas.Canvas(output_path, pagesize=A4)
width, height = A4
x, y = 40, height - 60
line_height = 18
font_size = 12

# Title
c.setFont("Helvetica-Bold", 14)
c.drawString(x, y, f"📄 AI Detection Report: {filename}")
y -= 25
c.setFont("Helvetica", 12)
c.drawString(x, y, f"🧠 AI Detected: {ai_percent}% of {total_sentences} sentences")
y -= 30
c.setFont("Helvetica", font_size)

# Body rendering
for para_result in results:
    if not para_result:
        y -= line_height
        continue

    for sentence, is_ai, ai_prob in para_result:
        if y < 50:
            c.showPage()
            y = height - 50
            c.setFont("Helvetica", font_size)

        if is_ai:
            text_width = c.stringWidth(sentence, "Helvetica", font_size)
            c.setFillColor(colors.cyan)
            c.rect(x - 2, y - 4, text_width + 4, line_height + 2, fill=True, stroke=False)
            c.setFillColor(colors.black)

        c.drawString(x, y, sentence)
        y -= line_height

    y -= line_height  # spacing between paragraphs

c.save()
print(f"\n✅ Report saved: {output_path}")