from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from sentence_transformers import SentenceTransformer, util
import pickle
import google.generativeai as genai
from datetime import datetime
from typing import Dict
import os

app = FastAPI(title="Jimma University Plagiarism API")

# ====================== SAFE LIMITS ======================
MAX_TEXT_LENGTH = 8000
MAX_PROMPT_LENGTH = 4000
SIMILARITY_THRESHOLD = 30.0
# =========================================================

# ====================== RATING FUNCTION ======================
def convert_to_rating(similarity_percent: float) -> int:
    if similarity_percent >= 80:
        return 5
    elif similarity_percent >= 60:
        return 4
    elif similarity_percent >= 40:
        return 3
    elif similarity_percent >= 20:
        return 2
    else:
        return 1
# ============================================================

# ====================== ROOT ======================
@app.get("/")
def home():
    return {"message": "Jimma University Plagiarism API is running 🚀"}

@app.get("/health")
def health():
    return {"status": "ok"}
# ==================================================

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_methods=["*"],
    allow_headers=["*"],
)

# ====================== CONFIG ======================
GEMINI_API_KEY = os.getenv(
    "GEMINI_API_KEY",
    "AQ.Ab8RN6Id1IlRKgMi19Vmy7PGrY82ZxG5D34vsDOnsFOFdrRI6g"
)

MODEL_PATH = "plagiarism_sbert_model"
EMBEDDINGS_FILE = "reference_embeddings.pkl"
# ===================================================

# ====================== LOAD MODEL (FIXED) ======================
if not os.path.exists(MODEL_PATH):
    raise RuntimeError("❌ Model folder not found")

model = SentenceTransformer(MODEL_PATH)

print("✅ SBERT model loaded")

# ====================== LOAD REFERENCE DATASET ======================
if not os.path.exists(EMBEDDINGS_FILE):
    raise RuntimeError("❌ Reference embeddings file not found")

with open(EMBEDDINGS_FILE, "rb") as f:
    data = pickle.load(f)

ref_embeddings = data["embeddings"]
df_ref = data["df_ref"]

print("✅ Reference dataset loaded")
# ================================================================

# ====================== GEMINI ======================
genai.configure(api_key=GEMINI_API_KEY)
gemini_model = genai.GenerativeModel('gemini-2.5-flash')

print("✅ System ready")
# ===================================================

# ====================== REQUEST MODEL ======================
class PlagiarismRequest(BaseModel):
    text: str
    title: str = "Submitted Document"
    student_name: str = "Unknown Student"
    year: str = "2026"
# ============================================================

# ====================== API ======================
@app.post("/check_plagiarism")
async def check_plagiarism(req: PlagiarismRequest) -> Dict:

    text = req.text.strip()

    if len(text) < 200:
        raise HTTPException(400, "Text too short (minimum 200 characters)")

    if len(text) > MAX_TEXT_LENGTH:
        text = text[:MAX_TEXT_LENGTH]

    # ================= SBERT ENCODING =================
    try:
        query_embedding = model.encode(
            text,
            convert_to_tensor=True,
            normalize_embeddings=True
        )

        # IMPORTANT FIX: cosine similarity stays in 0–1 range
        cosine_scores = util.cos_sim(query_embedding, ref_embeddings)[0]

        # convert to percentage properly
        similarities = (cosine_scores * 100).cpu().numpy()

    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Embedding error: {str(e)}")

    # ================= TOP MATCH =================
    top_idx = int(similarities.argmax())
    top_similarity = float(similarities[top_idx])

    rating = convert_to_rating(top_similarity)
    stars = "★" * rating + "☆" * (5 - rating)

    # ================= LOW RISK =================
    if top_similarity <= SIMILARITY_THRESHOLD:
        return {
            "status": "low_risk",
            "similarity_percent": round(top_similarity, 2),
            "rating": rating,
            "stars": stars,
            "message": "No significant plagiarism detected."
        }

    # ================= SOURCE =================
    row = df_ref.iloc[top_idx]
    source_title = str(row.get("title", "Reference Project"))[:150]
    source_student = str(row.get("student_name", "Original Student"))
    source_year = str(row.get("year", "2023"))

    category = "LOW" if top_similarity <= 30 else "MEDIUM" if top_similarity <= 70 else "HIGH"
    emoji = "✅" if category == "LOW" else "⚖️" if category == "MEDIUM" else "❌"

    # ================= GEMINI PROMPT =================
    prompt = f"""
You are a strict academic plagiarism supervisor at Jimma University.

{emoji} {category} SIMILARITY CASE

Source Title: {source_title}
Student Name: {source_student}
Year: {source_year}

Suspicious Title: {req.title}
Student Name: {req.student_name}
Year: {req.year}

Similarity Score: {top_similarity:.1f}%

1. Conceptual Similarity:
2. Conceptual Differences:
3. Technology Differences:
4. Supervisor Recommendation:
"""

    try:
        prompt = prompt[:MAX_PROMPT_LENGTH]
        response = gemini_model.generate_content(prompt)
        report = response.text.strip()

    except Exception as e:
        report = f"Gemini error: {str(e)}"

    return {
        "status": "suspicious",
        "similarity_percent": round(top_similarity, 2),
        "rating": rating,
        "stars": stars,
        "most_similar_source": source_title,
        "source_student": source_student,
        "gemini_report": report,
        "timestamp": datetime.now().isoformat()
    }