import os
import gradio as gr
import datetime, re, requests
from transformers import pipeline
from sentence_transformers import SentenceTransformer, util
from concurrent.futures import ThreadPoolExecutor

# ---------------------------
# Environment-safe settings
# ---------------------------
os.environ["TOKENIZERS_PARALLELISM"] = "false"

# ---------------------------
# Load Models (SAFE MODE)
# ---------------------------

# Claim Extraction (FORCE slow tokenizer)
claim_model_name = "MoritzLaurer/DeBERTa-v3-base-mnli"
claim_classifier = pipeline(
    "zero-shot-classification",
    model=claim_model_name,
    tokenizer=claim_model_name,
    device=-1,
    use_fast=False   # 🔥 CRITICAL FIX
)
claim_labels = ["factual claim", "opinion", "personal anecdote", "other"]

# AI Text Detection
ai_detect_model_name = "roberta-base-openai-detector"
ai_detector = pipeline(
    "text-classification",
    model=ai_detect_model_name,
    device=-1
)

# Semantic Model (EmbeddingGemma)
SEM_MODEL_NAME = "google/embeddinggemma-300m"
HF_TOKEN = os.getenv("HF_TOKEN")

sem_model = SentenceTransformer(
    SEM_MODEL_NAME,
    use_auth_token=HF_TOKEN
)

# ---------------------------
# Google Search Config
# ---------------------------
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
GOOGLE_CX = os.getenv("GOOGLE_CX")

google_quota = {"count": 0, "date": datetime.date.today()}
GOOGLE_DAILY_LIMIT = 100

def check_google_quota():
    global google_quota
    today = datetime.date.today()
    if google_quota["date"] != today:
        google_quota = {"count": 0, "date": today}

# ---------------------------
# Text Split Helper
# ---------------------------
def safe_split_text(text):
    pattern = r'(?<!\d)[.](?!\d)|;'
    return [s.strip() for s in re.split(pattern, text) if len(s.strip()) > 10]

# ---------------------------
# Claim Extraction
# ---------------------------
def extract_claims(text, max_claims=20):
    sentences = safe_split_text(text)

    def classify(s):
        out = claim_classifier(s, claim_labels)
        return {
            "text": s,
            "label": out["labels"][0],
            "score": round(out["scores"][0], 3)
        }

    with ThreadPoolExecutor(max_workers=4) as ex:
        results = list(ex.map(classify, sentences))

    return results[:max_claims]

# ---------------------------
# AI Detection
# ---------------------------
def detect_ai(texts):
    if isinstance(texts, str):
        texts = [texts]
    results = []
    for t in texts:
        r = ai_detector(t)[0]
        label = "AI-generated" if r["label"].lower() in ["fake", "ai-generated"] else "Human"
        results.append({
            "text": t,
            "label": label,
            "score": round(r["score"], 3)
        })
    return results

# ---------------------------
# Keyword + Semantic Fact Check
# ---------------------------
def fetch_google_search_semantic(claim, k=3):
    check_google_quota()
    global google_quota

    if google_quota["count"] >= GOOGLE_DAILY_LIMIT:
        return {"keyword": [], "semantic": []}

    url = (
        "https://www.googleapis.com/customsearch/v1"
        f"?q={requests.utils.quote(claim)}"
        f"&key={GOOGLE_API_KEY}&cx={GOOGLE_CX}&num=10"
    )

    r = requests.get(url).json()
    google_quota["count"] += 1

    items = r.get("items", [])
    snippets = [f"{i['title']}: {i['snippet']}" for i in items]

    keyword_results = snippets[:k]
    if not snippets:
        return {"keyword": keyword_results, "semantic": []}

    q_emb = sem_model.encode(claim, normalize_embeddings=True)
    s_emb = sem_model.encode(snippets, normalize_embeddings=True)
    sims = util.cos_sim(q_emb, s_emb)[0]

    top_idx = sims.argsort(descending=True)[:k]
    semantic_results = [snippets[i] for i in top_idx]

    return {
        "keyword": keyword_results,
        "semantic": semantic_results
    }

# ---------------------------
# Predict
# ---------------------------
def predict(text=""):
    if not text.strip():
        return {"error": "No input provided"}

    full_ai = detect_ai(text)
    sentences = safe_split_text(text)
    full_fc = {s: fetch_google_search_semantic(s) for s in sentences}

    claims = extract_claims(text)
    claim_ai = detect_ai([c["text"] for c in claims])
    claim_fc = {c["text"]: fetch_google_search_semantic(c["text"]) for c in claims}

    return {
        "full_text": {
            "input": text,
            "ai_detection": full_ai,
            "fact_checking": full_fc
        },
        "claims": claims,
        "claims_ai_detection": claim_ai,
        "claims_fact_checking": claim_fc,
        "google_quota_used": google_quota["count"]
    }

# ---------------------------
# UI
# ---------------------------
with gr.Blocks() as demo:
    gr.Markdown("## EduShield AI Backend – Keyword + Semantic Fact Check")
    inp = gr.Textbox(lines=8, label="Input Text")
    btn = gr.Button("Run Analysis")
    out = gr.JSON()
    btn.click(predict, inp, out)

if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0")