import os
import uuid
import json
import requests
from typing import Dict, Any, Optional

from fastapi import FastAPI, Body
from pydantic import BaseModel, Field
from fastapi.middleware.cors import CORSMiddleware

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

# ---------------------------
# Config
# ---------------------------
BASE_MODEL_ID = os.environ.get("BASE_MODEL_ID", "TinyLlama/TinyLlama-1.1B-Chat-v1.0")
ADAPTER_ID    = os.environ.get("ADAPTER_ID", "ethnmcl/tinyllama-entrepreneurchatbot-lora")
XGB_SCORE_URL = os.environ.get("XGB_SCORE_URL", "https://ethnmcl-EntrepreneurialReadinessScoreAPI.hf.space/score")

SYSTEM_PROMPT = (
    "You are a concise entrepreneurial readiness assistant. "
    "Be clear, specific, and professional."
)

QUESTION_FLOW = [
    {"key": "age",                      "question": "What is your age? (number)"},
    {"key": "savings",                  "question": "How much do you currently have saved (USD)?"},
    {"key": "monthly_expense_ratio",    "question": "What is your monthly expense ratio (expenses/income)? (e.g., 0.55)"},
    {"key": "sales_experience",         "question": "Rate your sales experience from 0–10."},
    {"key": "dependents",               "question": "How many dependents do you support? (number)"},
    {"key": "weekly_time_commitment",   "question": "How many hours/week can you commit to your venture?"},
]
TYPE_CASTS = {
    "age": float,
    "savings": float,
    "monthly_expense_ratio": float,
    "sales_experience": float,
    "dependents": float,
    "weekly_time_commitment": float,
}

EXAMPLES = [
    "Can you explain what the entrepreneurial readiness check is in one or two sentences?",
    "I am 31, with $8,000 savings and an expense ratio of 0.72. What does that say about my readiness?",
    "I’m 27, working 15 hours a week on my business, with 2 dependents. How might that affect my entrepreneurial score?",
    "Inputs → age 29, savings 5000, expense ratio 0.62, sales experience 4, dependents 1, hours/week 12. Summarize likely strengths and risks in bullet points.",
    "Based on these inputs (savings 3000, expense ratio 0.85, sales exp 2), what 3 actions should I take to improve my readiness?",
    "I feel nervous about launching with only $2,000 saved. Can you give me encouragement and one practical step?",
]

# ---------------------------
# Lightweight session store
# ---------------------------
SESSIONS: Dict[str, Dict[str, Any]] = {}

def new_session() -> str:
    sid = uuid.uuid4().hex
    SESSIONS[sid] = {"answers": {}, "idx": 0, "active": False, "scored": False, "score": None}
    return sid

def get_session(session_id: Optional[str]) -> str:
    if session_id and session_id in SESSIONS:
        return session_id
    return new_session()

# ---------------------------
# Model loading (GPU 4-bit if available)
# ---------------------------
def load_model():
    global tokenizer, model
    tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_ID, use_fast=True)

    use_4bit = torch.cuda.is_available()
    kwargs = {"device_map": "auto"}
    if use_4bit:
        # 4-bit quant only when CUDA available (bitsandbytes)
        kwargs.update(dict(load_in_4bit=True, torch_dtype=torch.float16))
    else:
        # CPU fallback
        kwargs.update(dict(torch_dtype=torch.float32))

    base = AutoModelForCausalLM.from_pretrained(BASE_MODEL_ID, **kwargs)
    try:
        from peft import PeftModel
        model = PeftModel.from_pretrained(base, ADAPTER_ID)
    except Exception as e:
        # If adapter load fails, proceed with base (but log)
        print(f"[WARN] Failed to load PEFT adapter {ADAPTER_ID}: {e}")
        model = base

    model.eval()
    print("[INFO] Model ready. CUDA:", torch.cuda.is_available())

def llm_reply(system: str, user: str, max_new_tokens=180, temperature=0.2) -> str:
    prompt = f"<|system|>\n{system}\n<|user|>\n{user}\n<|assistant|>\n"
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    with torch.inference_mode():
        out = model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            do_sample=temperature > 0,
            temperature=temperature,
            pad_token_id=tokenizer.eos_token_id,
        )
    text = tokenizer.decode(out[0], skip_special_tokens=True)
    return text.split("<|assistant|>")[-1].strip()

def next_question(state: Dict[str, Any]) -> Optional[str]:
    if state["idx"] < len(QUESTION_FLOW):
        return QUESTION_FLOW[state["idx"]]["question"]
    return None

def cast_features(answers: Dict[str, str]) -> Dict[str, Any]:
    out = {}
    for k, v in answers.items():
        caster = TYPE_CASTS.get(k, str)
        try:
            out[k] = caster(v)
        except Exception:
            out[k] = v
    return out

def score_via_api(features: Dict[str, Any]) -> Dict[str, Any]:
    try:
        r = requests.post(XGB_SCORE_URL, json={"features": features}, timeout=20)
        r.raise_for_status()
        return r.json()
    except Exception as e:
        return {"error": f"Scoring API error: {e}"}

# ---------------------------
# FastAPI
# ---------------------------
app = FastAPI(title="Entrepreneurial Readiness Chat API", version="1.0.0")
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"]
)

class ChatIn(BaseModel):
    message: str = Field(..., description="User message")
    session_id: Optional[str] = Field(None, description="Session ID (optional)")

class ChatOut(BaseModel):
    session_id: str
    reply: str
    assessment: Dict[str, Any]

class StartOut(BaseModel):
    session_id: str
    question: str
    idx: int
    total: int

class AnswerIn(BaseModel):
    session_id: str
    answer: str

class AnswerOut(BaseModel):
    session_id: str
    done: bool
    question: Optional[str] = None
    idx: int
    total: int
    score: Optional[Any] = None
    error: Optional[str] = None

@app.on_event("startup")
def _startup():
    load_model()

@app.get("/health")
def health():
    return {"ok": True}

@app.get("/examples")
def examples():
    return {"examples": EXAMPLES}

@app.post("/assessment/start", response_model=StartOut)
def assessment_start(payload: Dict[str, Any] = Body(default={})):
    sid = get_session(payload.get("session_id"))
    st = SESSIONS[sid]
    st.update({"answers": {}, "idx": 0, "active": True, "scored": False, "score": None})
    q = next_question(st)
    return {"session_id": sid, "question": q, "idx": st["idx"] + 1, "total": len(QUESTION_FLOW)}

@app.post("/assessment/answer", response_model=AnswerOut)
def assessment_answer(inp: AnswerIn):
    if inp.session_id not in SESSIONS:
        return AnswerOut(session_id=new_session(), done=False, idx=0, total=len(QUESTION_FLOW), error="Invalid session_id")

    st = SESSIONS[inp.session_id]
    if not st.get("active"):
        return AnswerOut(session_id=inp.session_id, done=False, idx=st["idx"], total=len(QUESTION_FLOW), error="Assessment not active.")

    cur_key = QUESTION_FLOW[st["idx"]]["key"]
    st["answers"][cur_key] = inp.answer.strip()
    st["idx"] += 1

    q = next_question(st)
    if q is None:
        st["active"] = False
        features = cast_features(st["answers"])
        res = score_via_api(features)
        st["scored"] = "error" not in res
        st["score"] = res
        return AnswerOut(session_id=inp.session_id, done=True, idx=len(QUESTION_FLOW), total=len(QUESTION_FLOW), score=res)
    else:
        return AnswerOut(session_id=inp.session_id, done=False, question=q, idx=st["idx"] + 1, total=len(QUESTION_FLOW))

@app.post("/chat", response_model=ChatOut)
def chat(inp: ChatIn):
    sid = get_session(inp.session_id)
    st = SESSIONS[sid]

    # If the user asks to take the assessment, start it.
    msg_lower = inp.message.lower()
    triggers = ["take the entrepreneurial readiness assessment", "take assessment", "start assessment", "readiness assessment"]
    if any(t in msg_lower for t in triggers):
        st.update({"answers": {}, "idx": 0, "active": True, "scored": False, "score": None})
        q = next_question(st)
        reply = (
            "Great—let’s do a short 6-question entrepreneurial readiness check.\n\n"
            f"**Q1/6**: {q}"
        )
        return ChatOut(
            session_id=sid,
            reply=reply,
            assessment={"active": True, "idx": st["idx"] + 1, "total": len(QUESTION_FLOW), "question": q}
        )

    # Otherwise normal chat via LLM
    answer = llm_reply(SYSTEM_PROMPT, inp.message)
    return ChatOut(
        session_id=sid,
        reply=answer,
        assessment={"active": st.get("active", False), "idx": st.get("idx", 0), "total": len(QUESTION_FLOW)}
    )