import os
import torch
from fastapi import FastAPI, Request
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
from transformers import AutoTokenizer, AutoModelForCausalLM

# -------------------------------
# CONFIGURATION
# -------------------------------
MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.2"
HF_TOKEN = os.getenv("HF_TOKEN")  # Uses your Hugging Face Space secret

# -------------------------------
# LOAD MODEL & TOKENIZER
# -------------------------------
tokenizer = AutoTokenizer.from_pretrained(
    MODEL_NAME,
    use_auth_token=HF_TOKEN
)

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    torch_dtype=torch.float16 if torch.cuda.is_available() else "auto",
    device_map="auto",
    use_auth_token=HF_TOKEN
)

# -------------------------------
# FASTAPI APP SETUP
# -------------------------------
app = FastAPI()

# Enable CORS so frontend can talk to backend
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],  # Allow all origins — or restrict if needed
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# -------------------------------
# GENERATE RECIPE ENDPOINT
# -------------------------------
@app.post("/generate")
async def generate(request: Request):
    try:
        # Parse JSON input from frontend
        data = await request.json()

        ingredients = data.get("ingredients", "")
        mood = data.get("mood", "any")
        time = data.get("time", "30")
        restrictions = data.get("restrictions", "")

        # Create the prompt
        prompt = f"""
You are a helpful cooking assistant. Generate ONE {restrictions} {mood} recipe using the following ingredients: {ingredients}.
The recipe should be easy to follow and ready in under {time} minutes.
Include:
- A fun and catchy title
- Ingredient list (with substitutions in brackets)
- Clear step-by-step instructions
Keep it short, clear, and conversational.
"""

        # Prepare input for model
        messages = [{"role": "user", "content": prompt}]
        text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
        model_inputs = tokenizer([text], return_tensors="pt").to(model.device)

        # Generate recipe text
        generated_ids = model.generate(
            **model_inputs,
            max_new_tokens=1024,  # Max supported by model on HF Spaces
            do_sample=True,
            temperature=0.9,
            top_p=0.95
        )

        # Decode the output
        output_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist()
        content = tokenizer.decode(output_ids, skip_special_tokens=True)

        return JSONResponse(content={"recipe": content})

    except Exception as e:
        return JSONResponse(content={"error": str(e)}, status_code=500)


# -------------------------------
# HEALTH CHECK ENDPOINT
# -------------------------------
@app.get("/")
async def root():
    return {"message": "SnackHack backend is running! 🎉"}