# models/loader.py
import torch
import os
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig

# Configuration for loading models
QUANTIZATION_CONFIG = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
)

# Use a registry to map agent roles to specific models
MODEL_REGISTRY = {
    "ceo": "Qwen/Qwen3-0.6B",
    "planner": "Qwen/Qwen3-0.6B",
    "manager": "Qwen/Qwen3-0.6B",
    "debugger": "Qwen/Qwen3-0.6B",
    "business_analyst": "Qwen/Qwen3-0.6B",
    "ux_ui_designer": "Qwen/Qwen3-0.6B",
    "worker_backend_coder": "Qwen/Qwen3-0.6B",
    "worker_front_end_coder": "Qwen/Qwen3-0.6B",
    "worker_tester": "Qwen/Qwen3-0.6B",
}
_MODEL_CACHE = {}

def get_model_and_tokenizer(model_name="Qwen/Qwen3-0.6B"):
    if model_name not in _MODEL_CACHE:
        print(f"Loading model: {model_name}...")
        _MODEL_CACHE[model_name] = {
            "model": AutoModelForCausalLM.from_pretrained(
                model_name,
                device_map="auto",
                quantization_config=QUANTIZATION_CONFIG,
                trust_remote_code=True,
            ),
            "tokenizer": AutoTokenizer.from_pretrained(model_name)
        }
    return _MODEL_CACHE[model_name]["model"], _MODEL_CACHE[model_name]["tokenizer"]

def generate_with_model(agent_role, prompt):
    model_name = MODEL_REGISTRY.get(agent_role, "Qwen/Qwen3-0.6B")
    model, tokenizer = get_model_and_tokenizer(model_name)
    
    full_prompt = f"You are a helpful assistant. {ROLE_PROMPTS.get(agent_role, '')}\n\nUser prompt: {prompt}"
    
    input_ids = tokenizer.encode(full_prompt, return_tensors="pt").to(model.device)
    
    with torch.no_grad():
        output = model.generate(
            input_ids,
            max_new_tokens=2048,
            do_sample=True,
            temperature=0.7,
            top_p=0.9,
            repetition_penalty=1.1
        )
    
    decoded_output = tokenizer.decode(output[0], skip_special_tokens=True)
    return decoded_output.replace(full_prompt, "", 1).strip()