# app.py
import os
import json
import requests
from typing import List, Optional

import gradio as gr

# Optional: huggingface_hub.InferenceApi if installed
try:
    from huggingface_hub import InferenceApi
    HF_HUB_AVAILABLE = True
except Exception:
    HF_HUB_AVAILABLE = False

# Optional local generation support
try:
    from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
    TRANSFORMERS_AVAILABLE = True
except Exception:
    TRANSFORMERS_AVAILABLE = False

# ---------------------
# Config / Model list
# ---------------------
DEFAULT_MODEL = os.getenv("HUGGINGFACE_MODEL", "gpt2")

# A curated list of public models for quick selection (small->medium->instruction-tuned)
COMMON_MODELS = [
    "gpt2",
    "distilgpt2",
    "google/flan-t5-small",
    "google/flan-t5-base",
    "google/flan-t5-large",
    "google/flan-t5-xl",
    "facebook/opt-1.3b",
    "facebook/opt-2.7b",
    "bigscience/bloom-560m",
    "bigscience/bloomz-560m",
    "tiiuae/falcon-7b-instruct",          # may be gated
    "mistralai/Mixtral-8x7B-Instruct-v0.1", # example gated/large
    "stabilityai/stablelm-tuned-alpha-3b",
    "EleutherAI/gpt-neo-2.7B",
    "google/t5-v1_1-base",
    "hf-internal-testing/tiny-random-gpt2"
]

# ---------------------
# Helpers
# ---------------------
def normalize_hf_output(data) -> str:
    """Normalize HF inference output (list/dict/string) to plain text."""
    if data is None:
        return ""
    if isinstance(data, str):
        return data.strip()
    if isinstance(data, list) and len(data) > 0:
        first = data[0]
        if isinstance(first, dict):
            for key in ("generated_text", "text", "content"):
                if key in first and isinstance(first[key], str):
                    return first[key].strip()
            # fallback: join string values
            vals = [str(v) for v in first.values()]
            return " ".join(vals).strip()
        if all(isinstance(x, str) for x in data):
            return "\n".join(data).strip()
        return str(data)
    if isinstance(data, dict):
        for key in ("generated_text", "text", "content"):
            if key in data and isinstance(data[key], str):
                return data[key].strip()
        return json.dumps(data)
    return str(data)

def get_api_token(input_token: Optional[str]) -> Optional[str]:
    """Prefer UI-provided token, then env vars, else None."""
    if input_token and input_token.strip():
        return input_token.strip()
    return os.getenv("HUGGINGFACEHUB_API_TOKEN") or os.getenv("HF_TOKEN")

# ---------------------
# Inference callers
# ---------------------
def call_hf_router(prompt: str, model: str, token: Optional[str], max_new_tokens: int = 256, temperature: float = 0.2) -> str:
    """
    Call HF router endpoint which is more future-proof for some hosted models.
    Returns a plain-text response or a helpful error message.
    """
    url = f"https://router.huggingface.co/hf-inference/{model}"
    headers = {"Content-Type": "application/json"}
    if token:
        headers["Authorization"] = f"Bearer {token}"
    payload = {
        "inputs": prompt,
        "parameters": {"max_new_tokens": max_new_tokens, "temperature": temperature}
    }
    try:
        resp = requests.post(url, headers=headers, json=payload, timeout=60)
    except Exception as e:
        return f"[Request error: {e}]"

    if resp.status_code == 410:
        return ("[Error 410: endpoint/gone. This model may not have a hosted inference endpoint or requires gated access. "
                "Try another model or check the model page for access requirements.]")
    if resp.status_code == 404:
        return "[Error 404: model not found. Check the model id or try a different model.]"
    if resp.status_code == 401:
        return "[Error 401: unauthorized. Your API key may be missing or lacking permissions.]"
    if resp.status_code != 200:
        # include limited info
        try:
            info = resp.json()
        except Exception:
            info = resp.text
        return f"[HF error {resp.status_code}: {info}]"

    try:
        data = resp.json()
    except Exception:
        return resp.text
    return normalize_hf_output(data)

def call_hf_inferenceapi(prompt: str, model: str, token: Optional[str], max_new_tokens: int = 256, temperature: float = 0.2) -> str:
    """Use huggingface_hub.InferenceApi when available (wraps different behaviour)."""
    if not HF_HUB_AVAILABLE:
        return call_hf_router(prompt, model, token, max_new_tokens, temperature)
    try:
        api = InferenceApi(repo_id=model, token=token)
        out = api(prompt, params={"max_new_tokens": max_new_tokens, "temperature": temperature})
        return normalize_hf_output(out)
    except Exception as e:
        # fallback to router
        return call_hf_router(prompt, model, token, max_new_tokens, temperature)

# Local generation fallback
_local_gen = None
def init_local_gen(model_name: str):
    global _local_gen
    if not TRANSFORMERS_AVAILABLE:
        return None
    try:
        # Try to initialize pipeline for the specific model
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        model = AutoModelForCausalLM.from_pretrained(model_name)
        _local_gen = pipeline("text-generation", model=model, tokenizer=tokenizer)
        return _local_gen
    except Exception:
        try:
            _local_gen = pipeline("text-generation", model=model_name)
            return _local_gen
        except Exception:
            return None

def call_local(prompt: str, model_name: str):
    gen = init_local_gen(model_name)
    if gen is None:
        return "[Local generation unavailable — install 'transformers' and ensure the model is available locally.]"
    try:
        out = gen(prompt, max_length=len(prompt.split()) + 150, do_sample=True, top_p=0.95, temperature=0.8, num_return_sequences=1)
        if isinstance(out, list) and len(out) > 0:
            first = out[0]
            if isinstance(first, dict):
                for key in ("generated_text", "text"):
                    if key in first and isinstance(first[key], str):
                        return first[key].strip()
                return str(first)
            if isinstance(first, str):
                return first
        return str(out)
    except Exception as e:
        return f"[Local generation failed: {e}]"

# ---------------------
# Conversation prompt builder
# ---------------------
SYSTEM_PROMPT = (
    "You are an expert computer technician and systems engineer. "
    "You know practical details about personal computers, servers, operating systems, networking, "
    "hardware troubleshooting, performance tuning, security best practices, software installation and debugging. "
    "When a user asks a question, respond clearly and concisely in English. Provide step-by-step instructions when helpful, "
    "explain risks and trade-offs, and include commands or code snippets if they are useful."
)

def build_prompt(system_prompt: str, history: List[List[str]]) -> str:
    parts = [f"System: {system_prompt}", "Conversation:"]
    for user_msg, assistant_msg in history:
        parts.append(f"User: {user_msg}")
        if assistant_msg:
            parts.append(f"Assistant: {assistant_msg}")
    parts.append("Assistant:")
    return "\n".join(parts)

# ---------------------
# Gradio callbacks
# ---------------------
def respond(user_message: str, chat_history, mode: str, selected_model: str, custom_model: str, api_key_input: str, max_tokens: int):
    if chat_history is None:
        chat_history = []
    chat_history.append([user_message, None])

    model_to_use = custom_model.strip() if custom_model and custom_model.strip() else selected_model
    token = get_api_token(api_key_input)

    prompt = build_prompt(SYSTEM_PROMPT, chat_history)

    # Choose inference path
    if mode == "HuggingFace (remote)":
        # prefer huggingface_hub wrapper if available, fallback to router
        if HF_HUB_AVAILABLE:
            reply = call_hf_inferenceapi(prompt, model_to_use, token, max_new_tokens=max_tokens)
        else:
            reply = call_hf_router(prompt, model_to_use, token, max_new_tokens=max_tokens)
    else:
        reply = call_local(prompt, model_to_use)

    # Ensure string and safe value
    if reply is None:
        reply = ""
    reply = str(reply)

    chat_history[-1][1] = reply
    return chat_history, ""

def clear_history():
    return []

# ---------------------
# Gradio UI
# ---------------------
with gr.Blocks(title="AI Computer Expert (multi-model)") as demo:
    gr.Markdown("# AI Computer Expert — Multi-model (Hugging Face)")
    gr.Markdown("Ask anything about computers. Choose a model from the list or type a custom model id. Enter a HF API key (optional) to use remote inference.")

    with gr.Row():
        with gr.Column(scale=3):
            chatbot = gr.Chatbot(label="AI Computer Expert")
            user_input = gr.Textbox(placeholder="Type your question here (e.g. 'Why is my laptop overheating?')", show_label=False, lines=2)
            with gr.Row():
                send_btn = gr.Button("Send")
                clear_btn = gr.Button("Clear")
        with gr.Column(scale=1):
            mode = gr.Radio(choices=["HuggingFace (remote)", "Local (transformers)"], value="HuggingFace (remote)", label="Mode")
            model_dropdown = gr.Dropdown(label="Select model", choices=COMMON_MODELS, value=DEFAULT_MODEL)
            custom_model = gr.Textbox(label="Custom model id (optional)", placeholder="owner/model-name (takes precedence over dropdown)")
            api_key_box = gr.Textbox(label="HuggingFace API Key (optional)", type="password", placeholder="hf_xxx ...")
            max_tokens = gr.Slider(label="Max new tokens", minimum=32, maximum=1024, step=32, value=256)

            gr.Markdown("**Notes:**\n- Some large/gated models require special access or are not hosted for inference. If you see 410/404, try a different model or set up an Inference Endpoint.\n- If you don't want to use remote API, switch to Local and ensure you have the model installed and `transformers` available.")

    examples = [
        "My Windows 10 laptop randomly restarts — how do I diagnose this?",
        "How can I speed up boot time on Ubuntu?",
        "Explain how RAID 1 differs from RAID 5 and when to use each.",
        "I get 'kernel panic' on boot, what logs should I check?"
    ]
    gr.Examples(examples=examples, inputs=user_input)

    send_btn.click(respond, inputs=[user_input, chatbot, mode, model_dropdown, custom_model, api_key_box, max_tokens], outputs=[chatbot, user_input])
    user_input.submit(respond, inputs=[user_input, chatbot, mode, model_dropdown, custom_model, api_key_box, max_tokens], outputs=[chatbot, user_input])
    clear_btn.click(lambda: [], None, chatbot)

    gr.Markdown("---")
    gr.Markdown("*This app supports many HF models; some models may be gated or not available via hosted inference.*")

if __name__ == "__main__":
    # port can be set with PORT env var (useful for Spaces)
    demo.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", 7860)))