import gradio as gr
import os
from huggingface_hub import InferenceClient

# Model configuration - Using Inference API
MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.3"
DEFAULT_SYSTEM_PROMPT = "You are a helpful AI assistant powered by Mistral."
client = None

def init_client():
    """Initialize the Hugging Face Inference Client"""""
    global client
    hf_token = os.environ.get("HF_TOKEN")
    if hf_token:
        client = InferenceClient(token=hf_token)
        print("Inference client initialized successfully")
        return True
    else:
        print("Warning: HF_TOKEN not found. Please set it in Space secrets.")
        return False

def generate_response(message, history, system_prompt, max_tokens, temperature):
    """Generate response using Hugging Face Inference API"""""
    global client
    
    if client is None:
        if not init_client():
            return "Error: HF_TOKEN not configured. Please add it in Space settings."
    
    try:
        # Build messages
        messages = [{"role": "system", "content": system_prompt or DEFAULT_SYSTEM_PROMPT}]
        
        for h in history:
            if h[0]:
                messages.append({"role": "user", "content": h[0]})
            if h[1]:
                messages.append({"role": "assistant", "content": h[1]})
        
        messages.append({"role": "user", "content": message})
        
        # Call Inference API
        response = client.chat_completion(
            model=MODEL_NAME,
            messages=messages,
            max_tokens=int(max_tokens),
            temperature=float(temperature)
        )
        
        return response.choices[0].message.content
    
    except Exception as e:
        return f"Error: {str(e)}"

# Create interface
print("===== Kimi K2 Thinking Dev =====")
print(f"Using Inference API with model: {MODEL_NAME}")

# Initialize client at startup
client_ready = init_client()

with gr.Blocks(title="Kimi-K2 Chat", theme=gr.themes.Soft()) as iface:
    gr.Markdown("""
    # 🤖 Kimi-K2 Instruct Chat
    **Powered by Hugging Face Inference API**
    
    This space uses the Kimi-K2-Instruct quantized model via API for efficient inference.
    """)
    
    if not client_ready:
        gr.Markdown("⚠️ **Warning:** HF_TOKEN not found. Please configure it in Space secrets.")
    
    chatbot = gr.Chatbot(height=450, label="Chat")
    
    with gr.Row():
        msg = gr.Textbox(
            placeholder="Type your message here...",
            label="Your Message",
            scale=4,
            lines=2
        )
        submit_btn = gr.Button("Send 🚀", variant="primary", scale=1)
    
    with gr.Accordion("⚙️ Settings", open=False):
        system_prompt = gr.Textbox(
            value=DEFAULT_SYSTEM_PROMPT,
            label="System Prompt",
            lines=2
        )
        with gr.Row():
            max_tokens = gr.Slider(
                minimum=64,
                maximum=2048,
                value=512,
                step=64,
                label="Max Tokens"
            )
            temperature = gr.Slider(
                minimum=0.1,
                maximum=2.0,
                value=0.7,
                step=0.1,
                label="Temperature"
            )
    
    clear_btn = gr.Button("🗑️ Clear Chat")
    
    def respond(message, history, system_prompt, max_tokens, temperature):
        if not message.strip():
            return "", history
        response = generate_response(message, history, system_prompt, max_tokens, temperature)
        history.append((message, response))
        return "", history
    
    msg.submit(respond, [msg, chatbot, system_prompt, max_tokens, temperature], [msg, chatbot])
    submit_btn.click(respond, [msg, chatbot, system_prompt, max_tokens, temperature], [msg, chatbot])
    clear_btn.click(lambda: [], None, chatbot)

if __name__ == "__main__":
    iface.launch(server_name="0.0.0.0", server_port=7860)