File size: 3,950 Bytes
9d2d217
 
cfa3f95
ef80b0e
cfa3f95
8520334
a8a7c74
cfa3f95
 
 
1e6a29d
 
 
 
 
 
 
 
cfa3f95
1e6a29d
24a1793
cfa3f95
1e6a29d
cfa3f95
1e6a29d
cfa3f95
1e6a29d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cfa3f95
 
1e6a29d
 
 
 
cfa3f95
1e6a29d
cfa3f95
1e6a29d
 
cfa3f95
24a1793
cfa3f95
 
 
 
 
 
24a1793
cfa3f95
1e6a29d
 
 
 
 
 
 
cfa3f95
1e6a29d
 
cfa3f95
1e6a29d
cfa3f95
1e6a29d
 
 
 
 
 
cfa3f95
1e6a29d
cfa3f95
1e6a29d
 
 
 
cfa3f95
1e6a29d
 
 
 
 
 
 
 
cfa3f95
1e6a29d
 
 
 
 
cfa3f95
1e6a29d
cfa3f95
1e6a29d
cfa3f95
1e6a29d
 
 
cfa3f95
 
1e6a29d
cfa3f95
 
7501b6e
9d2d217
 
1e6a29d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import gradio as gr
import os
from huggingface_hub import InferenceClient

# Model configuration - Using Inference API
MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.3"
DEFAULT_SYSTEM_PROMPT = "You are a helpful AI assistant powered by Mistral."
client = None

def init_client():
    """Initialize the Hugging Face Inference Client"""""
    global client
    hf_token = os.environ.get("HF_TOKEN")
    if hf_token:
        client = InferenceClient(token=hf_token)
        print("Inference client initialized successfully")
        return True
    else:
        print("Warning: HF_TOKEN not found. Please set it in Space secrets.")
        return False

def generate_response(message, history, system_prompt, max_tokens, temperature):
    """Generate response using Hugging Face Inference API"""""
    global client
    
    if client is None:
        if not init_client():
            return "Error: HF_TOKEN not configured. Please add it in Space settings."
    
    try:
        # Build messages
        messages = [{"role": "system", "content": system_prompt or DEFAULT_SYSTEM_PROMPT}]
        
        for h in history:
            if h[0]:
                messages.append({"role": "user", "content": h[0]})
            if h[1]:
                messages.append({"role": "assistant", "content": h[1]})
        
        messages.append({"role": "user", "content": message})
        
        # Call Inference API
        response = client.chat_completion(
            model=MODEL_NAME,
            messages=messages,
            max_tokens=int(max_tokens),
            temperature=float(temperature)
        )
        
        return response.choices[0].message.content
    
    except Exception as e:
        return f"Error: {str(e)}"

# Create interface
print("===== Kimi K2 Thinking Dev =====")
print(f"Using Inference API with model: {MODEL_NAME}")

# Initialize client at startup
client_ready = init_client()

with gr.Blocks(title="Kimi-K2 Chat", theme=gr.themes.Soft()) as iface:
    gr.Markdown("""
    # 🤖 Kimi-K2 Instruct Chat
    **Powered by Hugging Face Inference API**
    
    This space uses the Kimi-K2-Instruct quantized model via API for efficient inference.
    """)
    
    if not client_ready:
        gr.Markdown("⚠️ **Warning:** HF_TOKEN not found. Please configure it in Space secrets.")
    
    chatbot = gr.Chatbot(height=450, label="Chat")
    
    with gr.Row():
        msg = gr.Textbox(
            placeholder="Type your message here...",
            label="Your Message",
            scale=4,
            lines=2
        )
        submit_btn = gr.Button("Send 🚀", variant="primary", scale=1)
    
    with gr.Accordion("⚙️ Settings", open=False):
        system_prompt = gr.Textbox(
            value=DEFAULT_SYSTEM_PROMPT,
            label="System Prompt",
            lines=2
        )
        with gr.Row():
            max_tokens = gr.Slider(
                minimum=64,
                maximum=2048,
                value=512,
                step=64,
                label="Max Tokens"
            )
            temperature = gr.Slider(
                minimum=0.1,
                maximum=2.0,
                value=0.7,
                step=0.1,
                label="Temperature"
            )
    
    clear_btn = gr.Button("🗑️ Clear Chat")
    
    def respond(message, history, system_prompt, max_tokens, temperature):
        if not message.strip():
            return "", history
        response = generate_response(message, history, system_prompt, max_tokens, temperature)
        history.append((message, response))
        return "", history
    
    msg.submit(respond, [msg, chatbot, system_prompt, max_tokens, temperature], [msg, chatbot])
    submit_btn.click(respond, [msg, chatbot, system_prompt, max_tokens, temperature], [msg, chatbot])
    clear_btn.click(lambda: [], None, chatbot)

if __name__ == "__main__":
    iface.launch(server_name="0.0.0.0", server_port=7860)