import gradio as gr
from huggingface_hub import InferenceClient
import time

# Khởi tạo client
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")

# CSS tùy chỉnh để làm đẹp giao diện
custom_css = """
.container {
    max-width: 800px;
    margin: auto;
    padding: 20px;
}
.chat-window {
    height: 600px;
    border-radius: 10px;
    background-color: #f5f5f5;
}
.controls {
    margin-top: 20px;
    padding: 15px;
    border-radius: 8px;
    background-color: #ffffff;
}
"""

def respond(message, history: list[tuple[str, str]], 
           system_message, max_tokens, temperature, top_p,
           creativity_level, response_length):
    
    # Xử lý mức độ sáng tạo
    if creativity_level == "Conservative":
        temperature = min(temperature, 0.5)
        top_p = min(top_p, 0.8)
    elif creativity_level == "Balanced":
        temperature = 0.7
        top_p = 0.9
    else:  # Creative
        temperature = max(temperature, 0.9)
        top_p = 0.95

    # Điều chỉnh max_tokens dựa trên độ dài phản hồi
    if response_length == "Short":
        max_tokens = min(max_tokens, 256)
    elif response_length == "Medium":
        max_tokens = min(max_tokens, 512)
    else:  # Long
        max_tokens = max_tokens

    messages = [{"role": "system", "content": system_message}]
    
    # Thêm lịch sử chat
    for user_msg, assistant_msg in history:
        if user_msg:
            messages.append({"role": "user", "content": user_msg})
        if assistant_msg:
            messages.append({"role": "assistant", "content": assistant_msg})
    
    # Thêm tin nhắn hiện tại
    messages.append({"role": "user", "content": message})
    
    # Sinh phản hồi
    response = ""
    try:
        for message in client.chat_completion(
            messages,
            max_tokens=max_tokens,
            stream=True,
            temperature=temperature,
            top_p=top_p,
        ):
            token = message.choices[0].delta.content
            response += token
            time.sleep(0.02)  # Tạo hiệu ứng đánh máy
            yield response
    except Exception as e:
        yield f"Xin lỗi, đã có lỗi xảy ra: {str(e)}"

# Tạo giao diện
with gr.Blocks(css=custom_css) as demo:
    gr.Markdown("# 🤖 AI Chat Assistant")
    
    with gr.Row():
        with gr.Column(scale=2):
            chatbot = gr.ChatInterface(
                respond,
                chatbot=gr.Chatbot(height=500, container=True, show_share_button=True),
                textbox=gr.Textbox(placeholder="Nhập tin nhắn của bạn...", 
                                 container=True, scale=7),
                additional_inputs=[
                    gr.Textbox(
                        value="You are a friendly and helpful AI assistant.",
                        label="System Message",
                        lines=2
                    ),
                    gr.Slider(
                        minimum=1, maximum=2048, value=512, step=1,
                        label="Max Tokens"
                    ),
                    gr.Slider(
                        minimum=0.1, maximum=4.0, value=0.7, step=0.1,
                        label="Temperature"
                    ),
                    gr.Slider(
                        minimum=0.1, maximum=1.0, value=0.95, step=0.05,
                        label="Top-p (nucleus sampling)"
                    ),
                    gr.Radio(
                        ["Conservative", "Balanced", "Creative"],
                        label="Creativity Level",
                        value="Balanced"
                    ),
                    gr.Radio(
                        ["Short", "Medium", "Long"],
                        label="Response Length",
                        value="Medium"
                    )
                ],
                submit_btn="Gửi 📤",
                retry_btn="Thử lại 🔄",
                undo_btn="Hoàn tác ↩️",
                clear_btn="Xóa 🗑️",
            )
        
        with gr.Column(scale=1):
            with gr.Accordion("Hướng dẫn sử dụng", open=False):
                gr.Markdown("""
                ### Cách sử dụng:
                1. Nhập tin nhắn vào ô văn bản
                2. Điều chỉnh các thông số nếu muốn:
                   - System Message: Định hướng cho AI
                   - Max Tokens: Độ dài tối đa của câu trả lời
                   - Temperature: Độ sáng tạo
                   - Top-p: Độ đa dạng của từ vựng
                3. Chọn mức độ sáng tạo và độ dài phản hồi
                4. Nhấn 'Gửi' để chat
                
                ### Các nút chức năng:
                - 🔄 Thử lại: Tạo câu trả lời mới
                - ↩️ Hoàn tác: Quay lại tin nhắn trước
                - 🗑️ Xóa: Xóa lịch sử chat
                """)

if __name__ == "__main__":
    demo.launch(share=True, server_name="0.0.0.0")