import os
import torch
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer

# ✅ Load API Token Securely from Hugging Face Secrets
HF_TOKEN = os.getenv("HF_TOKEN")

# ✅ Load model and tokenizer (Optimized for Speed)
MODEL_NAME = "eabybabu/chatbot_model"  # Replace with your actual model name
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=HF_TOKEN)

# ✅ Use GPU if available
device = "cuda" if torch.cuda.is_available() else "cpu"

# ✅ Load model and apply quantization (if available)
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, token=HF_TOKEN).to(device)
model = torch.quantization.quantize_dynamic(model, {torch.nn.Linear}, dtype=torch.qint8)  # Apply quantization

# ✅ Function to generate chatbot responses with chat history
def chatbot_response(user_input, chat_history):
    try:
        chat_context = " ".join([f"User: {msg}\nChatbot: {resp}" for msg, resp in chat_history])
        prompt = f"{chat_context}\nUser: {user_input}\nChatbot:"

        # Encode input
        inputs = tokenizer.encode(prompt, return_tensors="pt").to(device)

        # Generate response (Faster with CUDA & Optimized Settings)
        outputs = model.generate(
            inputs,
            max_length=200,  
            temperature=0.7,
            top_k=50,  
            top_p=0.9,  
            repetition_penalty=1.5,
            num_return_sequences=1
        )

        # Decode response
        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
        response = ". ".join(set(response.split(". ")))  # Prevent repetition

        chat_history.append((user_input, response))

        return chat_history, ""

    except Exception as e:
        return chat_history, f"Error: {str(e)}"

# ✅ Create Gradio UI with Chat History
with gr.Blocks() as demo:
    gr.Markdown("# 🤖 Cybersecurity Chatbot")
    gr.Markdown("Ask me anything about ISO 27001, ISO 27005, MITRE ATT&CK, and more!")

    chatbot = gr.Chatbot(label="Chat History")
    user_input = gr.Textbox(label="Type your question:")
    submit_btn = gr.Button("Ask Chatbot")

    chat_history = gr.State([])

    submit_btn.click(chatbot_response, inputs=[user_input, chat_history], outputs=[chatbot, user_input])

# ✅ Launch the Gradio app
demo.launch()