import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
import torch
import time

# =======================================================
# Load Model
# =======================================================
model_name = "augtoma/qCammel-13"
print("Loading tokenizer and model...")

tokenizer = AutoTokenizer.from_pretrained(model_name)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    torch_dtype=torch.float16,
    trust_remote_code=True,
    low_cpu_mem_usage=True
)
model.eval()

print("Model loaded successfully!")
print(f"Device map: {model.hf_device_map}")
print(f"Model device: {next(model.parameters()).device}")


# =======================================================
# Generate Doctor Response (Stateless + Clean Replies)
# =======================================================
def generate_doctor_response(history):
    user_message = history[-1]["content"]

    if not user_message.strip():
        history.append({"role": "assistant", "content": "⚠️ Please describe your symptoms or ask a question."})
        yield history
        return

    # 🩺 New Prompt (no 'Patient:' or 'Doctor:' lines)
    prompt = f"""
You are a compassionate and professional medical expert.
Your role is to help users by providing clear, empathetic, and accurate medical information.

Guidelines:
1. Do NOT include words like 'Doctor:' or 'Patient:' in your replies.
2. Respond naturally and directly to the user's concern.
3. Keep answers short, clear, and medically sound.
4. Add a disclaimer when appropriate: 
   ⚕️ *This is AI-generated information and not a substitute for professional medical advice.*

Now, please respond to the user's message below:

User: {user_message}
Assistant:
"""

    # Tokenize input
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    gen_config = GenerationConfig(
        temperature=0.7,
        top_p=0.9,
        do_sample=True,
        max_new_tokens=500,
        pad_token_id=tokenizer.pad_token_id,
        eos_token_id=tokenizer.eos_token_id,
        repetition_penalty=1.2
    )

    input_len = inputs["input_ids"].shape[1]

    with torch.no_grad():
        output_ids = model.generate(**inputs, generation_config=gen_config)

    generated_ids = output_ids[0][input_len:]
    response = tokenizer.decode(generated_ids, skip_special_tokens=True).strip()

    # Keep concise output
    response = ". ".join(response.split(". ")[:3]).strip()
    if response.lower().startswith("assistant:"):
        response = response[10:].strip()
    if len(response) < 10:
        response = "I understand your concern. Could you please provide more details about your symptoms?"

    # Stream response token by token
    history.append({"role": "assistant", "content": ""})
    for i in range(0, len(response), 4):
        chunk = response[:i + 4]
        history[-1]["content"] = chunk + "▌"
        yield history.copy()
        time.sleep(0.015)

    history[-1]["content"] = response
    yield history


# =======================================================
# Gradio Interface
# =======================================================
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# 🩺 AI Doctor Chat Assistant")

    chatbot = gr.Chatbot(
        label="💬 Doctor Consultation",
        type='messages',
        avatar_images=(
            "https://cdn-icons-png.flaticon.com/512/706/706830.png",  # Patient
            "https://cdn-icons-png.flaticon.com/512/3774/3774299.png"   # Doctor
        ),
        height=500
    )

    with gr.Row():
        user_input = gr.Textbox(
            placeholder="Type your symptoms or question here...",
            label="🧍 Your Message",
            lines=2,
            scale=4
        )

    with gr.Row():
        send_btn = gr.Button("💬 Send", variant="primary", scale=1)
        clear_btn = gr.Button("🧹 Clear Chat", scale=1)

    gr.Examples(
        examples=[
            "I have a fever of 102°F since yesterday",
            "I've been having headaches for the past week",
            "I feel very tired all the time",
            "I have a sore throat and body aches",
        ],
        inputs=user_input,
        label="💡 Example Questions"
    )

    # =======================================================
    # Respond Function — Model forgets, Chat UI remembers
    # =======================================================
    def respond(message, history):
        user_message = message.strip()
        if not user_message:
            return "", history

        # Show user message in chat
        history.append({"role": "user", "content": user_message})

        # Model sees only current message (no memory)
        temp_history = [{"role": "user", "content": user_message}]

        for updated_history in generate_doctor_response(temp_history):
            if len(history) == 0 or history[-1]["role"] != "assistant":
                history.append({"role": "assistant", "content": updated_history[-1]["content"]})
            else:
                history[-1]["content"] = updated_history[-1]["content"]
            yield "", history

    # =======================================================
    # Button & Input Bindings
    # =======================================================
    send_btn.click(respond, [user_input, chatbot], [user_input, chatbot])
    user_input.submit(respond, [user_input, chatbot], [user_input, chatbot])
    clear_btn.click(lambda: [], None, chatbot, queue=False)


# =======================================================
# Launch App
# =======================================================
if __name__ == "__main__":
    demo.queue()
    demo.launch(share=True)