# chat.py
import os
import gc
import torch
from transformers import LlamaTokenizer, LlamaForCausalLM

# =============================
# Configuration
# =============================
MODEL_PATH = r"C:\Users\JAY\Downloads\Chatdoc\ChatDoctor\pretrained"
MAX_NEW_TOKENS = 200
TEMPERATURE = 0.5
TOP_K = 50
REPETITION_PENALTY = 1.1

# Detect device
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Loading model from {MODEL_PATH} on {device}...")

# =============================
# Load Tokenizer and Model
# =============================
tokenizer = LlamaTokenizer.from_pretrained(MODEL_PATH)

model = LlamaForCausalLM.from_pretrained(
    MODEL_PATH,
    device_map="auto",          # automatically dispatch weights to GPU
    torch_dtype=torch.float16,  # half precision for faster inference
    low_cpu_mem_usage=True      # optimize CPU memory
)

# DO NOT call model.to(device) when using device_map="auto"
generator = model.generate
print("✅ Model loaded successfully!\n")

# =============================
# Chat History
# =============================
systemprompt = ("""You are ChatDoctor — an intelligent, empathetic medical AI assistant. 
Your role is to carefully gather medical information, reason clinically, 
and provide safe, evidence-based guidance.

Follow these instructions strictly:
1. When a patient describes their illness, DO NOT diagnose immediately.
2. Ask relevant, targeted questions to collect all necessary details 
   such as symptoms, duration, severity, lifestyle habits, medical history, 
   medications, and any recent tests or changes.
3. Once you have enough information for a preliminary diagnosis, clearly 
   explain your reasoning and possible causes in simple medical language.
4. Then, provide a clear and structured response that includes:
   - **Diagnosis:** probable or confirmed condition(s)
   - **Dietary Advice:** foods to include and avoid
   - **Lifestyle Advice:** exercise, sleep, stress, and other habits
5. Be concise, empathetic, and professional at all times.
6. Never switch roles or generate “Patient:” responses. Always remain as ChatDoctor.
7. If symptoms suggest a serious or emergency condition, advise the patient 
   to seek immediate medical attention.""")

history = [systemprompt, "ChatDoctor: I am ChatDoctor, what medical questions do you have?"]

# =============================
# Response Function
# =============================
def get_response(user_input):
    global history
    human_invitation = "Patient: "
    doctor_invitation = "ChatDoctor: "

    # Append user input
    history.append(human_invitation + user_input)

    # Build prompt
    prompt = "\n".join(history) + "\n" + doctor_invitation
    input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)

    # Generate response
    with torch.no_grad():
        output_ids = generator(
            input_ids,
            max_new_tokens=MAX_NEW_TOKENS,
            do_sample=True,
            temperature=TEMPERATURE,
            top_k=TOP_K,
            repetition_penalty=REPETITION_PENALTY
        )

    # Decode response
    full_output = tokenizer.decode(output_ids[0], skip_special_tokens=True)
    response = full_output[len(prompt):].strip()

    # Clean if the model repeats the patient prompt
    if response.startswith("Patient:"):
        response = response[len("Patient:"):].strip()

    # Append model response to history
    history.append(doctor_invitation + response)

    # Free memory
    del input_ids, output_ids
    gc.collect()
    torch.cuda.empty_cache()

    return response

# =============================
# CLI Chat
# =============================
if __name__ == "__main__":
    print("\n=== ChatDoctor is ready! Type your questions. ===\n")
    while True:
        try:
            user_input = input("Patient: ").strip()
            if user_input.lower() in ["exit", "quit"]:
                print("Exiting ChatDoctor. Goodbye!")
                break
            response = get_response(user_input)
            print("ChatDoctor: " + response + "\n")
        except KeyboardInterrupt:
            print("\nExiting ChatDoctor. Goodbye!")
            break