# student_assistant_chatbot.py
# MSAI-631 Group Project – improved version

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, BitsAndBytesConfig
import gradio as gr

# debugging the code to find versions
import huggingface_hub
print("huggingface_hub version:", huggingface_hub.__version__)
import transformers
print("transformers version:", transformers.__version__)

# =============================================
#  CONFIGURATION
# =============================================
MODEL_NAME = "microsoft/phi-2"

# System prompt – gives the model its student-helper personality
SYSTEM_PROMPT = """You are a helpful, friendly, and organized academic assistant designed to help university students succeed.
You are supportive, clear, structured, and encouraging.
You help with:
- Planning study schedules and time management
- Breaking down assignments and projects
- Creating study plans and revision timetables
- Explaining concepts in simple terms
- Suggesting study techniques and productivity methods
- Organizing tasks and priorities
- Motivational support and avoiding procrastination
Always respond in a clear, structured way.
Use bullet points, numbered lists, tables (in markdown) when it helps.
Be specific, practical, and actionable.
Current date: February 2026"""

# Optional: 4-bit quantization to reduce memory usage (highly recommended)
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4"
)

# =============================================
#  LOAD MODEL & TOKENIZER
# =============================================
print(f"Loading model: {MODEL_NAME}")
print("This may take a few minutes the first time...")

#This loads the tokenizer that converts text into tokens (numbers) the model can understand, and vice versa. 
try:
    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
    
    model = AutoModelForCausalLM.from_pretrained(
        MODEL_NAME,
        quantization_config=quantization_config,   # comment out if you want full precision (needs more RAM)
        device_map="auto",
        trust_remote_code=False,                   # SmolLM3 doesn't need custom code
        torch_dtype=torch.float16
    )
    print("Model loaded successfully!")
except Exception as e:
    print("Error loading model:", str(e))
    print("Try without quantization or check RAM/GPU availability.")
    exit(1)

# Text-generation pipeline (auto-handles chat templates in newer transformers)
# This code creates a text generation pipeline with specific settings for how the model produces text
generator = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    device_map="auto",
    max_new_tokens=800,
    do_sample=True,
    temperature=0.75,
    top_p=0.92,
    repetition_penalty=1.08
)

# =============================================
#  CHAT LOGIC
# =============================================
#This code creates a text generation pipeline with specific settings for how the model produces text
chat_history = []  # list of (user_msg, assistant_msg) tuples

# ... (imports and config stay the same)

# Put this function EARLY in the file — right after imports or before chatbot()
def format_phi2_prompt(messages):
    text = ""
    for message in messages:
        role = message["role"]
        content = message["content"]
        if role == "system":
            text += content + "\n\n"
        elif role == "user":
            text += "Instruct: " + content + "\n\n"
        elif role == "assistant":
            text += "Output: " + content + "\n\n"
    text += "Output:"
    return text


def chatbot(user_input, history):
    global chat_history
    
    if not user_input.strip():
        return history, ""
    
    # Build messages
    messages = [{"role": "system", "content": SYSTEM_PROMPT}]
    
    for user_msg, assistant_msg in history:
        messages.append({"role": "user", "content": user_msg})
        messages.append({"role": "assistant", "content": assistant_msg})
    
    messages.append({"role": "user", "content": user_input})
    
    try:
        prompt = format_phi2_prompt(messages)
        
        response = generator(
            prompt,
            max_new_tokens=800,
            do_sample=True,
            temperature=0.75,
            top_p=0.92,
            repetition_penalty=1.08
        )[0]["generated_text"]
        
        # Extract only the new assistant response
        assistant_response = response[len(prompt):].strip()
        
        # Clean up trailing EOS token if present
        if tokenizer.eos_token and assistant_response.endswith(tokenizer.eos_token):
            assistant_response = assistant_response.replace(tokenizer.eos_token, "").strip()
            
    except Exception as e:
        assistant_response = f"Error during generation: {str(e)}"
    
    # Update history
    history.append((user_input, assistant_response))
    chat_history = history
    
    return history, ""


# =============================================
#  GRADIO INTERFACE
# =============================================
with gr.Blocks(title="Student Academic Assistant – Phi-2", theme=gr.themes.Soft()) as demo:
    gr.Markdown("""
    # 🎓 Student Academic Assistant Chatbot
    
    Powered by **microsoft/phi-2** (local version)
    
    Ask me anything about studying, planning, time management, motivation, etc.!
    
    **Quick examples:**
    - Create a 2-week study plan for finals
    - How do I break down this 2000-word essay?
    - Suggest Pomodoro alternatives for focus
    - Help prioritize: exam prep vs group project vs reading
    """)
    
    chatbot_ui = gr.Chatbot(height=500, label="Chat History")
    
    with gr.Row():
        user_input = gr.Textbox(
            placeholder="Ask me anything about studying...",
            show_label=False,
            scale=4
        )
        submit_btn = gr.Button("Send", scale=1, variant="primary")
    
    clear_btn = gr.Button("Clear Chat")
    
    # Event handlers
    submit_btn.click(
        chatbot,
        inputs=[user_input, chatbot_ui],
        outputs=[chatbot_ui, user_input]
    )
    
    user_input.submit(
        chatbot,
        inputs=[user_input, chatbot_ui],
        outputs=[chatbot_ui, user_input]
    )
    
    clear_btn.click(lambda: ([], []), outputs=[chatbot_ui, user_input])
    
    gr.Markdown("""
    ---
    Runs locally.
    Model: microsoft/phi-2
    """)

demo.launch()