# student_assistant_chatbot.py # MSAI-631 Group Project – improved version import torch from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, BitsAndBytesConfig import gradio as gr # debugging the code to find versions import huggingface_hub print("huggingface_hub version:", huggingface_hub.__version__) import transformers print("transformers version:", transformers.__version__) # ============================================= # CONFIGURATION # ============================================= MODEL_NAME = "microsoft/phi-2" # System prompt – gives the model its student-helper personality SYSTEM_PROMPT = """You are a helpful, friendly, and organized academic assistant designed to help university students succeed. You are supportive, clear, structured, and encouraging. You help with: - Planning study schedules and time management - Breaking down assignments and projects - Creating study plans and revision timetables - Explaining concepts in simple terms - Suggesting study techniques and productivity methods - Organizing tasks and priorities - Motivational support and avoiding procrastination Always respond in a clear, structured way. Use bullet points, numbered lists, tables (in markdown) when it helps. Be specific, practical, and actionable. Current date: February 2026""" # Optional: 4-bit quantization to reduce memory usage (highly recommended) quantization_config = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_compute_dtype=torch.float16, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4" ) # ============================================= # LOAD MODEL & TOKENIZER # ============================================= print(f"Loading model: {MODEL_NAME}") print("This may take a few minutes the first time...") #This loads the tokenizer that converts text into tokens (numbers) the model can understand, and vice versa. try: tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) model = AutoModelForCausalLM.from_pretrained( MODEL_NAME, quantization_config=quantization_config, # comment out if you want full precision (needs more RAM) device_map="auto", trust_remote_code=False, # SmolLM3 doesn't need custom code torch_dtype=torch.float16 ) print("Model loaded successfully!") except Exception as e: print("Error loading model:", str(e)) print("Try without quantization or check RAM/GPU availability.") exit(1) # Text-generation pipeline (auto-handles chat templates in newer transformers) # This code creates a text generation pipeline with specific settings for how the model produces text generator = pipeline( "text-generation", model=model, tokenizer=tokenizer, device_map="auto", max_new_tokens=800, do_sample=True, temperature=0.75, top_p=0.92, repetition_penalty=1.08 ) # ============================================= # CHAT LOGIC # ============================================= #This code creates a text generation pipeline with specific settings for how the model produces text chat_history = [] # list of (user_msg, assistant_msg) tuples # ... (imports and config stay the same) # Put this function EARLY in the file — right after imports or before chatbot() def format_phi2_prompt(messages): text = "" for message in messages: role = message["role"] content = message["content"] if role == "system": text += content + "\n\n" elif role == "user": text += "Instruct: " + content + "\n\n" elif role == "assistant": text += "Output: " + content + "\n\n" text += "Output:" return text def chatbot(user_input, history): global chat_history if not user_input.strip(): return history, "" # Build messages messages = [{"role": "system", "content": SYSTEM_PROMPT}] for user_msg, assistant_msg in history: messages.append({"role": "user", "content": user_msg}) messages.append({"role": "assistant", "content": assistant_msg}) messages.append({"role": "user", "content": user_input}) try: prompt = format_phi2_prompt(messages) response = generator( prompt, max_new_tokens=800, do_sample=True, temperature=0.75, top_p=0.92, repetition_penalty=1.08 )[0]["generated_text"] # Extract only the new assistant response assistant_response = response[len(prompt):].strip() # Clean up trailing EOS token if present if tokenizer.eos_token and assistant_response.endswith(tokenizer.eos_token): assistant_response = assistant_response.replace(tokenizer.eos_token, "").strip() except Exception as e: assistant_response = f"Error during generation: {str(e)}" # Update history history.append((user_input, assistant_response)) chat_history = history return history, "" # ============================================= # GRADIO INTERFACE # ============================================= with gr.Blocks(title="Student Academic Assistant – Phi-2", theme=gr.themes.Soft()) as demo: gr.Markdown(""" # 🎓 Student Academic Assistant Chatbot Powered by **microsoft/phi-2** (local version) Ask me anything about studying, planning, time management, motivation, etc.! **Quick examples:** - Create a 2-week study plan for finals - How do I break down this 2000-word essay? - Suggest Pomodoro alternatives for focus - Help prioritize: exam prep vs group project vs reading """) chatbot_ui = gr.Chatbot(height=500, label="Chat History") with gr.Row(): user_input = gr.Textbox( placeholder="Ask me anything about studying...", show_label=False, scale=4 ) submit_btn = gr.Button("Send", scale=1, variant="primary") clear_btn = gr.Button("Clear Chat") # Event handlers submit_btn.click( chatbot, inputs=[user_input, chatbot_ui], outputs=[chatbot_ui, user_input] ) user_input.submit( chatbot, inputs=[user_input, chatbot_ui], outputs=[chatbot_ui, user_input] ) clear_btn.click(lambda: ([], []), outputs=[chatbot_ui, user_input]) gr.Markdown(""" --- Runs locally. Model: microsoft/phi-2 """) demo.launch()