Spaces:

TDMaule
/

MSAI631_TM_ChatBot

Sleeping

App Files Files Community

MSAI631_TM_ChatBot / app.py

TDMaule

update GRADIO

4094397 verified about 2 months ago

raw

history blame contribute delete

6.53 kB

	# student_assistant_chatbot.py
	# MSAI-631 Group Project – improved version

	import torch
	from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, BitsAndBytesConfig
	import gradio as gr

	# debugging the code to find versions
	import huggingface_hub
	print("huggingface_hub version:", huggingface_hub.__version__)
	import transformers
	print("transformers version:", transformers.__version__)

	# =============================================
	# CONFIGURATION
	# =============================================
	MODEL_NAME = "microsoft/phi-2"

	# System prompt – gives the model its student-helper personality
	SYSTEM_PROMPT = """You are a helpful, friendly, and organized academic assistant designed to help university students succeed.
	You are supportive, clear, structured, and encouraging.
	You help with:
	- Planning study schedules and time management
	- Breaking down assignments and projects
	- Creating study plans and revision timetables
	- Explaining concepts in simple terms
	- Suggesting study techniques and productivity methods
	- Organizing tasks and priorities
	- Motivational support and avoiding procrastination
	Always respond in a clear, structured way.
	Use bullet points, numbered lists, tables (in markdown) when it helps.
	Be specific, practical, and actionable.
	Current date: February 2026"""

	# Optional: 4-bit quantization to reduce memory usage (highly recommended)
	quantization_config = BitsAndBytesConfig(
	load_in_4bit=True,
	bnb_4bit_compute_dtype=torch.float16,
	bnb_4bit_use_double_quant=True,
	bnb_4bit_quant_type="nf4"
	)

	# =============================================
	# LOAD MODEL & TOKENIZER
	# =============================================
	print(f"Loading model: {MODEL_NAME}")
	print("This may take a few minutes the first time...")

	#This loads the tokenizer that converts text into tokens (numbers) the model can understand, and vice versa.
	try:
	tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

	model = AutoModelForCausalLM.from_pretrained(
	MODEL_NAME,
	quantization_config=quantization_config, # comment out if you want full precision (needs more RAM)
	device_map="auto",
	trust_remote_code=False, # SmolLM3 doesn't need custom code
	torch_dtype=torch.float16
	)
	print("Model loaded successfully!")
	except Exception as e:
	print("Error loading model:", str(e))
	print("Try without quantization or check RAM/GPU availability.")
	exit(1)

	# Text-generation pipeline (auto-handles chat templates in newer transformers)
	# This code creates a text generation pipeline with specific settings for how the model produces text
	generator = pipeline(
	"text-generation",
	model=model,
	tokenizer=tokenizer,
	device_map="auto",
	max_new_tokens=800,
	do_sample=True,
	temperature=0.75,
	top_p=0.92,
	repetition_penalty=1.08
	)

	# =============================================
	# CHAT LOGIC
	# =============================================
	#This code creates a text generation pipeline with specific settings for how the model produces text
	chat_history = [] # list of (user_msg, assistant_msg) tuples

	# ... (imports and config stay the same)

	# Put this function EARLY in the file — right after imports or before chatbot()
	def format_phi2_prompt(messages):
	text = ""
	for message in messages:
	role = message["role"]
	content = message["content"]
	if role == "system":
	text += content + "\n\n"
	elif role == "user":
	text += "Instruct: " + content + "\n\n"
	elif role == "assistant":
	text += "Output: " + content + "\n\n"
	text += "Output:"
	return text


	def chatbot(user_input, history):
	global chat_history

	if not user_input.strip():
	return history, ""

	# Build messages
	messages = [{"role": "system", "content": SYSTEM_PROMPT}]

	for user_msg, assistant_msg in history:
	messages.append({"role": "user", "content": user_msg})
	messages.append({"role": "assistant", "content": assistant_msg})

	messages.append({"role": "user", "content": user_input})

	try:
	prompt = format_phi2_prompt(messages)

	response = generator(
	prompt,
	max_new_tokens=800,
	do_sample=True,
	temperature=0.75,
	top_p=0.92,
	repetition_penalty=1.08
	)[0]["generated_text"]

	# Extract only the new assistant response
	assistant_response = response[len(prompt):].strip()

	# Clean up trailing EOS token if present
	if tokenizer.eos_token and assistant_response.endswith(tokenizer.eos_token):
	assistant_response = assistant_response.replace(tokenizer.eos_token, "").strip()

	except Exception as e:
	assistant_response = f"Error during generation: {str(e)}"

	# Update history
	history.append((user_input, assistant_response))
	chat_history = history

	return history, ""


	# =============================================
	# GRADIO INTERFACE
	# =============================================
	with gr.Blocks(title="Student Academic Assistant – Phi-2", theme=gr.themes.Soft()) as demo:
	gr.Markdown("""
	# 🎓 Student Academic Assistant Chatbot

	Powered by microsoft/phi-2 (local version)

	Ask me anything about studying, planning, time management, motivation, etc.!

	Quick examples:
	- Create a 2-week study plan for finals
	- How do I break down this 2000-word essay?
	- Suggest Pomodoro alternatives for focus
	- Help prioritize: exam prep vs group project vs reading
	""")

	chatbot_ui = gr.Chatbot(height=500, label="Chat History")

	with gr.Row():
	user_input = gr.Textbox(
	placeholder="Ask me anything about studying...",
	show_label=False,
	scale=4
	)
	submit_btn = gr.Button("Send", scale=1, variant="primary")

	clear_btn = gr.Button("Clear Chat")

	# Event handlers
	submit_btn.click(
	chatbot,
	inputs=[user_input, chatbot_ui],
	outputs=[chatbot_ui, user_input]
	)

	user_input.submit(
	chatbot,
	inputs=[user_input, chatbot_ui],
	outputs=[chatbot_ui, user_input]
	)

	clear_btn.click(lambda: ([], []), outputs=[chatbot_ui, user_input])

	gr.Markdown("""
	---
	Runs locally.
	Model: microsoft/phi-2
	""")

	demo.launch()