TDMaule's picture
update GRADIO
4094397 verified
# student_assistant_chatbot.py
# MSAI-631 Group Project – improved version
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, BitsAndBytesConfig
import gradio as gr
# debugging the code to find versions
import huggingface_hub
print("huggingface_hub version:", huggingface_hub.__version__)
import transformers
print("transformers version:", transformers.__version__)
# =============================================
# CONFIGURATION
# =============================================
MODEL_NAME = "microsoft/phi-2"
# System prompt – gives the model its student-helper personality
SYSTEM_PROMPT = """You are a helpful, friendly, and organized academic assistant designed to help university students succeed.
You are supportive, clear, structured, and encouraging.
You help with:
- Planning study schedules and time management
- Breaking down assignments and projects
- Creating study plans and revision timetables
- Explaining concepts in simple terms
- Suggesting study techniques and productivity methods
- Organizing tasks and priorities
- Motivational support and avoiding procrastination
Always respond in a clear, structured way.
Use bullet points, numbered lists, tables (in markdown) when it helps.
Be specific, practical, and actionable.
Current date: February 2026"""
# Optional: 4-bit quantization to reduce memory usage (highly recommended)
quantization_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_compute_dtype=torch.float16,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4"
)
# =============================================
# LOAD MODEL & TOKENIZER
# =============================================
print(f"Loading model: {MODEL_NAME}")
print("This may take a few minutes the first time...")
#This loads the tokenizer that converts text into tokens (numbers) the model can understand, and vice versa.
try:
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
quantization_config=quantization_config, # comment out if you want full precision (needs more RAM)
device_map="auto",
trust_remote_code=False, # SmolLM3 doesn't need custom code
torch_dtype=torch.float16
)
print("Model loaded successfully!")
except Exception as e:
print("Error loading model:", str(e))
print("Try without quantization or check RAM/GPU availability.")
exit(1)
# Text-generation pipeline (auto-handles chat templates in newer transformers)
# This code creates a text generation pipeline with specific settings for how the model produces text
generator = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
device_map="auto",
max_new_tokens=800,
do_sample=True,
temperature=0.75,
top_p=0.92,
repetition_penalty=1.08
)
# =============================================
# CHAT LOGIC
# =============================================
#This code creates a text generation pipeline with specific settings for how the model produces text
chat_history = [] # list of (user_msg, assistant_msg) tuples
# ... (imports and config stay the same)
# Put this function EARLY in the file β€” right after imports or before chatbot()
def format_phi2_prompt(messages):
text = ""
for message in messages:
role = message["role"]
content = message["content"]
if role == "system":
text += content + "\n\n"
elif role == "user":
text += "Instruct: " + content + "\n\n"
elif role == "assistant":
text += "Output: " + content + "\n\n"
text += "Output:"
return text
def chatbot(user_input, history):
global chat_history
if not user_input.strip():
return history, ""
# Build messages
messages = [{"role": "system", "content": SYSTEM_PROMPT}]
for user_msg, assistant_msg in history:
messages.append({"role": "user", "content": user_msg})
messages.append({"role": "assistant", "content": assistant_msg})
messages.append({"role": "user", "content": user_input})
try:
prompt = format_phi2_prompt(messages)
response = generator(
prompt,
max_new_tokens=800,
do_sample=True,
temperature=0.75,
top_p=0.92,
repetition_penalty=1.08
)[0]["generated_text"]
# Extract only the new assistant response
assistant_response = response[len(prompt):].strip()
# Clean up trailing EOS token if present
if tokenizer.eos_token and assistant_response.endswith(tokenizer.eos_token):
assistant_response = assistant_response.replace(tokenizer.eos_token, "").strip()
except Exception as e:
assistant_response = f"Error during generation: {str(e)}"
# Update history
history.append((user_input, assistant_response))
chat_history = history
return history, ""
# =============================================
# GRADIO INTERFACE
# =============================================
with gr.Blocks(title="Student Academic Assistant – Phi-2", theme=gr.themes.Soft()) as demo:
gr.Markdown("""
# πŸŽ“ Student Academic Assistant Chatbot
Powered by **microsoft/phi-2** (local version)
Ask me anything about studying, planning, time management, motivation, etc.!
**Quick examples:**
- Create a 2-week study plan for finals
- How do I break down this 2000-word essay?
- Suggest Pomodoro alternatives for focus
- Help prioritize: exam prep vs group project vs reading
""")
chatbot_ui = gr.Chatbot(height=500, label="Chat History")
with gr.Row():
user_input = gr.Textbox(
placeholder="Ask me anything about studying...",
show_label=False,
scale=4
)
submit_btn = gr.Button("Send", scale=1, variant="primary")
clear_btn = gr.Button("Clear Chat")
# Event handlers
submit_btn.click(
chatbot,
inputs=[user_input, chatbot_ui],
outputs=[chatbot_ui, user_input]
)
user_input.submit(
chatbot,
inputs=[user_input, chatbot_ui],
outputs=[chatbot_ui, user_input]
)
clear_btn.click(lambda: ([], []), outputs=[chatbot_ui, user_input])
gr.Markdown("""
---
Runs locally.
Model: microsoft/phi-2
""")
demo.launch()