File size: 6,531 Bytes
45efc50
 
1fcba24
45efc50
 
 
1fcba24
97f7146
c95624f
 
 
 
 
45efc50
 
 
6a74574
1fcba24
45efc50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1fcba24
45efc50
 
e4dd20c
45efc50
 
 
 
1fcba24
45efc50
 
 
 
 
1fcba24
97f7146
45efc50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1fcba24
45efc50
97f7146
45efc50
 
 
 
 
 
 
 
 
 
1fcba24
 
45efc50
 
 
97f7146
45efc50
 
cdb6ba2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45efc50
 
 
 
 
cdb6ba2
 
45efc50
 
 
 
 
 
 
 
 
cdb6ba2
45efc50
 
 
 
 
 
 
 
 
 
cdb6ba2
45efc50
 
cdb6ba2
45efc50
 
cdb6ba2
45efc50
cdb6ba2
45efc50
 
 
 
 
 
 
1fcba24
4094397
 
 
 
02d787d
 
45efc50
02d787d
45efc50
02d787d
 
 
 
 
 
 
 
 
 
4094397
76f299e
 
 
 
 
 
 
45efc50
76f299e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45efc50
 
 
cdb6ba2
 
45efc50
76f299e
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
# student_assistant_chatbot.py
# MSAI-631 Group Project – improved version

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, BitsAndBytesConfig
import gradio as gr

# debugging the code to find versions
import huggingface_hub
print("huggingface_hub version:", huggingface_hub.__version__)
import transformers
print("transformers version:", transformers.__version__)

# =============================================
#  CONFIGURATION
# =============================================
MODEL_NAME = "microsoft/phi-2"

# System prompt – gives the model its student-helper personality
SYSTEM_PROMPT = """You are a helpful, friendly, and organized academic assistant designed to help university students succeed.
You are supportive, clear, structured, and encouraging.
You help with:
- Planning study schedules and time management
- Breaking down assignments and projects
- Creating study plans and revision timetables
- Explaining concepts in simple terms
- Suggesting study techniques and productivity methods
- Organizing tasks and priorities
- Motivational support and avoiding procrastination
Always respond in a clear, structured way.
Use bullet points, numbered lists, tables (in markdown) when it helps.
Be specific, practical, and actionable.
Current date: February 2026"""

# Optional: 4-bit quantization to reduce memory usage (highly recommended)
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4"
)

# =============================================
#  LOAD MODEL & TOKENIZER
# =============================================
print(f"Loading model: {MODEL_NAME}")
print("This may take a few minutes the first time...")

#This loads the tokenizer that converts text into tokens (numbers) the model can understand, and vice versa. 
try:
    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
    
    model = AutoModelForCausalLM.from_pretrained(
        MODEL_NAME,
        quantization_config=quantization_config,   # comment out if you want full precision (needs more RAM)
        device_map="auto",
        trust_remote_code=False,                   # SmolLM3 doesn't need custom code
        torch_dtype=torch.float16
    )
    print("Model loaded successfully!")
except Exception as e:
    print("Error loading model:", str(e))
    print("Try without quantization or check RAM/GPU availability.")
    exit(1)

# Text-generation pipeline (auto-handles chat templates in newer transformers)
# This code creates a text generation pipeline with specific settings for how the model produces text
generator = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    device_map="auto",
    max_new_tokens=800,
    do_sample=True,
    temperature=0.75,
    top_p=0.92,
    repetition_penalty=1.08
)

# =============================================
#  CHAT LOGIC
# =============================================
#This code creates a text generation pipeline with specific settings for how the model produces text
chat_history = []  # list of (user_msg, assistant_msg) tuples

# ... (imports and config stay the same)

# Put this function EARLY in the file — right after imports or before chatbot()
def format_phi2_prompt(messages):
    text = ""
    for message in messages:
        role = message["role"]
        content = message["content"]
        if role == "system":
            text += content + "\n\n"
        elif role == "user":
            text += "Instruct: " + content + "\n\n"
        elif role == "assistant":
            text += "Output: " + content + "\n\n"
    text += "Output:"
    return text


def chatbot(user_input, history):
    global chat_history
    
    if not user_input.strip():
        return history, ""
    
    # Build messages
    messages = [{"role": "system", "content": SYSTEM_PROMPT}]
    
    for user_msg, assistant_msg in history:
        messages.append({"role": "user", "content": user_msg})
        messages.append({"role": "assistant", "content": assistant_msg})
    
    messages.append({"role": "user", "content": user_input})
    
    try:
        prompt = format_phi2_prompt(messages)
        
        response = generator(
            prompt,
            max_new_tokens=800,
            do_sample=True,
            temperature=0.75,
            top_p=0.92,
            repetition_penalty=1.08
        )[0]["generated_text"]
        
        # Extract only the new assistant response
        assistant_response = response[len(prompt):].strip()
        
        # Clean up trailing EOS token if present
        if tokenizer.eos_token and assistant_response.endswith(tokenizer.eos_token):
            assistant_response = assistant_response.replace(tokenizer.eos_token, "").strip()
            
    except Exception as e:
        assistant_response = f"Error during generation: {str(e)}"
    
    # Update history
    history.append((user_input, assistant_response))
    chat_history = history
    
    return history, ""


# =============================================
#  GRADIO INTERFACE
# =============================================
with gr.Blocks(title="Student Academic Assistant – Phi-2", theme=gr.themes.Soft()) as demo:
    gr.Markdown("""
    # 🎓 Student Academic Assistant Chatbot
    
    Powered by **microsoft/phi-2** (local version)
    
    Ask me anything about studying, planning, time management, motivation, etc.!
    
    **Quick examples:**
    - Create a 2-week study plan for finals
    - How do I break down this 2000-word essay?
    - Suggest Pomodoro alternatives for focus
    - Help prioritize: exam prep vs group project vs reading
    """)
    
    chatbot_ui = gr.Chatbot(height=500, label="Chat History")
    
    with gr.Row():
        user_input = gr.Textbox(
            placeholder="Ask me anything about studying...",
            show_label=False,
            scale=4
        )
        submit_btn = gr.Button("Send", scale=1, variant="primary")
    
    clear_btn = gr.Button("Clear Chat")
    
    # Event handlers
    submit_btn.click(
        chatbot,
        inputs=[user_input, chatbot_ui],
        outputs=[chatbot_ui, user_input]
    )
    
    user_input.submit(
        chatbot,
        inputs=[user_input, chatbot_ui],
        outputs=[chatbot_ui, user_input]
    )
    
    clear_btn.click(lambda: ([], []), outputs=[chatbot_ui, user_input])
    
    gr.Markdown("""
    ---
    Runs locally.
    Model: microsoft/phi-2
    """)

demo.launch()