TDMaule commited on
Commit
97f7146
·
verified ·
1 Parent(s): 7a9a3cf

Added more detail comments and change one setting to improve response

Browse files
Files changed (1) hide show
  1. app.py +10 -6
app.py CHANGED
@@ -5,7 +5,7 @@ import torch
5
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, BitsAndBytesConfig
6
  import gradio as gr
7
 
8
-
9
  import huggingface_hub
10
  print("huggingface_hub version:", huggingface_hub.__version__)
11
  import transformers
@@ -18,7 +18,6 @@ MODEL_NAME = "HuggingFaceTB/SmolLM3-3B"
18
 
19
  # System prompt – gives the model its student-helper personality
20
  SYSTEM_PROMPT = """You are a helpful, friendly, and organized academic assistant designed to help university students succeed.
21
-
22
  You are supportive, clear, structured, and encouraging.
23
  You help with:
24
  - Planning study schedules and time management
@@ -28,16 +27,15 @@ You help with:
28
  - Suggesting study techniques and productivity methods
29
  - Organizing tasks and priorities
30
  - Motivational support and avoiding procrastination
31
-
32
  Always respond in a clear, structured way.
33
  Use bullet points, numbered lists, tables (in markdown) when it helps.
34
  Be specific, practical, and actionable.
35
-
36
  Current date: February 2026"""
37
 
38
  # Optional: 4-bit quantization to reduce memory usage (highly recommended)
39
  quantization_config = BitsAndBytesConfig(
40
- load_in_4bit=True,
 
41
  bnb_4bit_compute_dtype=torch.float16,
42
  bnb_4bit_use_double_quant=True,
43
  bnb_4bit_quant_type="nf4"
@@ -49,6 +47,7 @@ quantization_config = BitsAndBytesConfig(
49
  print(f"Loading model: {MODEL_NAME}")
50
  print("This may take a few minutes the first time...")
51
 
 
52
  try:
53
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
54
 
@@ -66,6 +65,7 @@ except Exception as e:
66
  exit(1)
67
 
68
  # Text-generation pipeline (auto-handles chat templates in newer transformers)
 
69
  generator = pipeline(
70
  "text-generation",
71
  model=model,
@@ -81,6 +81,7 @@ generator = pipeline(
81
  # =============================================
82
  # CHAT LOGIC
83
  # =============================================
 
84
  chat_history = [] # list of (user_msg, assistant_msg) tuples
85
 
86
  def chatbot(user_input, history):
@@ -88,7 +89,7 @@ def chatbot(user_input, history):
88
 
89
  if not user_input.strip():
90
  return history, ""
91
-
92
  # Build messages list in OpenAI-style format (role/content)
93
  messages = [{"role": "system", "content": SYSTEM_PROMPT}]
94
 
@@ -100,6 +101,7 @@ def chatbot(user_input, history):
100
  # Add current user message
101
  messages.append({"role": "user", "content": user_input})
102
 
 
103
  # Generate using the official chat template
104
  try:
105
  # Let the tokenizer format everything correctly
@@ -118,6 +120,7 @@ def chatbot(user_input, history):
118
  repetition_penalty=1.08
119
  )[0]["generated_text"]
120
 
 
121
  # Extract only the new assistant response (after the prompt)
122
  assistant_response = response[len(prompt):].strip()
123
 
@@ -158,6 +161,7 @@ with gr.Blocks(title="Student Academic Assistant – SmolLM3", theme=gr.themes.S
158
  - Help prioritize: exam prep vs group project vs reading
159
  """)
160
 
 
161
  chatbot_ui = gr.Chatbot(height=500, show_label=False)
162
 
163
  with gr.Row():
 
5
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, BitsAndBytesConfig
6
  import gradio as gr
7
 
8
+ # debugging the code to find versions
9
  import huggingface_hub
10
  print("huggingface_hub version:", huggingface_hub.__version__)
11
  import transformers
 
18
 
19
  # System prompt – gives the model its student-helper personality
20
  SYSTEM_PROMPT = """You are a helpful, friendly, and organized academic assistant designed to help university students succeed.
 
21
  You are supportive, clear, structured, and encouraging.
22
  You help with:
23
  - Planning study schedules and time management
 
27
  - Suggesting study techniques and productivity methods
28
  - Organizing tasks and priorities
29
  - Motivational support and avoiding procrastination
 
30
  Always respond in a clear, structured way.
31
  Use bullet points, numbered lists, tables (in markdown) when it helps.
32
  Be specific, practical, and actionable.
 
33
  Current date: February 2026"""
34
 
35
  # Optional: 4-bit quantization to reduce memory usage (highly recommended)
36
  quantization_config = BitsAndBytesConfig(
37
+ # load_in_4bit=True, change to - bnb_4bit_use_double_quant=True
38
+ bnb_4bit_use_double_quant=True
39
  bnb_4bit_compute_dtype=torch.float16,
40
  bnb_4bit_use_double_quant=True,
41
  bnb_4bit_quant_type="nf4"
 
47
  print(f"Loading model: {MODEL_NAME}")
48
  print("This may take a few minutes the first time...")
49
 
50
+ #This loads the tokenizer that converts text into tokens (numbers) the model can understand, and vice versa.
51
  try:
52
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
53
 
 
65
  exit(1)
66
 
67
  # Text-generation pipeline (auto-handles chat templates in newer transformers)
68
+ # This code creates a text generation pipeline with specific settings for how the model produces text
69
  generator = pipeline(
70
  "text-generation",
71
  model=model,
 
81
  # =============================================
82
  # CHAT LOGIC
83
  # =============================================
84
+ #This code creates a text generation pipeline with specific settings for how the model produces text
85
  chat_history = [] # list of (user_msg, assistant_msg) tuples
86
 
87
  def chatbot(user_input, history):
 
89
 
90
  if not user_input.strip():
91
  return history, ""
92
+ # This code constructs a conversation history in a structured format that language models expect.
93
  # Build messages list in OpenAI-style format (role/content)
94
  messages = [{"role": "system", "content": SYSTEM_PROMPT}]
95
 
 
101
  # Add current user message
102
  messages.append({"role": "user", "content": user_input})
103
 
104
+ # This code converts the conversation messages into the proper format for the model, then generates a response.
105
  # Generate using the official chat template
106
  try:
107
  # Let the tokenizer format everything correctly
 
120
  repetition_penalty=1.08
121
  )[0]["generated_text"]
122
 
123
+ # This code cleans up the generated output to get just the assistant's new response.
124
  # Extract only the new assistant response (after the prompt)
125
  assistant_response = response[len(prompt):].strip()
126
 
 
161
  - Help prioritize: exam prep vs group project vs reading
162
  """)
163
 
164
+ # This code creates the user interface components for a chatbot using Gradio.
165
  chatbot_ui = gr.Chatbot(height=500, show_label=False)
166
 
167
  with gr.Row():