Spaces:

TDMaule
/

MSAI631_TM_ChatBot

Sleeping

App Files Files Community

TDMaule commited on Feb 1

Commit

97f7146

verified ·

1 Parent(s): 7a9a3cf

Added more detail comments and change one setting to improve response

Browse files

Files changed (1) hide show

app.py +10 -6

app.py CHANGED Viewed

@@ -5,7 +5,7 @@ import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, BitsAndBytesConfig
 import gradio as gr
 import huggingface_hub
 print("huggingface_hub version:", huggingface_hub.__version__)
 import transformers
@@ -18,7 +18,6 @@ MODEL_NAME = "HuggingFaceTB/SmolLM3-3B"
 # System prompt – gives the model its student-helper personality
 SYSTEM_PROMPT = """You are a helpful, friendly, and organized academic assistant designed to help university students succeed.
 You are supportive, clear, structured, and encouraging.
 You help with:
 - Planning study schedules and time management
@@ -28,16 +27,15 @@ You help with:
 - Suggesting study techniques and productivity methods
 - Organizing tasks and priorities
 - Motivational support and avoiding procrastination
 Always respond in a clear, structured way.
 Use bullet points, numbered lists, tables (in markdown) when it helps.
 Be specific, practical, and actionable.
 Current date: February 2026"""
 # Optional: 4-bit quantization to reduce memory usage (highly recommended)
 quantization_config = BitsAndBytesConfig(
-    load_in_4bit=True,
     bnb_4bit_compute_dtype=torch.float16,
     bnb_4bit_use_double_quant=True,
     bnb_4bit_quant_type="nf4"
@@ -49,6 +47,7 @@ quantization_config = BitsAndBytesConfig(
 print(f"Loading model: {MODEL_NAME}")
 print("This may take a few minutes the first time...")
 try:
     tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
@@ -66,6 +65,7 @@ except Exception as e:
     exit(1)
 # Text-generation pipeline (auto-handles chat templates in newer transformers)
 generator = pipeline(
     "text-generation",
     model=model,
@@ -81,6 +81,7 @@ generator = pipeline(
 # =============================================
 #  CHAT LOGIC
 # =============================================
 chat_history = []  # list of (user_msg, assistant_msg) tuples
 def chatbot(user_input, history):
@@ -88,7 +89,7 @@ def chatbot(user_input, history):
     if not user_input.strip():
         return history, ""
     # Build messages list in OpenAI-style format (role/content)
     messages = [{"role": "system", "content": SYSTEM_PROMPT}]
@@ -100,6 +101,7 @@ def chatbot(user_input, history):
     # Add current user message
     messages.append({"role": "user", "content": user_input})
     # Generate using the official chat template
     try:
         # Let the tokenizer format everything correctly
@@ -118,6 +120,7 @@ def chatbot(user_input, history):
             repetition_penalty=1.08
         )[0]["generated_text"]
         # Extract only the new assistant response (after the prompt)
         assistant_response = response[len(prompt):].strip()
@@ -158,6 +161,7 @@ with gr.Blocks(title="Student Academic Assistant – SmolLM3", theme=gr.themes.S
     - Help prioritize: exam prep vs group project vs reading
     """)
     chatbot_ui = gr.Chatbot(height=500, show_label=False)
     with gr.Row():

 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, BitsAndBytesConfig
 import gradio as gr
+# debugging the code to find versions
 import huggingface_hub
 print("huggingface_hub version:", huggingface_hub.__version__)
 import transformers
 # System prompt – gives the model its student-helper personality
 SYSTEM_PROMPT = """You are a helpful, friendly, and organized academic assistant designed to help university students succeed.
 You are supportive, clear, structured, and encouraging.
 You help with:
 - Planning study schedules and time management
 - Suggesting study techniques and productivity methods
 - Organizing tasks and priorities
 - Motivational support and avoiding procrastination
 Always respond in a clear, structured way.
 Use bullet points, numbered lists, tables (in markdown) when it helps.
 Be specific, practical, and actionable.
 Current date: February 2026"""
 # Optional: 4-bit quantization to reduce memory usage (highly recommended)
 quantization_config = BitsAndBytesConfig(
+  #  load_in_4bit=True, change to - bnb_4bit_use_double_quant=True
+    bnb_4bit_use_double_quant=True
     bnb_4bit_compute_dtype=torch.float16,
     bnb_4bit_use_double_quant=True,
     bnb_4bit_quant_type="nf4"
 print(f"Loading model: {MODEL_NAME}")
 print("This may take a few minutes the first time...")
+#This loads the tokenizer that converts text into tokens (numbers) the model can understand, and vice versa.
 try:
     tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
     exit(1)
 # Text-generation pipeline (auto-handles chat templates in newer transformers)
+# This code creates a text generation pipeline with specific settings for how the model produces text
 generator = pipeline(
     "text-generation",
     model=model,
 # =============================================
 #  CHAT LOGIC
 # =============================================
+#This code creates a text generation pipeline with specific settings for how the model produces text
 chat_history = []  # list of (user_msg, assistant_msg) tuples
 def chatbot(user_input, history):
     if not user_input.strip():
         return history, ""
+# This code constructs a conversation history in a structured format that language models expect.
     # Build messages list in OpenAI-style format (role/content)
     messages = [{"role": "system", "content": SYSTEM_PROMPT}]
     # Add current user message
     messages.append({"role": "user", "content": user_input})
+# This code converts the conversation messages into the proper format for the model, then generates a response.
     # Generate using the official chat template
     try:
         # Let the tokenizer format everything correctly
             repetition_penalty=1.08
         )[0]["generated_text"]
+# This code cleans up the generated output to get just the assistant's new response.
         # Extract only the new assistant response (after the prompt)
         assistant_response = response[len(prompt):].strip()
     - Help prioritize: exam prep vs group project vs reading
     """)
+# This code creates the user interface components for a chatbot using Gradio.
     chatbot_ui = gr.Chatbot(height=500, show_label=False)
     with gr.Row():