Spaces:

dzehuggingface
/

SmallChat-FxnCaller

Sleeping

App Files Files Community

DylanZimmer commited on Aug 15, 2025

Commit

5ebc9ea

1 Parent(s): 20ed78d

reWritten

Browse files

Files changed (1) hide show

app.py +16 -41

app.py CHANGED Viewed

@@ -6,52 +6,29 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
 # Load model and tokenizer
 model_name = "HuggingFaceTB/SmolLM3-3B"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map="auto")
-@spaces.GPU
-def chat_with_smollm3(message, history, system_prompt="", enable_thinking=True, temperature=0.6, top_p=0.95, max_tokens=32768):
-    """
-    Chat with SmolLM3-3B model with full feature support
-    """
-    # Prepare messages
     messages = []
-    # Add system prompt if provided
     if system_prompt.strip():
-        # Handle thinking mode flags in system prompt
-        if enable_thinking and "/no_think" not in system_prompt:
-            if "/think" not in system_prompt:
-                system_prompt += "/think"
-        elif not enable_thinking and "/think" not in system_prompt:
-            if "/no_think" not in system_prompt:
-                system_prompt += "/no_think"
         messages.append({"role": "system", "content": system_prompt})
-    else:
-        # Use enable_thinking parameter if no system prompt
-        if not enable_thinking:
-            messages.append({"role": "system", "content": "/no_think"})
-    # Add conversation history
     for human_msg, assistant_msg in history:
         messages.append({"role": "user", "content": human_msg})
         if assistant_msg:
             messages.append({"role": "assistant", "content": assistant_msg})
-    # Add current message
     messages.append({"role": "user", "content": message})
-    # Apply chat template
     text = tokenizer.apply_chat_template(
         messages,
         tokenize=False,
-        add_generation_prompt=True,
-        enable_thinking=enable_thinking if not system_prompt.strip() else None
     )
-    # Tokenize input
     model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
-    # Generate response
     with torch.no_grad():
         generated_ids = model.generate(
             **model_inputs,
@@ -61,22 +38,20 @@ def chat_with_smollm3(message, history, system_prompt="", enable_thinking=True,
             do_sample=True,
             pad_token_id=tokenizer.eos_token_id
         )
-    # Decode response
     output_ids = generated_ids[0][len(model_inputs.input_ids[0]):]
     response = tokenizer.decode(output_ids, skip_special_tokens=True)
     return response
 demo = gr.ChatInterface(
-    fn=chat_with_smollm3,
     additional_inputs=[
-        gr.Textbox(label="System Prompt", value=""),
-        gr.Checkbox(label="Enable Thinking", value=True),
-        gr.Slider(0, 1, value=0.6, step=0.01, label="Temperature"),
-        gr.Slider(0, 1, value=0.95, step=0.01, label="Top P"),
-        gr.Number(value=32768, label="Max Tokens")
-    ]
 )
 demo.launch()

 # Load model and tokenizer
 model_name = "HuggingFaceTB/SmolLM3-3B"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map="auto")\
+def chat_fxn_caller(message, history, system_prompt="", temperature=0.6, top_p=0.95, max_tokens=32768):
     messages = []
     if system_prompt.strip():
         messages.append({"role": "system", "content": system_prompt})
     for human_msg, assistant_msg in history:
         messages.append({"role": "user", "content": human_msg})
         if assistant_msg:
             messages.append({"role": "assistant", "content": assistant_msg})
     messages.append({"role": "user", "content": message})
     text = tokenizer.apply_chat_template(
         messages,
         tokenize=False,
+        add_generation_prompt=True  #SmolLm3 specific, tells model give next response
     )
     model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
     with torch.no_grad():
         generated_ids = model.generate(
             **model_inputs,
             do_sample=True,
             pad_token_id=tokenizer.eos_token_id
         )
     output_ids = generated_ids[0][len(model_inputs.input_ids[0]):]
     response = tokenizer.decode(output_ids, skip_special_tokens=True)
     return response
+prompt = "Be a good chatbox"
 demo = gr.ChatInterface(
+    chat_fxn_caller,
+    type="messages",
     additional_inputs=[
+        gr.Textbox(prompt, label="System Prompt"),
+    ],
 )
 demo.launch()