Spaces:

GhostScientist
/

smollm2-360m-function-calling-chat

Paused

GhostScientist commited on Dec 17, 2025

Commit

f27ef17

verified ·

1 Parent(s): 913da61

Upload folder using huggingface_hub

Files changed (2) hide show

app.py CHANGED Viewed

@@ -2,23 +2,29 @@ import gradio as gr
 import spaces
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
-MODEL_ID = "GhostScientist/smollm2-360m-function-calling-sft"
-# Load tokenizer at startup
-tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
-# Global model - loaded lazily on first GPU call for faster Space startup
 model = None
 def load_model():
     global model
     if model is None:
-        model = AutoModelForCausalLM.from_pretrained(
-            MODEL_ID,
             torch_dtype=torch.float16,
             device_map="auto",
         )
     return model
 @spaces.GPU(duration=120)
@@ -63,7 +69,7 @@ def generate_response(message, history, system_message, max_tokens, temperature,
 demo = gr.ChatInterface(
     generate_response,
     title="SmolLM2 360M Function Calling",
-    description="A fine-tuned SmolLM2-360M model for function calling, powered by ZeroGPU (free!)",
     additional_inputs=[
         gr.Textbox(
             value="You are a helpful assistant that can call functions when needed.",

 import spaces
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
+from peft import PeftModel
+# Your LoRA adapter
+ADAPTER_ID = "GhostScientist/smollm2-360m-function-calling-sft"
+# Base model (from adapter_config.json -> base_model_name_or_path)
+BASE_MODEL_ID = "HuggingFaceTB/SmolLM2-360M-Instruct"
+# Load tokenizer at startup (from base model)
+tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_ID)
+# Global model - loaded lazily on first GPU call
 model = None
 def load_model():
     global model
     if model is None:
+        base_model = AutoModelForCausalLM.from_pretrained(
+            BASE_MODEL_ID,
             torch_dtype=torch.float16,
             device_map="auto",
         )
+        model = PeftModel.from_pretrained(base_model, ADAPTER_ID)
+        model = model.merge_and_unload()  # Merge for faster inference
     return model
 @spaces.GPU(duration=120)
 demo = gr.ChatInterface(
     generate_response,
     title="SmolLM2 360M Function Calling",
+    description="A LoRA fine-tuned SmolLM2-360M model for function calling, powered by ZeroGPU (free!)",
     additional_inputs=[
         gr.Textbox(
             value="You are a helpful assistant that can call functions when needed.",

requirements.txt CHANGED Viewed

@@ -3,3 +3,4 @@ torch
 transformers
 accelerate
 spaces

 transformers
 accelerate
 spaces
+peft