GhostScientist commited on
Commit
f27ef17
·
verified ·
1 Parent(s): 913da61

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. app.py +13 -7
  2. requirements.txt +1 -0
app.py CHANGED
@@ -2,23 +2,29 @@ import gradio as gr
2
  import spaces
3
  import torch
4
  from transformers import AutoModelForCausalLM, AutoTokenizer
 
5
 
6
- MODEL_ID = "GhostScientist/smollm2-360m-function-calling-sft"
 
 
 
7
 
8
- # Load tokenizer at startup
9
- tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
10
 
11
- # Global model - loaded lazily on first GPU call for faster Space startup
12
  model = None
13
 
14
  def load_model():
15
  global model
16
  if model is None:
17
- model = AutoModelForCausalLM.from_pretrained(
18
- MODEL_ID,
19
  torch_dtype=torch.float16,
20
  device_map="auto",
21
  )
 
 
22
  return model
23
 
24
  @spaces.GPU(duration=120)
@@ -63,7 +69,7 @@ def generate_response(message, history, system_message, max_tokens, temperature,
63
  demo = gr.ChatInterface(
64
  generate_response,
65
  title="SmolLM2 360M Function Calling",
66
- description="A fine-tuned SmolLM2-360M model for function calling, powered by ZeroGPU (free!)",
67
  additional_inputs=[
68
  gr.Textbox(
69
  value="You are a helpful assistant that can call functions when needed.",
 
2
  import spaces
3
  import torch
4
  from transformers import AutoModelForCausalLM, AutoTokenizer
5
+ from peft import PeftModel
6
 
7
+ # Your LoRA adapter
8
+ ADAPTER_ID = "GhostScientist/smollm2-360m-function-calling-sft"
9
+ # Base model (from adapter_config.json -> base_model_name_or_path)
10
+ BASE_MODEL_ID = "HuggingFaceTB/SmolLM2-360M-Instruct"
11
 
12
+ # Load tokenizer at startup (from base model)
13
+ tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_ID)
14
 
15
+ # Global model - loaded lazily on first GPU call
16
  model = None
17
 
18
  def load_model():
19
  global model
20
  if model is None:
21
+ base_model = AutoModelForCausalLM.from_pretrained(
22
+ BASE_MODEL_ID,
23
  torch_dtype=torch.float16,
24
  device_map="auto",
25
  )
26
+ model = PeftModel.from_pretrained(base_model, ADAPTER_ID)
27
+ model = model.merge_and_unload() # Merge for faster inference
28
  return model
29
 
30
  @spaces.GPU(duration=120)
 
69
  demo = gr.ChatInterface(
70
  generate_response,
71
  title="SmolLM2 360M Function Calling",
72
+ description="A LoRA fine-tuned SmolLM2-360M model for function calling, powered by ZeroGPU (free!)",
73
  additional_inputs=[
74
  gr.Textbox(
75
  value="You are a helpful assistant that can call functions when needed.",
requirements.txt CHANGED
@@ -3,3 +3,4 @@ torch
3
  transformers
4
  accelerate
5
  spaces
 
 
3
  transformers
4
  accelerate
5
  spaces
6
+ peft