Jn-Huang commited on
Commit
58ffc70
·
1 Parent(s): 38dedc7

Customize BeFM UI and defaults

Browse files
Files changed (2) hide show
  1. app.py +24 -9
  2. app_vllm.py +26 -11
app.py CHANGED
@@ -77,7 +77,7 @@ def get_model_and_tokenizer():
77
 
78
  @spaces.GPU
79
  @torch.inference_mode()
80
- def generate_response(messages, max_new_tokens=512, temperature=0.7, top_p=0.9) -> str:
81
  model, tokenizer = get_model_and_tokenizer()
82
  device = model.device
83
 
@@ -96,20 +96,24 @@ def generate_response(messages, max_new_tokens=512, temperature=0.7, top_p=0.9)
96
  max_new_tokens=max_new_tokens,
97
  do_sample=True,
98
  temperature=temperature,
99
- top_p=top_p,
100
  pad_token_id=tokenizer.eos_token_id,
101
  )
102
  # Decode only the newly generated tokens
103
  generated_text = tokenizer.decode(out[0][input_length:], skip_special_tokens=True)
104
  return generated_text.strip()
105
 
106
- def chat_fn(message, history, system_prompt, max_new_tokens, temperature, top_p):
107
  # Build conversation in Llama 3.1 chat format
108
  messages = []
109
 
110
  # Add system prompt (use default if not provided)
111
  if not system_prompt:
112
- system_prompt = "You are Be.FM, a helpful and knowledgeable AI assistant. Provide clear, accurate, and concise responses."
 
 
 
 
113
  messages.append({"role": "system", "content": system_prompt})
114
 
115
  # Handle Gradio 6.0 history format
@@ -141,19 +145,30 @@ def chat_fn(message, history, system_prompt, max_new_tokens, temperature, top_p)
141
  messages,
142
  max_new_tokens=max_new_tokens,
143
  temperature=temperature,
144
- top_p=top_p,
145
  )
146
  return reply
147
 
148
  demo = gr.ChatInterface(
149
  fn=chat_fn,
150
  additional_inputs=[
151
- gr.Textbox(label="System prompt (optional)", placeholder="You are Be.FM assistant...", lines=2),
 
 
 
 
 
 
 
 
 
 
 
 
 
152
  gr.Slider(16, 2048, value=512, step=16, label="max_new_tokens"),
153
- gr.Slider(0.1, 1.5, value=0.7, step=0.05, label="temperature"),
154
- gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="top_p"),
155
  ],
156
- title="Be.FM-8B (PEFT) on Meta-Llama-3.1-8B-Instruct",
157
  description="Chat interface using Meta-Llama-3.1-8B-Instruct with PEFT adapter befm/Be.FM-8B."
158
  )
159
 
 
77
 
78
  @spaces.GPU
79
  @torch.inference_mode()
80
+ def generate_response(messages, max_new_tokens=512, temperature=0.7) -> str:
81
  model, tokenizer = get_model_and_tokenizer()
82
  device = model.device
83
 
 
96
  max_new_tokens=max_new_tokens,
97
  do_sample=True,
98
  temperature=temperature,
99
+ top_p=0.9,
100
  pad_token_id=tokenizer.eos_token_id,
101
  )
102
  # Decode only the newly generated tokens
103
  generated_text = tokenizer.decode(out[0][input_length:], skip_special_tokens=True)
104
  return generated_text.strip()
105
 
106
+ def chat_fn(message, history, system_prompt, max_new_tokens, temperature):
107
  # Build conversation in Llama 3.1 chat format
108
  messages = []
109
 
110
  # Add system prompt (use default if not provided)
111
  if not system_prompt:
112
+ system_prompt = (
113
+ "Be.FM 8B is an open foundation model for human behavior modeling, built on "
114
+ "Llama 3.1 8B and fine-tuned on diverse behavioral datasets. It is designed "
115
+ "to enhance the understanding and prediction of human decision-making."
116
+ )
117
  messages.append({"role": "system", "content": system_prompt})
118
 
119
  # Handle Gradio 6.0 history format
 
145
  messages,
146
  max_new_tokens=max_new_tokens,
147
  temperature=temperature,
 
148
  )
149
  return reply
150
 
151
  demo = gr.ChatInterface(
152
  fn=chat_fn,
153
  additional_inputs=[
154
+ gr.Textbox(
155
+ label="System prompt (optional)",
156
+ placeholder=(
157
+ "Be.FM 8B is an open foundation model for human behavior modeling, built "
158
+ "on Llama 3.1 8B and fine-tuned on diverse behavioral datasets. It is "
159
+ "designed to enhance the understanding and prediction of human decision-"
160
+ "making."
161
+ ),
162
+ lines=2,
163
+ ),
164
+ gr.Markdown(
165
+ "For system and user prompts in a variety of economic games, please refer to "
166
+ "[this document](https://docs.google.com/document/d/1g3479v-jBwjRyHuk_yzi71XTt_-uEkafP8ugQkMRD0s/edit?tab=t.0)."
167
+ ),
168
  gr.Slider(16, 2048, value=512, step=16, label="max_new_tokens"),
169
+ gr.Slider(0.1, 1.5, value=0.6, step=0.05, label="temperature"),
 
170
  ],
171
+ title="Be.FM: Open Foundation Models for Human Behavior (8B)",
172
  description="Chat interface using Meta-Llama-3.1-8B-Instruct with PEFT adapter befm/Be.FM-8B."
173
  )
174
 
app_vllm.py CHANGED
@@ -63,7 +63,7 @@ def get_model_and_tokenizer():
63
  return _llm, _lora_request, _tokenizer
64
 
65
  @spaces.GPU
66
- def generate_response(messages, max_new_tokens=512, temperature=0.7, top_p=0.9) -> str:
67
  llm, lora_request, tokenizer = get_model_and_tokenizer()
68
 
69
  # Apply Llama 3.1 chat template
@@ -75,7 +75,7 @@ def generate_response(messages, max_new_tokens=512, temperature=0.7, top_p=0.9)
75
 
76
  sampling_params = SamplingParams(
77
  temperature=temperature,
78
- top_p=top_p,
79
  max_tokens=max_new_tokens,
80
  )
81
 
@@ -88,13 +88,17 @@ def generate_response(messages, max_new_tokens=512, temperature=0.7, top_p=0.9)
88
 
89
  return outputs[0].outputs[0].text
90
 
91
- def chat_fn(message, history, system_prompt, max_new_tokens, temperature, top_p):
92
  # Build conversation in Llama 3.1 chat format
93
  messages = []
94
 
95
  # Add system prompt (use default if not provided)
96
  if not system_prompt:
97
- system_prompt = "You are Be.FM, a helpful and knowledgeable AI assistant. Provide clear, accurate, and concise responses."
 
 
 
 
98
  messages.append({"role": "system", "content": system_prompt})
99
 
100
  # History is already in dict format: [{"role": "user", "content": "..."}, ...]
@@ -108,20 +112,31 @@ def chat_fn(message, history, system_prompt, max_new_tokens, temperature, top_p)
108
  messages,
109
  max_new_tokens=max_new_tokens,
110
  temperature=temperature,
111
- top_p=top_p,
112
  )
113
  return reply
114
 
115
  demo = gr.ChatInterface(
116
- fn=lambda message, history, system_prompt, max_new_tokens, temperature, top_p:
117
- chat_fn(message, history, system_prompt, max_new_tokens, temperature, top_p),
118
  additional_inputs=[
119
- gr.Textbox(label="System prompt (optional)", placeholder="You are Be.FM assistant...", lines=2),
 
 
 
 
 
 
 
 
 
 
 
 
 
120
  gr.Slider(16, 2048, value=512, step=16, label="max_new_tokens"),
121
- gr.Slider(0.1, 1.5, value=0.7, step=0.05, label="temperature"),
122
- gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="top_p"),
123
  ],
124
- title="Be.FM-8B (vLLM)",
125
  description="Chat interface using vLLM for optimized inference with Meta-Llama-3.1-8B-Instruct and PEFT adapter befm/Be.FM-8B."
126
  )
127
 
 
63
  return _llm, _lora_request, _tokenizer
64
 
65
  @spaces.GPU
66
+ def generate_response(messages, max_new_tokens=512, temperature=0.7) -> str:
67
  llm, lora_request, tokenizer = get_model_and_tokenizer()
68
 
69
  # Apply Llama 3.1 chat template
 
75
 
76
  sampling_params = SamplingParams(
77
  temperature=temperature,
78
+ top_p=0.9,
79
  max_tokens=max_new_tokens,
80
  )
81
 
 
88
 
89
  return outputs[0].outputs[0].text
90
 
91
+ def chat_fn(message, history, system_prompt, max_new_tokens, temperature):
92
  # Build conversation in Llama 3.1 chat format
93
  messages = []
94
 
95
  # Add system prompt (use default if not provided)
96
  if not system_prompt:
97
+ system_prompt = (
98
+ "Be.FM 8B is an open foundation model for human behavior modeling, built on "
99
+ "Llama 3.1 8B and fine-tuned on diverse behavioral datasets. It is designed "
100
+ "to enhance the understanding and prediction of human decision-making."
101
+ )
102
  messages.append({"role": "system", "content": system_prompt})
103
 
104
  # History is already in dict format: [{"role": "user", "content": "..."}, ...]
 
112
  messages,
113
  max_new_tokens=max_new_tokens,
114
  temperature=temperature,
 
115
  )
116
  return reply
117
 
118
  demo = gr.ChatInterface(
119
+ fn=lambda message, history, system_prompt, max_new_tokens, temperature:
120
+ chat_fn(message, history, system_prompt, max_new_tokens, temperature),
121
  additional_inputs=[
122
+ gr.Textbox(
123
+ label="System prompt (optional)",
124
+ placeholder=(
125
+ "Be.FM 8B is an open foundation model for human behavior modeling, built "
126
+ "on Llama 3.1 8B and fine-tuned on diverse behavioral datasets. It is "
127
+ "designed to enhance the understanding and prediction of human decision-"
128
+ "making."
129
+ ),
130
+ lines=2,
131
+ ),
132
+ gr.Markdown(
133
+ "For system and user prompts in a variety of economic games, please refer to "
134
+ "[this document](https://docs.google.com/document/d/1g3479v-jBwjRyHuk_yzi71XTt_-uEkafP8ugQkMRD0s/edit?tab=t.0)."
135
+ ),
136
  gr.Slider(16, 2048, value=512, step=16, label="max_new_tokens"),
137
+ gr.Slider(0.1, 1.5, value=0.6, step=0.05, label="temperature"),
 
138
  ],
139
+ title="Be.FM: Open Foundation Models for Human Behavior (8B)",
140
  description="Chat interface using vLLM for optimized inference with Meta-Llama-3.1-8B-Instruct and PEFT adapter befm/Be.FM-8B."
141
  )
142