BeFM

Sleeping

App Files Files Community

Jn-Huang commited on Dec 10, 2025

Commit

58ffc70

1 Parent(s): 38dedc7

Customize BeFM UI and defaults

Browse files

Files changed (2) hide show

app.py +24 -9
app_vllm.py +26 -11

app.py CHANGED Viewed

@@ -77,7 +77,7 @@ def get_model_and_tokenizer():
 @spaces.GPU
 @torch.inference_mode()
-def generate_response(messages, max_new_tokens=512, temperature=0.7, top_p=0.9) -> str:
     model, tokenizer = get_model_and_tokenizer()
     device = model.device
@@ -96,20 +96,24 @@ def generate_response(messages, max_new_tokens=512, temperature=0.7, top_p=0.9)
         max_new_tokens=max_new_tokens,
         do_sample=True,
         temperature=temperature,
-        top_p=top_p,
         pad_token_id=tokenizer.eos_token_id,
     )
     # Decode only the newly generated tokens
     generated_text = tokenizer.decode(out[0][input_length:], skip_special_tokens=True)
     return generated_text.strip()
-def chat_fn(message, history, system_prompt, max_new_tokens, temperature, top_p):
     # Build conversation in Llama 3.1 chat format
     messages = []
     # Add system prompt (use default if not provided)
     if not system_prompt:
-        system_prompt = "You are Be.FM, a helpful and knowledgeable AI assistant. Provide clear, accurate, and concise responses."
     messages.append({"role": "system", "content": system_prompt})
     # Handle Gradio 6.0 history format
@@ -141,19 +145,30 @@ def chat_fn(message, history, system_prompt, max_new_tokens, temperature, top_p)
         messages,
         max_new_tokens=max_new_tokens,
         temperature=temperature,
-        top_p=top_p,
     )
     return reply
 demo = gr.ChatInterface(
     fn=chat_fn,
     additional_inputs=[
-        gr.Textbox(label="System prompt (optional)", placeholder="You are Be.FM assistant...", lines=2),
         gr.Slider(16, 2048, value=512, step=16, label="max_new_tokens"),
-        gr.Slider(0.1, 1.5, value=0.7, step=0.05, label="temperature"),
-        gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="top_p"),
     ],
-    title="Be.FM-8B (PEFT) on Meta-Llama-3.1-8B-Instruct",
     description="Chat interface using Meta-Llama-3.1-8B-Instruct with PEFT adapter befm/Be.FM-8B."
 )

 @spaces.GPU
 @torch.inference_mode()
+def generate_response(messages, max_new_tokens=512, temperature=0.7) -> str:
     model, tokenizer = get_model_and_tokenizer()
     device = model.device
         max_new_tokens=max_new_tokens,
         do_sample=True,
         temperature=temperature,
+        top_p=0.9,
         pad_token_id=tokenizer.eos_token_id,
     )
     # Decode only the newly generated tokens
     generated_text = tokenizer.decode(out[0][input_length:], skip_special_tokens=True)
     return generated_text.strip()
+def chat_fn(message, history, system_prompt, max_new_tokens, temperature):
     # Build conversation in Llama 3.1 chat format
     messages = []
     # Add system prompt (use default if not provided)
     if not system_prompt:
+        system_prompt = (
+            "Be.FM 8B is an open foundation model for human behavior modeling, built on "
+            "Llama 3.1 8B and fine-tuned on diverse behavioral datasets. It is designed "
+            "to enhance the understanding and prediction of human decision-making."
+        )
     messages.append({"role": "system", "content": system_prompt})
     # Handle Gradio 6.0 history format
         messages,
         max_new_tokens=max_new_tokens,
         temperature=temperature,
     )
     return reply
 demo = gr.ChatInterface(
     fn=chat_fn,
     additional_inputs=[
+        gr.Textbox(
+            label="System prompt (optional)",
+            placeholder=(
+                "Be.FM 8B is an open foundation model for human behavior modeling, built "
+                "on Llama 3.1 8B and fine-tuned on diverse behavioral datasets. It is "
+                "designed to enhance the understanding and prediction of human decision-"
+                "making."
+            ),
+            lines=2,
+        ),
+        gr.Markdown(
+            "For system and user prompts in a variety of economic games, please refer to "
+            "[this document](https://docs.google.com/document/d/1g3479v-jBwjRyHuk_yzi71XTt_-uEkafP8ugQkMRD0s/edit?tab=t.0)."
+        ),
         gr.Slider(16, 2048, value=512, step=16, label="max_new_tokens"),
+        gr.Slider(0.1, 1.5, value=0.6, step=0.05, label="temperature"),
     ],
+    title="Be.FM: Open Foundation Models for Human Behavior (8B)",
     description="Chat interface using Meta-Llama-3.1-8B-Instruct with PEFT adapter befm/Be.FM-8B."
 )

app_vllm.py CHANGED Viewed

@@ -63,7 +63,7 @@ def get_model_and_tokenizer():
     return _llm, _lora_request, _tokenizer
 @spaces.GPU
-def generate_response(messages, max_new_tokens=512, temperature=0.7, top_p=0.9) -> str:
     llm, lora_request, tokenizer = get_model_and_tokenizer()
     # Apply Llama 3.1 chat template
@@ -75,7 +75,7 @@ def generate_response(messages, max_new_tokens=512, temperature=0.7, top_p=0.9)
     sampling_params = SamplingParams(
         temperature=temperature,
-        top_p=top_p,
         max_tokens=max_new_tokens,
     )
@@ -88,13 +88,17 @@ def generate_response(messages, max_new_tokens=512, temperature=0.7, top_p=0.9)
     return outputs[0].outputs[0].text
-def chat_fn(message, history, system_prompt, max_new_tokens, temperature, top_p):
     # Build conversation in Llama 3.1 chat format
     messages = []
     # Add system prompt (use default if not provided)
     if not system_prompt:
-        system_prompt = "You are Be.FM, a helpful and knowledgeable AI assistant. Provide clear, accurate, and concise responses."
     messages.append({"role": "system", "content": system_prompt})
     # History is already in dict format: [{"role": "user", "content": "..."}, ...]
@@ -108,20 +112,31 @@ def chat_fn(message, history, system_prompt, max_new_tokens, temperature, top_p)
         messages,
         max_new_tokens=max_new_tokens,
         temperature=temperature,
-        top_p=top_p,
     )
     return reply
 demo = gr.ChatInterface(
-    fn=lambda message, history, system_prompt, max_new_tokens, temperature, top_p:
-        chat_fn(message, history, system_prompt, max_new_tokens, temperature, top_p),
     additional_inputs=[
-        gr.Textbox(label="System prompt (optional)", placeholder="You are Be.FM assistant...", lines=2),
         gr.Slider(16, 2048, value=512, step=16, label="max_new_tokens"),
-        gr.Slider(0.1, 1.5, value=0.7, step=0.05, label="temperature"),
-        gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="top_p"),
     ],
-    title="Be.FM-8B (vLLM)",
     description="Chat interface using vLLM for optimized inference with Meta-Llama-3.1-8B-Instruct and PEFT adapter befm/Be.FM-8B."
 )

     return _llm, _lora_request, _tokenizer
 @spaces.GPU
+def generate_response(messages, max_new_tokens=512, temperature=0.7) -> str:
     llm, lora_request, tokenizer = get_model_and_tokenizer()
     # Apply Llama 3.1 chat template
     sampling_params = SamplingParams(
         temperature=temperature,
+        top_p=0.9,
         max_tokens=max_new_tokens,
     )
     return outputs[0].outputs[0].text
+def chat_fn(message, history, system_prompt, max_new_tokens, temperature):
     # Build conversation in Llama 3.1 chat format
     messages = []
     # Add system prompt (use default if not provided)
     if not system_prompt:
+        system_prompt = (
+            "Be.FM 8B is an open foundation model for human behavior modeling, built on "
+            "Llama 3.1 8B and fine-tuned on diverse behavioral datasets. It is designed "
+            "to enhance the understanding and prediction of human decision-making."
+        )
     messages.append({"role": "system", "content": system_prompt})
     # History is already in dict format: [{"role": "user", "content": "..."}, ...]
         messages,
         max_new_tokens=max_new_tokens,
         temperature=temperature,
     )
     return reply
 demo = gr.ChatInterface(
+    fn=lambda message, history, system_prompt, max_new_tokens, temperature:
+        chat_fn(message, history, system_prompt, max_new_tokens, temperature),
     additional_inputs=[
+        gr.Textbox(
+            label="System prompt (optional)",
+            placeholder=(
+                "Be.FM 8B is an open foundation model for human behavior modeling, built "
+                "on Llama 3.1 8B and fine-tuned on diverse behavioral datasets. It is "
+                "designed to enhance the understanding and prediction of human decision-"
+                "making."
+            ),
+            lines=2,
+        ),
+        gr.Markdown(
+            "For system and user prompts in a variety of economic games, please refer to "
+            "[this document](https://docs.google.com/document/d/1g3479v-jBwjRyHuk_yzi71XTt_-uEkafP8ugQkMRD0s/edit?tab=t.0)."
+        ),
         gr.Slider(16, 2048, value=512, step=16, label="max_new_tokens"),
+        gr.Slider(0.1, 1.5, value=0.6, step=0.05, label="temperature"),
     ],
+    title="Be.FM: Open Foundation Models for Human Behavior (8B)",
     description="Chat interface using vLLM for optimized inference with Meta-Llama-3.1-8B-Instruct and PEFT adapter befm/Be.FM-8B."
 )