GRM2-Chat

Running on Zero

DedeProGames commited on 21 days ago

Commit

d29591c

verified ·

1 Parent(s): 6141415

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -14,8 +14,8 @@ GRM2 is Orion's latest iteration of powerfull open LLMs.
 This is a demo of [`OrionLLM/GRM2-3b`](https://huggingface.co/OrionLLM/GRM2-3b), fine-tuned for long reasoning for general reasoning tasks.
 """
-MAX_NEW_TOKENS_LIMIT = 2048
-DEFAULT_MAX_NEW_TOKENS = 1024
 MAX_INPUT_TOKENS = int(os.getenv("MAX_INPUT_TOKENS", "32768"))
 MODEL_ID = "OrionLLM/GRM2-3b"
@@ -84,8 +84,8 @@ def validate_input(message: str) -> dict:
 def generate(
     message: str,
     chat_history: list[dict],
-    max_new_tokens: int = 1024,
-    temperature: float = 0.6,
     top_p: float = 0.9,
     top_k: int = 50,
     repetition_penalty: float = 1.2,

 This is a demo of [`OrionLLM/GRM2-3b`](https://huggingface.co/OrionLLM/GRM2-3b), fine-tuned for long reasoning for general reasoning tasks.
 """
+MAX_NEW_TOKENS_LIMIT = 32768
+DEFAULT_MAX_NEW_TOKENS = 32768
 MAX_INPUT_TOKENS = int(os.getenv("MAX_INPUT_TOKENS", "32768"))
 MODEL_ID = "OrionLLM/GRM2-3b"
 def generate(
     message: str,
     chat_history: list[dict],
+    max_new_tokens: int = 32768,
+    temperature: float = 1.0,
     top_p: float = 0.9,
     top_k: int = 50,
     repetition_penalty: float = 1.2,