Spaces:
Running on Zero
Running on Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -14,8 +14,8 @@ GRM2 is Orion's latest iteration of powerfull open LLMs.
|
|
| 14 |
This is a demo of [`OrionLLM/GRM2-3b`](https://huggingface.co/OrionLLM/GRM2-3b), fine-tuned for long reasoning for general reasoning tasks.
|
| 15 |
"""
|
| 16 |
|
| 17 |
-
MAX_NEW_TOKENS_LIMIT =
|
| 18 |
-
DEFAULT_MAX_NEW_TOKENS =
|
| 19 |
MAX_INPUT_TOKENS = int(os.getenv("MAX_INPUT_TOKENS", "32768"))
|
| 20 |
|
| 21 |
MODEL_ID = "OrionLLM/GRM2-3b"
|
|
@@ -84,8 +84,8 @@ def validate_input(message: str) -> dict:
|
|
| 84 |
def generate(
|
| 85 |
message: str,
|
| 86 |
chat_history: list[dict],
|
| 87 |
-
max_new_tokens: int =
|
| 88 |
-
temperature: float =
|
| 89 |
top_p: float = 0.9,
|
| 90 |
top_k: int = 50,
|
| 91 |
repetition_penalty: float = 1.2,
|
|
|
|
| 14 |
This is a demo of [`OrionLLM/GRM2-3b`](https://huggingface.co/OrionLLM/GRM2-3b), fine-tuned for long reasoning for general reasoning tasks.
|
| 15 |
"""
|
| 16 |
|
| 17 |
+
MAX_NEW_TOKENS_LIMIT = 32768
|
| 18 |
+
DEFAULT_MAX_NEW_TOKENS = 32768
|
| 19 |
MAX_INPUT_TOKENS = int(os.getenv("MAX_INPUT_TOKENS", "32768"))
|
| 20 |
|
| 21 |
MODEL_ID = "OrionLLM/GRM2-3b"
|
|
|
|
| 84 |
def generate(
|
| 85 |
message: str,
|
| 86 |
chat_history: list[dict],
|
| 87 |
+
max_new_tokens: int = 32768,
|
| 88 |
+
temperature: float = 1.0,
|
| 89 |
top_p: float = 0.9,
|
| 90 |
top_k: int = 50,
|
| 91 |
repetition_penalty: float = 1.2,
|