Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -9,7 +9,6 @@ import tempfile
|
|
| 9 |
app = Flask(__name__)
|
| 10 |
logging.basicConfig(level=logging.INFO)
|
| 11 |
|
| 12 |
-
# CONFIGURACIÓN DE TOKENS
|
| 13 |
MAX_CONTEXT_TOKENS = 1024 * 8
|
| 14 |
MAX_GENERATION_TOKENS = 1024 * 4
|
| 15 |
|
|
@@ -236,6 +235,10 @@ def chat_completions():
|
|
| 236 |
# Aplicar límite de tokens si no se especifica
|
| 237 |
if 'max_tokens' not in kwargs:
|
| 238 |
kwargs['max_tokens'] = MAX_GENERATION_TOKENS
|
|
|
|
|
|
|
|
|
|
|
|
|
| 239 |
|
| 240 |
result = llm_manager.chat_completion(model_name, messages, **kwargs)
|
| 241 |
|
|
|
|
| 9 |
app = Flask(__name__)
|
| 10 |
logging.basicConfig(level=logging.INFO)
|
| 11 |
|
|
|
|
| 12 |
MAX_CONTEXT_TOKENS = 1024 * 8
|
| 13 |
MAX_GENERATION_TOKENS = 1024 * 4
|
| 14 |
|
|
|
|
| 235 |
# Aplicar límite de tokens si no se especifica
|
| 236 |
if 'max_tokens' not in kwargs:
|
| 237 |
kwargs['max_tokens'] = MAX_GENERATION_TOKENS
|
| 238 |
+
else:
|
| 239 |
+
# Validar que max_tokens no exceda el máximo permitido
|
| 240 |
+
if kwargs['max_tokens'] > MAX_GENERATION_TOKENS:
|
| 241 |
+
kwargs['max_tokens'] = MAX_GENERATION_TOKENS
|
| 242 |
|
| 243 |
result = llm_manager.chat_completion(model_name, messages, **kwargs)
|
| 244 |
|