Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -272,7 +272,7 @@ def generate_endpoint(user_message):
|
|
| 272 |
system_instruction = request.args.get('system', '')
|
| 273 |
temperature = float(request.args.get('temperature', 0.7))
|
| 274 |
top_p = float(request.args.get('top_p', 0.95))
|
| 275 |
-
top_k = int(request.args.get('top_k',
|
| 276 |
model_name = request.args.get('model', MODELS[0]["name"])
|
| 277 |
max_tokens = int(request.args.get('max_tokens', MAX_GENERATION_TOKENS))
|
| 278 |
|
|
@@ -291,12 +291,6 @@ def generate_endpoint(user_message):
|
|
| 291 |
mimetype='text/plain'
|
| 292 |
)
|
| 293 |
|
| 294 |
-
if top_k:
|
| 295 |
-
try:
|
| 296 |
-
kwargs["top_k"] = int(top_k)
|
| 297 |
-
except ValueError:
|
| 298 |
-
return Response("Error: top_k debe ser n煤mero entero", status=400)
|
| 299 |
-
|
| 300 |
# Limitar max_tokens a la configuraci贸n m谩xima
|
| 301 |
if max_tokens > MAX_GENERATION_TOKENS:
|
| 302 |
max_tokens = MAX_GENERATION_TOKENS
|
|
@@ -322,7 +316,13 @@ def generate_endpoint(user_message):
|
|
| 322 |
"max_tokens": max_tokens,
|
| 323 |
"stream": False
|
| 324 |
}
|
| 325 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 326 |
# Generar respuesta
|
| 327 |
result = llm_manager.chat_completion(model_name, messages, **kwargs)
|
| 328 |
|
|
|
|
| 272 |
system_instruction = request.args.get('system', '')
|
| 273 |
temperature = float(request.args.get('temperature', 0.7))
|
| 274 |
top_p = float(request.args.get('top_p', 0.95))
|
| 275 |
+
top_k = int(request.args.get('top_k', 0))
|
| 276 |
model_name = request.args.get('model', MODELS[0]["name"])
|
| 277 |
max_tokens = int(request.args.get('max_tokens', MAX_GENERATION_TOKENS))
|
| 278 |
|
|
|
|
| 291 |
mimetype='text/plain'
|
| 292 |
)
|
| 293 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 294 |
# Limitar max_tokens a la configuraci贸n m谩xima
|
| 295 |
if max_tokens > MAX_GENERATION_TOKENS:
|
| 296 |
max_tokens = MAX_GENERATION_TOKENS
|
|
|
|
| 316 |
"max_tokens": max_tokens,
|
| 317 |
"stream": False
|
| 318 |
}
|
| 319 |
+
|
| 320 |
+
if top_k:
|
| 321 |
+
try:
|
| 322 |
+
kwargs["top_k"] = int(top_k)
|
| 323 |
+
except ValueError:
|
| 324 |
+
return Response("Error: top_k debe ser n煤mero entero", status=400)
|
| 325 |
+
|
| 326 |
# Generar respuesta
|
| 327 |
result = llm_manager.chat_completion(model_name, messages, **kwargs)
|
| 328 |
|