Spaces:
Paused
Paused
fix: draft max_tokens 512→2048 (thought 블록이 토큰 소진하여 빈 초안 문제)
Browse files
src/inference/api_server.py
CHANGED
|
@@ -1043,7 +1043,7 @@ class vLLMEngineManager:
|
|
| 1043 |
|
| 1044 |
gen_request = GenerateCivilResponseRequest(
|
| 1045 |
prompt=working_query,
|
| 1046 |
-
max_tokens=
|
| 1047 |
temperature=0.7,
|
| 1048 |
use_rag=False,
|
| 1049 |
)
|
|
|
|
| 1043 |
|
| 1044 |
gen_request = GenerateCivilResponseRequest(
|
| 1045 |
prompt=working_query,
|
| 1046 |
+
max_tokens=2048,
|
| 1047 |
temperature=0.7,
|
| 1048 |
use_rag=False,
|
| 1049 |
)
|