Spaces:
Running on Zero
Running on Zero
fix api server no duration
Browse files
acestep/api_server.py
CHANGED
|
@@ -125,7 +125,7 @@ class GenerateMusicRequest(BaseModel):
|
|
| 125 |
is_format_caption: bool = False
|
| 126 |
|
| 127 |
lm_temperature: float = 0.85
|
| 128 |
-
lm_cfg_scale: float = 2.
|
| 129 |
lm_top_k: Optional[int] = None
|
| 130 |
lm_top_p: Optional[float] = 0.9
|
| 131 |
lm_repetition_penalty: float = 1.0
|
|
@@ -137,7 +137,7 @@ class GenerateMusicRequest(BaseModel):
|
|
| 137 |
|
| 138 |
|
| 139 |
_LM_DEFAULT_TEMPERATURE = 0.85
|
| 140 |
-
_LM_DEFAULT_CFG_SCALE = 2.
|
| 141 |
_LM_DEFAULT_TOP_P = 0.9
|
| 142 |
_DEFAULT_DIT_INSTRUCTION = DEFAULT_DIT_INSTRUCTION
|
| 143 |
_DEFAULT_LM_INSTRUCTION = DEFAULT_LM_INSTRUCTION
|
|
@@ -728,16 +728,33 @@ def create_app() -> FastAPI:
|
|
| 728 |
print(f"[api_server] Sample generated: caption_len={len(caption)}, lyrics_len={len(lyrics)}, bpm={bpm}, duration={audio_duration}")
|
| 729 |
|
| 730 |
# Apply format_sample() if use_format is True and caption/lyrics are provided
|
|
|
|
|
|
|
|
|
|
| 731 |
if req.use_format and (caption or lyrics):
|
| 732 |
print(f"[api_server] Applying format_sample to enhance input...")
|
| 733 |
_ensure_llm_ready()
|
| 734 |
if getattr(app.state, "_llm_init_error", None):
|
| 735 |
raise RuntimeError(f"5Hz LM init failed (needed for format): {app.state._llm_init_error}")
|
| 736 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 737 |
format_result = format_sample(
|
| 738 |
llm_handler=llm,
|
| 739 |
caption=caption,
|
| 740 |
lyrics=lyrics,
|
|
|
|
| 741 |
temperature=req.lm_temperature,
|
| 742 |
top_k=lm_top_k if lm_top_k > 0 else None,
|
| 743 |
top_p=lm_top_p if lm_top_p < 1.0 else None,
|
|
@@ -745,9 +762,20 @@ def create_app() -> FastAPI:
|
|
| 745 |
)
|
| 746 |
|
| 747 |
if format_result.success:
|
| 748 |
-
|
| 749 |
-
|
| 750 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 751 |
else:
|
| 752 |
print(f"[api_server] Warning: format_sample failed: {format_result.error}, using original input")
|
| 753 |
|
|
@@ -811,7 +839,12 @@ def create_app() -> FastAPI:
|
|
| 811 |
lm_top_k=lm_top_k,
|
| 812 |
lm_top_p=lm_top_p,
|
| 813 |
lm_negative_prompt=req.lm_negative_prompt,
|
| 814 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 815 |
use_cot_caption=req.use_cot_caption,
|
| 816 |
use_cot_language=req.use_cot_language,
|
| 817 |
use_constrained_decoding=req.constrained_decoding,
|
|
|
|
| 125 |
is_format_caption: bool = False
|
| 126 |
|
| 127 |
lm_temperature: float = 0.85
|
| 128 |
+
lm_cfg_scale: float = 2.5
|
| 129 |
lm_top_k: Optional[int] = None
|
| 130 |
lm_top_p: Optional[float] = 0.9
|
| 131 |
lm_repetition_penalty: float = 1.0
|
|
|
|
| 137 |
|
| 138 |
|
| 139 |
_LM_DEFAULT_TEMPERATURE = 0.85
|
| 140 |
+
_LM_DEFAULT_CFG_SCALE = 2.5
|
| 141 |
_LM_DEFAULT_TOP_P = 0.9
|
| 142 |
_DEFAULT_DIT_INSTRUCTION = DEFAULT_DIT_INSTRUCTION
|
| 143 |
_DEFAULT_LM_INSTRUCTION = DEFAULT_LM_INSTRUCTION
|
|
|
|
| 728 |
print(f"[api_server] Sample generated: caption_len={len(caption)}, lyrics_len={len(lyrics)}, bpm={bpm}, duration={audio_duration}")
|
| 729 |
|
| 730 |
# Apply format_sample() if use_format is True and caption/lyrics are provided
|
| 731 |
+
# Track whether format_sample generated duration (to decide if Phase 1 is needed)
|
| 732 |
+
format_has_duration = False
|
| 733 |
+
|
| 734 |
if req.use_format and (caption or lyrics):
|
| 735 |
print(f"[api_server] Applying format_sample to enhance input...")
|
| 736 |
_ensure_llm_ready()
|
| 737 |
if getattr(app.state, "_llm_init_error", None):
|
| 738 |
raise RuntimeError(f"5Hz LM init failed (needed for format): {app.state._llm_init_error}")
|
| 739 |
|
| 740 |
+
# Build user_metadata from request params (matching bot.py behavior)
|
| 741 |
+
user_metadata_for_format = {}
|
| 742 |
+
if bpm is not None:
|
| 743 |
+
user_metadata_for_format['bpm'] = bpm
|
| 744 |
+
if audio_duration is not None and audio_duration > 0:
|
| 745 |
+
user_metadata_for_format['duration'] = int(audio_duration)
|
| 746 |
+
if key_scale:
|
| 747 |
+
user_metadata_for_format['keyscale'] = key_scale
|
| 748 |
+
if time_signature:
|
| 749 |
+
user_metadata_for_format['timesignature'] = time_signature
|
| 750 |
+
if req.vocal_language and req.vocal_language != "unknown":
|
| 751 |
+
user_metadata_for_format['language'] = req.vocal_language
|
| 752 |
+
|
| 753 |
format_result = format_sample(
|
| 754 |
llm_handler=llm,
|
| 755 |
caption=caption,
|
| 756 |
lyrics=lyrics,
|
| 757 |
+
user_metadata=user_metadata_for_format if user_metadata_for_format else None,
|
| 758 |
temperature=req.lm_temperature,
|
| 759 |
top_k=lm_top_k if lm_top_k > 0 else None,
|
| 760 |
top_p=lm_top_p if lm_top_p < 1.0 else None,
|
|
|
|
| 762 |
)
|
| 763 |
|
| 764 |
if format_result.success:
|
| 765 |
+
# Extract all formatted data (matching bot.py behavior)
|
| 766 |
+
caption = format_result.caption or caption
|
| 767 |
+
lyrics = format_result.lyrics or lyrics
|
| 768 |
+
if format_result.duration:
|
| 769 |
+
audio_duration = format_result.duration
|
| 770 |
+
format_has_duration = True
|
| 771 |
+
if format_result.bpm:
|
| 772 |
+
bpm = format_result.bpm
|
| 773 |
+
if format_result.keyscale:
|
| 774 |
+
key_scale = format_result.keyscale
|
| 775 |
+
if format_result.timesignature:
|
| 776 |
+
time_signature = format_result.timesignature
|
| 777 |
+
|
| 778 |
+
print(f"[api_server] Format applied: new caption_len={len(caption)}, lyrics_len={len(lyrics)}, bpm={bpm}, duration={audio_duration}, has_duration={format_has_duration}")
|
| 779 |
else:
|
| 780 |
print(f"[api_server] Warning: format_sample failed: {format_result.error}, using original input")
|
| 781 |
|
|
|
|
| 839 |
lm_top_k=lm_top_k,
|
| 840 |
lm_top_p=lm_top_p,
|
| 841 |
lm_negative_prompt=req.lm_negative_prompt,
|
| 842 |
+
# use_cot_metas logic:
|
| 843 |
+
# - sample_mode: metas already generated, skip Phase 1
|
| 844 |
+
# - format with duration: metas already generated, skip Phase 1
|
| 845 |
+
# - format without duration: need Phase 1 to generate duration
|
| 846 |
+
# - no format: need Phase 1 to generate all metas
|
| 847 |
+
use_cot_metas=not sample_mode and not format_has_duration,
|
| 848 |
use_cot_caption=req.use_cot_caption,
|
| 849 |
use_cot_language=req.use_cot_language,
|
| 850 |
use_constrained_decoding=req.constrained_decoding,
|
acestep/gradio_ui/events/generation_handlers.py
CHANGED
|
@@ -70,7 +70,7 @@ def load_metadata(file_obj):
|
|
| 70 |
"""Load generation parameters from a JSON file"""
|
| 71 |
if file_obj is None:
|
| 72 |
gr.Warning(t("messages.no_file_selected"))
|
| 73 |
-
return [None] *
|
| 74 |
|
| 75 |
try:
|
| 76 |
# Read the uploaded file
|
|
@@ -115,7 +115,7 @@ def load_metadata(file_obj):
|
|
| 115 |
inference_steps = metadata.get('inference_steps', 8)
|
| 116 |
guidance_scale = metadata.get('guidance_scale', 7.0)
|
| 117 |
seed = metadata.get('seed', '-1')
|
| 118 |
-
random_seed =
|
| 119 |
use_adg = metadata.get('use_adg', False)
|
| 120 |
cfg_interval_start = metadata.get('cfg_interval_start', 0.0)
|
| 121 |
cfg_interval_end = metadata.get('cfg_interval_end', 1.0)
|
|
@@ -137,6 +137,9 @@ def load_metadata(file_obj):
|
|
| 137 |
complete_track_classes = metadata.get('complete_track_classes', [])
|
| 138 |
shift = metadata.get('shift', 3.0) # Default 3.0 for base models
|
| 139 |
infer_method = metadata.get('infer_method', 'ode') # Default 'ode' for diffusion inference
|
|
|
|
|
|
|
|
|
|
| 140 |
instrumental = metadata.get('instrumental', False) # Added: read instrumental
|
| 141 |
|
| 142 |
gr.Info(t("messages.params_loaded", filename=os.path.basename(filepath)))
|
|
@@ -144,8 +147,9 @@ def load_metadata(file_obj):
|
|
| 144 |
return (
|
| 145 |
task_type, captions, lyrics, vocal_language, bpm, key_scale, time_signature,
|
| 146 |
audio_duration, batch_size, inference_steps, guidance_scale, seed, random_seed,
|
| 147 |
-
use_adg, cfg_interval_start, cfg_interval_end, shift, infer_method,
|
| 148 |
-
|
|
|
|
| 149 |
use_cot_metas, use_cot_caption, use_cot_language, audio_cover_strength,
|
| 150 |
think, audio_codes, repainting_start, repainting_end,
|
| 151 |
track_name, complete_track_classes, instrumental,
|
|
@@ -154,10 +158,10 @@ def load_metadata(file_obj):
|
|
| 154 |
|
| 155 |
except json.JSONDecodeError as e:
|
| 156 |
gr.Warning(t("messages.invalid_json", error=str(e)))
|
| 157 |
-
return [None] *
|
| 158 |
except Exception as e:
|
| 159 |
gr.Warning(t("messages.load_error", error=str(e)))
|
| 160 |
-
return [None] *
|
| 161 |
|
| 162 |
|
| 163 |
def load_random_example(task_type: str):
|
|
@@ -429,7 +433,7 @@ def init_service_wrapper(dit_handler, llm_handler, checkpoint, config_path, devi
|
|
| 429 |
|
| 430 |
# Check if model is initialized - if so, collapse the accordion
|
| 431 |
is_model_initialized = dit_handler.model is not None
|
| 432 |
-
accordion_state = gr.
|
| 433 |
|
| 434 |
# Get model type settings based on actual loaded model
|
| 435 |
is_turbo = dit_handler.is_turbo_model()
|
|
@@ -446,12 +450,12 @@ def init_service_wrapper(dit_handler, llm_handler, checkpoint, config_path, devi
|
|
| 446 |
def get_model_type_ui_settings(is_turbo: bool):
|
| 447 |
"""Get UI settings based on whether the model is turbo or base"""
|
| 448 |
if is_turbo:
|
| 449 |
-
# Turbo model: max
|
| 450 |
return (
|
| 451 |
-
gr.update(value=8, maximum=
|
| 452 |
gr.update(visible=False), # guidance_scale
|
| 453 |
gr.update(visible=False), # use_adg
|
| 454 |
-
gr.update(value=
|
| 455 |
gr.update(visible=False), # cfg_interval_start
|
| 456 |
gr.update(visible=False), # cfg_interval_end
|
| 457 |
gr.update(choices=TASK_TYPES_TURBO), # task_type
|
|
@@ -603,7 +607,7 @@ def reset_format_caption_flag():
|
|
| 603 |
def update_audio_uploads_accordion(reference_audio, src_audio):
|
| 604 |
"""Update Audio Uploads accordion open state based on whether audio files are present"""
|
| 605 |
has_audio = (reference_audio is not None) or (src_audio is not None)
|
| 606 |
-
return gr.
|
| 607 |
|
| 608 |
|
| 609 |
def handle_instrumental_checkbox(instrumental_checked, current_lyrics):
|
|
@@ -708,11 +712,11 @@ def handle_generation_mode_change(mode: str):
|
|
| 708 |
|
| 709 |
return (
|
| 710 |
gr.update(visible=is_simple), # simple_mode_group
|
| 711 |
-
gr.
|
| 712 |
-
gr.
|
| 713 |
gr.update(interactive=not is_simple), # generate_btn - disabled in simple until sample created
|
| 714 |
False, # simple_sample_created - reset to False on mode change
|
| 715 |
-
gr.
|
| 716 |
)
|
| 717 |
|
| 718 |
|
|
@@ -836,8 +840,8 @@ def handle_create_sample(
|
|
| 836 |
result.language, # simple vocal_language
|
| 837 |
result.timesignature, # time_signature
|
| 838 |
result.instrumental, # instrumental_checkbox
|
| 839 |
-
gr.
|
| 840 |
-
gr.
|
| 841 |
gr.update(interactive=True), # generate_btn - enable
|
| 842 |
True, # simple_sample_created - True
|
| 843 |
True, # think_checkbox - enable thinking
|
|
|
|
| 70 |
"""Load generation parameters from a JSON file"""
|
| 71 |
if file_obj is None:
|
| 72 |
gr.Warning(t("messages.no_file_selected"))
|
| 73 |
+
return [None] * 36 + [False] # Return None for all fields, False for is_format_caption
|
| 74 |
|
| 75 |
try:
|
| 76 |
# Read the uploaded file
|
|
|
|
| 115 |
inference_steps = metadata.get('inference_steps', 8)
|
| 116 |
guidance_scale = metadata.get('guidance_scale', 7.0)
|
| 117 |
seed = metadata.get('seed', '-1')
|
| 118 |
+
random_seed = False # Always set to False when loading to enable reproducibility with saved seed
|
| 119 |
use_adg = metadata.get('use_adg', False)
|
| 120 |
cfg_interval_start = metadata.get('cfg_interval_start', 0.0)
|
| 121 |
cfg_interval_end = metadata.get('cfg_interval_end', 1.0)
|
|
|
|
| 137 |
complete_track_classes = metadata.get('complete_track_classes', [])
|
| 138 |
shift = metadata.get('shift', 3.0) # Default 3.0 for base models
|
| 139 |
infer_method = metadata.get('infer_method', 'ode') # Default 'ode' for diffusion inference
|
| 140 |
+
custom_timesteps = metadata.get('timesteps', '') # Custom timesteps (stored as 'timesteps' in JSON)
|
| 141 |
+
if custom_timesteps is None:
|
| 142 |
+
custom_timesteps = ''
|
| 143 |
instrumental = metadata.get('instrumental', False) # Added: read instrumental
|
| 144 |
|
| 145 |
gr.Info(t("messages.params_loaded", filename=os.path.basename(filepath)))
|
|
|
|
| 147 |
return (
|
| 148 |
task_type, captions, lyrics, vocal_language, bpm, key_scale, time_signature,
|
| 149 |
audio_duration, batch_size, inference_steps, guidance_scale, seed, random_seed,
|
| 150 |
+
use_adg, cfg_interval_start, cfg_interval_end, shift, infer_method,
|
| 151 |
+
custom_timesteps, # Added: custom_timesteps (between infer_method and audio_format)
|
| 152 |
+
audio_format, lm_temperature, lm_cfg_scale, lm_top_k, lm_top_p, lm_negative_prompt,
|
| 153 |
use_cot_metas, use_cot_caption, use_cot_language, audio_cover_strength,
|
| 154 |
think, audio_codes, repainting_start, repainting_end,
|
| 155 |
track_name, complete_track_classes, instrumental,
|
|
|
|
| 158 |
|
| 159 |
except json.JSONDecodeError as e:
|
| 160 |
gr.Warning(t("messages.invalid_json", error=str(e)))
|
| 161 |
+
return [None] * 36 + [False]
|
| 162 |
except Exception as e:
|
| 163 |
gr.Warning(t("messages.load_error", error=str(e)))
|
| 164 |
+
return [None] * 36 + [False]
|
| 165 |
|
| 166 |
|
| 167 |
def load_random_example(task_type: str):
|
|
|
|
| 433 |
|
| 434 |
# Check if model is initialized - if so, collapse the accordion
|
| 435 |
is_model_initialized = dit_handler.model is not None
|
| 436 |
+
accordion_state = gr.Accordion(open=not is_model_initialized)
|
| 437 |
|
| 438 |
# Get model type settings based on actual loaded model
|
| 439 |
is_turbo = dit_handler.is_turbo_model()
|
|
|
|
| 450 |
def get_model_type_ui_settings(is_turbo: bool):
|
| 451 |
"""Get UI settings based on whether the model is turbo or base"""
|
| 452 |
if is_turbo:
|
| 453 |
+
# Turbo model: max 20 steps, default 8, show shift with default 3.0, only show text2music/repaint/cover
|
| 454 |
return (
|
| 455 |
+
gr.update(value=8, maximum=20, minimum=1), # inference_steps
|
| 456 |
gr.update(visible=False), # guidance_scale
|
| 457 |
gr.update(visible=False), # use_adg
|
| 458 |
+
gr.update(value=3.0, visible=True), # shift (show with default 3.0)
|
| 459 |
gr.update(visible=False), # cfg_interval_start
|
| 460 |
gr.update(visible=False), # cfg_interval_end
|
| 461 |
gr.update(choices=TASK_TYPES_TURBO), # task_type
|
|
|
|
| 607 |
def update_audio_uploads_accordion(reference_audio, src_audio):
|
| 608 |
"""Update Audio Uploads accordion open state based on whether audio files are present"""
|
| 609 |
has_audio = (reference_audio is not None) or (src_audio is not None)
|
| 610 |
+
return gr.Accordion(open=has_audio)
|
| 611 |
|
| 612 |
|
| 613 |
def handle_instrumental_checkbox(instrumental_checked, current_lyrics):
|
|
|
|
| 712 |
|
| 713 |
return (
|
| 714 |
gr.update(visible=is_simple), # simple_mode_group
|
| 715 |
+
gr.Accordion(open=not is_simple), # caption_accordion - collapsed in simple, open in custom
|
| 716 |
+
gr.Accordion(open=not is_simple), # lyrics_accordion - collapsed in simple, open in custom
|
| 717 |
gr.update(interactive=not is_simple), # generate_btn - disabled in simple until sample created
|
| 718 |
False, # simple_sample_created - reset to False on mode change
|
| 719 |
+
gr.Accordion(open=not is_simple), # optional_params_accordion - hidden in simple mode
|
| 720 |
)
|
| 721 |
|
| 722 |
|
|
|
|
| 840 |
result.language, # simple vocal_language
|
| 841 |
result.timesignature, # time_signature
|
| 842 |
result.instrumental, # instrumental_checkbox
|
| 843 |
+
gr.Accordion(open=True), # caption_accordion - expand
|
| 844 |
+
gr.Accordion(open=True), # lyrics_accordion - expand
|
| 845 |
gr.update(interactive=True), # generate_btn - enable
|
| 846 |
True, # simple_sample_created - True
|
| 847 |
True, # think_checkbox - enable thinking
|
acestep/gradio_ui/interfaces/generation.py
CHANGED
|
@@ -402,13 +402,13 @@ def create_generation_section(dit_handler, llm_handler, init_params=None, langua
|
|
| 402 |
)
|
| 403 |
|
| 404 |
# Advanced Settings
|
| 405 |
-
# Default UI settings use turbo mode (max
|
| 406 |
# These will be updated after model initialization based on handler.is_turbo_model()
|
| 407 |
with gr.Accordion(t("generation.advanced_settings"), open=False):
|
| 408 |
with gr.Row():
|
| 409 |
inference_steps = gr.Slider(
|
| 410 |
minimum=1,
|
| 411 |
-
maximum=
|
| 412 |
value=8,
|
| 413 |
step=1,
|
| 414 |
label=t("generation.inference_steps_label"),
|
|
@@ -455,7 +455,7 @@ def create_generation_section(dit_handler, llm_handler, init_params=None, langua
|
|
| 455 |
step=0.1,
|
| 456 |
label=t("generation.shift_label"),
|
| 457 |
info=t("generation.shift_info"),
|
| 458 |
-
visible=
|
| 459 |
)
|
| 460 |
infer_method = gr.Dropdown(
|
| 461 |
choices=["ode", "sde"],
|
|
|
|
| 402 |
)
|
| 403 |
|
| 404 |
# Advanced Settings
|
| 405 |
+
# Default UI settings use turbo mode (max 20 steps, default 8, show shift with default 3)
|
| 406 |
# These will be updated after model initialization based on handler.is_turbo_model()
|
| 407 |
with gr.Accordion(t("generation.advanced_settings"), open=False):
|
| 408 |
with gr.Row():
|
| 409 |
inference_steps = gr.Slider(
|
| 410 |
minimum=1,
|
| 411 |
+
maximum=20,
|
| 412 |
value=8,
|
| 413 |
step=1,
|
| 414 |
label=t("generation.inference_steps_label"),
|
|
|
|
| 455 |
step=0.1,
|
| 456 |
label=t("generation.shift_label"),
|
| 457 |
info=t("generation.shift_info"),
|
| 458 |
+
visible=True
|
| 459 |
)
|
| 460 |
infer_method = gr.Dropdown(
|
| 461 |
choices=["ode", "sde"],
|
acestep/llm_inference.py
CHANGED
|
@@ -375,9 +375,9 @@ class LLMHandler:
|
|
| 375 |
max_ratio=0.9
|
| 376 |
)
|
| 377 |
if low_gpu_memory_mode:
|
| 378 |
-
self.max_model_len = 2048
|
| 379 |
-
else:
|
| 380 |
self.max_model_len = 4096
|
|
|
|
|
|
|
| 381 |
|
| 382 |
logger.info(f"Initializing 5Hz LM with model: {model_path}, enforce_eager: False, tensor_parallel_size: 1, max_model_len: {self.max_model_len}, gpu_memory_utilization: {gpu_memory_utilization}")
|
| 383 |
start_time = time.time()
|
|
@@ -1796,7 +1796,7 @@ class LLMHandler:
|
|
| 1796 |
# If no lyrics generated, keep original input
|
| 1797 |
metadata['lyrics'] = lyrics
|
| 1798 |
|
| 1799 |
-
logger.info(f"Format completed successfully. Generated {
|
| 1800 |
if constrained_decoding_debug:
|
| 1801 |
logger.debug(f"Generated metadata: {list(metadata.keys())}")
|
| 1802 |
logger.debug(f"Output text preview: {output_text[:300]}...")
|
|
|
|
| 375 |
max_ratio=0.9
|
| 376 |
)
|
| 377 |
if low_gpu_memory_mode:
|
|
|
|
|
|
|
| 378 |
self.max_model_len = 4096
|
| 379 |
+
else:
|
| 380 |
+
self.max_model_len = 8192
|
| 381 |
|
| 382 |
logger.info(f"Initializing 5Hz LM with model: {model_path}, enforce_eager: False, tensor_parallel_size: 1, max_model_len: {self.max_model_len}, gpu_memory_utilization: {gpu_memory_utilization}")
|
| 383 |
start_time = time.time()
|
|
|
|
| 1796 |
# If no lyrics generated, keep original input
|
| 1797 |
metadata['lyrics'] = lyrics
|
| 1798 |
|
| 1799 |
+
logger.info(f"Format completed successfully. Generated {metadata} fields")
|
| 1800 |
if constrained_decoding_debug:
|
| 1801 |
logger.debug(f"Generated metadata: {list(metadata.keys())}")
|
| 1802 |
logger.debug(f"Output text preview: {output_text[:300]}...")
|
acestep/third_parts/nano-vllm/nanovllm/config.py
CHANGED
|
@@ -8,7 +8,7 @@ class Config:
|
|
| 8 |
model: str
|
| 9 |
max_num_batched_tokens: int = 16384
|
| 10 |
max_num_seqs: int = 512
|
| 11 |
-
max_model_len: int =
|
| 12 |
gpu_memory_utilization: float = 0.9
|
| 13 |
tensor_parallel_size: int = 1
|
| 14 |
enforce_eager: bool = False
|
|
|
|
| 8 |
model: str
|
| 9 |
max_num_batched_tokens: int = 16384
|
| 10 |
max_num_seqs: int = 512
|
| 11 |
+
max_model_len: int = 8192
|
| 12 |
gpu_memory_utilization: float = 0.9
|
| 13 |
tensor_parallel_size: int = 1
|
| 14 |
enforce_eager: bool = False
|