hysts HF Staff commited on
Commit
d2c9c8d
·
1 Parent(s): e38d76b
Files changed (1) hide show
  1. app.py +9 -1
app.py CHANGED
@@ -177,6 +177,7 @@ def generate(
177
  history: list[dict],
178
  thinking: bool = False,
179
  max_new_tokens: int = 1024,
 
180
  system_prompt: str = "",
181
  ) -> Iterator[str]:
182
 
@@ -193,6 +194,7 @@ def generate(
193
  "return_tensors": "pt",
194
  "add_generation_prompt": True,
195
  "load_audio_from_video": _has_media_type(messages, "video"),
 
196
  }
197
  if thinking:
198
  template_kwargs["enable_thinking"] = True
@@ -313,7 +315,13 @@ demo = gr.ChatInterface(
313
  multimodal=True,
314
  additional_inputs=[
315
  gr.Checkbox(label="Thinking", value=False),
316
- gr.Slider(label="Max New Tokens", minimum=100, maximum=4000, step=10, value=1024),
 
 
 
 
 
 
317
  gr.Textbox(label="System Prompt", value=""),
318
  ],
319
  additional_inputs_accordion=gr.Accordion("Settings", open=True),
 
177
  history: list[dict],
178
  thinking: bool = False,
179
  max_new_tokens: int = 1024,
180
+ max_soft_tokens: int = 280,
181
  system_prompt: str = "",
182
  ) -> Iterator[str]:
183
 
 
194
  "return_tensors": "pt",
195
  "add_generation_prompt": True,
196
  "load_audio_from_video": _has_media_type(messages, "video"),
197
+ "processor_kwargs": {"images_kwargs": {"max_soft_tokens": max_soft_tokens}},
198
  }
199
  if thinking:
200
  template_kwargs["enable_thinking"] = True
 
315
  multimodal=True,
316
  additional_inputs=[
317
  gr.Checkbox(label="Thinking", value=False),
318
+ gr.Slider(label="Max New Tokens", minimum=100, maximum=4000, step=10, value=2000),
319
+ gr.Dropdown(
320
+ label="Image Token Budget",
321
+ info="Higher values preserve more visual detail (useful for OCR/documents). Lower values are faster.",
322
+ choices=[70, 140, 280, 560, 1120],
323
+ value=280,
324
+ ),
325
  gr.Textbox(label="System Prompt", value=""),
326
  ],
327
  additional_inputs_accordion=gr.Accordion("Settings", open=True),