Yifei Wang commited on
Commit
6d31849
·
1 Parent(s): 3c74727

fixed button bugs

Browse files
app_rag.py CHANGED
@@ -95,28 +95,32 @@ def _request_stop():
95
  event.set()
96
 
97
 
98
- def _on_stop_clicked():
99
  _request_stop()
 
 
100
  return _format_status(
101
- stage="Stop requested",
102
- loaded=_RUNTIME_LOADED,
103
  device="unknown",
104
  loading_percent="--",
105
  error="Stop requested. Waiting for backend generation to halt.",
106
  )
107
 
108
 
109
- def _on_clear_clicked():
110
  # Clear should also stop any in-flight generation to avoid concurrent
111
  # updates from the stream generator after UI has been reset.
112
  _request_stop()
 
 
113
  return (
114
  DEFAULT_INSTRUCTION,
115
  "",
116
- False,
117
  "",
118
- _format_status(stage="Idle", loaded=_RUNTIME_LOADED, device="unknown", loading_percent="0%"),
119
- _format_mode_indicator(False),
120
  "0.00s",
121
  )
122
 
@@ -187,7 +191,6 @@ def _load_demo_examples():
187
  "instruction": instruction,
188
  "input": user_input,
189
  "max_new_tokens": max_new_tokens,
190
- "use_rag": False,
191
  }
192
  )
193
 
@@ -208,11 +211,11 @@ def _load_demo_examples():
208
  return parsed, None
209
 
210
 
211
- def _apply_example(example: dict):
212
  max_tokens_update = (
213
  example["max_new_tokens"] if example.get("max_new_tokens") is not None else gr.update()
214
  )
215
- use_rag = bool(example.get("use_rag", False))
216
  return example["instruction"], example["input"], max_tokens_update, use_rag, _format_mode_indicator(use_rag)
217
 
218
 
@@ -757,8 +760,8 @@ with gr.Blocks(title="Numen Scriptorium Demo") as demo:
757
  for example in parsed_examples:
758
  example_btn = gr.Button(example["label"], variant="secondary")
759
  example_btn.click(
760
- fn=lambda ex=example: _apply_example(ex),
761
- inputs=None,
762
  outputs=[instruction, user_input, max_new_tokens, use_rag, mode_panel],
763
  )
764
 
@@ -783,11 +786,11 @@ with gr.Blocks(title="Numen Scriptorium Demo") as demo:
783
  outputs=[output, status_panel, elapsed_text],
784
  )
785
 
786
- stop_btn.click(fn=_on_stop_clicked, inputs=None, outputs=[status_panel], cancels=[run_event])
787
 
788
  clear_btn.click(
789
  fn=_on_clear_clicked,
790
- inputs=None,
791
  outputs=[instruction, user_input, use_rag, output, status_panel, mode_panel, elapsed_text],
792
  cancels=[run_event],
793
  )
 
95
  event.set()
96
 
97
 
98
+ def _on_stop_clicked(use_rag: bool):
99
  _request_stop()
100
+ loaded = _is_rag_runtime_loaded() if use_rag else _RUNTIME_LOADED
101
+ stage = "Stop requested (RAG)" if use_rag else "Stop requested"
102
  return _format_status(
103
+ stage=stage,
104
+ loaded=loaded,
105
  device="unknown",
106
  loading_percent="--",
107
  error="Stop requested. Waiting for backend generation to halt.",
108
  )
109
 
110
 
111
+ def _on_clear_clicked(current_use_rag: bool):
112
  # Clear should also stop any in-flight generation to avoid concurrent
113
  # updates from the stream generator after UI has been reset.
114
  _request_stop()
115
+ loaded = _is_rag_runtime_loaded() if current_use_rag else _RUNTIME_LOADED
116
+ stage = "Idle (RAG mode)" if current_use_rag else "Idle"
117
  return (
118
  DEFAULT_INSTRUCTION,
119
  "",
120
+ gr.update(),
121
  "",
122
+ _format_status(stage=stage, loaded=loaded, device="unknown", loading_percent="0%"),
123
+ _format_mode_indicator(current_use_rag),
124
  "0.00s",
125
  )
126
 
 
191
  "instruction": instruction,
192
  "input": user_input,
193
  "max_new_tokens": max_new_tokens,
 
194
  }
195
  )
196
 
 
211
  return parsed, None
212
 
213
 
214
+ def _apply_example(example: dict, current_use_rag: bool):
215
  max_tokens_update = (
216
  example["max_new_tokens"] if example.get("max_new_tokens") is not None else gr.update()
217
  )
218
+ use_rag = bool(example["use_rag"]) if "use_rag" in example else bool(current_use_rag)
219
  return example["instruction"], example["input"], max_tokens_update, use_rag, _format_mode_indicator(use_rag)
220
 
221
 
 
760
  for example in parsed_examples:
761
  example_btn = gr.Button(example["label"], variant="secondary")
762
  example_btn.click(
763
+ fn=lambda current_mode, ex=example: _apply_example(ex, current_mode),
764
+ inputs=[use_rag],
765
  outputs=[instruction, user_input, max_new_tokens, use_rag, mode_panel],
766
  )
767
 
 
786
  outputs=[output, status_panel, elapsed_text],
787
  )
788
 
789
+ stop_btn.click(fn=_on_stop_clicked, inputs=[use_rag], outputs=[status_panel], cancels=[run_event])
790
 
791
  clear_btn.click(
792
  fn=_on_clear_clicked,
793
+ inputs=[use_rag],
794
  outputs=[instruction, user_input, use_rag, output, status_panel, mode_panel, elapsed_text],
795
  cancels=[run_event],
796
  )
src/numen_scriptorium/inference/qwen.py CHANGED
@@ -110,6 +110,7 @@ def generate(
110
  temperature=temperature,
111
  top_p=top_p,
112
  eos_token_id=tokenizer.eos_token_id,
 
113
  )
114
  text = tokenizer.decode(outputs[0], skip_special_tokens=True)
115
  if "回答:" in text:
@@ -157,6 +158,8 @@ def stream_generate(
157
  eos_token_id=tokenizer.eos_token_id,
158
  streamer=streamer,
159
  )
 
 
160
  if stop_event is not None:
161
  generate_kwargs["stopping_criteria"] = StoppingCriteriaList([_EventStoppingCriteria(stop_event)])
162
 
 
110
  temperature=temperature,
111
  top_p=top_p,
112
  eos_token_id=tokenizer.eos_token_id,
113
+ generator=generator,
114
  )
115
  text = tokenizer.decode(outputs[0], skip_special_tokens=True)
116
  if "回答:" in text:
 
158
  eos_token_id=tokenizer.eos_token_id,
159
  streamer=streamer,
160
  )
161
+ if generator is not None:
162
+ generate_kwargs["generator"] = generator
163
  if stop_event is not None:
164
  generate_kwargs["stopping_criteria"] = StoppingCriteriaList([_EventStoppingCriteria(stop_event)])
165