Yifei Wang commited on
Commit ·
6d31849
1
Parent(s): 3c74727
fixed button bugs
Browse files- app_rag.py +17 -14
- src/numen_scriptorium/inference/qwen.py +3 -0
app_rag.py
CHANGED
|
@@ -95,28 +95,32 @@ def _request_stop():
|
|
| 95 |
event.set()
|
| 96 |
|
| 97 |
|
| 98 |
-
def _on_stop_clicked():
|
| 99 |
_request_stop()
|
|
|
|
|
|
|
| 100 |
return _format_status(
|
| 101 |
-
stage=
|
| 102 |
-
loaded=
|
| 103 |
device="unknown",
|
| 104 |
loading_percent="--",
|
| 105 |
error="Stop requested. Waiting for backend generation to halt.",
|
| 106 |
)
|
| 107 |
|
| 108 |
|
| 109 |
-
def _on_clear_clicked():
|
| 110 |
# Clear should also stop any in-flight generation to avoid concurrent
|
| 111 |
# updates from the stream generator after UI has been reset.
|
| 112 |
_request_stop()
|
|
|
|
|
|
|
| 113 |
return (
|
| 114 |
DEFAULT_INSTRUCTION,
|
| 115 |
"",
|
| 116 |
-
|
| 117 |
"",
|
| 118 |
-
_format_status(stage=
|
| 119 |
-
_format_mode_indicator(
|
| 120 |
"0.00s",
|
| 121 |
)
|
| 122 |
|
|
@@ -187,7 +191,6 @@ def _load_demo_examples():
|
|
| 187 |
"instruction": instruction,
|
| 188 |
"input": user_input,
|
| 189 |
"max_new_tokens": max_new_tokens,
|
| 190 |
-
"use_rag": False,
|
| 191 |
}
|
| 192 |
)
|
| 193 |
|
|
@@ -208,11 +211,11 @@ def _load_demo_examples():
|
|
| 208 |
return parsed, None
|
| 209 |
|
| 210 |
|
| 211 |
-
def _apply_example(example: dict):
|
| 212 |
max_tokens_update = (
|
| 213 |
example["max_new_tokens"] if example.get("max_new_tokens") is not None else gr.update()
|
| 214 |
)
|
| 215 |
-
use_rag = bool(example
|
| 216 |
return example["instruction"], example["input"], max_tokens_update, use_rag, _format_mode_indicator(use_rag)
|
| 217 |
|
| 218 |
|
|
@@ -757,8 +760,8 @@ with gr.Blocks(title="Numen Scriptorium Demo") as demo:
|
|
| 757 |
for example in parsed_examples:
|
| 758 |
example_btn = gr.Button(example["label"], variant="secondary")
|
| 759 |
example_btn.click(
|
| 760 |
-
fn=lambda ex=example: _apply_example(ex),
|
| 761 |
-
inputs=
|
| 762 |
outputs=[instruction, user_input, max_new_tokens, use_rag, mode_panel],
|
| 763 |
)
|
| 764 |
|
|
@@ -783,11 +786,11 @@ with gr.Blocks(title="Numen Scriptorium Demo") as demo:
|
|
| 783 |
outputs=[output, status_panel, elapsed_text],
|
| 784 |
)
|
| 785 |
|
| 786 |
-
stop_btn.click(fn=_on_stop_clicked, inputs=
|
| 787 |
|
| 788 |
clear_btn.click(
|
| 789 |
fn=_on_clear_clicked,
|
| 790 |
-
inputs=
|
| 791 |
outputs=[instruction, user_input, use_rag, output, status_panel, mode_panel, elapsed_text],
|
| 792 |
cancels=[run_event],
|
| 793 |
)
|
|
|
|
| 95 |
event.set()
|
| 96 |
|
| 97 |
|
| 98 |
+
def _on_stop_clicked(use_rag: bool):
|
| 99 |
_request_stop()
|
| 100 |
+
loaded = _is_rag_runtime_loaded() if use_rag else _RUNTIME_LOADED
|
| 101 |
+
stage = "Stop requested (RAG)" if use_rag else "Stop requested"
|
| 102 |
return _format_status(
|
| 103 |
+
stage=stage,
|
| 104 |
+
loaded=loaded,
|
| 105 |
device="unknown",
|
| 106 |
loading_percent="--",
|
| 107 |
error="Stop requested. Waiting for backend generation to halt.",
|
| 108 |
)
|
| 109 |
|
| 110 |
|
| 111 |
+
def _on_clear_clicked(current_use_rag: bool):
|
| 112 |
# Clear should also stop any in-flight generation to avoid concurrent
|
| 113 |
# updates from the stream generator after UI has been reset.
|
| 114 |
_request_stop()
|
| 115 |
+
loaded = _is_rag_runtime_loaded() if current_use_rag else _RUNTIME_LOADED
|
| 116 |
+
stage = "Idle (RAG mode)" if current_use_rag else "Idle"
|
| 117 |
return (
|
| 118 |
DEFAULT_INSTRUCTION,
|
| 119 |
"",
|
| 120 |
+
gr.update(),
|
| 121 |
"",
|
| 122 |
+
_format_status(stage=stage, loaded=loaded, device="unknown", loading_percent="0%"),
|
| 123 |
+
_format_mode_indicator(current_use_rag),
|
| 124 |
"0.00s",
|
| 125 |
)
|
| 126 |
|
|
|
|
| 191 |
"instruction": instruction,
|
| 192 |
"input": user_input,
|
| 193 |
"max_new_tokens": max_new_tokens,
|
|
|
|
| 194 |
}
|
| 195 |
)
|
| 196 |
|
|
|
|
| 211 |
return parsed, None
|
| 212 |
|
| 213 |
|
| 214 |
+
def _apply_example(example: dict, current_use_rag: bool):
|
| 215 |
max_tokens_update = (
|
| 216 |
example["max_new_tokens"] if example.get("max_new_tokens") is not None else gr.update()
|
| 217 |
)
|
| 218 |
+
use_rag = bool(example["use_rag"]) if "use_rag" in example else bool(current_use_rag)
|
| 219 |
return example["instruction"], example["input"], max_tokens_update, use_rag, _format_mode_indicator(use_rag)
|
| 220 |
|
| 221 |
|
|
|
|
| 760 |
for example in parsed_examples:
|
| 761 |
example_btn = gr.Button(example["label"], variant="secondary")
|
| 762 |
example_btn.click(
|
| 763 |
+
fn=lambda current_mode, ex=example: _apply_example(ex, current_mode),
|
| 764 |
+
inputs=[use_rag],
|
| 765 |
outputs=[instruction, user_input, max_new_tokens, use_rag, mode_panel],
|
| 766 |
)
|
| 767 |
|
|
|
|
| 786 |
outputs=[output, status_panel, elapsed_text],
|
| 787 |
)
|
| 788 |
|
| 789 |
+
stop_btn.click(fn=_on_stop_clicked, inputs=[use_rag], outputs=[status_panel], cancels=[run_event])
|
| 790 |
|
| 791 |
clear_btn.click(
|
| 792 |
fn=_on_clear_clicked,
|
| 793 |
+
inputs=[use_rag],
|
| 794 |
outputs=[instruction, user_input, use_rag, output, status_panel, mode_panel, elapsed_text],
|
| 795 |
cancels=[run_event],
|
| 796 |
)
|
src/numen_scriptorium/inference/qwen.py
CHANGED
|
@@ -110,6 +110,7 @@ def generate(
|
|
| 110 |
temperature=temperature,
|
| 111 |
top_p=top_p,
|
| 112 |
eos_token_id=tokenizer.eos_token_id,
|
|
|
|
| 113 |
)
|
| 114 |
text = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 115 |
if "回答:" in text:
|
|
@@ -157,6 +158,8 @@ def stream_generate(
|
|
| 157 |
eos_token_id=tokenizer.eos_token_id,
|
| 158 |
streamer=streamer,
|
| 159 |
)
|
|
|
|
|
|
|
| 160 |
if stop_event is not None:
|
| 161 |
generate_kwargs["stopping_criteria"] = StoppingCriteriaList([_EventStoppingCriteria(stop_event)])
|
| 162 |
|
|
|
|
| 110 |
temperature=temperature,
|
| 111 |
top_p=top_p,
|
| 112 |
eos_token_id=tokenizer.eos_token_id,
|
| 113 |
+
generator=generator,
|
| 114 |
)
|
| 115 |
text = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 116 |
if "回答:" in text:
|
|
|
|
| 158 |
eos_token_id=tokenizer.eos_token_id,
|
| 159 |
streamer=streamer,
|
| 160 |
)
|
| 161 |
+
if generator is not None:
|
| 162 |
+
generate_kwargs["generator"] = generator
|
| 163 |
if stop_event is not None:
|
| 164 |
generate_kwargs["stopping_criteria"] = StoppingCriteriaList([_EventStoppingCriteria(stop_event)])
|
| 165 |
|