puresoulwd commited on
Commit
0e7bd0b
ยท
verified ยท
1 Parent(s): 896dd35

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +362 -0
app.py ADDED
@@ -0,0 +1,362 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import inspect
2
+ import os
3
+ import threading
4
+
5
+ import gradio as gr
6
+ import torch
7
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
8
+
9
+
10
+ os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")
11
+
12
+ MODEL_ID = os.getenv("MODEL_ID", "Qwen/Qwen3-0.6B")
13
+ MAX_NEW_TOKENS = int(os.getenv("MAX_NEW_TOKENS", "4096"))
14
+ MAX_INPUT_TOKENS = int(os.getenv("MAX_INPUT_TOKENS", "4096"))
15
+ MAX_HISTORY_TURNS = int(os.getenv("MAX_HISTORY_TURNS", "3"))
16
+ N_THREADS = int(os.getenv("N_THREADS", str(max(1, os.cpu_count() or 1))))
17
+ DEFAULT_SYSTEM_PROMPT = os.getenv(
18
+ "SYSTEM_PROMPT",
19
+ "๋‹น์‹ ์€ ์œ ์šฉํ•œ ํ•œ๊ตญ์–ด AI ์–ด์‹œ์Šคํ„ดํŠธ์ž…๋‹ˆ๋‹ค. ๋ชจ๋“  ์ž์—ฐ์–ด ์‘๋‹ต์€ ๋ฐ˜๋“œ์‹œ ํ•œ๊ตญ์–ด๋กœ๋งŒ ์ž‘์„ฑํ•˜์„ธ์š”. ๋จผ์ € ์ฐจ๊ทผ์ฐจ๊ทผ ์ƒ๊ฐํ•œ ๋’ค, ์ตœ์ข… ๋‹ต๋ณ€์€ ๋ช…ํ™•ํ•œ ํ•œ๊ตญ์–ด๋กœ ์ž‘์„ฑํ•˜์„ธ์š”.",
20
+ )
21
+
22
+ BASE_THINKING_SUFFIX = (
23
+ "\n\nthinking ๋ชจ๋“œ๊ฐ€ ์ผœ์ ธ ์žˆ์œผ๋ฉด ๋ฐ˜๋“œ์‹œ ๋‘ ๋ถ€๋ถ„์„ ๋ชจ๋‘ ์™„์„ฑํ•˜์„ธ์š”: "
24
+ "(1) reasoning ์˜์—ญ์˜ ์ถ”๋ก  ๋‚ด์šฉ๊ณผ "
25
+ "(2) ์ถ”๋ก  ์ข…๋ฃŒ ํ›„ assistant ์˜์—ญ์˜ ์ตœ์ข… ๋‹ต๋ณ€. "
26
+ "์ถ”๋ก ๋งŒ ์ถœ๋ ฅํ•˜๊ณ  ๋๋‚ด์ง€ ๋งˆ์„ธ์š”. reasoning๊ณผ assistant์˜ ๋ชจ๋“  ์ž์—ฐ์–ด ๋ฌธ์žฅ์€ ๋ฐ˜๋“œ์‹œ ํ•œ๊ตญ์–ด๋กœ๋งŒ ์ž‘์„ฑํ•˜์„ธ์š”."
27
+ )
28
+
29
+ BASE_USER_SUFFIX_THINKING = (
30
+ "\n\n๋จผ์ € reasoning์„ ์ž‘์„ฑํ•˜๊ณ , ๊ทธ ๋‹ค์Œ assistant ์ตœ์ข… ๋‹ต๋ณ€์„ ๋ฐ˜๋“œ์‹œ ์ด์–ด์„œ ์ž‘์„ฑํ•˜์„ธ์š”. "
31
+ "reasoning๊ณผ ์ตœ์ข… ๋‹ต๋ณ€์˜ ์ž์—ฐ์–ด ๋ฌธ์žฅ์€ ๋ฐ˜๋“œ์‹œ ํ•œ๊ตญ์–ด๋งŒ ์‚ฌ์šฉํ•˜์„ธ์š”. ์ตœ์ข… ๋‹ต๋ณ€์€ ์ž์—ฐ์Šค๋Ÿฝ๊ณ  ๋ถ„๋ช…ํ•œ ํ•œ๊ตญ์–ด๋กœ ํฌํ•จํ•˜์„ธ์š”."
32
+ )
33
+
34
+ PRESETS = {
35
+ "์ˆ˜ํ•™": {
36
+ "system": (
37
+ "๋‹น์‹ ์€ ๊ผผ๊ผผํ•œ ์ˆ˜ํ•™ ํŠœํ„ฐ์ž…๋‹ˆ๋‹ค. ๋ฌธ์ œ๋ฅผ ์ •ํ™•ํžˆ ํ’€์ดํ•˜์„ธ์š”. "
38
+ "์ถ”๋ก ์€ reasoning ํŒจ๋„์—, ์ตœ์ข… ๋‹ต๋ณ€์€ assistant ํŒจ๋„์— ํ•œ๊ตญ์–ด๋กœ๋งŒ ๊ฐ„๋‹จ๋ช…๋ฃŒํ•˜๊ฒŒ ์ž‘์„ฑํ•˜์„ธ์š”."
39
+ ),
40
+ "prompt": "๋‹ค์Œ ์ด์ฐจ๋ฐฉ์ •์‹์„ ํ’€์ดํ•˜๊ณ  ๊ณ„์‚ฐ ๊ณผ์ •์„ reasoning์—, ์ตœ์ข… ๊ทผ์„ assistant์— ์ž‘์„ฑํ•˜์„ธ์š”: 2x^2 - 7x + 3 = 0.",
41
+ "thinking": True,
42
+ },
43
+ "์ฝ”๋”ฉ": {
44
+ "system": (
45
+ "๋‹น์‹ ์€ ํŒŒ์ด์ฌ ๋„์šฐ๋ฏธ์ž…๋‹ˆ๋‹ค. ์ฝ๊ธฐ ์‰ฝ๊ณ  ์ •ํ™•ํ•œ ์ฝ”๋“œ๋ฅผ ์ž‘์„ฑํ•˜์„ธ์š”. "
46
+ "๊ณ„ํš์€ reasoning ํŒจ๋„์—, ์ตœ์ข… ์ฝ”๋“œ๋Š” assistant ํŒจ๋„์— ์ž‘์„ฑํ•˜๊ณ  ์„ค๋ช…์€ ํ•œ๊ตญ์–ด๋กœ๋งŒ ์ž‘์„ฑํ•˜์„ธ์š”."
47
+ ),
48
+ "prompt": (
49
+ "์ •๋ ฌ๋œ ๋‘ ๋ฆฌ์ŠคํŠธ๋ฅผ ํ•˜๋‚˜์˜ ์ •๋ ฌ๋œ ๋ฆฌ์ŠคํŠธ๋กœ ํ•ฉ์น˜๋Š” "
50
+ "merge_sorted_lists(a, b) ํŒŒ์ด์ฌ ํ•จ์ˆ˜๋ฅผ ์ž‘์„ฑํ•˜์„ธ์š”. reasoning์—๋Š” ์ ‘๊ทผ ๋ฐฉ๋ฒ•์„, assistant์—๋Š” ์ตœ์ข… ์ฝ”๋“œ์™€ ์˜ˆ์‹œ ํ˜ธ์ถœ์„ ์ž‘์„ฑํ•˜์„ธ์š”."
51
+ ),
52
+ "thinking": True,
53
+ },
54
+ "๊ตฌ์กฐํ™” ์ถœ๋ ฅ": {
55
+ "system": "assistant ์ตœ์ข… ๋‹ต๋ณ€์—๋Š” ๊ตฐ๋”๋”๊ธฐ ์—†์ด compact JSON๋งŒ ์ถœ๋ ฅํ•˜์„ธ์š”. JSON ๋ฐ”๊นฅ์˜ ์ž์—ฐ์–ด ์„ค๋ช…์€ ์“ฐ์ง€ ๋งˆ์„ธ์š”.",
56
+ "prompt": "๋‹ค์Œ ๋ฉ”๋ชจ์—์„œ ํ•„์š”ํ•œ ์ •๋ณด๋ฅผ ์ถ”์ถœํ•ด JSON์œผ๋กœ๋งŒ ๋ฐ˜ํ™˜ํ•˜์„ธ์š”: ๊ธˆ์š”์ผ๊นŒ์ง€ Mina์—๊ฒŒ ์—ฐ๋ฝ, ์šฐ์„ ์ˆœ์œ„ ๋†’์Œ, ์˜ˆ์‚ฐ ์•ฝ 2400๋‹ฌ๋Ÿฌ, ์ฃผ์ œ๋Š” launch video edits.",
57
+ "thinking": False,
58
+ },
59
+ "ํ•จ์ˆ˜ ํ˜ธ์ถœ ์Šคํƒ€์ผ": {
60
+ "system": (
61
+ "๋‹น์‹ ์€ ํ•„์š”ํ•  ๋•Œ ๋„๊ตฌ ์‚ฌ์šฉ์„ ๊ณ„ํšํ•˜๋Š” ์–ด์‹œ์Šคํ„ดํŠธ์ž…๋‹ˆ๋‹ค. "
62
+ "reasoning ํŒจ๋„์—์„œ ์–ด๋–ค ๋„๊ตฌ๋ฅผ ์“ธ์ง€ ์ •๋ฆฌํ•˜๊ณ , assistant ํŒจ๋„์—์„œ ์ตœ์ข… ๊ฒฐ๊ณผ๋ฅผ ํ•œ๊ตญ์–ด๋กœ๋งŒ ๋ช…ํ™•ํ•˜๊ฒŒ ์ œ์‹œํ•˜์„ธ์š”."
63
+ ),
64
+ "prompt": (
65
+ "๋„๊ตฌ๋ฅผ ์‚ฌ์šฉํ•  ์ˆ˜ ์žˆ๋‹ค๊ณ  ๊ฐ€์ •ํ•˜์„ธ์š”. 18.75 * 42 - 199 ๊ณ„์‚ฐ๊ณผ 12km๋ฅผ ๋งˆ์ผ๋กœ ๋ณ€ํ™˜ํ•˜๋Š” ์ž‘์—…์— ๋Œ€ํ•ด "
66
+ "reasoning์—๋Š” ๋„๊ตฌ ์‚ฌ์šฉ ๊ณ„ํš์„, assistant์—๋Š” ์ตœ์ข… ์ˆ˜์น˜ ๊ฒฐ๊ณผ๋ฅผ ์ž‘์„ฑํ•˜์„ธ์š”."
67
+ ),
68
+ "thinking": True,
69
+ },
70
+ "์ฐฝ์ž‘": {
71
+ "system": "์ƒ์ƒํ•˜๊ณ  ๋ฐ€๋„ ์žˆ๊ฒŒ ํ•œ๊ตญ์–ด ๋ฌธ์žฅ์„ ์ž‘์„ฑํ•˜์„ธ์š”. ์™ธ๊ตญ์–ด ํ‘œํ˜„์„ ์„ž์ง€ ๋งˆ์„ธ์š”.",
72
+ "prompt": "ํ‘œ๋ฅ˜ํ•˜๋Š” ๋ฐ•๋ฌผ๊ด€ ์šฐ์ฃผ์„ ์„ ๋ฐฐ๊ฒฝ์œผ๋กœ ํ•œ SF ํ•˜์ด์ŠคํŠธ ์ด์•ผ๊ธฐ์˜ ๋„์ž…๋ถ€๋ฅผ ์ž‘์„ฑํ•˜์„ธ์š”. reasoning์—๋Š” ๋ถ„์œ„๊ธฐ์™€ ์ „๊ฐœ ๋ฐฉํ–ฅ์„, assistant์—๋Š” ์ตœ์ข… ํ•œ๊ตญ์–ด ๋‘ ๋ฌธ์žฅ์„ ์ž‘์„ฑํ•˜์„ธ์š”.",
73
+ "thinking": False,
74
+ },
75
+ }
76
+
77
+
78
+ torch.set_num_threads(N_THREADS)
79
+ try:
80
+ torch.set_num_interop_threads(max(1, min(2, N_THREADS)))
81
+ except RuntimeError:
82
+ pass
83
+
84
+ _tokenizer = None
85
+ _model = None
86
+ _load_lock = threading.Lock()
87
+ _generate_lock = threading.Lock()
88
+
89
+
90
+ def make_chatbot(label, height=520):
91
+ kwargs = {"label": label, "height": height}
92
+ if "type" in inspect.signature(gr.Chatbot.__init__).parameters:
93
+ kwargs["type"] = "messages"
94
+ return gr.Chatbot(**kwargs)
95
+
96
+
97
+ def get_model():
98
+ global _tokenizer, _model
99
+ if _model is None or _tokenizer is None:
100
+ with _load_lock:
101
+ if _model is None or _tokenizer is None:
102
+ _tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, use_fast=True)
103
+ _model = AutoModelForCausalLM.from_pretrained(
104
+ MODEL_ID,
105
+ torch_dtype=torch.float32,
106
+ )
107
+ _model.eval()
108
+ return _tokenizer, _model
109
+
110
+
111
+ def clone_messages(messages):
112
+ return [dict(item) for item in (messages or [])]
113
+
114
+
115
+ def load_preset(name):
116
+ preset = PRESETS[name]
117
+ return (
118
+ preset["system"],
119
+ preset["prompt"],
120
+ preset["thinking"],
121
+ )
122
+
123
+
124
+ def clear_all():
125
+ return [], [], [], ""
126
+
127
+
128
+ def strip_non_think_specials(text):
129
+ text = text or ""
130
+ for token in ["<|im_end|>", "<|endoftext|>", "<๏ฝœendโ–ofโ–sentence๏ฝœ>"]:
131
+ text = text.replace(token, "")
132
+ return text
133
+
134
+
135
+ def final_cleanup(text):
136
+ text = strip_non_think_specials(text)
137
+ text = text.replace("<think>", "").replace("</think>", "")
138
+ return text.strip()
139
+
140
+
141
+ def split_stream_text(raw_text, thinking):
142
+ raw_text = strip_non_think_specials(raw_text)
143
+ if not thinking:
144
+ return "", final_cleanup(raw_text), False
145
+
146
+ raw_text = raw_text.replace("<think>", "")
147
+ if "</think>" in raw_text:
148
+ reasoning, answer = raw_text.split("</think>", 1)
149
+ return reasoning.strip(), answer.strip(), True
150
+
151
+ return raw_text.strip(), "", False
152
+
153
+
154
+ def build_messages(system_prompt, message, short_history, thinking):
155
+ final_system_prompt = (system_prompt or "").strip() or DEFAULT_SYSTEM_PROMPT
156
+ final_user_message = (message or "").strip()
157
+
158
+ if thinking:
159
+ final_system_prompt += BASE_THINKING_SUFFIX
160
+ final_user_message += BASE_USER_SUFFIX_THINKING
161
+
162
+ return [
163
+ {"role": "system", "content": final_system_prompt},
164
+ *short_history,
165
+ {"role": "user", "content": final_user_message},
166
+ ]
167
+
168
+
169
+ def respond_stream(
170
+ message,
171
+ system_prompt,
172
+ thinking,
173
+ model_history,
174
+ reasoning_chat,
175
+ answer_chat,
176
+ ):
177
+ message = (message or "").strip()
178
+ if not message:
179
+ yield clone_messages(reasoning_chat), clone_messages(answer_chat), list(model_history or []), ""
180
+ return
181
+
182
+ model_history = list(model_history or [])
183
+ reasoning_chat = clone_messages(reasoning_chat)
184
+ answer_chat = clone_messages(answer_chat)
185
+
186
+ reasoning_chat.append({"role": "user", "content": message})
187
+ reasoning_chat.append(
188
+ {
189
+ "role": "assistant",
190
+ "content": "(thinking...)" if thinking else "(reasoning disabled)",
191
+ }
192
+ )
193
+ answer_chat.append({"role": "user", "content": message})
194
+ answer_chat.append({"role": "assistant", "content": ""})
195
+
196
+ yield clone_messages(reasoning_chat), clone_messages(answer_chat), list(model_history), ""
197
+
198
+ try:
199
+ tokenizer, model = get_model()
200
+ short_history = model_history[-2 * MAX_HISTORY_TURNS :]
201
+ messages = build_messages(system_prompt, message, short_history, thinking)
202
+
203
+ prompt = tokenizer.apply_chat_template(
204
+ messages,
205
+ tokenize=False,
206
+ add_generation_prompt=True,
207
+ enable_thinking=thinking,
208
+ )
209
+ inputs = tokenizer(prompt, return_tensors="pt")
210
+ input_ids = inputs["input_ids"][:, -MAX_INPUT_TOKENS:]
211
+ attention_mask = inputs["attention_mask"][:, -MAX_INPUT_TOKENS:]
212
+
213
+ streamer = TextIteratorStreamer(
214
+ tokenizer,
215
+ skip_prompt=True,
216
+ skip_special_tokens=False,
217
+ clean_up_tokenization_spaces=False,
218
+ timeout=None,
219
+ )
220
+
221
+ generation_kwargs = {
222
+ "input_ids": input_ids,
223
+ "attention_mask": attention_mask,
224
+ "max_new_tokens": MAX_NEW_TOKENS,
225
+ "do_sample": True,
226
+ "temperature": 0.6 if thinking else 0.7,
227
+ "top_p": 0.95 if thinking else 0.8,
228
+ "top_k": 20,
229
+ "repetition_penalty": 1.05,
230
+ "pad_token_id": tokenizer.eos_token_id,
231
+ "streamer": streamer,
232
+ }
233
+
234
+ generation_error = {}
235
+
236
+ def run_generation():
237
+ try:
238
+ with _generate_lock:
239
+ model.generate(**generation_kwargs)
240
+ except Exception as exc:
241
+ generation_error["message"] = str(exc)
242
+ streamer.on_finalized_text("", stream_end=True)
243
+
244
+ thread = threading.Thread(target=run_generation, daemon=True)
245
+ thread.start()
246
+
247
+ raw_text = ""
248
+ saw_end_think = False
249
+
250
+ for chunk in streamer:
251
+ raw_text += chunk
252
+ reasoning_text, answer_text, saw_end_now = split_stream_text(raw_text, thinking)
253
+ saw_end_think = saw_end_think or saw_end_now
254
+
255
+ if thinking:
256
+ if saw_end_think:
257
+ reasoning_chat[-1]["content"] = reasoning_text or "(no reasoning text returned)"
258
+ else:
259
+ reasoning_chat[-1]["content"] = reasoning_text or "(thinking...)"
260
+ else:
261
+ reasoning_chat[-1]["content"] = "(reasoning disabled)"
262
+
263
+ answer_chat[-1]["content"] = answer_text
264
+ yield clone_messages(reasoning_chat), clone_messages(answer_chat), list(model_history), ""
265
+
266
+ thread.join()
267
+
268
+ if generation_error:
269
+ reasoning_chat[-1]["content"] = ""
270
+ answer_chat[-1]["content"] = f"Error while running the local CPU model: {generation_error['message']}"
271
+ yield clone_messages(reasoning_chat), clone_messages(answer_chat), list(model_history), ""
272
+ return
273
+
274
+ reasoning_text, answer_text, saw_end_think = split_stream_text(raw_text, thinking)
275
+ if thinking and not saw_end_think:
276
+ reasoning_text = ""
277
+ answer_text = final_cleanup(raw_text)
278
+
279
+ if thinking:
280
+ reasoning_chat[-1]["content"] = reasoning_text or "(no reasoning text returned)"
281
+ else:
282
+ reasoning_chat[-1]["content"] = "(reasoning disabled)"
283
+
284
+ answer_chat[-1]["content"] = answer_text or "(empty response)"
285
+ model_history = short_history + [
286
+ {"role": "user", "content": message},
287
+ {"role": "assistant", "content": answer_chat[-1]["content"]},
288
+ ]
289
+
290
+ yield clone_messages(reasoning_chat), clone_messages(answer_chat), list(model_history), ""
291
+
292
+ except Exception as exc:
293
+ reasoning_chat[-1]["content"] = ""
294
+ answer_chat[-1]["content"] = f"Error while preparing the local CPU model: {exc}"
295
+ yield clone_messages(reasoning_chat), clone_messages(answer_chat), list(model_history), ""
296
+
297
+
298
+ with gr.Blocks(title="๋กœ์ปฌ CPU ๋ถ„๋ฆฌํ˜• ์ถ”๋ก  ์ฑ„ํŒ…") as demo:
299
+ gr.Markdown(
300
+ "# ๋กœ์ปฌ CPU ๋ถ„๋ฆฌํ˜• ์ถ”๋ก  ์ฑ„ํŒ…\n"
301
+ f"๋กœ์ปฌ CPU์—์„œ `{MODEL_ID}` ๋ชจ๋ธ์„ ์‹คํ–‰ํ•ฉ๋‹ˆ๋‹ค. GGUF๋‚˜ ์™ธ๋ถ€ ์ถ”๋ก  API๋Š” ์‚ฌ์šฉํ•˜์ง€ ์•Š์Šต๋‹ˆ๋‹ค.\n\n"
302
+ "์ฒซ ์š”์ฒญ์—์„œ๋Š” ๋ชจ๋ธ ๋‹ค์šด๋กœ๋“œ๊ฐ€ ํ•„์š”ํ•  ์ˆ˜ ์žˆ์–ด ์ดˆ๊ธฐ ์‘๋‹ต์ด ์กฐ๊ธˆ ๋А๋ฆด ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.\n\n"
303
+ "๊ธฐ๋ณธ ์„ค์ •์€ ํ•œ๊ตญ์–ด ๋‹ต๋ณ€ ์šฐ์„ ์ด๋ฉฐ, reasoning ํŒจ๋„๊ณผ ๋‹ต๋ณ€ ํŒจ๋„์„ ๋ถ„๋ฆฌํ•ด์„œ ๋ณด์—ฌ์ค๋‹ˆ๋‹ค.\n\n"
304
+ "reasoning๊ณผ assistant์— ๋ณ„๋„์˜ ๊ฐœ๋ณ„ ๊ธธ์ด ์ œํ•œ์€ ๋‘์ง€ ์•Š๊ณ , ์ „์ฒด ์ƒ์„ฑ ๊ธธ์ด๋ฅผ ๋„‰๋„‰ํ•˜๊ฒŒ ์„ค์ •ํ–ˆ์Šต๋‹ˆ๋‹ค."
305
+ )
306
+
307
+ with gr.Row():
308
+ preset = gr.Dropdown(
309
+ choices=list(PRESETS.keys()),
310
+ value="์ˆ˜ํ•™",
311
+ label="ํ”„๋ฆฌ์…‹ ํ”„๋กฌํ”„ํŠธ",
312
+ )
313
+ thinking = gr.Checkbox(label="์ถ”๋ก  ์‚ฌ์šฉ", value=True)
314
+
315
+ system_prompt = gr.Textbox(
316
+ label="์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ",
317
+ value=PRESETS["์ˆ˜ํ•™"]["system"],
318
+ lines=4,
319
+ )
320
+
321
+ user_input = gr.Textbox(
322
+ label="์‚ฌ์šฉ์ž ๋ฉ”์‹œ์ง€",
323
+ value=PRESETS["์ˆ˜ํ•™"]["prompt"],
324
+ lines=5,
325
+ )
326
+
327
+ with gr.Row():
328
+ send_btn = gr.Button("์ „์†ก", variant="primary")
329
+ clear_btn = gr.Button("์ง€์šฐ๊ธฐ")
330
+
331
+ with gr.Row():
332
+ reasoning_bot = make_chatbot("์ถ”๋ก ", height=520)
333
+ answer_bot = make_chatbot("๋‹ต๋ณ€", height=520)
334
+
335
+ model_history_state = gr.State([])
336
+
337
+ preset.change(
338
+ fn=load_preset,
339
+ inputs=preset,
340
+ outputs=[system_prompt, user_input, thinking],
341
+ )
342
+
343
+ send_btn.click(
344
+ fn=respond_stream,
345
+ inputs=[user_input, system_prompt, thinking, model_history_state, reasoning_bot, answer_bot],
346
+ outputs=[reasoning_bot, answer_bot, model_history_state, user_input],
347
+ )
348
+ user_input.submit(
349
+ fn=respond_stream,
350
+ inputs=[user_input, system_prompt, thinking, model_history_state, reasoning_bot, answer_bot],
351
+ outputs=[reasoning_bot, answer_bot, model_history_state, user_input],
352
+ )
353
+
354
+ clear_btn.click(
355
+ fn=clear_all,
356
+ inputs=None,
357
+ outputs=[reasoning_bot, answer_bot, model_history_state, user_input],
358
+ )
359
+
360
+
361
+ demo.queue()
362
+ demo.launch()