Jay1121 commited on
Commit
5c6662b
·
verified ·
1 Parent(s): 07c4cee

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +99 -261
app.py CHANGED
@@ -1,11 +1,10 @@
1
  # -*- coding: utf-8 -*-
2
- # app.py — 어느 MZ 친구의 느린 DM방 (Blossom 8B, 4bit, Gradio)
3
 
4
  import os
5
  import re
6
  import random
7
  import difflib
8
- import torch
9
  from datetime import datetime
10
 
11
  try:
@@ -14,20 +13,25 @@ except Exception:
14
  ZoneInfo = None
15
 
16
  import gradio as gr
17
- from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
18
- from peft import PeftModel
 
19
 
20
  # =========================================================
21
- # 기본 모델 / 파인튜닝 모델 경로
22
  # =========================================================
23
 
 
24
  BASE_MODEL_PATH = "MLP-KTLim/llama-3-Korean-Bllossom-8B"
25
 
26
- # Hugging Face Hub에 올려둔 병합 모델 리포 ID
27
- # 스페이스에서는 여기만 네 모델 리포로 맞춰주면 됨
28
- MODEL_DIR_DEFAULT = "Jay1121/blossom_v2" # repo id만! (필요하면 subfolder는 나중에 옵션으로 빼자)
29
  MODEL_DIR = os.environ.get("MODEL_DIR", MODEL_DIR_DEFAULT)
30
 
 
 
 
31
  # =========================================================
32
  # 환경 변수 / 기본값 설정
33
  # =========================================================
@@ -44,11 +48,6 @@ STRICT_MODE = os.environ.get("STRICT_MODE", "0") == "1" # 기본 OFF
44
  SAFETY_ON = os.environ.get("SAFETY_ON", "0") == "1" # 기본 OFF
45
  BAN_JAMO = os.environ.get("BAN_JAMO", "1") == "1"
46
 
47
- # ⚠ 스페이스는 GPU가 없는 경우가 많으니까 기본은 4bit ON이지만,
48
- # 실제로는 아래 _get_bnb_config()에서 torch.cuda 확인해서 자동으로 꺼짐
49
- USE_FA = os.environ.get("USE_FLASH_ATTN", "1") == "1"
50
- USE_4BIT = os.environ.get("USE_4BIT", "1") == "1" # ✅ 기본 4bit 사용 (GPU 있을 때만 적용)
51
-
52
  STYLE_MODE = os.environ.get("STYLE_MODE", "auto") # auto | deadpan | neutral
53
 
54
  WHITELIST_JAMO = set(
@@ -68,205 +67,37 @@ DEFAULT_PROFANITY = {
68
  }
69
 
70
  # =========================================================
71
- # 로더 보조
72
- # =========================================================
73
-
74
- def _pick_attn_impl():
75
- return "flash_attention_2" if USE_FA and torch.cuda.is_available() else "sdpa"
76
-
77
-
78
- def _is_peft_adapter(model_dir: str) -> bool:
79
- return os.path.exists(os.path.join(model_dir, "adapter_config.json"))
80
-
81
-
82
- def _has_full_model(model_dir: str) -> bool:
83
- names = ["pytorch_model.bin", "model.safetensors", "consolidated.safetensors"]
84
- has_weight = any(os.path.exists(os.path.join(model_dir, n)) for n in names)
85
- has_cfg = os.path.exists(os.path.join(model_dir, "config.json"))
86
- return has_weight and has_cfg
87
-
88
-
89
- def _has_tokenizer_files(path: str) -> bool:
90
- if not path:
91
- return False
92
- return any(
93
- os.path.exists(os.path.join(path, n))
94
- for n in ["tokenizer.model", "tokenizer.json", "vocab.json", "merges.txt"]
95
- )
96
-
97
-
98
- def _load_tokenizer_pref_local(local_dir: str, fallback_dir: str):
99
- def _try(path, fast):
100
- return AutoTokenizer.from_pretrained(
101
- path, trust_remote_code=True, use_fast=fast
102
- )
103
-
104
- # 1) 로컬 tokenizer.model 우선
105
- if local_dir and os.path.exists(os.path.join(local_dir, "tokenizer.model")):
106
- try:
107
- tok = _try(local_dir, False)
108
- if tok.pad_token is None:
109
- tok.pad_token = tok.eos_token
110
- print(f"🔤 토크나이저 OK: {local_dir} (slow, tokenizer.model)")
111
- return tok
112
- except Exception as e:
113
- print(f"⚠️ local slow 실패: {e}")
114
-
115
- # 2) 로컬 tokenizer.json
116
- if local_dir and os.path.exists(os.path.join(local_dir, "tokenizer.json")):
117
- try:
118
- tok = _try(local_dir, True)
119
- if tok.pad_token is None:
120
- tok.pad_token = tok.eos_token
121
- print(f"🔤 토크나이저 OK: {local_dir} (fast, tokenizer.json)")
122
- return tok
123
- except Exception as e:
124
- print(f"⚠️ local fast 실패: {e}")
125
-
126
- # 3) fallback (베이스 모델)
127
- for fast in (True, False):
128
- try:
129
- tok = _try(fallback_dir, fast)
130
- if tok.pad_token is None:
131
- tok.pad_token = tok.eos_token
132
- print(f"🔤 토크나이저 OK: {fallback_dir} (fast={fast})")
133
- return tok
134
- except Exception as e:
135
- print(f"⚠️ fallback (fast={fast}) 실패: {e}")
136
-
137
- raise RuntimeError("토크나이저 로드에 모두 실패했습니다.")
138
-
139
- # =========================================================
140
- # 모델 로드 (4bit 지원)
141
  # =========================================================
142
 
143
- def _get_bnb_config():
144
  """
145
- GPU 없으면 4bit quantization 자체를 꺼버려서
146
- 'No GPU found. A GPU is needed for quantization.' 에러 막기.
 
147
  """
148
- if (not USE_4BIT) or (not torch.cuda.is_available()):
149
- print("💡 4bit 비활성화 (USE_4BIT=0 이거나 GPU 없음) → 일반 fp16/bf16 로드")
150
- return None
151
-
152
- compute_dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float16
153
- print(f"🧮 4bit 양자화 사용 (compute_dtype={compute_dtype})")
154
- return BitsAndBytesConfig(
155
- load_in_4bit=True,
156
- bnb_4bit_use_double_quant=True,
157
- bnb_4bit_quant_type="nf4",
158
- bnb_4bit_compute_dtype=compute_dtype,
159
  )
160
 
 
 
161
 
162
- def load_model_for_chat(model_dir: str, tokenizer_dir: str | None = None):
163
- """
164
- model_dir:
165
- - 로컬 폴더
166
- - 또는 Hugging Face Hub repo id (예: 'Jay1121/blossom_v2')
167
- """
168
- if os.path.isdir(model_dir):
169
- print(f"▶ 로컬 모델 폴더: {model_dir}")
170
- is_adapter = _is_peft_adapter(model_dir)
171
- is_full = _has_full_model(model_dir)
172
- else:
173
- print(f"▶ 로컬 폴더 없음 → HF Hub에서 '{model_dir}' 로드 시도")
174
- is_adapter = False
175
- is_full = False
176
-
177
- attn_impl = _pick_attn_impl()
178
- bnb_config = _get_bnb_config()
179
-
180
- # 토크나이저 경로 선택
181
- if tokenizer_dir:
182
- tk_dir = tokenizer_dir
183
- elif os.path.isdir(model_dir) and _has_tokenizer_files(model_dir):
184
- tk_dir = model_dir
185
- else:
186
- tk_dir = BASE_MODEL_PATH
187
-
188
- print(f"🔎 토크나이저 경로 선택: {tk_dir}")
189
- tok = _load_tokenizer_pref_local(tk_dir, BASE_MODEL_PATH)
190
-
191
- # 1) PEFT 어댑터만 있는 경우 (로컬에서만 의미)
192
- if is_adapter and not is_full:
193
- print("📦 감지: PEFT LoRA 어댑터 → 베이스(Bllossom) 로드 후 어댑터 적용")
194
- try:
195
- base = AutoModelForCausalLM.from_pretrained(
196
- BASE_MODEL_PATH,
197
- torch_dtype=torch.float16,
198
- device_map="auto",
199
- trust_remote_code=True,
200
- attn_implementation=attn_impl,
201
- )
202
- except Exception as e:
203
- if attn_impl == "flash_attention_2":
204
- print(f"⚠️ flash-attn 실패 → SDPA로 전환: {e}")
205
- base = AutoModelForCausalLM.from_pretrained(
206
- BASE_MODEL_PATH,
207
- torch_dtype=torch.float16,
208
- device_map="auto",
209
- trust_remote_code=True,
210
- attn_implementation="sdpa",
211
- )
212
- else:
213
- raise
214
-
215
- model = PeftModel.from_pretrained(base, model_dir, offload_folder="offload")
216
- try:
217
- model = model.merge_and_unload()
218
- print("✅ 어댑터 병합(merge_and_unload) 완료")
219
- except Exception as e:
220
- print(f"ℹ️ 병합 스킵: {e}")
221
-
222
- model.eval()
223
- print("✅ 모델 로드 완료!")
224
- return model, tok
225
-
226
- # 2) 병합된 풀 모델 or HF Hub 모델 (4bit 가능)
227
- print("📦 감지: 병합된 '완전체' 모델 또는 HF Hub 모델 → from_pretrained 로 로드")
228
- try:
229
- if bnb_config is not None:
230
- model = AutoModelForCausalLM.from_pretrained(
231
- model_dir,
232
- device_map="auto",
233
- trust_remote_code=True,
234
- attn_implementation=attn_impl,
235
- quantization_config=bnb_config,
236
- )
237
- else:
238
- model = AutoModelForCausalLM.from_pretrained(
239
- model_dir,
240
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
241
- device_map="auto",
242
- trust_remote_code=True,
243
- attn_implementation=attn_impl,
244
- )
245
- except Exception as e:
246
- if attn_impl == "flash_attention_2":
247
- print(f"⚠️ flash-attn 실패 → SDPA로 전환: {e}")
248
- if bnb_config is not None:
249
- model = AutoModelForCausalLM.from_pretrained(
250
- model_dir,
251
- device_map="auto",
252
- trust_remote_code=True,
253
- attn_implementation="sdpa",
254
- quantization_config=bnb_config,
255
- )
256
- else:
257
- model = AutoModelForCausalLM.from_pretrained(
258
- model_dir,
259
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
260
- device_map="auto",
261
- trust_remote_code=True,
262
- attn_implementation="sdpa",
263
- )
264
- else:
265
- raise
266
-
267
- model.eval()
268
- print("✅ 모델 로드 완료!")
269
- return model, tok
270
 
271
  # =========================================================
272
  # 사전 / 욕설
@@ -301,28 +132,37 @@ RE_LAUGH = re.compile(r"(ㅋ|ㅎ|ㅠ|ㅜ)\1{2,}")
301
  RE_EN = re.compile(r"[A-Za-z]+")
302
  RE_WORDS = re.compile(r"[가-힣]{2,}")
303
 
304
- def build_bad_words_ids(tokenizer):
305
- ids = [
306
- tokenizer(w, add_special_tokens=False).input_ids
307
- for w in META_BANS
308
- ]
309
- for ch in list("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"):
310
- ids.append(tokenizer(ch, add_special_tokens=False).input_ids)
311
- if BAN_JAMO:
312
- for code in list(range(0x1100, 0x11FF + 1)) + list(range(0x3130, 0x318F + 1)):
313
- ch = chr(code)
314
- if ch in WHITELIST_JAMO:
315
- continue
316
- ids.append(tokenizer(ch, add_special_tokens=False).input_ids)
317
- return ids
318
 
319
 
320
  def clean_text(txt: str):
 
321
  if not KEEP_REPEATS:
322
  txt = RE_LAUGH.sub(lambda m: m.group(1) * 2, txt)
 
323
  txt = RE_EN.sub("", txt)
 
324
  cut = txt.split("### User:")[0]
325
- return cut.strip()
 
 
 
 
 
 
326
 
327
 
328
  def count_oov(txt: str, dictionary, allowlist):
@@ -412,49 +252,37 @@ def postprocess_deadpan(reply: str):
412
  return reply.strip()
413
 
414
  # =========================================================
415
- # 디코딩
416
  # =========================================================
417
 
418
- def decode_once(model, tok, prompt, bad_words_ids, *, deadpan=False):
419
- """답변 길이를 줄여서 속도 확보."""
420
  if deadpan:
421
- cfg = dict(
422
- do_sample=True,
423
- temperature=0.25,
424
- top_p=0.85,
425
- max_new_tokens=48,
426
- )
427
  elif STRICT_MODE:
428
- cfg = dict(
429
- do_sample=True,
430
- temperature=0.35,
431
- top_p=0.88,
432
- max_new_tokens=56,
433
- )
434
  else:
435
- cfg = dict(
436
- do_sample=True,
437
- temperature=0.6,
438
- top_p=0.9,
439
- max_new_tokens=64,
440
- )
441
-
442
- inputs = tok(prompt, return_tensors="pt").to(model.device)
443
- with torch.no_grad():
444
- out = model.generate(
445
- **inputs,
446
- repetition_penalty=1.12,
447
- no_repeat_ngram_size=3,
448
- eos_token_id=tok.eos_token_id,
449
- pad_token_id=tok.pad_token_id,
450
- bad_words_ids=bad_words_ids,
451
- **cfg,
452
- )
453
- gen = tok.decode(out[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
454
  return clean_text(gen)
455
 
456
  # =========================================================
457
- # 시스템 프롬프트
458
  # =========================================================
459
 
460
  SYSTEM_PROMPT = (
@@ -468,7 +296,7 @@ SYSTEM_PROMPT = (
468
  "User: 무슨 일 해?\n"
469
  "Assistant: 별 건 안해.. 그냥 먹고 살려고\n"
470
  "User: 심심하다\n"
471
- "Assistant: 지금 평일인데? 왜?\n"
472
  "--- 여기까지 예시 ---\n\n"
473
  )
474
 
@@ -476,11 +304,20 @@ SYSTEM_PROMPT = (
476
  # 전역 초기화
477
  # =========================================================
478
 
479
- print("🚀 모델/토크나이저 로드 중...")
480
- model, tokenizer = load_model_for_chat(MODEL_DIR, tokenizer_dir=None)
 
 
 
 
 
 
 
 
 
 
481
  dictionary = load_dictionary()
482
  profanity = load_profanity()
483
- bad_words_ids = build_bad_words_ids(tokenizer)
484
  print("✅ 초기화 완료")
485
 
486
  # =========================================================
@@ -497,6 +334,7 @@ def chat_fn(user_input, history):
497
  messages.append({"role": "assistant", "content": b})
498
  messages.append({"role": "user", "content": user_input})
499
 
 
500
  prompt = tokenizer.apply_chat_template(
501
  messages,
502
  tokenize=False,
@@ -504,7 +342,7 @@ def chat_fn(user_input, history):
504
  )
505
 
506
  deadpan = should_deadpan(user_input)
507
- reply = decode_once(model, tokenizer, prompt, bad_words_ids, deadpan=deadpan)
508
 
509
  oov_cnt, _ = count_oov(reply, dictionary, profanity)
510
  if OOV_STRIP and oov_cnt > 0:
@@ -538,7 +376,7 @@ demo = gr.ChatInterface(
538
  fn=chat_fn,
539
  title="어느 MZ 친구의 느린 DM방",
540
  description=(
541
- "Blossom 8B + 카카오톡 말투 LoRA를 얹은, 어떤 MZ의 말투를 따라하는 한국어 친구 챗봇입니다.\n"
542
  "(⚠️ 개 느림주의: 대답 늦어도 서운해하지 말 것)"
543
  ),
544
  examples=[
 
1
  # -*- coding: utf-8 -*-
2
+ # app.py — 어느 MZ 친구의 느린 DM방 (Blossom 8B GGUF, llama.cpp, Gradio)
3
 
4
  import os
5
  import re
6
  import random
7
  import difflib
 
8
  from datetime import datetime
9
 
10
  try:
 
13
  ZoneInfo = None
14
 
15
  import gradio as gr
16
+ from transformers import AutoTokenizer
17
+ from huggingface_hub import hf_hub_download
18
+ from llama_cpp import Llama
19
 
20
  # =========================================================
21
+ # 기본 모델 / 토크나이저 / GGUF 경로 설정
22
  # =========================================================
23
 
24
+ # 베이스 모델 (토크나이저용)
25
  BASE_MODEL_PATH = "MLP-KTLim/llama-3-Korean-Bllossom-8B"
26
 
27
+ # 병합된 GGUF 모델이 올라간 Hugging Face Repo
28
+ # (예: Jay1121/blossom_v2 blossom_v2.Q4_K_M.gguf)
29
+ MODEL_DIR_DEFAULT = "Jay1121/blossom_v2" # repo id
30
  MODEL_DIR = os.environ.get("MODEL_DIR", MODEL_DIR_DEFAULT)
31
 
32
+ GGUF_REPO_ID = os.environ.get("GGUF_REPO_ID", MODEL_DIR)
33
+ GGUF_FILENAME = os.environ.get("GGUF_FILENAME", "blossom_v2.Q4_K_M.gguf")
34
+
35
  # =========================================================
36
  # 환경 변수 / 기본값 설정
37
  # =========================================================
 
48
  SAFETY_ON = os.environ.get("SAFETY_ON", "0") == "1" # 기본 OFF
49
  BAN_JAMO = os.environ.get("BAN_JAMO", "1") == "1"
50
 
 
 
 
 
 
51
  STYLE_MODE = os.environ.get("STYLE_MODE", "auto") # auto | deadpan | neutral
52
 
53
  WHITELIST_JAMO = set(
 
67
  }
68
 
69
  # =========================================================
70
+ # GGUF 로더 (llama.cpp)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  # =========================================================
72
 
73
+ def load_model_for_chat(model_repo: str):
74
  """
75
+ GGUF + llama.cpp 로드.
76
+ - model_repo: Hugging Face repo id (예: 'Jay1121/blossom_v2')
77
+ - GGUF_REPO_ID / GGUF_FILENAME 환경변수로 오버라이드 가능
78
  """
79
+ repo_id = os.environ.get("GGUF_REPO_ID", model_repo)
80
+ filename = os.environ.get("GGUF_FILENAME", GGUF_FILENAME)
81
+
82
+ print(f"📥 GGUF 다운로드: {repo_id}/{filename}")
83
+ model_path = hf_hub_download(
84
+ repo_id=repo_id,
85
+ filename=filename,
 
 
 
 
86
  )
87
 
88
+ n_threads = int(os.environ.get("N_THREADS", str(os.cpu_count() or 4)))
89
+ n_ctx = int(os.environ.get("N_CTX", "2048"))
90
 
91
+ print(f"🧠 llama.cpp 초기화 (n_threads={n_threads}, n_ctx={n_ctx})")
92
+ llm = Llama(
93
+ model_path=model_path,
94
+ n_ctx=n_ctx,
95
+ n_threads=n_threads,
96
+ logits_all=False,
97
+ seed=0,
98
+ )
99
+ print("✅ GGUF 모델 로드 완료!")
100
+ return llm
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
 
102
  # =========================================================
103
  # 사전 / 욕설
 
132
  RE_EN = re.compile(r"[A-Za-z]+")
133
  RE_WORDS = re.compile(r"[가-힣]{2,}")
134
 
135
+ def _is_jamo(ch: str) -> bool:
136
+ code = ord(ch)
137
+ return (0x1100 <= code <= 0x11FF) or (0x3130 <= code <= 0x318F)
138
+
139
+
140
+ def _strip_jamo(text: str) -> str:
141
+ if not BAN_JAMO:
142
+ return text
143
+ out_chars = []
144
+ for ch in text:
145
+ if _is_jamo(ch) and (ch not in WHITELIST_JAMO):
146
+ continue
147
+ out_chars.append(ch)
148
+ return "".join(out_chars)
149
 
150
 
151
  def clean_text(txt: str):
152
+ # 1) ㅋㅋㅋㅋ/ㅠㅠㅠ 등 줄이기
153
  if not KEEP_REPEATS:
154
  txt = RE_LAUGH.sub(lambda m: m.group(1) * 2, txt)
155
+ # 2) 영문 제거
156
  txt = RE_EN.sub("", txt)
157
+ # 3) prompt template 섞인 경우 잘라내기
158
  cut = txt.split("### User:")[0]
159
+ txt = cut.strip()
160
+ # 4) 메타 단어 제거
161
+ for banned in META_BANS:
162
+ txt = txt.replace(banned, "")
163
+ # 5) 자모 제거 (화이트리스트 제외)
164
+ txt = _strip_jamo(txt)
165
+ return txt.strip()
166
 
167
 
168
  def count_oov(txt: str, dictionary, allowlist):
 
252
  return reply.strip()
253
 
254
  # =========================================================
255
+ # 디코딩 (llama.cpp 사용)
256
  # =========================================================
257
 
258
+ def decode_once(model, prompt: str, *, deadpan: bool = False) -> str:
259
+ """llama.cpp로 디코딩."""
260
  if deadpan:
261
+ temperature = 0.25
262
+ top_p = 0.85
263
+ max_tokens = 48
 
 
 
264
  elif STRICT_MODE:
265
+ temperature = 0.35
266
+ top_p = 0.88
267
+ max_tokens = 56
 
 
 
268
  else:
269
+ temperature = 0.6
270
+ top_p = 0.9
271
+ max_tokens = 64
272
+
273
+ # llama_cpp.Llama.__call__
274
+ out = model(
275
+ prompt,
276
+ max_tokens=max_tokens,
277
+ temperature=temperature,
278
+ top_p=top_p,
279
+ stop=["</s>", "User:", "Assistant:", "### User:"],
280
+ )
281
+ gen = out["choices"][0]["text"]
 
 
 
 
 
 
282
  return clean_text(gen)
283
 
284
  # =========================================================
285
+ # 시스템 프롬프트 (⚠ 예시 문구 그대로 유지)
286
  # =========================================================
287
 
288
  SYSTEM_PROMPT = (
 
296
  "User: 무슨 일 해?\n"
297
  "Assistant: 별 건 안해.. 그냥 먹고 살려고\n"
298
  "User: 심심하다\n"
299
+ "Assistant: 심심해? 개부럽누..\n"
300
  "--- 여기까지 예시 ---\n\n"
301
  )
302
 
 
304
  # 전역 초기화
305
  # =========================================================
306
 
307
+ print("🚀 모델 로드 중 (GGUF + llama.cpp)...")
308
+ model = load_model_for_chat(MODEL_DIR)
309
+
310
+ print("🔤 토크나이저 로드 중...")
311
+ tokenizer = AutoTokenizer.from_pretrained(
312
+ BASE_MODEL_PATH,
313
+ trust_remote_code=True,
314
+ use_fast=True,
315
+ )
316
+ if tokenizer.pad_token is None:
317
+ tokenizer.pad_token = tokenizer.eos_token
318
+
319
  dictionary = load_dictionary()
320
  profanity = load_profanity()
 
321
  print("✅ 초기화 완료")
322
 
323
  # =========================================================
 
334
  messages.append({"role": "assistant", "content": b})
335
  messages.append({"role": "user", "content": user_input})
336
 
337
+ # 원래 쓰던 chat_template 그대로 활용 (토크나이저만 사용)
338
  prompt = tokenizer.apply_chat_template(
339
  messages,
340
  tokenize=False,
 
342
  )
343
 
344
  deadpan = should_deadpan(user_input)
345
+ reply = decode_once(model, prompt, deadpan=deadpan)
346
 
347
  oov_cnt, _ = count_oov(reply, dictionary, profanity)
348
  if OOV_STRIP and oov_cnt > 0:
 
376
  fn=chat_fn,
377
  title="어느 MZ 친구의 느린 DM방",
378
  description=(
379
+ "Blossom 8B GGUF + 카카오톡 말투 LoRA를 얹은, 어떤 MZ의 말투를 따라하는 한국어 친구 챗봇입니다.\n"
380
  "(⚠️ 개 느림주의: 대답 늦어도 서운해하지 말 것)"
381
  ),
382
  examples=[