Spaces:

Jay1121
/

immitate_chatbot

Running

App Files Files Community

Jay1121 commited on Nov 18, 2025

Commit

3bbacc0

verified ·

1 Parent(s): 4985efe

Update app.py

Browse files

Files changed (1) hide show

app.py +186 -91

app.py CHANGED Viewed

@@ -1,57 +1,63 @@
 # -*- coding: utf-8 -*-
-# app.py — SOLAR 10.7B 친구 챗봇 (Gradio, 경량 설정)
-import os, re, random, difflib, torch
 from datetime import datetime
 try:
     from zoneinfo import ZoneInfo
 except Exception:
     ZoneInfo = None
 import gradio as gr
-from transformers import AutoModelForCausalLM, AutoTokenizer
 from peft import PeftModel
 # =========================================================
 # 기본 모델 / 파인튜닝 모델 경로
 # =========================================================
-# 베이스 SOLAR 모델 (토크나이저 fallback 용)
-BASE_MODEL_PATH = "Upstage/SOLAR-10.7B-Instruct-v1.0"
-# ✅ Hugging Face Hub 에 올려둔 병합 모델 리포 ID
-#   - Colab 에서 별도 경로 쓰고 싶으면 환경변수 MODEL_DIR 로 override
-MODEL_DIR = os.environ.get("MODEL_DIR", "Jay1121/my-solar-chatbot-merged")
 # =========================================================
 # 환경 변수 / 기본값 설정
 # =========================================================
-# 사전/욕설 경로 (Space에는 ./dictionaries 안에 같이 올리면 됨)
-DICT_PATH      = os.environ.get("DICT_PATH", "./dictionaries/korean_words.txt")
 PROFANITY_PATH = os.environ.get("PROFANITY_PATH", "")
-# 속도/품질 옵션 (기본은 빠르게 쪽으로)
-OOV_THRESHOLD  = int(os.environ.get("OOV_THRESHOLD", "0"))
-OOV_STRIP      = os.environ.get("OOV_STRIP", "1") == "1"
-STRICT_MODE    = os.environ.get("STRICT_MODE", "0") == "1"   # 기본 OFF
-SAFETY_ON      = os.environ.get("SAFETY_ON", "0") == "1"     # 기본 OFF
-BAN_JAMO       = os.environ.get("BAN_JAMO", "1") == "1"
-USE_FA         = os.environ.get("USE_FLASH_ATTN", "1") == "1"
-STYLE_MODE     = os.environ.get("STYLE_MODE", "auto")  # auto | deadpan | neutral
-WHITELIST_JAMO = set([s.strip() for s in os.environ.get("WHITELIST_JAMO", "ㅎ,ㅋ").split(",") if s.strip()])
-KEEP_REPEATS   = os.environ.get("KEEP_REPEATS", "0") == "1"
-ANTI_SMALLTALK = os.environ.get("ANTI_SMALLTALK", "0") == "1"   # 기본 OFF
-SMALLTALK_TRIES= int(os.environ.get("SMALLTALK_TRIES", "1"))
 META_BANS = ["AI", "인공지능", "챗봇", "도와줄게", "역할"]
 DEFAULT_PROFANITY = {
-    "씨발", "시발", "ㅅㅂ", "좆", "좆같", "개같", "개새끼", "개새", "개소리", "지랄",
-    "병신", "븅신", "병쉰", "병1신", "염병", "닥쳐", "꺼져", "닥치", "ㅄ", "ㅗ", "씹",
-    "ㅈ같", "개지랄", "싫다", "빡친", "개빡", "개빡침", "등신", "존나", "미친"
 }
 # =========================================================
@@ -61,28 +67,32 @@ DEFAULT_PROFANITY = {
 def _pick_attn_impl():
     return "flash_attention_2" if USE_FA else "sdpa"
 def _is_peft_adapter(model_dir: str) -> bool:
     return os.path.exists(os.path.join(model_dir, "adapter_config.json"))
 def _has_full_model(model_dir: str) -> bool:
     names = ["pytorch_model.bin", "model.safetensors", "consolidated.safetensors"]
     has_weight = any(os.path.exists(os.path.join(model_dir, n)) for n in names)
-    has_cfg    = os.path.exists(os.path.join(model_dir, "config.json"))
     return has_weight and has_cfg
 def _has_tokenizer_files(path: str) -> bool:
     if not path:
         return False
-    return any(os.path.exists(os.path.join(path, n)) for n in [
-        "tokenizer.model", "tokenizer.json", "vocab.json", "merges.txt"
-    ])
-def _load_tokenizer_pref_local(local_dir: str, fallback_dir: str):
-    tried = []
     def _try(path, fast):
-        tried.append(f"{path} (fast={fast})")
-        return AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=fast)
     # 1) 로컬 tokenizer.model 우선
     if local_dir and os.path.exists(os.path.join(local_dir, "tokenizer.model")):
@@ -90,7 +100,7 @@ def _load_tokenizer_pref_local(local_dir: str, fallback_dir: str):
             tok = _try(local_dir, False)
             if tok.pad_token is None:
                 tok.pad_token = tok.eos_token
-            print(f"🔤 토크나이저 OK: {local_dir} (use_fast=False, tokenizer.model)")
             return tok
         except Exception as e:
             print(f"⚠️ local slow 실패: {e}")
@@ -101,7 +111,7 @@ def _load_tokenizer_pref_local(local_dir: str, fallback_dir: str):
             tok = _try(local_dir, True)
             if tok.pad_token is None:
                 tok.pad_token = tok.eos_token
-            print(f"🔤 토크나이저 OK: {local_dir} (use_fast=True, tokenizer.json)")
             return tok
         except Exception as e:
             print(f"⚠️ local fast 실패: {e}")
@@ -112,7 +122,7 @@ def _load_tokenizer_pref_local(local_dir: str, fallback_dir: str):
             tok = _try(fallback_dir, fast)
             if tok.pad_token is None:
                 tok.pad_token = tok.eos_token
-            print(f"🔤 토크나이저 OK: {fallback_dir} (use_fast={fast})")
             return tok
         except Exception as e:
             print(f"⚠️ fallback (fast={fast}) 실패: {e}")
@@ -120,26 +130,39 @@ def _load_tokenizer_pref_local(local_dir: str, fallback_dir: str):
     raise RuntimeError("토크나이저 로드에 모두 실패했습니다.")
 # =========================================================
-# 모델 로드
 # =========================================================
 def load_model_for_chat(model_dir: str, tokenizer_dir: str | None = None):
     """
     model_dir:
-      - 로컬 폴더일 수도 있고
-      - Hugging Face Hub repo id (예: 'Jay1121/my-solar-chatbot-merged') 일 수도 있음
     """
     if os.path.isdir(model_dir):
         print(f"▶ 로컬 모델 폴더: {model_dir}")
         is_adapter = _is_peft_adapter(model_dir)
-        is_full    = _has_full_model(model_dir)
     else:
         print(f"▶ 로컬 폴더 없음 → HF Hub에서 '{model_dir}' 로드 시도")
         is_adapter = False
-        is_full    = False
     attn_impl = _pick_attn_impl()
     # 토크나이저 경로 선택
     if tokenizer_dir:
@@ -152,9 +175,9 @@ def load_model_for_chat(model_dir: str, tokenizer_dir: str | None = None):
     print(f"🔎 토크나이저 경로 선택: {tk_dir}")
     tok = _load_tokenizer_pref_local(tk_dir, BASE_MODEL_PATH)
-    # 1) PEFT 어댑터 폴더인 경우 (로컬 디렉토리에서만 의미 있음)
     if is_adapter and not is_full:
-        print("📦 감지: PEFT LoRA 어댑터 → 베이스(SOLAR) 로드 후 어댑터 적용")
         try:
             base = AutoModelForCausalLM.from_pretrained(
                 BASE_MODEL_PATH,
@@ -182,30 +205,49 @@ def load_model_for_chat(model_dir: str, tokenizer_dir: str | None = None):
             print("✅ 어댑터 병합(merge_and_unload) 완료")
         except Exception as e:
             print(f"ℹ️ 병합 스킵: {e}")
         model.eval()
         print("✅ 모델 로드 완료!")
         return model, tok
-    # 2) 병합된 풀 모델 or HF Hub 모델로 로드
     print("📦 감지: 병합된 '완전체' 모델 또는 HF Hub 모델 → from_pretrained 로 로드")
     try:
-        model = AutoModelForCausalLM.from_pretrained(
-            model_dir,
-            torch_dtype=torch.float16,
-            device_map="auto",
-            trust_remote_code=True,
-            attn_implementation=attn_impl,
-        )
-    except Exception as e:
-        if attn_impl == "flash_attention_2":
-            print(f"⚠️ flash-attn 실패 → SDPA로 전환: {e}")
             model = AutoModelForCausalLM.from_pretrained(
                 model_dir,
                 torch_dtype=torch.float16,
                 device_map="auto",
                 trust_remote_code=True,
-                attn_implementation="sdpa",
             )
         else:
             raise
@@ -226,6 +268,7 @@ def load_dictionary(path=DICT_PATH):
     print(f"📚 사전 없음: {path} (OOV 검사 약화)")
     return set()
 def load_profanity(path=PROFANITY_PATH):
     prof = set(DEFAULT_PROFANITY)
     if path and os.path.exists(path):
@@ -241,12 +284,15 @@ def load_profanity(path=PROFANITY_PATH):
 # 전처리 / 검사
 # =========================================================
-RE_LAUGH = re.compile(r'(ㅋ|ㅎ|ㅠ|ㅜ)\1{2,}')
-RE_EN    = re.compile(r'[A-Za-z]+')
-RE_WORDS = re.compile(r'[가-힣]{2,}')
 def build_bad_words_ids(tokenizer):
-    ids = [tokenizer(w, add_special_tokens=False).input_ids for w in META_BANS]
     for ch in list("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"):
         ids.append(tokenizer(ch, add_special_tokens=False).input_ids)
     if BAN_JAMO:
@@ -257,18 +303,21 @@ def build_bad_words_ids(tokenizer):
             ids.append(tokenizer(ch, add_special_tokens=False).input_ids)
     return ids
 def clean_text(txt: str):
     if not KEEP_REPEATS:
         txt = RE_LAUGH.sub(lambda m: m.group(1) * 2, txt)
-    txt = RE_EN.sub('', txt)
     cut = txt.split("### User:")[0]
     return cut.strip()
 def count_oov(txt: str, dictionary, allowlist):
     words = RE_WORDS.findall(txt)
     oov = [w for w in words if (w not in dictionary and w not in allowlist)]
     return len(oov), oov
 def strip_oov(txt: str, dictionary, allowlist):
     kept, i = [], 0
     while i < len(txt):
@@ -282,28 +331,32 @@ def strip_oov(txt: str, dictionary, allowlist):
             kept.append(w)
         i = m.end()
     out = "".join(kept)
-    out = re.sub(r'\s{2,}', ' ', out).strip()
     return out
 SMALLTALK_PATTERNS = [
-    r'오늘\s*날씨', r'\b날씨\s*(가|는)?\s*(좋|괜찮|별로|따뜻|쌀쌀|시원|선선)',
-    r'(하늘|기온|미세먼지)\s*(이|가)?\s*(좋|맑|깨끗|나쁨|흐림)',
-    r'(더워|추워)\b', r'비(\s*가)?\s*(온|와|왔|올)\b'
 ]
 SMALLTALK_REGEXES = [re.compile(p) for p in SMALLTALK_PATTERNS]
 def normalize_for_sim(s: str):
-    s = re.sub(r'\s+', '', s)
-    s = re.sub(r'[.!?~…]+', '', s)
-    s = re.sub(r'(.)\1{2,}', r'\1\1', s)
     return s
 def looks_smalltalk(text: str):
     t = normalize_for_sim(text)
     if "오늘날씨좋았어" in t:
         return True
     return any(rx.search(text) for rx in SMALLTALK_REGEXES)
 def too_similar_to_history(text: str, history_texts, thresh=0.86):
     t1 = normalize_for_sim(text)
     for h in history_texts:
@@ -317,9 +370,9 @@ def too_similar_to_history(text: str, history_texts, thresh=0.86):
 # =========================================================
 DEADPAN_TRIGGERS = [
-    "심심", "귀찮", "짜증", "싫", "하..", "휴", "후", "지루", "그만", "피곤", "죽였어",
-    "개소리", "뭐래", "에휴", "흥미없", "아...", "음....", ";;;;", "어쩌라고",
-    "그건 본인 사정이죠", "그건 니사정이지"
 ]
 def should_deadpan(user_text: str):
@@ -330,11 +383,12 @@ def should_deadpan(user_text: str):
         return False
     return any(k in user_text for k in DEADPAN_TRIGGERS)
 def postprocess_deadpan(reply: str):
     reply = reply.replace("!", ".")
-    reply = re.sub(r'[~…]+', '...', reply)
     if len(reply) > 120:
-        cut = re.split(r'([.다]\s)', reply, maxsplit=1)
         if cut and len("".join(cut[:2])) > 0:
             reply = "".join(cut[:2]).strip()
         reply = reply[:120].rstrip() + "..."
@@ -345,17 +399,32 @@ def postprocess_deadpan(reply: str):
     return reply.strip()
 # =========================================================
-# 디코딩 (경량화)
 # =========================================================
 def decode_once(model, tok, prompt, bad_words_ids, *, deadpan=False):
-    # max_new_tokens 줄여서 속도 확보
     if deadpan:
-        cfg = dict(do_sample=True, temperature=0.25, top_p=0.85, max_new_tokens=96)
     elif STRICT_MODE:
-        cfg = dict(do_sample=True, temperature=0.35, top_p=0.88, max_new_tokens=128)
     else:
-        cfg = dict(do_sample=True, temperature=0.5, top_p=0.9, max_new_tokens=128)
     inputs = tok(prompt, return_tensors="pt").to(model.device)
     with torch.no_grad():
@@ -366,7 +435,7 @@ def decode_once(model, tok, prompt, bad_words_ids, *, deadpan=False):
             eos_token_id=tok.eos_token_id,
             pad_token_id=tok.pad_token_id,
             bad_words_ids=bad_words_ids,
-            **cfg
         )
     gen = tok.decode(out[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
     return clean_text(gen)
@@ -376,17 +445,17 @@ def decode_once(model, tok, prompt, bad_words_ids, *, deadpan=False):
 # =========================================================
 SYSTEM_PROMPT = (
-    "너는 사용자의 가장 친한 친구야. 순수한 한글 구어체로 말해. "
     f"영문/불필요한 낱자 자모 금지(허용: {','.join(sorted(WHITELIST_JAMO))}). "
     "메타 단어('AI','인공지능','챗봇','도와줄게','역할') 금지. "
-    "필요하면 짧고 건조하게 답해도 돼.\n\n"
     "--- 대화 예시 ---\n"
     "User: 넌 누구야?\n"
-    "Assistant: 나는.. 인간이야..\n"
     "User: 무슨 일 해?\n"
-    "Assistant: 그냥 있어..\n"
     "User: 심심하다\n"
-    "Assistant: 음.. 뭐 할래? 산책?\n"
     "--- 여기까지 예시 ---\n\n"
 )
@@ -408,20 +477,23 @@ print("✅ 초기화 완료")
 def chat_fn(user_input, history):
     # history: 리스트 [(user, bot), ...]
     messages = [{"role": "system", "content": SYSTEM_PROMPT}]
-    for u, b in history[-5:]:  # 최근 5턴만 사용
         messages.append({"role": "user", "content": u})
         messages.append({"role": "assistant", "content": b})
     messages.append({"role": "user", "content": user_input})
     prompt = tokenizer.apply_chat_template(
-        messages, tokenize=False, add_generation_prompt=True
     )
     deadpan = should_deadpan(user_input)
     reply = decode_once(model, tokenizer, prompt, bad_words_ids, deadpan=deadpan)
-    oov_cnt, _ = count_oov(reply, dictionary, profanity)
     if OOV_STRIP and oov_cnt > 0:
         reply = strip_oov(reply, dictionary, profanity)
@@ -434,11 +506,34 @@ def chat_fn(user_input, history):
 # Gradio UI
 # =========================================================
 demo = gr.ChatInterface(
     fn=chat_fn,
-    title="SOLAR 친구 챗봇",
-    description="SOLAR-10.7B 기반 한글 친구 챗봇 (가벼운 설정)",
-    examples=["야 나 오늘 개피곤하다", "이직할까 말까 고민중이야", "나 좀 칭찬해줘"],
 )
 if __name__ == "__main__":

 # -*- coding: utf-8 -*-
+# app.py — 어느 MZ 친구의 느린 DM방 (Blossom 8B, 4bit, Gradio)
+import os
+import re
+import random
+import difflib
+import torch
 from datetime import datetime
 try:
     from zoneinfo import ZoneInfo
 except Exception:
     ZoneInfo = None
 import gradio as gr
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
 from peft import PeftModel
 # =========================================================
 # 기본 모델 / 파인튜닝 모델 경로
 # =========================================================
+BASE_MODEL_PATH = "MLP-KTLim/llama-3-Korean-Bllossom-8B"
+# Hugging Face Hub에 올려둔 병합 모델 리포 ID
+MODEL_DIR_DEFAULT = "Jay1121/blossom_v2/kakao_merged_v1"  # << 여기를 네 repo로 바꿔!
+MODEL_DIR = os.environ.get("MODEL_DIR", MODEL_DIR_DEFAULT)
 # =========================================================
 # 환경 변수 / 기본값 설정
 # =========================================================
+DICT_PATH = os.environ.get("DICT_PATH", "./dictionaries/korean_words.txt")
 PROFANITY_PATH = os.environ.get("PROFANITY_PATH", "")
+OOV_THRESHOLD = int(os.environ.get("OOV_THRESHOLD", "0"))
+OOV_STRIP = os.environ.get("OOV_STRIP", "1") == "1"
+STRICT_MODE = os.environ.get("STRICT_MODE", "0") == "1"  # 기본 OFF
+SAFETY_ON = os.environ.get("SAFETY_ON", "0") == "1"      # 기본 OFF
+BAN_JAMO = os.environ.get("BAN_JAMO", "1") == "1"
+USE_FA = os.environ.get("USE_FLASH_ATTN", "1") == "1"
+USE_4BIT = os.environ.get("USE_4BIT", "1") == "1"        # ✅ 기본 4bit 사용
+STYLE_MODE = os.environ.get("STYLE_MODE", "auto")  # auto | deadpan | neutral
+WHITELIST_JAMO = set(
+    [s.strip() for s in os.environ.get("WHITELIST_JAMO", "ㅎ,ㅋ").split(",") if s.strip()]
+)
+KEEP_REPEATS = os.environ.get("KEEP_REPEATS", "0") == "1"
+ANTI_SMALLTALK = os.environ.get("ANTI_SMALLTALK", "0") == "1"
+SMALLTALK_TRIES = int(os.environ.get("SMALLTALK_TRIES", "1"))
 META_BANS = ["AI", "인공지능", "챗봇", "도와줄게", "역할"]
 DEFAULT_PROFANITY = {
+    "씨발", "시발", "ㅅㅂ", "좆", "좆같", "개같", "개새끼", "개새", "개소리",
+    "지랄", "병신", "븅신", "병쉰", "병1신", "염병", "닥쳐", "꺼져", "닥치",
+    "ㅄ", "ㅗ", "씹", "ㅈ같", "개지랄", "싫다", "빡친", "개빡", "개빡침",
+    "등신", "존나", "미친"
 }
 # =========================================================
 def _pick_attn_impl():
     return "flash_attention_2" if USE_FA else "sdpa"
 def _is_peft_adapter(model_dir: str) -> bool:
     return os.path.exists(os.path.join(model_dir, "adapter_config.json"))
 def _has_full_model(model_dir: str) -> bool:
     names = ["pytorch_model.bin", "model.safetensors", "consolidated.safetensors"]
     has_weight = any(os.path.exists(os.path.join(model_dir, n)) for n in names)
+    has_cfg = os.path.exists(os.path.join(model_dir, "config.json"))
     return has_weight and has_cfg
 def _has_tokenizer_files(path: str) -> bool:
     if not path:
         return False
+    return any(
+        os.path.exists(os.path.join(path, n))
+        for n in ["tokenizer.model", "tokenizer.json", "vocab.json", "merges.txt"]
+    )
+def _load_tokenizer_pref_local(local_dir: str, fallback_dir: str):
     def _try(path, fast):
+        return AutoTokenizer.from_pretrained(
+            path, trust_remote_code=True, use_fast=fast
+        )
     # 1) 로컬 tokenizer.model 우선
     if local_dir and os.path.exists(os.path.join(local_dir, "tokenizer.model")):
             tok = _try(local_dir, False)
             if tok.pad_token is None:
                 tok.pad_token = tok.eos_token
+            print(f"🔤 토크나이저 OK: {local_dir} (slow, tokenizer.model)")
             return tok
         except Exception as e:
             print(f"⚠️ local slow 실패: {e}")
             tok = _try(local_dir, True)
             if tok.pad_token is None:
                 tok.pad_token = tok.eos_token
+            print(f"🔤 토크나이저 OK: {local_dir} (fast, tokenizer.json)")
             return tok
         except Exception as e:
             print(f"⚠️ local fast 실패: {e}")
             tok = _try(fallback_dir, fast)
             if tok.pad_token is None:
                 tok.pad_token = tok.eos_token
+            print(f"🔤 토크나이저 OK: {fallback_dir} (fast={fast})")
             return tok
         except Exception as e:
             print(f"⚠️ fallback (fast={fast}) 실패: {e}")
     raise RuntimeError("토크나이저 로드에 모두 실패했습니다.")
 # =========================================================
+# 모델 로드 (4bit 지원)
 # =========================================================
+def _get_bnb_config():
+    if not USE_4BIT:
+        return None
+    compute_dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float16
+    print(f"🧮 4bit 양자화 사용 (compute_dtype={compute_dtype})")
+    return BitsAndBytesConfig(
+        load_in_4bit=True,
+        bnb_4bit_use_double_quant=True,
+        bnb_4bit_quant_type="nf4",
+        bnb_4bit_compute_dtype=compute_dtype,
+    )
 def load_model_for_chat(model_dir: str, tokenizer_dir: str | None = None):
     """
     model_dir:
+      - 로컬 폴더
+      - 또는 Hugging Face Hub repo id (예: 'Jay1121/blossom-kakao-merged')
     """
     if os.path.isdir(model_dir):
         print(f"▶ 로컬 모델 폴더: {model_dir}")
         is_adapter = _is_peft_adapter(model_dir)
+        is_full = _has_full_model(model_dir)
     else:
         print(f"▶ 로컬 폴더 없음 → HF Hub에서 '{model_dir}' 로드 시도")
         is_adapter = False
+        is_full = False
     attn_impl = _pick_attn_impl()
+    bnb_config = _get_bnb_config()
     # 토크나이저 경로 선택
     if tokenizer_dir:
     print(f"🔎 토크나이저 경로 선택: {tk_dir}")
     tok = _load_tokenizer_pref_local(tk_dir, BASE_MODEL_PATH)
+    # 1) PEFT 어댑터만 있는 경우 (로컬에서만 의미)
     if is_adapter and not is_full:
+        print("📦 감지: PEFT LoRA 어댑터 → 베이스(Bllossom) 로드 후 어댑터 적용")
         try:
             base = AutoModelForCausalLM.from_pretrained(
                 BASE_MODEL_PATH,
             print("✅ 어댑터 병합(merge_and_unload) 완료")
         except Exception as e:
             print(f"ℹ️ 병합 스킵: {e}")
         model.eval()
         print("✅ 모델 로드 완료!")
         return model, tok
+    # 2) 병합된 풀 모델 or HF Hub 모델 (4bit 가능)
     print("📦 감지: 병합된 '완전체' 모델 또는 HF Hub 모델 → from_pretrained 로 로드")
     try:
+        if bnb_config is not None:
+            model = AutoModelForCausalLM.from_pretrained(
+                model_dir,
+                device_map="auto",
+                trust_remote_code=True,
+                attn_implementation=attn_impl,
+                quantization_config=bnb_config,
+            )
+        else:
             model = AutoModelForCausalLM.from_pretrained(
                 model_dir,
                 torch_dtype=torch.float16,
                 device_map="auto",
                 trust_remote_code=True,
+                attn_implementation=attn_impl,
             )
+    except Exception as e:
+        if attn_impl == "flash_attention_2":
+            print(f"⚠️ flash-attn 실패 → SDPA로 전환: {e}")
+            if bnb_config is not None:
+                model = AutoModelForCausalLM.from_pretrained(
+                    model_dir,
+                    device_map="auto",
+                    trust_remote_code=True,
+                    attn_implementation="sdpa",
+                    quantization_config=bnb_config,
+                )
+            else:
+                model = AutoModelForCausalLM.from_pretrained(
+                    model_dir,
+                    torch_dtype=torch.float16,
+                    device_map="auto",
+                    trust_remote_code=True,
+                    attn_implementation="sdpa",
+                )
         else:
             raise
     print(f"📚 사전 없음: {path} (OOV 검사 약화)")
     return set()
 def load_profanity(path=PROFANITY_PATH):
     prof = set(DEFAULT_PROFANITY)
     if path and os.path.exists(path):
 # 전처리 / 검사
 # =========================================================
+RE_LAUGH = re.compile(r"(ㅋ|ㅎ|ㅠ|ㅜ)\1{2,}")
+RE_EN = re.compile(r"[A-Za-z]+")
+RE_WORDS = re.compile(r"[가-힣]{2,}")
 def build_bad_words_ids(tokenizer):
+    ids = [
+        tokenizer(w, add_special_tokens=False).input_ids
+        for w in META_BANS
+    ]
     for ch in list("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"):
         ids.append(tokenizer(ch, add_special_tokens=False).input_ids)
     if BAN_JAMO:
             ids.append(tokenizer(ch, add_special_tokens=False).input_ids)
     return ids
 def clean_text(txt: str):
     if not KEEP_REPEATS:
         txt = RE_LAUGH.sub(lambda m: m.group(1) * 2, txt)
+    txt = RE_EN.sub("", txt)
     cut = txt.split("### User:")[0]
     return cut.strip()
 def count_oov(txt: str, dictionary, allowlist):
     words = RE_WORDS.findall(txt)
     oov = [w for w in words if (w not in dictionary and w not in allowlist)]
     return len(oov), oov
 def strip_oov(txt: str, dictionary, allowlist):
     kept, i = [], 0
     while i < len(txt):
             kept.append(w)
         i = m.end()
     out = "".join(kept)
+    out = re.sub(r"\s{2,}", " ", out).strip()
     return out
 SMALLTALK_PATTERNS = [
+    r"오늘\s*날씨",
+    r"\b날씨\s*(가|는)?\s*(좋|괜찮|별로|따뜻|쌀쌀|시원|선선)",
+    r"(하늘|기온|미세먼지)\s*(이|가)?\s*(좋|맑|깨끗|나쁨|흐림)",
+    r"(더워|추워)\b",
+    r"비(\s*가)?\s*(온|와|왔|올)\b",
 ]
 SMALLTALK_REGEXES = [re.compile(p) for p in SMALLTALK_PATTERNS]
 def normalize_for_sim(s: str):
+    s = re.sub(r"\s+", "", s)
+    s = re.sub(r"[.!?~…]+", "", s)
+    s = re.sub(r"(.)\1{2,}", r"\1\1", s)
     return s
 def looks_smalltalk(text: str):
     t = normalize_for_sim(text)
     if "오늘날씨좋았어" in t:
         return True
     return any(rx.search(text) for rx in SMALLTALK_REGEXES)
 def too_similar_to_history(text: str, history_texts, thresh=0.86):
     t1 = normalize_for_sim(text)
     for h in history_texts:
 # =========================================================
 DEADPAN_TRIGGERS = [
+    "심심", "귀찮", "짜증", "싫", "하..", "휴", "후", "지루",
+    "그만", "피곤", "죽였어", "개소리", "뭐래", "에휴", "흥미없",
+    "아...", "음....", ";;;;", "어쩌라고", "그건 본인 사정이죠", "그건 니사정이지"
 ]
 def should_deadpan(user_text: str):
         return False
     return any(k in user_text for k in DEADPAN_TRIGGERS)
 def postprocess_deadpan(reply: str):
     reply = reply.replace("!", ".")
+    reply = re.sub(r"[~…]+", "...", reply)
     if len(reply) > 120:
+        cut = re.split(r"([.다]\s)", reply, maxsplit=1)
         if cut and len("".join(cut[:2])) > 0:
             reply = "".join(cut[:2]).strip()
         reply = reply[:120].rstrip() + "..."
     return reply.strip()
 # =========================================================
+# 디코딩
 # =========================================================
 def decode_once(model, tok, prompt, bad_words_ids, *, deadpan=False):
+    """답변 길이를 줄여서 속도 확보."""
     if deadpan:
+        cfg = dict(
+            do_sample=True,
+            temperature=0.25,
+            top_p=0.85,
+            max_new_tokens=48,
+        )
     elif STRICT_MODE:
+        cfg = dict(
+            do_sample=True,
+            temperature=0.35,
+            top_p=0.88,
+            max_new_tokens=56,
+        )
     else:
+        cfg = dict(
+            do_sample=True,
+            temperature=0.6,
+            top_p=0.9,
+            max_new_tokens=64,
+        )
     inputs = tok(prompt, return_tensors="pt").to(model.device)
     with torch.no_grad():
             eos_token_id=tok.eos_token_id,
             pad_token_id=tok.pad_token_id,
             bad_words_ids=bad_words_ids,
+            **cfg,
         )
     gen = tok.decode(out[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
     return clean_text(gen)
 # =========================================================
 SYSTEM_PROMPT = (
+    "너는 사용자의 가장 친한 친구야. 20~30대 MZ 말투 섞인 편안한 한국어 구어체로 말해. "
     f"영문/불필요한 낱자 자모 금지(허용: {','.join(sorted(WHITELIST_JAMO))}). "
     "메타 단어('AI','인공지능','챗봇','도와줄게','역할') 금지. "
+    "가끔 시크하게 한 줄만 대답해도 되고, 너무 설교하지 말고 현실 친구처럼 얘기해.\n\n"
     "--- 대화 예시 ---\n"
     "User: 넌 누구야?\n"
+    "Assistant: 그냥.. 네 얘기 들어주는 친구야.\n"
     "User: 무슨 일 해?\n"
+    "Assistant: 별 건 안 하고, 니 하소연 받아주는 정도?\n"
     "User: 심심하다\n"
+    "Assistant: 음.. 뭐 할래? 넷플? 산책? 아니면 그냥 수다?\n"
     "--- 여기까지 예시 ---\n\n"
 )
 def chat_fn(user_input, history):
     # history: 리스트 [(user, bot), ...]
     messages = [{"role": "system", "content": SYSTEM_PROMPT}]
+    # 속도 위해 최근 2턴만 유지
+    for u, b in history[-2:]:
         messages.append({"role": "user", "content": u})
         messages.append({"role": "assistant", "content": b})
     messages.append({"role": "user", "content": user_input})
     prompt = tokenizer.apply_chat_template(
+        messages,
+        tokenize=False,
+        add_generation_prompt=True,
     )
     deadpan = should_deadpan(user_input)
     reply = decode_once(model, tokenizer, prompt, bad_words_ids, deadpan=deadpan)
+    oov_cnt, _ = count_oov(reply, dictionary, profanity)
     if OOV_STRIP and oov_cnt > 0:
         reply = strip_oov(reply, dictionary, profanity)
 # Gradio UI
 # =========================================================
+CUSTOM_CSS = """
+.gradio-container {
+    font-family: "Noto Sans KR", system-ui, sans-serif;
+}
+/* 유저 메시지 텍스트를 진한 검정으로 */
+.message.user,
+.user .message,
+.chat-message.user,
+.gr-chatbot .message.user,
+.gr-chatbot .user {
+    color: #111111 !important;
+}
+"""
 demo = gr.ChatInterface(
     fn=chat_fn,
+    title="어느 MZ 친구의 느린 DM방",
+    description=(
+        "Blossom 8B + 카카오톡 말투 LoRA를 얹은, 어떤 MZ의 말투를 따라하는 한국어 친구 챗봇입니다.\n"
+        "(⚠️ 개 느림주의: 대답 늦어도 서운해하지 말 것)"
+    ),
+    examples=[
+        "야 나 오늘 개피곤하다",
+        "이직할까 말까 고민중이야",
+        "나 좀 칭찬해줘",
+    ],
+    css=CUSTOM_CSS,
 )
 if __name__ == "__main__":