Spaces:

Jay1121
/

immitate_chatbot

Running

App Files Files Community

Jay1121 commited on Nov 18, 2025

Commit

da9f771

verified ·

1 Parent(s): 93cdf4b

Create app.py

Browse files

Files changed (1) hide show

app.py +390 -63

app.py CHANGED Viewed

@@ -1,70 +1,397 @@
 import gradio as gr
-from huggingface_hub import InferenceClient
-def respond(
-    message,
-    history: list[dict[str, str]],
-    system_message,
-    max_tokens,
-    temperature,
-    top_p,
-    hf_token: gr.OAuthToken,
-):
-    """
-    For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
-    """
-    client = InferenceClient(token=hf_token.token, model="openai/gpt-oss-20b")
-    messages = [{"role": "system", "content": system_message}]
-    messages.extend(history)
-    messages.append({"role": "user", "content": message})
-    response = ""
-    for message in client.chat_completion(
-        messages,
-        max_tokens=max_tokens,
-        stream=True,
-        temperature=temperature,
-        top_p=top_p,
-    ):
-        choices = message.choices
-        token = ""
-        if len(choices) and choices[0].delta.content:
-            token = choices[0].delta.content
-        response += token
-        yield response
-"""
-For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
-"""
-chatbot = gr.ChatInterface(
-    respond,
-    type="messages",
-    additional_inputs=[
-        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
-        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(
-            minimum=0.1,
-            maximum=1.0,
-            value=0.95,
-            step=0.05,
-            label="Top-p (nucleus sampling)",
-        ),
-    ],
 )
-with gr.Blocks() as demo:
-    with gr.Sidebar():
-        gr.LoginButton()
-    chatbot.render()
 if __name__ == "__main__":
     demo.launch()

+# -*- coding: utf-8 -*-
+# app.py — SOLAR 10.7B 친구 챗봇 (Gradio, 경량 설정)
+import os, re, random, difflib, torch
+from datetime import datetime
+try:
+    from zoneinfo import ZoneInfo
+except Exception:
+    ZoneInfo = None
 import gradio as gr
+from transformers import AutoModelForCausalLM, AutoTokenizer
+from peft import PeftModel
+BASE_MODEL_PATH = "Upstage/SOLAR-10.7B-Instruct-v1.0"
+# =========================
+# 환경 변수 / 기본값 설정
+# =========================
+# Hugging Face / Colab 공통: 모델 폴더 경로
+# - 코랩: /content/my-solar-chatbot-merged
+# - Space: ./my-solar-chatbot-merged (repo 안에 모델 폴더 넣었을 때)
+MODEL_DIR       = os.environ.get("MODEL_DIR", "/content/my-solar-chatbot-merged")
+# 사전/욕설 경로 (Space에는 ./dictionaries 안에 같이 올리면 됨)
+DICT_PATH      = os.environ.get("DICT_PATH", "./dictionaries/korean_words.txt")
+PROFANITY_PATH = os.environ.get("PROFANITY_PATH", "")
+# 속도/품질 옵션 (기본은 빠르게 쪽으로)
+OOV_THRESHOLD  = int(os.environ.get("OOV_THRESHOLD", "0"))
+OOV_STRIP      = os.environ.get("OOV_STRIP","1") == "1"
+STRICT_MODE    = os.environ.get("STRICT_MODE","0") == "1"   # 기본 OFF
+SAFETY_ON      = os.environ.get("SAFETY_ON","0") == "1"     # 기본 OFF
+BAN_JAMO       = os.environ.get("BAN_JAMO","1") == "1"
+USE_FA         = os.environ.get("USE_FLASH_ATTN","1") == "1"
+STYLE_MODE     = os.environ.get("STYLE_MODE","auto")  # auto | deadpan | neutral
+WHITELIST_JAMO = set([s.strip() for s in os.environ.get("WHITELIST_JAMO","ㅎ,ㅋ").split(",") if s.strip()])
+KEEP_REPEATS   = os.environ.get("KEEP_REPEATS","0") == "1"
+ANTI_SMALLTALK = os.environ.get("ANTI_SMALLTALK","0") == "1"   # 기본 OFF
+SMALLTALK_TRIES= int(os.environ.get("SMALLTALK_TRIES","1"))
+META_BANS = ["AI","인공지능","챗봇","도와줄게","역할"]
+DEFAULT_PROFANITY = {
+    "씨발","시발","ㅅㅂ","좆","좆같","개같","개새끼","개새","개소리","지랄",
+    "병신","븅신","병쉰","병1신","염병","닥쳐","꺼져","닥치","ㅄ","ㅗ","씹",
+    "ㅈ같","개지랄","싫다","빡친","개빡","개빡침","등신","존나","미친"
+}
+# =========================
+# 로더 보조
+# =========================
+def _pick_attn_impl():
+    return "flash_attention_2" if USE_FA else "sdpa"
+def _is_peft_adapter(model_dir: str) -> bool:
+    return os.path.exists(os.path.join(model_dir, "adapter_config.json"))
+def _has_full_model(model_dir: str) -> bool:
+    names = ["pytorch_model.bin", "model.safetensors", "consolidated.safetensors"]
+    has_weight = any(os.path.exists(os.path.join(model_dir, n)) for n in names)
+    has_cfg    = os.path.exists(os.path.join(model_dir, "config.json"))
+    return has_weight and has_cfg
+def _has_tokenizer_files(path: str) -> bool:
+    if not path: return False
+    return any(os.path.exists(os.path.join(path, n)) for n in [
+        "tokenizer.model","tokenizer.json","vocab.json","merges.txt"
+    ])
+def _load_tokenizer_pref_local(local_dir: str, fallback_dir: str):
+    tried = []
+    def _try(path, fast):
+        tried.append(f"{path} (fast={fast})")
+        return AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=fast)
+    if local_dir and os.path.exists(os.path.join(local_dir, "tokenizer.model")):
+        try:
+            tok = _try(local_dir, False)
+            if tok.pad_token is None: tok.pad_token = tok.eos_token
+            print(f"🔤 토크나이저 OK: {local_dir} (use_fast=False, tokenizer.model)")
+            return tok
+        except Exception as e:
+            print(f"⚠️ local slow 실패: {e}")
+    if local_dir and os.path.exists(os.path.join(local_dir, "tokenizer.json")):
+        try:
+            tok = _try(local_dir, True)
+            if tok.pad_token is None: tok.pad_token = tok.eos_token
+            print(f"🔤 토크나이저 OK: {local_dir} (use_fast=True, tokenizer.json)")
+            return tok
+        except Exception as e:
+            print(f"⚠️ local fast 실패: {e}")
+    for fast in (True, False):
+        try:
+            tok = _try(fallback_dir, fast)
+            if tok.pad_token is None: tok.pad_token = tok.eos_token
+            print(f"🔤 토크나이저 OK: {fallback_dir} (use_fast={fast})")
+            return tok
+        except Exception as e:
+            print(f"⚠️ fallback (fast={fast}) 실패: {e}")
+    raise RuntimeError("토크나이저 로드에 모두 실패했습니다.")
+def load_model_for_chat(model_dir: str, tokenizer_dir: str | None = None):
+    # Space에서는 모델 폴더를 repo 안에 그대로 넣는다고 가정 → 로컬 디렉토리
+    if not os.path.isdir(model_dir):
+        raise FileNotFoundError(f"모델 폴더를 찾을 수 없습니다: {model_dir}")
+    print(f"▶ 모델 폴더: {model_dir}")
+    attn_impl = _pick_attn_impl()
+    is_adapter = _is_peft_adapter(model_dir)
+    is_full    = _has_full_model(model_dir)
+    tk_dir = tokenizer_dir if tokenizer_dir else (model_dir if _has_tokenizer_files(model_dir) else BASE_MODEL_PATH)
+    print(f"🔎 토크나이저 경로 선택: {tk_dir}")
+    tok = _load_tokenizer_pref_local(tk_dir, BASE_MODEL_PATH)
+    if is_adapter and not is_full:
+        print("📦 감지: PEFT LoRA 어댑터 → 베이스(SOLAR) 로드 후 어댑터 적용")
+        try:
+            base = AutoModelForCausalLM.from_pretrained(
+                BASE_MODEL_PATH, torch_dtype=torch.float16,
+                device_map="auto", trust_remote_code=True, attn_implementation=attn_impl
+            )
+        except Exception as e:
+            if attn_impl == "flash_attention_2":
+                print(f"⚠️ flash-attn 실패 → SDPA로 전환: {e}")
+                base = AutoModelForCausalLM.from_pretrained(
+                    BASE_MODEL_PATH, torch_dtype=torch.float16,
+                    device_map="auto", trust_remote_code=True, attn_implementation="sdpa"
+                )
+            else:
+                raise
+        model = PeftModel.from_pretrained(base, model_dir, offload_folder="offload")
+        try:
+            model = model.merge_and_unload()
+            print("✅ 어댑터 병합(merge_and_unload) 완료")
+        except Exception as e:
+            print(f"ℹ️ 병합 스킵: {e}")
+        model.eval()
+        print("✅ 모델 로드 완료!")
+        return model, tok
+    print("📦 감지: 병합된 '완전체' 모델 또는 일반 폴더 → 해당 폴더에서 직접 로드")
+    try:
+        model = AutoModelForCausalLM.from_pretrained(
+            model_dir, torch_dtype=torch.float16,
+            device_map="auto", trust_remote_code=True, attn_implementation=attn_impl
+        )
+    except Exception as e:
+        if attn_impl == "flash_attention_2":
+            print(f"⚠️ flash-attn 실패 → SDPA로 전환: {e}")
+            model = AutoModelForCausalLM.from_pretrained(
+                model_dir, torch_dtype=torch.float16,
+                device_map="auto", trust_remote_code=True, attn_implementation="sdpa"
+            )
+        else:
+            raise
+    model.eval()
+    print("✅ 모델 로드 완료!")
+    return model, tok
+# =========================
+# 사전 / 욕설
+# =========================
+def load_dictionary(path=DICT_PATH):
+    if os.path.exists(path):
+        with open(path, "r", encoding="utf-8") as f:
+            words = set(w.strip() for w in f if w.strip())
+        print(f"📚 사전 로드: {path} (단어 {len(words)}개)")
+        return words
+    print(f"📚 사전 없음: {path} (OOV 검사 약화)")
+    return set()
+def load_profanity(path=PROFANITY_PATH):
+    prof = set(DEFAULT_PROFANITY)
+    if path and os.path.exists(path):
+        with open(path, "r", encoding="utf-8") as f:
+            for line in f:
+                w = line.strip()
+                if w: prof.add(w)
+        print(f"📝 욕설 화이트리스트 추가 로드: {path}")
+    return prof
+# =========================
+# 전처리 / 검사
+# =========================
+RE_LAUGH = re.compile(r'(ㅋ|ㅎ|ㅠ|ㅜ)\1{2,}')
+RE_EN    = re.compile(r'[A-Za-z]+')
+RE_WORDS = re.compile(r'[가-힣]{2,}')
+def build_bad_words_ids(tokenizer):
+    ids = [tokenizer(w, add_special_tokens=False).input_ids for w in META_BANS]
+    for ch in list("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"):
+        ids.append(tokenizer(ch, add_special_tokens=False).input_ids)
+    if BAN_JAMO:
+        for code in list(range(0x1100, 0x11FF+1)) + list(range(0x3130, 0x318F+1)):
+            ch = chr(code)
+            if ch in WHITELIST_JAMO:
+                continue
+            ids.append(tokenizer(ch, add_special_tokens=False).input_ids)
+    return ids
+def clean_text(txt: str):
+    if not KEEP_REPEATS:
+        txt = RE_LAUGH.sub(lambda m: m.group(1)*2, txt)
+    txt = RE_EN.sub('', txt)
+    cut = txt.split("### User:")[0]
+    return cut.strip()
+def count_oov(txt: str, dictionary, allowlist):
+    words = RE_WORDS.findall(txt)
+    oov = [w for w in words if (w not in dictionary and w not in allowlist)]
+    return len(oov), oov
+def strip_oov(txt: str, dictionary, allowlist):
+    kept, i = [], 0
+    while i < len(txt):
+        m = RE_WORDS.search(txt, i)
+        if not m:
+            kept.append(txt[i:]); break
+        kept.append(txt[i:m.start()])
+        w = m.group(0)
+        if (w in dictionary) or (w in allowlist):
+            kept.append(w)
+        i = m.end()
+    out = "".join(kept)
+    out = re.sub(r'\s{2,}', ' ', out).strip()
+    return out
+SMALLTALK_PATTERNS = [
+    r'오늘\s*날씨', r'\b날씨\s*(가|는)?\s*(좋|괜찮|별로|따뜻|쌀쌀|시원|선선)',
+    r'(하늘|기온|미세먼지)\s*(이|가)?\s*(좋|맑|깨끗|나쁨|흐림)',
+    r'(더워|추워)\b', r'비(\s*가)?\s*(온|와|왔|올)\b'
+]
+SMALLTALK_REGEXES = [re.compile(p) for p in SMALLTALK_PATTERNS]
+def normalize_for_sim(s: str):
+    s = re.sub(r'\s+','',s)
+    s = re.sub(r'[.!?~…]+','',s)
+    s = re.sub(r'(.)\1{2,}', r'\1\1', s)
+    return s
+def looks_smalltalk(text: str):
+    t = normalize_for_sim(text)
+    if "오늘날씨좋았어" in t:
+        return True
+    return any(rx.search(text) for rx in SMALLTALK_REGEXES)
+def too_similar_to_history(text: str, history_texts, thresh=0.86):
+    t1 = normalize_for_sim(text)
+    for h in history_texts:
+        t2 = normalize_for_sim(h)
+        if difflib.SequenceMatcher(None, t1, t2).ratio() >= thresh:
+            return True
+    return False
+# =========================
+# 데드팬
+# =========================
+DEADPAN_TRIGGERS = [
+    "심심","귀찮","짜증","싫","하..","휴","후","지루","그만","피곤","죽였어","개소리","뭐래","에휴","흥미없",
+    "아...", "음....", ";;;;", "어쩌라고", "그건 본인 사정이죠", "그건 니사정이지"
+]
+def should_deadpan(user_text: str):
+    mode = STYLE_MODE
+    if mode == "deadpan":
+        return True
+    if mode == "neutral":
+        return False
+    return any(k in user_text for k in DEADPAN_TRIGGERS)
+def postprocess_deadpan(reply: str):
+    reply = reply.replace("!", ".")
+    reply = re.sub(r'[~…]+', '...', reply)
+    if len(reply) > 120:
+        cut = re.split(r'([.다]\s)', reply, maxsplit=1)
+        if cut and len("".join(cut[:2])) > 0:
+            reply = "".join(cut[:2]).strip()
+        reply = reply[:120].rstrip() + "..."
+    if not reply.startswith(("음", "아니", "흠", "글쎄")):
+        reply = random.choice(["음.. ","아니.. ","흠.. ","글쎄.. "]) + reply
+    if random.random() < 0.3 and not reply.endswith(("..","...",".")):
+        reply = reply + "..."
+    return reply.strip()
+# =========================
+# 디코딩 (경량화)
+# =========================
+def decode_once(model, tok, prompt, bad_words_ids, *, deadpan=False):
+    # max_new_tokens 줄여서 속도 확보
+    if deadpan:
+        cfg = dict(do_sample=True, temperature=0.25, top_p=0.85, max_new_tokens=96)
+    elif STRICT_MODE:
+        cfg = dict(do_sample=True, temperature=0.35, top_p=0.88, max_new_tokens=128)
+    else:
+        cfg = dict(do_sample=True, temperature=0.5,  top_p=0.9,  max_new_tokens=128)
+    inputs = tok(prompt, return_tensors="pt").to(model.device)
+    with torch.no_grad():
+        out = model.generate(
+            **inputs,
+            repetition_penalty=1.12,
+            no_repeat_ngram_size=3,
+            eos_token_id=tok.eos_token_id,
+            pad_token_id=tok.pad_token_id,
+            bad_words_ids=bad_words_ids,
+            **cfg
+        )
+    gen = tok.decode(out[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
+    return clean_text(gen)
+# =========================
+# 시스템 프롬프트
+# =========================
+SYSTEM_PROMPT = (
+    "너는 사용자의 가장 친한 친구야. 순수한 한글 구어체로 말해. "
+    f"영문/불필요한 낱자 자모 금지(허용: {','.join(sorted(WHITELIST_JAMO))}). "
+    "메타 단어('AI','인공지능','챗봇','도와줄게','역할') 금지. "
+    "필요하면 짧고 건조하게 답해도 돼.\n\n"
+    "--- 대화 예시 ---\n"
+    "User: 넌 누구야?\n"
+    "Assistant: 나는.. 인간이야..\n"
+    "User: 무슨 일 해?\n"
+    "Assistant: 그냥 있어..\n"
+    "User: 심심하다\n"
+    "Assistant: 음.. 뭐 할래? 산책?\n"
+    "--- 여기까지 예시 ---\n\n"
 )
+# =========================
+# 전역 초기화
+# =========================
+print("🚀 모델/토크나이저 로드 중...")
+model, tokenizer = load_model_for_chat(MODEL_DIR, tokenizer_dir=None)
+dictionary = load_dictionary()
+profanity = load_profanity()
+bad_words_ids = build_bad_words_ids(tokenizer)
+print("✅ 초기화 완료")
+# =========================
+# Gradio 챗 함수
+# =========================
+def chat_fn(user_input, history):
+    # history: 리스트 [(user, bot), ...]
+    messages = [{"role":"system","content":SYSTEM_PROMPT}]
+    for u, b in history[-5:]:  # 최근 5턴만 사용
+        messages.append({"role":"user","content":u})
+        messages.append({"role":"assistant","content":b})
+    messages.append({"role":"user","content":user_input})
+    prompt = tokenizer.apply_chat_template(
+        messages, tokenize=False, add_generation_prompt=True
+    )
+    deadpan = should_deadpan(user_input)
+    # 1회 생성 (재시도 없음, 기본은 SAFETY_OFF)
+    reply = decode_once(model, tokenizer, prompt, bad_words_ids, deadpan=deadpan)
+    oov_cnt, _ = count_oov(reply, dictionary, profanity)
+    # 필요시 OOV 제거
+    if OOV_STRIP and oov_cnt > 0:
+        reply = strip_oov(reply, dictionary, profanity)
+    if deadpan:
+        reply = postprocess_deadpan(reply)
+    return reply
+# =========================
+# Gradio UI
+# =========================
+demo = gr.ChatInterface(
+    fn=chat_fn,
+    title="SOLAR 친구 챗봇",
+    description="SOLAR-10.7B 기반 한글 친구 챗봇 (가벼운 설정)",
+    examples=["야 나 오늘 개피곤하다", "이직할까 말까 고민중이야", "나 좀 칭찬해줘"],
+)
 if __name__ == "__main__":
     demo.launch()