Spaces:

uc-msai631-2025-spring
/

Steve_Simple_Chat

Sleeping

File size: 7,017 Bytes

d511a4d
971d379
064d610
bc9d6cb
bafbae3
0e21d39
d511a4d
91fd18e
 
bafbae3
91fd18e
 
d511a4d
91fd18e
9779c60
91fd18e
bafbae3
d511a4d
bafbae3
91fd18e
e645da6
91fd18e
1179ade
 
 
9c4ec4e
 
 
1179ade
 
bafbae3
 
 
 
1179ade
e645da6
 
 
 
9c4ec4e
 
 
 
 
 
 
 
 
 
 
 
bafbae3
d511a4d
bafbae3
 
9779c60
bafbae3
d511a4d
 
 
9779c60
d511a4d
0e21d39
bc9d6cb
0e21d39
d511a4d
9779c60
bafbae3
 
 
 
 
 
 
 
971d379
bafbae3
 
971d379
bafbae3
 
 
 
 
 
 
 
 
 
971d379
 
 
 
 
9c4ec4e
 
 
 
971d379
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bafbae3
971d379
bafbae3
971d379
bafbae3
 
 
 
971d379
d511a4d
9779c60
 
1179ade
 
 
 
 
9779c60
1179ade
9779c60
9c4ec4e
 
9779c60
9c4ec4e
91fd18e
 
9779c60
 
bafbae3
064d610
bafbae3
 
 
 
064d610
9c4ec4e
 
 
 
 
bafbae3
9c4ec4e
bafbae3
 
9c4ec4e
bafbae3
 
9c4ec4e
bafbae3
 
9c4ec4e
bafbae3
 
9c4ec4e
bafbae3
 
 
 
971d379
064d610
bafbae3
e645da6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d511a4d
 
e645da6
 
d511a4d
 
064d610

import os
import re
import gradio as gr

# --------- CPU hygiene (nice-to-have) ----------
os.environ["TOKENIZERS_PARALLELISM"] = "false"
try:
    import torch
    try:
        torch.set_num_threads(2)  # smoother on 2 vCPUs
    except Exception:
        pass
except Exception:
    pass

from transformers import pipeline
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

# -------- Model / bot configuration --------
GEN_MODEL_NAME = "MBZUAI/LaMini-Flan-T5-248M"  # CPU-friendly text2text model
WELCOME_IMAGE_PATH = "assets/cat.png"          # your local image

DOMAIN_INSTRUCTIONS = (
    "You are a concise assistant about cats in ancient Egypt. "
    "Keep focus on Bastet, cat mummies, daily life, worship, and other ancient Egypt facts. "
    "If the user asks something unrelated, say briefly that you only cover those topics and suggest one. "
    "Do not include greetings or apologies and do not say phrases like 'as an AI language model'. "
    "Start directly with the answer."
)

HELP_TEXT = (
    "Ask me about: Bastet • cat mummies • daily life • worship\n"
    "Type anything else to try the AI fallback."
)

WELCOME_TEXT = (
    "Hi! I share facts about cats in ancient Egypt.\n\n" + HELP_TEXT
)

# -------- Output cleanup (remove model-added fluff) --------
DISCLAIMER_PATTERNS = [
    r"^\s*(hi|hello|hey)[,!.?\s-]*",            # stray greeting
    r"^\s*i'?m\s+sorr(y|ied)[^.\n]*[.\n]*",     # leading apology
    r"^\s*as an ai language model[^.\n]*[.\n]*" # “as an AI language model…”
]
def strip_preamble(text: str) -> str:
    t = text or ""
    for pat in DISCLAIMER_PATTERNS:
        t = re.sub(pat, "", t, flags=re.IGNORECASE)
    return t.strip()

# -------- Lazy singletons --------
_t2t = None
_vader = None

def get_t2t():
    """Lazy-load the text2text pipeline (LaMini-Flan-T5)."""
    global _t2t
    if _t2t is None:
        _t2t = pipeline(
            "text2text-generation",
            model=GEN_MODEL_NAME,
            tokenizer=GEN_MODEL_NAME
        )
        print(f"[startup] Loaded model: {GEN_MODEL_NAME}")
    return _t2t

def get_vader():
    """Lazy-load the VADER sentiment analyzer."""
    global _vader
    if _vader is None:
        _vader = SentimentIntensityAnalyzer()
        print("[startup] Loaded VADER sentiment analyzer")
    return _vader

# -------- Sentiment helpers --------
def detect_sentiment_bucket(text: str):
    """
    Return ('neg'|'neu'|'pos', compound_score) using VADER.
    Thresholds chosen for clear buckets in chat settings.
    """
    scores = get_vader().polarity_scores(text or "")
    c = scores.get("compound", 0.0)
    if c <= -0.4:
        return "neg", c
    if c >= 0.4:
        return "pos", c
    return "neu", c

def is_question(text: str) -> bool:
    t = (text or "").strip()
    if "?" in t:
        return True
    # Heuristic for question-like openings
    return bool(re.match(
        r"^(who|what|when|where|why|how|do|does|did|can|could|is|are|was|were|should|would|will)\b",
        t.lower()
    ))

def is_thanks_or_praise(text: str) -> bool:
    t = (text or "").lower()
    return any(k in t for k in [
        "thanks", "thank you", "appreciate", "appreciated",
        "great answer", "nice", "awesome", "love", "helpful",
        "that helped", "that was good", "i like your response"
    ])

POS_QUESTION_PREFIXES = [
    "Good question! ",
    "Nice one—here’s the gist: ",
    "Let’s dig in. ",
]

POS_PRAISE_PREFIXES = [
    "You’re welcome—glad that helped. ",
    "Appreciate the kind words! ",
    "Happy it was useful. ",
]

POS_STATEMENT_PREFIXES = [
    "Sounds good. ",
    "Got it. ",
    "All right—here’s the short version. ",
]

# Your custom negative message:
NEG_PREFIX = "Calm down. You're being a little too negative! "

def choose_positive_prefix(message: str) -> str:
    if is_thanks_or_praise(message):
        return POS_PRAISE_PREFIXES[0]
    if is_question(message):
        return POS_QUESTION_PREFIXES[0]
    return POS_STATEMENT_PREFIXES[0]

def apply_tone_prefix(reply_text: str, bucket: str, message: str = "") -> str:
    if bucket == "pos":
        prefix = choose_positive_prefix(message)
    elif bucket == "neg":
        prefix = NEG_PREFIX
    else:
        prefix = ""
    return (prefix + (reply_text or "")).strip()

# ---- LLM fallback (used when rules don't match) ----
def ai_fallback(prompt: str) -> str:
    try:
        gen = get_t2t()
        prefixed = (
            f"{DOMAIN_INSTRUCTIONS}\n\n"
            f"User: {prompt}\n"
            f"Assistant:"
        )
        out = gen(
            prefixed,
            max_new_tokens=48,
            do_sample=False,
            no_repeat_ngram_size=3
        )[0]["generated_text"]
        return strip_preamble(out)
    except Exception as e:
        print("AI fallback error:", repr(e))
        return "AI fallback had an issue. Please try a simpler question or use the topics in 'help'."

# -------- Chat logic --------
def reply(message, history):
    # 1) sentiment first (on the raw user text)
    bucket, _score = detect_sentiment_bucket(message or "")

    # 2) rules-first responses
    msg = (message or "").strip().lower()

    # Robust greeting/help rule: catches "hi there", "hello!", etc.
    if re.search(r"\b(hi|hello|hey|hiya|yo|greetings)\b", msg) or any(
        k in msg for k in ["help", "menu", "topics", "instructions"]
    ):
        base = "Hi! I share facts about cats in ancient Egypt.\n\n" + HELP_TEXT

    elif "bastet" in msg or "bast" in msg:
        base = "Bastet (later cat-headed) … major cult center at Bubastis in the Nile Delta."

    elif any(w in msg for w in ["mummy", "mummies", "mummified", "offering"]):
        base = "Millions of animal mummies (cats common), esp. Late Period (664–332 BCE)."

    elif any(w in msg for w in ["daily", "life", "pest", "mouse", "rat", "snake"]):
        base = "Cats protected grain stores; art shows them under chairs/on leashes with owners."

    elif any(w in msg for w in ["worship", "god", "goddess", "taboo"]):
        base = "People didn’t worship pet cats as gods; they revered cats via Bastet and votive offerings."

    else:
        base = ai_fallback(message)

    # 3) tone wrapper (content unchanged)
    return apply_tone_prefix(base, bucket, message)

# -------- UI --------

# Preload a welcome message that includes your local image + short intro text.
initial_messages = [
    {
        "role": "assistant",
        "content": [
            {"type": "image", "image": WELCOME_IMAGE_PATH, "alt_text": "Stylized Bastet cat image"},
            {"type": "text", "text": WELCOME_TEXT},
        ],
    }
]

chatbot_component = gr.Chatbot(
    type="messages",          # avoids deprecated tuples format
    value=initial_messages,   # show welcome image + text on load
    show_label=False,
)

demo = gr.ChatInterface(
    fn=reply,
    title="😺 Cats of Ancient Egypt Chatbot 😺",
    chatbot=chatbot_component
)

if __name__ == "__main__":
    demo.launch()