File size: 4,260 Bytes
d511a4d
064d610
bc9d6cb
bafbae3
0e21d39
d511a4d
91fd18e
 
bafbae3
91fd18e
 
d511a4d
91fd18e
9779c60
91fd18e
bafbae3
d511a4d
bafbae3
91fd18e
 
1179ade
 
 
 
 
 
bafbae3
 
 
 
1179ade
bafbae3
d511a4d
bafbae3
 
9779c60
bafbae3
d511a4d
 
 
9779c60
d511a4d
0e21d39
bc9d6cb
0e21d39
d511a4d
9779c60
bafbae3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1179ade
d511a4d
9779c60
 
1179ade
 
 
 
 
9779c60
1179ade
9779c60
d511a4d
9779c60
d511a4d
91fd18e
 
9779c60
 
bafbae3
064d610
bafbae3
 
 
 
064d610
 
bafbae3
 
 
 
 
 
 
 
 
 
 
 
 
 
064d610
bafbae3
d511a4d
 
1179ade
d511a4d
 
064d610
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import os
import gradio as gr

# --------- CPU hygiene (nice-to-have) ----------
os.environ["TOKENIZERS_PARALLELISM"] = "false"
try:
    import torch
    try:
        torch.set_num_threads(2)  # smoother on 2 vCPUs
    except Exception:
        pass
except Exception:
    pass

from transformers import pipeline
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

# -------- Model / bot configuration --------
GEN_MODEL_NAME = "MBZUAI/LaMini-Flan-T5-248M"  # CPU-friendly text2text model

DOMAIN_INSTRUCTIONS = (
    "You are a concise assistant about cats in ancient Egypt. "
    "Keep focus on Bastet, cat mummies, daily life, worship, and other ancient Egypt facts. "
    "If the user asks something unrelated, say briefly that you only cover those topics and suggest one."
)

HELP_TEXT = (
    "Ask me about: Bastet • cat mummies • daily life • worship\n"
    "Type anything else to try the AI fallback."
)

# -------- Lazy singletons --------
_t2t = None
_vader = None

def get_t2t():
    """Lazy-load the text2text pipeline (LaMini-Flan-T5)."""
    global _t2t
    if _t2t is None:
        _t2t = pipeline(
            "text2text-generation",
            model=GEN_MODEL_NAME,
            tokenizer=GEN_MODEL_NAME
        )
        print(f"[startup] Loaded model: {GEN_MODEL_NAME}")
    return _t2t

def get_vader():
    """Lazy-load the VADER sentiment analyzer."""
    global _vader
    if _vader is None:
        _vader = SentimentIntensityAnalyzer()
        print("[startup] Loaded VADER sentiment analyzer")
    return _vader

# -------- Helpers --------
def detect_sentiment_bucket(text: str):
    """
    Return ('neg'|'neu'|'pos', compound_score).
    Thresholds chosen for clear buckets in chat settings.
    """
    scores = get_vader().polarity_scores(text or "")
    c = scores.get("compound", 0.0)
    if c <= -0.4:
        return "neg", c
    if c >= 0.4:
        return "pos", c
    return "neu", c

def apply_tone_prefix(reply_text: str, bucket: str) -> str:
    """Prepend a tiny tone wrapper without changing factual content."""
    if bucket == "pos":
        prefix = "Great question! "
    elif bucket == "neg":
        prefix = "Calm down. You're being a little too negative! "
    else:
        prefix = ""
    return (prefix + (reply_text or "")).strip()

# ---- Use the LLM if the generator doesn't understand user prompt ----
def ai_fallback(prompt: str) -> str:
    try:
        gen = get_t2t()
        prefixed = (
            f"{DOMAIN_INSTRUCTIONS}\n\n"
            f"User: {prompt}\n"
            f"Assistant:"
        )
        out = gen(
            prefixed,
            max_new_tokens=48,
            do_sample=False
        )[0]["generated_text"]
        return (out or "").strip()
    except Exception as e:
        print("AI fallback error:", repr(e))
        return "AI fallback had an issue. Please try a simpler question or use the topics in 'help'."

# -------- Chat logic --------
def reply(message, history):
    # 1) sentiment first (on the raw user text)
    bucket, _score = detect_sentiment_bucket(message or "")

    # 2) rules-first responses
    msg = (message or "").strip().lower()
    if msg in {"hi", "hello", "hey"} or "help" in msg:
        base = "Hi! I share facts about cats in ancient Egypt.\n\n" + HELP_TEXT
    elif "bastet" in msg or "bast" in msg:
        base = "Bastet (later cat-headed) … major cult center at Bubastis in the Nile Delta."
    elif any(w in msg for w in ["mummy", "mummies", "mummified", "offering"]):
        base = "Millions of animal mummies (cats common), esp. Late Period (664–332 BCE)."
    elif any(w in msg for w in ["daily", "life", "pest", "mouse", "rat", "snake"]):
        base = "Cats protected grain stores; art shows them under chairs/on leashes with owners."
    elif any(w in msg for w in ["worship", "god", "goddess", "taboo"]):
        base = "People didn’t worship pet cats as gods; they revered cats via Bastet and votive offerings."
    else:
        base = ai_fallback(message)

    # 3) tone wrapper (content unchanged)
    return apply_tone_prefix(base, bucket)

# -------- UI --------
demo = gr.ChatInterface(
    fn=reply,
    title="Cats of Ancient Egypt Chatbot"
)

if __name__ == "__main__":
    demo.launch()