File size: 7,017 Bytes
d511a4d 971d379 064d610 bc9d6cb bafbae3 0e21d39 d511a4d 91fd18e bafbae3 91fd18e d511a4d 91fd18e 9779c60 91fd18e bafbae3 d511a4d bafbae3 91fd18e e645da6 91fd18e 1179ade 9c4ec4e 1179ade bafbae3 1179ade e645da6 9c4ec4e bafbae3 d511a4d bafbae3 9779c60 bafbae3 d511a4d 9779c60 d511a4d 0e21d39 bc9d6cb 0e21d39 d511a4d 9779c60 bafbae3 971d379 bafbae3 971d379 bafbae3 971d379 9c4ec4e 971d379 bafbae3 971d379 bafbae3 971d379 bafbae3 971d379 d511a4d 9779c60 1179ade 9779c60 1179ade 9779c60 9c4ec4e 9779c60 9c4ec4e 91fd18e 9779c60 bafbae3 064d610 bafbae3 064d610 9c4ec4e bafbae3 9c4ec4e bafbae3 9c4ec4e bafbae3 9c4ec4e bafbae3 9c4ec4e bafbae3 9c4ec4e bafbae3 971d379 064d610 bafbae3 e645da6 d511a4d e645da6 d511a4d 064d610 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 |
import os
import re
import gradio as gr
# --------- CPU hygiene (nice-to-have) ----------
os.environ["TOKENIZERS_PARALLELISM"] = "false"
try:
import torch
try:
torch.set_num_threads(2) # smoother on 2 vCPUs
except Exception:
pass
except Exception:
pass
from transformers import pipeline
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
# -------- Model / bot configuration --------
GEN_MODEL_NAME = "MBZUAI/LaMini-Flan-T5-248M" # CPU-friendly text2text model
WELCOME_IMAGE_PATH = "assets/cat.png" # your local image
DOMAIN_INSTRUCTIONS = (
"You are a concise assistant about cats in ancient Egypt. "
"Keep focus on Bastet, cat mummies, daily life, worship, and other ancient Egypt facts. "
"If the user asks something unrelated, say briefly that you only cover those topics and suggest one. "
"Do not include greetings or apologies and do not say phrases like 'as an AI language model'. "
"Start directly with the answer."
)
HELP_TEXT = (
"Ask me about: Bastet • cat mummies • daily life • worship\n"
"Type anything else to try the AI fallback."
)
WELCOME_TEXT = (
"Hi! I share facts about cats in ancient Egypt.\n\n" + HELP_TEXT
)
# -------- Output cleanup (remove model-added fluff) --------
DISCLAIMER_PATTERNS = [
r"^\s*(hi|hello|hey)[,!.?\s-]*", # stray greeting
r"^\s*i'?m\s+sorr(y|ied)[^.\n]*[.\n]*", # leading apology
r"^\s*as an ai language model[^.\n]*[.\n]*" # “as an AI language model…”
]
def strip_preamble(text: str) -> str:
t = text or ""
for pat in DISCLAIMER_PATTERNS:
t = re.sub(pat, "", t, flags=re.IGNORECASE)
return t.strip()
# -------- Lazy singletons --------
_t2t = None
_vader = None
def get_t2t():
"""Lazy-load the text2text pipeline (LaMini-Flan-T5)."""
global _t2t
if _t2t is None:
_t2t = pipeline(
"text2text-generation",
model=GEN_MODEL_NAME,
tokenizer=GEN_MODEL_NAME
)
print(f"[startup] Loaded model: {GEN_MODEL_NAME}")
return _t2t
def get_vader():
"""Lazy-load the VADER sentiment analyzer."""
global _vader
if _vader is None:
_vader = SentimentIntensityAnalyzer()
print("[startup] Loaded VADER sentiment analyzer")
return _vader
# -------- Sentiment helpers --------
def detect_sentiment_bucket(text: str):
"""
Return ('neg'|'neu'|'pos', compound_score) using VADER.
Thresholds chosen for clear buckets in chat settings.
"""
scores = get_vader().polarity_scores(text or "")
c = scores.get("compound", 0.0)
if c <= -0.4:
return "neg", c
if c >= 0.4:
return "pos", c
return "neu", c
def is_question(text: str) -> bool:
t = (text or "").strip()
if "?" in t:
return True
# Heuristic for question-like openings
return bool(re.match(
r"^(who|what|when|where|why|how|do|does|did|can|could|is|are|was|were|should|would|will)\b",
t.lower()
))
def is_thanks_or_praise(text: str) -> bool:
t = (text or "").lower()
return any(k in t for k in [
"thanks", "thank you", "appreciate", "appreciated",
"great answer", "nice", "awesome", "love", "helpful",
"that helped", "that was good", "i like your response"
])
POS_QUESTION_PREFIXES = [
"Good question! ",
"Nice one—here’s the gist: ",
"Let’s dig in. ",
]
POS_PRAISE_PREFIXES = [
"You’re welcome—glad that helped. ",
"Appreciate the kind words! ",
"Happy it was useful. ",
]
POS_STATEMENT_PREFIXES = [
"Sounds good. ",
"Got it. ",
"All right—here’s the short version. ",
]
# Your custom negative message:
NEG_PREFIX = "Calm down. You're being a little too negative! "
def choose_positive_prefix(message: str) -> str:
if is_thanks_or_praise(message):
return POS_PRAISE_PREFIXES[0]
if is_question(message):
return POS_QUESTION_PREFIXES[0]
return POS_STATEMENT_PREFIXES[0]
def apply_tone_prefix(reply_text: str, bucket: str, message: str = "") -> str:
if bucket == "pos":
prefix = choose_positive_prefix(message)
elif bucket == "neg":
prefix = NEG_PREFIX
else:
prefix = ""
return (prefix + (reply_text or "")).strip()
# ---- LLM fallback (used when rules don't match) ----
def ai_fallback(prompt: str) -> str:
try:
gen = get_t2t()
prefixed = (
f"{DOMAIN_INSTRUCTIONS}\n\n"
f"User: {prompt}\n"
f"Assistant:"
)
out = gen(
prefixed,
max_new_tokens=48,
do_sample=False,
no_repeat_ngram_size=3
)[0]["generated_text"]
return strip_preamble(out)
except Exception as e:
print("AI fallback error:", repr(e))
return "AI fallback had an issue. Please try a simpler question or use the topics in 'help'."
# -------- Chat logic --------
def reply(message, history):
# 1) sentiment first (on the raw user text)
bucket, _score = detect_sentiment_bucket(message or "")
# 2) rules-first responses
msg = (message or "").strip().lower()
# Robust greeting/help rule: catches "hi there", "hello!", etc.
if re.search(r"\b(hi|hello|hey|hiya|yo|greetings)\b", msg) or any(
k in msg for k in ["help", "menu", "topics", "instructions"]
):
base = "Hi! I share facts about cats in ancient Egypt.\n\n" + HELP_TEXT
elif "bastet" in msg or "bast" in msg:
base = "Bastet (later cat-headed) … major cult center at Bubastis in the Nile Delta."
elif any(w in msg for w in ["mummy", "mummies", "mummified", "offering"]):
base = "Millions of animal mummies (cats common), esp. Late Period (664–332 BCE)."
elif any(w in msg for w in ["daily", "life", "pest", "mouse", "rat", "snake"]):
base = "Cats protected grain stores; art shows them under chairs/on leashes with owners."
elif any(w in msg for w in ["worship", "god", "goddess", "taboo"]):
base = "People didn’t worship pet cats as gods; they revered cats via Bastet and votive offerings."
else:
base = ai_fallback(message)
# 3) tone wrapper (content unchanged)
return apply_tone_prefix(base, bucket, message)
# -------- UI --------
# Preload a welcome message that includes your local image + short intro text.
initial_messages = [
{
"role": "assistant",
"content": [
{"type": "image", "image": WELCOME_IMAGE_PATH, "alt_text": "Stylized Bastet cat image"},
{"type": "text", "text": WELCOME_TEXT},
],
}
]
chatbot_component = gr.Chatbot(
type="messages", # avoids deprecated tuples format
value=initial_messages, # show welcome image + text on load
show_label=False,
)
demo = gr.ChatInterface(
fn=reply,
title="😺 Cats of Ancient Egypt Chatbot 😺",
chatbot=chatbot_component
)
if __name__ == "__main__":
demo.launch()
|