Spaces:
No application file
No application file
Upload 5 files
Browse files- .gitattributes +1 -0
- app.py +344 -0
- requirements.txt +11 -0
- wedding_docs/Cherry&Samuel Wedding plan version 1.1.docx +3 -0
- wedding_docs/Samuel wedding locations.csv +8 -0
- wedding_docs/Samuel wedding schedule.csv +87 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
wedding_docs/Cherry&Samuel[[:space:]]Wedding[[:space:]]plan[[:space:]]version[[:space:]]1.1.docx filter=lfs diff=lfs merge=lfs -text
|
app.py
ADDED
|
@@ -0,0 +1,344 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os, io, glob, time, hashlib
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import numpy as np
|
| 4 |
+
from typing import List, Tuple
|
| 5 |
+
from dataclasses import dataclass
|
| 6 |
+
from dotenv import load_dotenv
|
| 7 |
+
|
| 8 |
+
# Telegram
|
| 9 |
+
from telegram import Update
|
| 10 |
+
from telegram.constants import ParseMode
|
| 11 |
+
from telegram.ext import Application, CommandHandler, MessageHandler, ContextTypes, filters
|
| 12 |
+
|
| 13 |
+
# OpenAI
|
| 14 |
+
from openai import OpenAI
|
| 15 |
+
|
| 16 |
+
# Files / parsing
|
| 17 |
+
from docx import Document as DocxDocument
|
| 18 |
+
from pypdf import PdfReader
|
| 19 |
+
|
| 20 |
+
# Vector store
|
| 21 |
+
import faiss
|
| 22 |
+
|
| 23 |
+
load_dotenv()
|
| 24 |
+
import httpx
|
| 25 |
+
if not hasattr(httpx, "proxies"):
|
| 26 |
+
httpx.proxies = None
|
| 27 |
+
|
| 28 |
+
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
| 29 |
+
TELEGRAM_BOT_TOKEN = os.getenv("TELEGRAM_BOT_TOKEN")
|
| 30 |
+
OPENAI_MODEL = os.getenv("OPENAI_MODEL", "gpt-4o-mini")
|
| 31 |
+
EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL", "text-embedding-3-small")
|
| 32 |
+
STRICT_DOC_MODE = (os.getenv("STRICT_DOC_MODE", "true").lower() == "true")
|
| 33 |
+
|
| 34 |
+
DOCS_DIR = os.getenv("DOCS_DIR", "wedding_docs")
|
| 35 |
+
INDEX_PATH = os.getenv("INDEX_PATH", "wedding.index")
|
| 36 |
+
META_CSV = os.getenv("META_CSV", "wedding_chunks.csv")
|
| 37 |
+
|
| 38 |
+
client = OpenAI(api_key=OPENAI_API_KEY)
|
| 39 |
+
|
| 40 |
+
# -----------------------------
|
| 41 |
+
# Utilities to read documents
|
| 42 |
+
# -----------------------------
|
| 43 |
+
|
| 44 |
+
def read_txt_md(path: str) -> str:
|
| 45 |
+
with open(path, "r", encoding="utf-8", errors="ignore") as f:
|
| 46 |
+
return f.read()
|
| 47 |
+
|
| 48 |
+
def read_docx(path: str) -> str:
|
| 49 |
+
doc = DocxDocument(path)
|
| 50 |
+
return "\n".join(p.text for p in doc.paragraphs)
|
| 51 |
+
|
| 52 |
+
def read_pdf(path: str) -> str:
|
| 53 |
+
reader = PdfReader(path)
|
| 54 |
+
texts = []
|
| 55 |
+
for page in reader.pages:
|
| 56 |
+
try:
|
| 57 |
+
texts.append(page.extract_text() or "")
|
| 58 |
+
except Exception:
|
| 59 |
+
pass
|
| 60 |
+
return "\n".join(texts)
|
| 61 |
+
|
| 62 |
+
def read_csv_file(path: str, max_rows: int = 20000, max_chars: int = 400_000) -> str:
|
| 63 |
+
"""
|
| 64 |
+
Read a CSV as text that’s friendly for RAG.
|
| 65 |
+
- Coerces everything to string
|
| 66 |
+
- Skips bad lines
|
| 67 |
+
- Truncates very large files by rows and total chars (to avoid blowing up memory)
|
| 68 |
+
"""
|
| 69 |
+
try:
|
| 70 |
+
df = pd.read_csv(
|
| 71 |
+
path,
|
| 72 |
+
dtype=str,
|
| 73 |
+
on_bad_lines="skip", # pandas>=1.4
|
| 74 |
+
nrows=max_rows, # safeguard for very large CSVs
|
| 75 |
+
encoding="utf-8"
|
| 76 |
+
)
|
| 77 |
+
except UnicodeDecodeError:
|
| 78 |
+
# Fallback for odd encodings
|
| 79 |
+
df = pd.read_csv(
|
| 80 |
+
path,
|
| 81 |
+
dtype=str,
|
| 82 |
+
on_bad_lines="skip",
|
| 83 |
+
nrows=max_rows,
|
| 84 |
+
encoding="latin-1"
|
| 85 |
+
)
|
| 86 |
+
|
| 87 |
+
df = df.fillna("")
|
| 88 |
+
|
| 89 |
+
# Build a compact, readable text block:
|
| 90 |
+
# 1) schema
|
| 91 |
+
cols = list(df.columns)
|
| 92 |
+
schema_block = "COLUMNS: " + ", ".join(cols)
|
| 93 |
+
|
| 94 |
+
# 2) first N rows as pipe-separated lines (more RAG-friendly than raw CSV)
|
| 95 |
+
# Example line: "col1=..., col2=..., col3=..."
|
| 96 |
+
lines = []
|
| 97 |
+
for _, row in df.iterrows():
|
| 98 |
+
kv = [f"{c}={row[c]}" for c in cols]
|
| 99 |
+
lines.append(" | ".join(kv))
|
| 100 |
+
# Hard cap to avoid giant strings
|
| 101 |
+
if sum(len(x) for x in lines) > max_chars:
|
| 102 |
+
lines.append("…(truncated)")
|
| 103 |
+
break
|
| 104 |
+
|
| 105 |
+
data_block = "\n".join(lines)
|
| 106 |
+
return f"{schema_block}\n{data_block}"
|
| 107 |
+
|
| 108 |
+
def load_all_docs(folder: str) -> List[Tuple[str, str]]:
|
| 109 |
+
paths = []
|
| 110 |
+
# ADD "*.csv" here
|
| 111 |
+
for ext in ("*.md", "*.txt", "*.docx", "*.pdf", "*.csv"):
|
| 112 |
+
paths.extend(glob.glob(os.path.join(folder, ext)))
|
| 113 |
+
|
| 114 |
+
docs = []
|
| 115 |
+
for p in paths:
|
| 116 |
+
if p.endswith((".md", ".txt")):
|
| 117 |
+
text = read_txt_md(p)
|
| 118 |
+
elif p.endswith(".docx"):
|
| 119 |
+
text = read_docx(p)
|
| 120 |
+
elif p.endswith(".pdf"):
|
| 121 |
+
text = read_pdf(p)
|
| 122 |
+
elif p.endswith(".csv"): # <--- NEW
|
| 123 |
+
text = read_csv_file(p)
|
| 124 |
+
else:
|
| 125 |
+
continue
|
| 126 |
+
docs.append((p, text))
|
| 127 |
+
return docs
|
| 128 |
+
|
| 129 |
+
|
| 130 |
+
# -----------------------------
|
| 131 |
+
# Chunk + Embed + Index
|
| 132 |
+
# -----------------------------
|
| 133 |
+
|
| 134 |
+
def chunk_text(text: str, source: str, chunk_size: int = 300, overlap: int = 80) -> List[dict]:
|
| 135 |
+
words = text.split()
|
| 136 |
+
chunks = []
|
| 137 |
+
i = 0
|
| 138 |
+
while i < len(words):
|
| 139 |
+
chunk_words = words[i:i+chunk_size]
|
| 140 |
+
chunk = " ".join(chunk_words)
|
| 141 |
+
chunks.append({
|
| 142 |
+
"source": source,
|
| 143 |
+
"chunk": chunk,
|
| 144 |
+
"hash": hashlib.md5((source + str(i) + chunk).encode("utf-8")).hexdigest()
|
| 145 |
+
})
|
| 146 |
+
i += (chunk_size - overlap)
|
| 147 |
+
return chunks
|
| 148 |
+
|
| 149 |
+
def embed_texts(texts: List[str]) -> np.ndarray:
|
| 150 |
+
# Returns an array of shape (n, d)
|
| 151 |
+
# Uses OpenAI embeddings
|
| 152 |
+
resp = client.embeddings.create(model=EMBEDDING_MODEL, input=texts)
|
| 153 |
+
vecs = [item.embedding for item in resp.data]
|
| 154 |
+
return np.array(vecs).astype("float32")
|
| 155 |
+
|
| 156 |
+
@dataclass
|
| 157 |
+
class RAGIndex:
|
| 158 |
+
index: faiss.IndexFlatIP
|
| 159 |
+
df: pd.DataFrame # columns: [source, chunk, hash, vector]
|
| 160 |
+
dim: int
|
| 161 |
+
|
| 162 |
+
def build_or_load_index(force_rebuild: bool = False) -> RAGIndex:
|
| 163 |
+
docs = load_all_docs(DOCS_DIR)
|
| 164 |
+
if not docs:
|
| 165 |
+
raise RuntimeError(f"No docs found in {DOCS_DIR}/. Put your itinerary files there.")
|
| 166 |
+
|
| 167 |
+
# Simple staleness check: if any file is newer than index, rebuild
|
| 168 |
+
def newest_mtime():
|
| 169 |
+
paths = []
|
| 170 |
+
for ext in ("*.md", "*.txt", "*.docx", "*.pdf"):
|
| 171 |
+
paths.extend(glob.glob(os.path.join(DOCS_DIR, ext)))
|
| 172 |
+
return max(os.path.getmtime(p) for p in paths)
|
| 173 |
+
|
| 174 |
+
index_exists = os.path.exists(INDEX_PATH) and os.path.exists(META_CSV)
|
| 175 |
+
need_rebuild = force_rebuild
|
| 176 |
+
if index_exists:
|
| 177 |
+
idx_mtime = min(os.path.getmtime(INDEX_PATH), os.path.getmtime(META_CSV))
|
| 178 |
+
need_rebuild = need_rebuild or (newest_mtime() > idx_mtime)
|
| 179 |
+
|
| 180 |
+
if index_exists and not need_rebuild:
|
| 181 |
+
df = pd.read_csv(META_CSV)
|
| 182 |
+
vecs = np.load(INDEX_PATH)
|
| 183 |
+
dim = vecs.shape[1]
|
| 184 |
+
index = faiss.IndexFlatIP(dim)
|
| 185 |
+
faiss.normalize_L2(vecs)
|
| 186 |
+
index.add(vecs)
|
| 187 |
+
return RAGIndex(index=index, df=df, dim=dim)
|
| 188 |
+
|
| 189 |
+
# Rebuild
|
| 190 |
+
all_chunks = []
|
| 191 |
+
for path, text in docs:
|
| 192 |
+
if not text.strip():
|
| 193 |
+
continue
|
| 194 |
+
all_chunks.extend(chunk_text(text, source=path))
|
| 195 |
+
|
| 196 |
+
if not all_chunks:
|
| 197 |
+
raise RuntimeError("Docs were read but produced no chunks. Check formats.")
|
| 198 |
+
|
| 199 |
+
df = pd.DataFrame(all_chunks)
|
| 200 |
+
vecs = embed_texts(df["chunk"].tolist())
|
| 201 |
+
# Normalize for cosine similarity via inner product
|
| 202 |
+
faiss.normalize_L2(vecs)
|
| 203 |
+
np.save(INDEX_PATH, vecs)
|
| 204 |
+
df.to_csv(META_CSV, index=False)
|
| 205 |
+
|
| 206 |
+
dim = vecs.shape[1]
|
| 207 |
+
index = faiss.IndexFlatIP(dim)
|
| 208 |
+
index.add(vecs)
|
| 209 |
+
return RAGIndex(index=index, df=df, dim=dim)
|
| 210 |
+
|
| 211 |
+
# -----------------------------
|
| 212 |
+
# Retrieval + Answering
|
| 213 |
+
# -----------------------------
|
| 214 |
+
|
| 215 |
+
def retrieve(query: str, rag: RAGIndex, k: int = 7) -> List[dict]:
|
| 216 |
+
qvec = embed_texts([query])
|
| 217 |
+
faiss.normalize_L2(qvec)
|
| 218 |
+
D, I = rag.index.search(qvec, k)
|
| 219 |
+
results = []
|
| 220 |
+
for score, idx in zip(D[0], I[0]):
|
| 221 |
+
if idx == -1:
|
| 222 |
+
continue
|
| 223 |
+
row = rag.df.iloc[int(idx)]
|
| 224 |
+
results.append({
|
| 225 |
+
"score": float(score),
|
| 226 |
+
"source": row["source"],
|
| 227 |
+
"chunk": row["chunk"]
|
| 228 |
+
})
|
| 229 |
+
return results
|
| 230 |
+
|
| 231 |
+
SYSTEM_PROMPT = (
|
| 232 |
+
"You are a helpful, concise wedding assistant for Samuel's wedding. "
|
| 233 |
+
"Answer ONLY using the provided context from the wedding documents. "
|
| 234 |
+
"If the answer isn’t in the docs, say you don’t have that info and suggest who to contact (e.g., Overall IC). "
|
| 235 |
+
"Keep answers under 6 bullets or 150 words when possible. Use SGT times."
|
| 236 |
+
)
|
| 237 |
+
|
| 238 |
+
async def answer_with_rag(question: str, rag: RAGIndex) -> str:
|
| 239 |
+
ctx = retrieve(question, rag, k=6)
|
| 240 |
+
context_blocks = []
|
| 241 |
+
for r in ctx:
|
| 242 |
+
# Keep brief context slices
|
| 243 |
+
text = r["chunk"]
|
| 244 |
+
if len(text) > 800:
|
| 245 |
+
text = text[:800] + "…"
|
| 246 |
+
context_blocks.append(f"[Source: {os.path.basename(r['source'])}]\n{text}")
|
| 247 |
+
|
| 248 |
+
context_text = "\n\n".join(context_blocks)
|
| 249 |
+
|
| 250 |
+
messages = [
|
| 251 |
+
{"role": "system", "content": SYSTEM_PROMPT},
|
| 252 |
+
{"role": "user", "content": f"Context from docs:\n\n{context_text}\n\nQuestion: {question}"}
|
| 253 |
+
]
|
| 254 |
+
|
| 255 |
+
completion = client.chat.completions.create(
|
| 256 |
+
model=OPENAI_MODEL,
|
| 257 |
+
messages=messages,
|
| 258 |
+
temperature=0.2,
|
| 259 |
+
)
|
| 260 |
+
answer = completion.choices[0].message.content.strip()
|
| 261 |
+
|
| 262 |
+
if STRICT_DOC_MODE and (not context_blocks or "I don’t have that info" in answer):
|
| 263 |
+
# If no context matched strongly, enforce honesty
|
| 264 |
+
if len(context_blocks) == 0:
|
| 265 |
+
return (
|
| 266 |
+
"I couldn’t find this in the wedding docs. Please check the Family Playbook or ask the Overall IC. "
|
| 267 |
+
"You can also /refresh to make sure I have the latest files."
|
| 268 |
+
)
|
| 269 |
+
return answer
|
| 270 |
+
|
| 271 |
+
# -----------------------------
|
| 272 |
+
# Telegram Handlers
|
| 273 |
+
# -----------------------------
|
| 274 |
+
RAG = None # lazy loaded
|
| 275 |
+
|
| 276 |
+
async def ensure_rag(force: bool = False):
|
| 277 |
+
global RAG
|
| 278 |
+
if RAG is None or force:
|
| 279 |
+
RAG = build_or_load_index(force_rebuild=force)
|
| 280 |
+
|
| 281 |
+
async def start(update: Update, context: ContextTypes.DEFAULT_TYPE):
|
| 282 |
+
await ensure_rag(False)
|
| 283 |
+
msg = (
|
| 284 |
+
"👋 Hello! I’m the Wedding Q&A Bot. Ask me anything about roles, timings, addresses, and logistics.\n\n"
|
| 285 |
+
"Examples:\n"
|
| 286 |
+
"• What time is the solemnisation?\n"
|
| 287 |
+
"• What’s Mum’s role during tea ceremony?\n"
|
| 288 |
+
"• Where to park at the hotel?\n"
|
| 289 |
+
"• Who holds the ang bao box?\n\n"
|
| 290 |
+
"Admins can /refresh after updating the docs."
|
| 291 |
+
)
|
| 292 |
+
await update.message.reply_text(msg)
|
| 293 |
+
|
| 294 |
+
async def help_cmd(update: Update, context: ContextTypes.DEFAULT_TYPE):
|
| 295 |
+
await update.message.reply_text(
|
| 296 |
+
"Send a normal question, or use:\n"
|
| 297 |
+
"/role <name> — quick role lookup\n"
|
| 298 |
+
"/refresh — rebuild knowledge from latest docs (admin only, but not enforced)"
|
| 299 |
+
)
|
| 300 |
+
|
| 301 |
+
async def role_cmd(update: Update, context: ContextTypes.DEFAULT_TYPE):
|
| 302 |
+
await ensure_rag(False)
|
| 303 |
+
name = " ".join(context.args).strip()
|
| 304 |
+
if not name:
|
| 305 |
+
await update.message.reply_text("Usage: /role <name>")
|
| 306 |
+
return
|
| 307 |
+
q = f"What is the role and responsibilities of {name}? Include timings and contact if available."
|
| 308 |
+
ans = await answer_with_rag(q, RAG)
|
| 309 |
+
await update.message.reply_text(ans, parse_mode=ParseMode.MARKDOWN)
|
| 310 |
+
|
| 311 |
+
async def refresh_cmd(update: Update, context: ContextTypes.DEFAULT_TYPE):
|
| 312 |
+
await ensure_rag(True)
|
| 313 |
+
await update.message.reply_text("✅ Refreshed. I’m now using the latest documents in wedding_docs/.")
|
| 314 |
+
|
| 315 |
+
async def on_message(update: Update, context: ContextTypes.DEFAULT_TYPE):
|
| 316 |
+
await ensure_rag(False)
|
| 317 |
+
text = (update.message.text or "").strip()
|
| 318 |
+
if not text:
|
| 319 |
+
return
|
| 320 |
+
ans = await answer_with_rag(text, RAG)
|
| 321 |
+
# Telegram has 4096 char limit per message; be safe
|
| 322 |
+
if len(ans) > 3500:
|
| 323 |
+
ans = ans[:3500] + "…"
|
| 324 |
+
await update.message.reply_text(ans, parse_mode=ParseMode.MARKDOWN)
|
| 325 |
+
|
| 326 |
+
def main():
|
| 327 |
+
if not TELEGRAM_BOT_TOKEN:
|
| 328 |
+
raise RuntimeError("TELEGRAM_BOT_TOKEN missing")
|
| 329 |
+
if not OPENAI_API_KEY:
|
| 330 |
+
raise RuntimeError("OPENAI_API_KEY missing")
|
| 331 |
+
|
| 332 |
+
app = Application.builder().token(TELEGRAM_BOT_TOKEN).build()
|
| 333 |
+
|
| 334 |
+
app.add_handler(CommandHandler("start", start))
|
| 335 |
+
app.add_handler(CommandHandler("help", help_cmd))
|
| 336 |
+
app.add_handler(CommandHandler("role", role_cmd))
|
| 337 |
+
app.add_handler(CommandHandler("refresh", refresh_cmd))
|
| 338 |
+
app.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, on_message))
|
| 339 |
+
|
| 340 |
+
print("Bot running… Press Ctrl+C to stop.")
|
| 341 |
+
app.run_polling(drop_pending_updates=True)
|
| 342 |
+
|
| 343 |
+
if __name__ == "__main__":
|
| 344 |
+
main()
|
requirements.txt
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
python-telegram-bot==20.7
|
| 2 |
+
openai>=1.51.0
|
| 3 |
+
faiss-cpu==1.8.0.post1
|
| 4 |
+
python-dotenv==1.0.1
|
| 5 |
+
pydantic==2.9.2
|
| 6 |
+
pandas==2.2.3
|
| 7 |
+
numpy==1.26.4
|
| 8 |
+
python-docx==1.1.2
|
| 9 |
+
pypdf==5.0.0
|
| 10 |
+
unstructured==0.15.7
|
| 11 |
+
unstructured[pdf]==0.15.7
|
wedding_docs/Cherry&Samuel Wedding plan version 1.1.docx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:438cecd73269200d9df2e16d29c094216886e72b349d4daa87298338ee3ca760
|
| 3 |
+
size 4997938
|
wedding_docs/Samuel wedding locations.csv
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Property,Address,Google maps link,Details,Key Holders / Notes
|
| 2 |
+
Groom’s Airbnb,67 Jalan Taman Jalong 2,https://maps.app.goo.gl/z8TwJpvgpNXZQGMm8,"Master: Samuel & CherryRoom 2: Andy, Kenneth, Zixian, EdwinRoom 3: Yuqi",Keys: Samuel & Yuqi
|
| 3 |
+
Groom’s Family Airbnb,68 Jalan Taman Jalong 2,https://maps.app.goo.gl/fYvFjK6XDURJjw5UA,"Master: Mummy, LilianRoom 2: Auntie Wendy, Uncle BarryRoom 3–4: Samson & Wife or Sandra & Keyi",Keys: Samson & Sandra
|
| 4 |
+
Bride’s House,17 Jalan Taman Jalong 4,https://maps.app.goo.gl/bNmqgDt3iuKPHV2C9,Cherry’s family,—
|
| 5 |
+
Chloe (MUA) Hotel,MH Sentral,https://maps.app.goo.gl/JLvsg29VXKCmdbwo6,Chloe,5 mins from Groom’s Airbnb
|
| 6 |
+
Relatives’ Airbnb 1,"No. 232 Jalan Kemiri 8, Taman Kemiri, 31100 Sungai Siput (U), Perak",https://maps.app.goo.gl/Z3KsK5dQTh97qXNWA,阿姨 family (7 pax),—
|
| 7 |
+
Relatives’ Airbnb 2,"No. 250 Jalan Bistari 7, Taman Lintang Bistari, 31100 Sungai Siput (U), Perak",,Pei Wen + Husband + Serene (key holder)NabJesc + Winky + Cat + Ice (key holder)Photographer standby room,5 mins from Bride’s home
|
| 8 |
+
Phongmun Restaurant Sdn Bhd,"Komersial Water pond, & 99-1, Persiaran Komersial 1, &, Persiaran Komersial 5, 31100 Sungai Siput, Perak, Malaysia",https://maps.app.goo.gl/bHuKnnUYpJKBjRzA6,Wedding dinner,
|
wedding_docs/Samuel wedding schedule.csv
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
,Date / Time,Event / Task,Person(s) in Charge,Location,Notes / Items to Bring / Remarks
|
| 2 |
+
23 Oct 2025 (Thu),23 Oct 2025 (Thu),,,,
|
| 3 |
+
1,23 Oct 2025 (Thu),Arrive in Ipoh (Evening),Cherry & Samuel,—,Reach in evening
|
| 4 |
+
2,23 Oct 2025 (Thu),"Prepare remaining red packets (Jie Mei, Xiong Di)",Samuel,Groom’s Airbnb,Exchange for balance $700 + $200; keep in sling bag (from mother)
|
| 5 |
+
3,23 Oct 2025 (Thu),Collect 1st batch of red packets,Samuel,From DA GU,—
|
| 6 |
+
4,23 Oct 2025 (Thu),Final decoration of Cherry’s home (4 flower beds),Cherry & family,Bride’s home,—
|
| 7 |
+
5,23 Oct 2025 (Thu),Cycle count & remove door gifts from Toyogo bin (to use as cooler box for beers + drinks),Samuel & friends,Bride’s home,—
|
| 8 |
+
6,23 Oct 2025 (Thu),Prepare Tupperware to freeze ice cubes,Samuel,Bride’s home & Airbnb,For 24th dinner buffet
|
| 9 |
+
7,23 Oct 2025 (Thu),Restaurant lighting & LED screen test (bring thumbdrive),Samuel + LED Guy,Phongmun Restaurant,"Discuss walk-in aisle, remove table stands/cards"
|
| 10 |
+
8,23 Oct 2025 (Thu),Pack to bring to Airbnb (for 24th),Samuel & Cherry,—,"Glass cup x2, 150 scratch cards, clear glass box w/ test tube sand, Groom’s Airbnb deco tools, clothing, red bedsheet set, red pajamas, drinks for buffet"
|
| 11 |
+
9,23 Oct 2025 (Thu),Pack to bring to restaurant (for 24th),Samuel,—,"Poster stand, 150 door gifts, registration items, restaurant drinks, thumbdrive (Capcut video, opening video, PPS)"
|
| 12 |
+
24 Oct (Fri) ,,,,,
|
| 13 |
+
1,24 Oct (Fri) Morning,Meet deco team (Michelle) at restaurant,Cherry & Samuel,Phongmun Restaurant,Check deco setup
|
| 14 |
+
2,24 Oct (Fri) 1:00 PM,Check-in at Groom’s Airbnb,Cherry & Samuel,67/68 Jalan Taman Jalong 2,Earliest time
|
| 15 |
+
3,24 Oct (Fri) —,Check-in at Relatives’ Airbnb,Cherry & Samuel,Various (TBC),—
|
| 16 |
+
4,24 Oct (Fri) Afternoon,Make bed w/ red bedsheet & pillow cases,Samuel,Airbnb,—
|
| 17 |
+
5,24 Oct (Fri) Afternoon,Make ice cubes,Samuel,Airbnb,—
|
| 18 |
+
6,24 Oct (Fri) 2:25 PM,Friends & relatives arrive (airport),All,Ipoh,Settle own lunch; travel to Airbnb/hotel
|
| 19 |
+
7,24 Oct (Fri) 5:00 PM,Groom’s car deco (Lce’s car),Lce + Cherry,Groom’s Airbnb,Cherry to tag along; Samuel stay
|
| 20 |
+
8,24 Oct (Fri) 6:30 PM,Buffet Dinner @ Bride’s home,All,Bride’s home,Album ready for viewing (Johor Auntie)
|
| 21 |
+
9,24 Oct (Fri) During buffet,"Decorate 3 xiong di’s cars (Jake, Ah San, Cherry’s brother)",Samuel’s group,Bride’s home,After Cherry returns
|
| 22 |
+
10,24 Oct (Fri) During buffet,Bring beers,Jake,Bride’s home,—
|
| 23 |
+
11,24 Oct (Fri) During buffet,Retrieve/make ice cubes,Samuel,Bride’s home,Buy backup ice cubes from mart
|
| 24 |
+
12,24 Oct (Fri) During buffet,Paste stickers on cups & scratch cards,"Kenneth, Yuqi, Andy, Zixian",Airbnb,—
|
| 25 |
+
13,24 Oct (Fri) After buffet,Deco Groom’s Airbnb,"Kenneth, Yuqi, Andy, Zixian",Airbnb,—
|
| 26 |
+
14,24 Oct (Fri) After buffet,"Restaurant test run (lighting, LED, march-in)","Samuel, Cherry, Samson, Sandra, Keyi, Pei Wen, Serene, Jessie",Phongmun Restaurant,"Bring thumbdrive, test Kahoot, check tables/cards"
|
| 27 |
+
15,24 Oct (Fri) Night,Bring to restaurant,Samuel,—,"Poster stand, drinks, 150 door gifts"
|
| 28 |
+
16,24 Oct (Fri) Night,Deco team setup,"Michelle (walkway, VIP) + Jessie (lighting)",Phongmun Restaurant,—
|
| 29 |
+
25 Oct 2025 (Sat) ,,,,,
|
| 30 |
+
1,25 Oct 2025 (Sat) 0800 HRS,Simple breakfast (dapao 20 pax),Cherry’s brother,Airbnb,—
|
| 31 |
+
2,25 Oct 2025 (Sat) 0900 HRS,All brothers arrive,All Xiong Di,Groom’s Airbnb,"Ah Lung, Ah San, Chloe to note"
|
| 32 |
+
3,25 Oct 2025 (Sat) 0930 HRS,Depart for Bride’s house,Samson (lead),From Groom’s Airbnb,"1-min drive
|
| 33 |
+
a. Car 1 (Lce’s car 8 Seater alphard) – Driver: Samson
|
| 34 |
+
Samuel + Xiao Feng (PIC of Wedding customs)
|
| 35 |
+
b. Car 2 (Cherry’s brother car) – Driver: Ah long
|
| 36 |
+
Kenneth + Andy + Yuqi
|
| 37 |
+
c. Car 3 (Jake’s car) – Driver: Jake
|
| 38 |
+
Zixian + Wen Qing + Chloe
|
| 39 |
+
d. Car 4 (ah bang’s car 7 Seater): Driver: Ah San
|
| 40 |
+
Pei Wen + Serene + Sandra + Keyi + Samson’s wife + Ah San’s wife"
|
| 41 |
+
4,25 Oct 2025 (Sat) —,Red packet distribution,Samson,—,Samson holds sling bag
|
| 42 |
+
5,25 Oct 2025 (Sat) 0945 HRS,Arrival at Bride’s house,All,Bride’s home,Niece NiNi opens car
|
| 43 |
+
6,25 Oct 2025 (Sat) 1030 HRS,Gate games,Jie Mei + Cherry,Bride’s home,—
|
| 44 |
+
7,25 Oct 2025 (Sat) 1130 HRS,Tea ceremony & prayers,All,Bride’s home,Finish ~12 pm
|
| 45 |
+
8,25 Oct 2025 (Sat) 1230 HRS,Return to Airbnb,All,Groom’s Airbnb,Lunch provided
|
| 46 |
+
9,25 Oct 2025 (Sat) 1145 HRS,Brothers standby to travel back,Xiong Di,Bride’s home,Use same seating arrangement
|
| 47 |
+
10,25 Oct 2025 (Sat) 1230 HRS,Tea ceremony (Groom’s side) + Photos,All,Groom’s Airbnb,Pei Wen/Serene to reserve 2 lunch portions for photographers
|
| 48 |
+
25 Oct 2025 (Sat) Evening Wedding Dinner,,,,,
|
| 49 |
+
1,25 Oct 2025 (Sat) 3:00 pm–6:00 pm,Makeup at restaurant,Chloe (MUA),Phongmun Restaurant,"Bride 3:15–5:30, Groom 5:30–6:00, Mothers 3:00–6:00"
|
| 50 |
+
2,25 Oct 2025 (Sat) 4.20 pm-4:50 pm Pick up and reach restaurant ,"1. Samson to fetch setup team from both airbnbs with 8-seater Alphard.
|
| 51 |
+
2. Bring items from airbnb to setup","Pei Wen + Husband, Serene, Sandra, Keyi, Samson + Wife, Jessie",To restaurant,"Items to bring from Airbnb for setup on 25th
|
| 52 |
+
Bride & Groom’s Cups (Wash and standby – Sandra & Keyi to fill up with Chinese tea and standby at entrance & handover to me & cherry during 2nd march in @ 8.30 pm)
|
| 53 |
+
Samson to place Test tubes containing colored sand + clear glass box at registration counter table. Plus scratch card (distribute those with Samuel logo).
|
| 54 |
+
Pei Wen & Serene to place 4 beds of flowers to place in front of TV Screen (Check if the flowers will block the screen content)"
|
| 55 |
+
3,25 Oct 2025 (Sat) —,Table setup,"Pei Wen + Husband, Serene, Sandra, Keyi, Samson + Wife, Jessie",Restaurant,"Items already at restaurant (brought over on 24th) for setup
|
| 56 |
+
Samson to setup “Poster Stand” beside registration counter
|
| 57 |
+
All - Place door gift x 150 pcs at each seat in the restaurant (already at restaurant, limited gifts, do not need to place on Samuel's family table)
|
| 58 |
+
Sandra & Keyi to Tag table no. cards at each table stand for each table using seat plan (Cherry & Samuel to clarify 1 day earlier with restaurant boss where are the stands/cards)"
|
| 59 |
+
4,25 Oct 2025 (Sat) 1700 HRS,Live band & Emcee setup,"Alex (Band), James (Emcee)",Restaurant,Sound system setup
|
| 60 |
+
5,25 Oct 2025 (Sat) 1800 HRS,Early dinner (crew),"6 pax (2 photographers, 1 MUA, 2 band, 1 emcee)",Restaurant,
|
| 61 |
+
6,25 Oct 2025 (Sat) 1800 HRS,Guest registration opens,"Sandra, Samson, Cherry’s cousin (Matthew)",Entrance,"Samson & Sandra to standby at reception before 6 pm to register guests using the guest list (Cherry’s cousin – Matthew will be assisting as well)
|
| 62 |
+
Bring ballpoint pen. Register guest (Tick their name according) > Cherry bought registration book from shopee
|
| 63 |
+
Let them know their table no.
|
| 64 |
+
Pass them 1 scratch card each (Ask them to Scratch after Emcee’s announcement)
|
| 65 |
+
Receive red packets & indicate in the template (Check with guest if they are providing any red packets politely / 想请问一下,您这边会准备红包吗?) (Cherry & Samuel to confirm where to store these red packets) Check guest wrote name on angbao
|
| 66 |
+
After registration, direct Guests to fill up the clear glass box using the test tubes colored sand
|
| 67 |
+
Registration cut-off at 7 pm."
|
| 68 |
+
7,25 Oct 2025 (Sat) 1900 HRS,Registration closes,—,—,Wedding dinner begins
|
| 69 |
+
8,25 Oct 2025 (Sat) 1900 HRS,Guests seated / photo taking,Bride & Groom,Restaurant,Photographer at booth
|
| 70 |
+
9,25 Oct 2025 (Sat) 1900 HRS,Pre-wedding shoot Taiwan video,Jessie,Restaurant,
|
| 71 |
+
10,25 Oct 2025 (Sat) 1915 HRS,First march-in (parents + couple),All,Restaurant,"Songs: Joey “分分鐘需要你”, “not my business”"
|
| 72 |
+
11,25 Oct 2025 (Sat) 1930 HRS,Food serving starts,Restaurant,—,Wedding slideshow
|
| 73 |
+
12,25 Oct 2025 (Sat) 1935 HRS,Live band performance,Alex,—,45 mins
|
| 74 |
+
13,25 Oct 2025 (Sat) 2000 HRS,Second march-in prep,Sandra & Keyi,Entrance,Hand sticker cups
|
| 75 |
+
14,25 Oct 2025 (Sat) 2030 HRS,Second march-in,Bride & Groom,Restaurant,Spotify song link
|
| 76 |
+
15,25 Oct 2025 (Sat) 2035 HRS,Cake-cutting & champagne toast,Couple + Family,Stage,Song: SEVENTEEN – Reasons of My Smiles
|
| 77 |
+
16,25 Oct 2025 (Sat) 2100 HRS,CapCut highlight video,Photographer,Screen,Tea ceremony highlights
|
| 78 |
+
17,25 Oct 2025 (Sat) 2120 HRS,Kahoot game,Emcee + VIP guests,Restaurant,"Game: Kahoot (VIP scanned QR code to join the game )
|
| 79 |
+
https://slempire.rhinopal.top?oid=2510059PQFQMUW
|
| 80 |
+
https://create.kahoot.it/share/samuel-cherry-final-wedding-game/4d55c251-77e5-4269-8c70-4110c70148c4
|
| 81 |
+
|
| 82 |
+
When play Kahoot game, background song: https://open.spotify.com/track/4ppXYFIKT0GM6bGnxYk7qm?si=u3JXZSEgRNanLX94_B0HjQ
|
| 83 |
+
3 Winners (Red packets)"
|
| 84 |
+
18,25 Oct 2025 (Sat) 2130 HRS,Scratch card game,Emcee,Restaurant,"10 winners (red packets), background music: ROSÉ & Bruno Mars “APT.”"
|
| 85 |
+
19,25 Oct 2025 (Sat) 0.90625,Toast to guests,Couple,All tables,—
|
| 86 |
+
20,25 Oct 2025 (Sat) 9:45 pm–10:15 pm,Final live band set,Alex,Restaurant,Until end of dinner
|
| 87 |
+
21,25 Oct 2025 (Sat) 10:15–10:30 pm,Event ends,All,Restaurant,"Clean-up, end"
|