KOALADX / src /streamlit_app.py
cyacya123's picture
Update src/streamlit_app.py
4c903f1 verified
# app.py
# Koala DX โ€” Single-Page In-Memory Dashboard + Responder Console (Passcode-gated)
# - Sources:
# (A) Cloudflare Worker (encrypted AES-GCM) pull โ†’ decrypt โ†’ in-memory
# (B) CSV upload fallback (reads into memory only)
# - Presets & KB:
# - Load from a single Excel: preset.xlsx
# - Each preset is a separate sheet (sheet name = preset name).
# Expected columns (flexible): summary, actions, qna, rewrite
# Alternative format: rows with columns [key, value]
# - Optional sheet "Hotlines" with columns: name, dial/url, hours, note
# - Q&A sheet: "Q&A" (or "QA") with columns: keyword, question, answer
# - RA sheet: "RA" with columns: keyword, action (or actions)
# - Volunteer Assist (Summary / Actions / Q&A):
# - Uses OPENAI_API_KEY from environment (HF Secret) and OPENAI_MODEL (default: gpt-4o-mini)
# - Sends last ~500 tokens of context (wonโ€™t cut sentences)
# - For Actions/Q&A: LLM picks 1โ€“3 relevant keywords (JSON) and we display rows from Excel.
import os
import re
import io
import time
import json
import base64
import socket
import datetime as dt
from datetime import datetime, timedelta, timezone
from typing import List, Dict, Tuple, Optional
from urllib.parse import urlparse
import requests
import pandas as pd
import streamlit as st
from cryptography.hazmat.primitives.ciphers.aead import AESGCM
from streamlit.components.v1 import html as st_html
import html as _pyhtml # for safe escaping of message text
from stats_tab import render_stats_tab
# โ”€โ”€ Streamlit config MUST be the first Streamlit call โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
st.set_page_config(page_title="Koala DX โ€” Dashboard + Responder", layout="wide")
# โ”€โ”€ Compat helper for Streamlit rerun โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
rerun = getattr(st, "rerun", None) or getattr(st, "experimental_rerun", None)
# โ”€โ”€ JST timezone for console timestamps โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
try:
from zoneinfo import ZoneInfo # Python 3.9+
JPN_TZ = ZoneInfo("Asia/Tokyo")
except Exception:
JPN_TZ = dt.timezone(dt.timedelta(hours=9), name="JST")
def fmt_ts_ms_to_jst(ms):
try:
utc_dt = dt.datetime.fromtimestamp(int(ms) / 1000, tz=dt.timezone.utc)
jst_dt = utc_dt.astimezone(JPN_TZ)
return jst_dt.strftime("%Y-%m-%d %H:%M")
except Exception:
return str(ms)
def _esc(s: str) -> str:
return _pyhtml.escape(str(s or ""))
def render_conversation_bubbles(messages: list[dict], height_px: int = 520, key: str | None = None) -> None:
"""
Render all messages into a single scrollable HTML container (inside the iframe),
with its own CSS so formatting is preserved and the area always scrolls.
"""
rows = []
SYSTEM_TYPES = {"follow", "unfollow", "join", "leave", "block", "unblock", "member_joined", "member_left"}
for m in messages:
role = (m.get("role") or "").lower().strip()
text = (m.get("text") or "").strip()
msg_type = (m.get("type") or "").lower().strip()
ts_str = fmt_ts_ms_to_jst(m.get("ts") or m.get("ts_ms"))
responder = (m.get("responder") or "").strip()
is_system = False
effective_text = text
if not effective_text and msg_type:
effective_text = f"[{msg_type}]"
is_system = True
if is_system:
align = "center"; bubble_cls = "system"; meta = _esc(ts_str)
else:
align = "left" if role == "user" else "right"
bubble_cls = "user" if role == "user" else "account"
meta = _esc(ts_str + (f" ยท {responder}" if role != "user" and responder else ""))
rows.append(
f'<div class="chat-row {align}">'
f' <div class="bubble {bubble_cls}">{_esc(effective_text)}'
f' <div class="meta">{meta}</div>'
f' </div>'
f'</div>'
)
block_id = (key or "chat") + "_scroll"
css = f"""
<style>
:root {{ color-scheme: light only; }}
html, body {{ margin: 0; padding: 0; }}
body {{
font-family: system-ui, -apple-system, "Segoe UI", Roboto, Helvetica, Arial,
"Noto Sans JP", "Hiragino Kaku Gothic ProN", "Yu Gothic", sans-serif;
background: #ffffff;
}}
.chat-scroll {{
height: {height_px}px; /* fixed inner height */
overflow-y: auto;
border: 1px solid #e5e7eb;
border-radius: 12px;
padding: 8px 10px;
background: #ffffff;
}}
.chat-row {{ display: flex; margin: 6px 0; }}
.left {{ justify-content: flex-start; }}
.right {{ justify-content: flex-end; }}
.center{{ justify-content: center; }}
.bubble {{
border-radius: 16px;
padding: 10px 12px;
max-width: 92%;
word-wrap: break-word;
white-space: pre-wrap; /* preserve newlines/spacing */
}}
.user {{ background: #f1f5f9; }}
.account {{ background: #e0f2fe; }}
.system {{ background: #e0e7ff; font-style: italic; }}
.meta {{ font-size: 11px; color: #6b7280; margin-top: 2px; }}
.small {{ font-size: 12px; color: #6b7280; }}
</style>
"""
html_block = f"""<!DOCTYPE html>
<html>
<head><meta charset="utf-8">{css}</head>
<body>
<div id="{block_id}" class="chat-scroll">
{''.join(rows) if rows else '<div class="small">๏ผˆใƒกใƒƒใ‚ปใƒผใ‚ธใฏใพใ ใ‚ใ‚Šใพใ›ใ‚“๏ผ‰</div>'}
</div>
<script>
(function() {{
var box = document.getElementById("{block_id}");
if (box) box.scrollTop = box.scrollHeight; // autoscroll to bottom
}})();
</script>
</body>
</html>"""
# Some Streamlit builds donโ€™t support key= on html(); try with key then without.
try:
if key is not None:
st_html(html_block, height=height_px + 24, scrolling=False, key=key)
else:
st_html(html_block, height=height_px + 24, scrolling=False)
except TypeError:
st_html(html_block, height=height_px + 24, scrolling=False)
# Debug log (keep last 200 lines)
# ---- Session state handle (must exist before any ss.* usage) ----
ss = st.session_state
ss.setdefault("debug_log", [])
def dbg(msg: str):
try:
ts = dt.datetime.now(JPN_TZ).strftime("%Y-%m-%d %H:%M:%S")
except Exception:
ts = datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")
ss["debug_log"].append(f"[{ts}] {msg}")
ss["debug_log"] = ss["debug_log"][-200:]
# =========================
# Defaults / Config (env overrides optional)
# =========================
# ๐Ÿ”’ Locked Worker base for PUBLIC HF Space (do not take from user input)
WORKER_BASE_LOCKED = os.getenv("WORKER_BASE_LOCKED", "https://c93a29b9.koalarely-alt.pages.dev/").strip()
def _normalize_and_lock_worker_base(url: str) -> str:
u = urlparse((url or "").strip())
if u.scheme != "https":
raise ValueError("WORKER_BASE_LOCKED must be https.")
host = (u.hostname or "").lower()
if not host:
raise ValueError("WORKER_BASE_LOCKED missing hostname.")
if u.username or u.password:
raise ValueError("Credentials in WORKER_BASE_LOCKED are not allowed.")
if u.port not in (None, 443):
raise ValueError("Unexpected port in WORKER_BASE_LOCKED.")
# Important: drop any path/query/fragment
return f"https://{host}"
try:
WORKER_BASE_DEFAULT = _normalize_and_lock_worker_base(WORKER_BASE_LOCKED)
except Exception as e:
# Fail closed: if misconfigured, don't run against arbitrary URLs
raise RuntimeError(f"Invalid WORKER_BASE_LOCKED: {e}")
# Optional (still secret): used only for /messages pull (NOT responder console)
READ_API_KEY_DEFAULT = os.getenv("READ_API_KEY", "")
SINCE_MS_DEFAULT = int(os.getenv("SINCE_MS", "0"))
FETCH_LIMIT_DEFAULT = int(os.getenv("FETCH_LIMIT", "1000"))
MAX_CTX_CHARS = int(os.getenv("MAX_CTX_CHARS", "3500"))
KEY_TTL_MIN = int(os.getenv("D1_KEY_TTL_MIN", "30")) # AES key session TTL (minutes)
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
OPENAI_MODEL = os.getenv("OPENAI_MODEL", "gpt-5-nano")
OPENAI_URL = os.getenv("OPENAI_URL", "https://api.openai.com/v1/responses")
def _extract_responses_output_text(resp_json: dict) -> str:
"""
Extract assistant text from Responses API JSON.
Works for:
- SDK-like helper field: resp_json["output_text"] (if present)
- Raw HTTP shape: resp_json["output"] items with message.content[type=output_text]
"""
if not isinstance(resp_json, dict):
return ""
# Some clients may include this helper-like field
ot = resp_json.get("output_text")
if isinstance(ot, str) and ot.strip():
return ot.strip()
out_chunks: list[str] = []
for item in (resp_json.get("output") or []):
if not isinstance(item, dict):
continue
if item.get("type") != "message":
continue
for c in (item.get("content") or []):
if not isinstance(c, dict):
continue
if c.get("type") == "output_text":
t = c.get("text")
if isinstance(t, str) and t:
out_chunks.append(t)
return "".join(out_chunks).strip()
def _trim(s: str, limit: int = MAX_CTX_CHARS) -> str:
s = s or ""
return s if len(s) <= limit else s[-limit:]
# =========================
# Session state (in-memory DB + auth + presets)
# =========================
ss = st.session_state
ss.setdefault("messages_df", pd.DataFrame(columns=["ts", "user_id", "display_name", "role", "text", "risk_level", "risk_terms"]))
ss.setdefault("sec_cache", {})
ss.setdefault("worker_base", WORKER_BASE_DEFAULT)
ss.setdefault("token", "")
ss.setdefault("scope", "")
ss.setdefault("responder", "")
ss.setdefault("selected_user", "")
ss.setdefault("selected_user_label", "")
ss.setdefault("nickname_current", "")
ss.setdefault("last_refresh", 0.0)
ss.setdefault("auto_set_nick_once_for", "")
ss.setdefault("presets", {}) # {sheet_name: {"summary":..., "actions":..., "qna":..., "rewrite":...}}
ss.setdefault("hotlines", []) # [{"name":...,"dial":...,"url":...,"hours":...,"note":...}]
ss.setdefault("active_preset", "") # selected sheet name
ss.setdefault("user_index", {}) # user_id -> {"nickname":..., "display_name":..., "suffix":...}
ss.setdefault("llm_out", {}) # per-user outputs: {"uid": {"summary":"...", "actions":"...", "qna":"..."}}
# KB state
ss.setdefault("qa_rows", pd.DataFrame(columns=["keyword","question","answer"]))
ss.setdefault("ra_rows", pd.DataFrame(columns=["keyword","action"]))
ss.setdefault("qa_keywords", [])
ss.setdefault("ra_keywords", [])
ss.setdefault("qa_selected", [])
ss.setdefault("ra_selected", [])
# =========================
# Demo KB defaults (used when preset.xlsx not loaded)
# =========================
DEFAULT_PRESET_NAME = "Demo (Built-in)"
DEFAULT_PRESETS = {
DEFAULT_PRESET_NAME: {
"summary": (
""
),
"actions": (
"๏ผˆใƒ‡ใƒข็”จ๏ผ‰\n"
"- ใพใšๅฎ‰ๅ…จ็ขบ่ช๏ผˆไปŠใ„ใ‚‹ๅ ดๆ‰€ใƒปๅŠ ๅฎณ่€…ใŒ่ฟ‘ใใซใ„ใ‚‹ใ‹ใƒปๆ€ชๆˆ‘ใฎๆœ‰็„ก๏ผ‰\n"
"- ไปŠๅคœใฎๅฑ…ๅ ดๆ‰€/้ฃŸไบ‹/ใŠ้‡‘/้€šไฟกๆ‰‹ๆฎตใฎ็ขบไฟ\n"
"- ็›ธ่ซ‡ๅ…ˆ๏ผˆ่‡ชๆฒปไฝ“ใƒปๆ”ฏๆดๅ›ฃไฝ“ใƒปๅŒป็™‚๏ผ‰ใธใคใชใ\n"
),
"qna": (
"๏ผˆใƒ‡ใƒข็”จ๏ผ‰\n"
"- ใ™ใใซๅฝน็ซ‹ใคQ&Aใฎๅ€™่ฃœใ‚’่กจ็คบใ—ใพใ™๏ผˆใ‚ญใƒผใƒฏใƒผใƒ‰้ธๆŠž๏ผ‰\n"
),
"rewrite": (
"๏ผˆใƒ‡ใƒข็”จใƒปๅฎ‰ๅ…จๅฏ„ใ‚Šใฎ่จ€ใ„ๆ›ใˆใƒ†ใƒณใƒ—ใƒฌ๏ผ‰\n"
"ใพใšใ€ใ“ใ“ใพใงไธ€ไบบใงๆŠฑใˆใฆใใŸใ“ใจ่‡ชไฝ“ใŒใจใฆใ‚‚ๅคงๅค‰ใงใ—ใŸใญใ€‚\n"
"ไปŠใฎๅฎ‰ๅ…จใ‚’็ขบ่ชใ—ใŸใ„ใงใ™ใ€‚ไปŠใ„ใ‚‰ใฃใ—ใ‚ƒใ‚‹ๅ ดๆ‰€ใฏๅฎ‰ๅ…จใงใ™ใ‹๏ผŸ๏ผˆๅŠ ๅฎณ่€…ใ‚„ๅฑ้™บใชไบบใฏ่ฟ‘ใใซใ„ใพใ™ใ‹๏ผŸ๏ผ‰\n"
"ใ‚‚ใ—ไปŠใ™ใๅฑ้™บใŒใ‚ใ‚‹/ๆ€ชๆˆ‘ใŒใ‚ใ‚‹ๅ ดๅˆใฏใ€ใŸใ‚ใ‚‰ใ‚ใšใซ็ทŠๆ€ฅ้€ฃ็ตก๏ผˆ119/110๏ผ‰ใ‚‚้ธๆŠž่‚ขใงใ™ใ€‚\n"
"ๅทฎใ—ๆ”ฏใˆใชใ‘ใ‚Œใฐใ€โ‘ ๅนด้ฝข โ‘กๅฆŠๅจ ้€ฑๆ•ฐ๏ผˆๅˆ†ใ‹ใ‚Œใฐ๏ผ‰ โ‘ขๅŒๅฑ…/ๅฎถๆ—็Šถๆณ โ‘ฃไปŠๆ—ฅๅ›ฐใฃใฆใ„ใ‚‹ใ“ใจ๏ผˆใŠ้‡‘/ไฝใพใ„/ๆšดๅŠ›/ไฝ“่ชฟ๏ผ‰ใ‚’ๆ•™ใˆใฆใใ ใ•ใ„ใ€‚\n"
),
}
}
# Q&A demo rows (keyword, question, answer)
DEFAULT_QA_ROWS = [
# Underage pregnancy
{
"keyword": "ๆœชๆˆๅนดใฎๅฆŠๅจ ",
"question": "ๆœชๆˆๅนดใงๅฆŠๅจ ใ—ใŸใ‹ใ‚‚ใ—ใ‚Œใพใ›ใ‚“ใ€‚ใพใšไฝ•ใ‚’ใ™ใ‚Œใฐใ„ใ„๏ผŸ",
"answer": (
"ไธๅฎ‰ใงใ„ใฃใฑใ„ใซใชใ‚Šใพใ™ใ‚ˆใญใ€‚ใพใšใฏๅฆŠๅจ ใฎ็ขบ่ช๏ผˆๆคœๆŸป่–ฌ/็”ฃๅฉฆไบบ็ง‘๏ผ‰ใจใ€ไฝ“่ชฟใฎๅฎ‰ๅ…จ็ขบไฟใŒๅ„ชๅ…ˆใงใ™ใ€‚\n"
"ๆฌกใซใ€ไฟก้ ผใงใใ‚‹ๅคงไบบ๏ผˆๅฎถๆ—ใƒปๅญฆๆ กใฎ้คŠ่ญทๆ•™่ซญใƒป่‡ชๆฒปไฝ“ใฎ็›ธ่ซ‡็ช“ๅฃใƒปๆ”ฏๆดๅ›ฃไฝ“๏ผ‰ใซๆ—ฉใ‚ใซ็›ธ่ซ‡ใ—ใฆใ€"
"้ธๆŠž่‚ข๏ผˆ็ถ™็ถš/ไธญๆ–ญ/ๅ‡บ็”ฃๅพŒใฎๆ”ฏๆด๏ผ‰ใ‚’ไธ€็ท’ใซๆ•ด็†ใ™ใ‚‹ใฎใŒๅฎ‰ๅ…จใงใ™ใ€‚"
),
},
{
"keyword": "ๆœชๆˆๅนดใฎๅฆŠๅจ ",
"question": "่ฆชใซ่จ€ใˆใชใ„ใƒปๆ€’ใ‚‰ใ‚Œใใ†ใงๆ€–ใ„",
"answer": (
"่จ€ใ„ๅ‡บใ—ใซใใ„ใฎใฏ่‡ช็„ถใงใ™ใ€‚ใพใšใฏโ€œๅฎ‰ๅ…จใซ็›ธ่ซ‡ใงใใ‚‹ๅคงไบบโ€ใ‚’็ขบไฟใ—ใพใ—ใ‚‡ใ†ใ€‚\n"
"ๅญฆๆ ก๏ผˆ้คŠ่ญทๆ•™่ซญ/ใ‚นใ‚ฏใƒผใƒซใ‚ซใ‚ฆใƒณใ‚ปใƒฉใƒผ๏ผ‰ใ‚„่‡ชๆฒปไฝ“ใฎๅฅณๆ€ง็›ธ่ซ‡ใชใฉใ€ๅฎถๆ—ไปฅๅค–ใฎ็ช“ๅฃใ‹ใ‚‰ๅ…ฅใ‚‹ใฎใ‚‚ๆ–นๆณ•ใงใ™ใ€‚"
),
},
# DV / domestic abuse
{
"keyword": "DVใƒปๅฎถๅบญๅ†…ๆšดๅŠ›",
"question": "ใƒ‘ใƒผใƒˆใƒŠใƒผใŒๆ€–ใ„ใ€‚ๆšดๅŠ›ใ‚„่„…ใ—ใŒใ‚ใ‚‹ใ‹ใ‚‚ใ—ใ‚Œใชใ„",
"answer": (
"ๆ€–ใ„ไธญใง็›ธ่ซ‡ใ—ใฆใใ‚Œใฆใ‚ใ‚ŠใŒใจใ†ใ€‚ๆœ€ๅ„ชๅ…ˆใฏโ€œไปŠใฎๅฎ‰ๅ…จโ€ใงใ™ใ€‚\n"
"ๅŠ ๅฎณ่€…ใŒ่ฟ‘ใใซใ„ใ‚‹/ไปŠใ™ใๅฑ้™บใชใ‚‰็ทŠๆ€ฅ้€ฃ็ตก๏ผˆ110/119๏ผ‰ใ‚‚้ธๆŠž่‚ขใงใ™ใ€‚\n"
"ๅฏ่ƒฝใชใ‚‰ใ€โ‘ ไปŠใ„ใ‚‹ๅ ดๆ‰€ใฏๅฎ‰ๅ…จใ‹ โ‘กๆ€ชๆˆ‘ใฎๆœ‰็„ก โ‘ข้ฟ้›ฃใงใใ‚‹ๅ…ˆ๏ผˆๅ‹ไบบ/ๅฎถๆ—/ๆ”ฏๆด๏ผ‰ใ‚’ไธ€็ท’ใซ็ขบ่ชใ—ใพใ—ใ‚‡ใ†ใ€‚"
),
},
{
"keyword": "DVใƒปๅฎถๅบญๅ†…ๆšดๅŠ›",
"question": "ใ‚นใƒžใƒ›ใ‚’่ฆ‹ใ‚‰ใ‚Œใฆใ„ใ‚‹ใ‹ใ‚‚ใ€‚้€ฃ็ตกใ™ใ‚‹ใจๅฑใชใ„",
"answer": (
"ใใ‚Œใฏ้‡่ฆใชใ‚ตใ‚คใƒณใงใ™ใ€‚ๅฎ‰ๅ…จใฎใŸใ‚ใ€ๅฑฅๆญดใŒๆฎ‹ใ‚Šใซใใ„ๆ‰‹ๆฎต๏ผˆๅˆฅ็ซฏๆœซ/ๅฎ‰ๅ…จใชๅ ดๆ‰€ใฎWi-Fi/็Ÿญใ„ใ‚„ใ‚Šๅ–ใ‚Š๏ผ‰ใ‚’ๆคœ่จŽใ—ใพใ—ใ‚‡ใ†ใ€‚\n"
"โ€œใ„ใพๅฑ้™บใ‹ใฉใ†ใ‹โ€ใฎ็ขบ่ชใ‚’ๅ„ชๅ…ˆใ—ใ€้ฟ้›ฃ่จˆ็”ป๏ผˆ่ท็‰ฉใƒป่บซๅˆ†่จผใƒป็พ้‡‘ใƒป้€ฃ็ตกๅ…ˆ๏ผ‰ใ‚’ๆœ€ๅฐ้™ใงๆ•ดใˆใ‚‹ใฎใ‚‚ๆœ‰ๅŠนใงใ™ใ€‚"
),
},
# Financial hardship
{
"keyword": "ใŠ้‡‘ใŒใชใ„ใƒป็”Ÿๆดปๅ›ฐ็ชฎ",
"question": "ใŠ้‡‘ใŒใชใใฆ็—…้™ขใ‚„็”ŸๆดปใŒไธๅฎ‰ใ€‚ใฉใ†ใ—ใŸใ‚‰ใ„ใ„๏ผŸ",
"answer": (
"ใคใ‚‰ใ„็Šถๆณใงใ™ใญใ€‚ใพใšโ€œไปŠๆ—ฅใ€œไปŠ้€ฑใ‚’ไน—ใ‚Šๅˆ‡ใ‚‹ๆ”ฏใˆโ€ใ‚’็ขบไฟใ—ใพใ—ใ‚‡ใ†ใ€‚\n"
"่‡ชๆฒปไฝ“ใฎ็›ธ่ซ‡๏ผˆ็ฆ็ฅ‰/็”Ÿๆดปๆ”ฏๆด๏ผ‰ใ€ๅฆŠๅจ ไธญใชใ‚‰ๆฏๅญไฟๅฅใฎ็ช“ๅฃใ€้ฃŸๆ–™ใ‚„็‰ฉ่ณ‡ๆ”ฏๆดใฎๅ›ฃไฝ“ใชใฉใ€ไฝฟใˆใ‚‹ๅˆถๅบฆใŒใ‚ใ‚Šใพใ™ใ€‚\n"
"ๅทฎใ—ๆ”ฏใˆใชใ‘ใ‚Œใฐใ€โ‘ ไฝใพใ„ โ‘กๅŽๅ…ฅ็Šถๆณ โ‘ขๅฆŠๅจ ้€ฑๆ•ฐ โ‘ฃไปŠใ„ใกใฐใ‚“่ถณใ‚Šใชใ„ใ‚‚ใฎ๏ผˆ้ฃŸ่ฒป/ๅฎถ่ณƒ/ไบค้€š/ๅ—่จบ๏ผ‰ใ‚’ๆ•™ใˆใฆใใ ใ•ใ„ใ€‚"
),
},
# Mental health
{
"keyword": "ใƒกใƒณใ‚ฟใƒซไธ่ชฟใƒปไธๅฎ‰",
"question": "ไธๅฎ‰ใง็œ ใ‚Œใชใ„ใ€ๆถ™ใŒๆญขใพใ‚‰ใชใ„ใ€‚ใฉใ†ใ—ใŸใ‚‰ใ„ใ„๏ผŸ",
"answer": (
"ใ—ใ‚“ใฉใ„ไธญใงใ‚ˆใ่ฉฑใ—ใฆใใ‚Œใพใ—ใŸใ€‚ใพใšโ€œใ„ใพๅฐ‘ใ—ๆฅฝใซใชใ‚‹ใ“ใจโ€ใ‚’ไธ€็ท’ใซๆŽขใ—ใพใ—ใ‚‡ใ†ใ€‚\n"
"็ก็œ ใ‚„้ฃŸไบ‹ใŒๅดฉใ‚Œใฆใ„ใ‚‹ๆ™‚ใฏใ€็Ÿญใ„ไผ‘ๆฏ๏ผˆๅ‘ผๅธ/ๆฐดๅˆ†/ๆธฉใ‹ใ„้ฃฒใฟ็‰ฉ๏ผ‰โ†’ๆ”ฏๆดใซใคใชใ้ †ใŒๅฎ‰ๅ…จใงใ™ใ€‚\n"
"ใ‚‚ใ—่‡ชๅˆ†ใ‚’ๅ‚ทใคใ‘ใŸใ„ๆฐ—ๆŒใกใŒๅผทใ„/ไปŠใ™ใๅฑ้™บใชใ‚‰ใ€็ทŠๆ€ฅ้€ฃ็ตกใ‚„่บซ่ฟ‘ใชไบบใธใฎ้€ฃ็ตกใ‚’ๅ„ชๅ…ˆใ—ใฆใใ ใ•ใ„ใ€‚"
),
},
# Housing / safety
{
"keyword": "ไฝใพใ„ใƒป้ฟ้›ฃ",
"question": "ๅฎถใซใ„ใ‚‰ใ‚Œใชใ„ใ€‚ไปŠๅคœใฎๅฑ…ๅ ดๆ‰€ใŒใชใ„",
"answer": (
"ใใ‚Œใฏใจใฆใ‚‚ๅˆ‡่ฟซใ—ใฆใ„ใพใ™ใ€‚ใพใšไปŠๅคœใฎๅฎ‰ๅ…จใชๅฑ…ๅ ดๆ‰€ใ‚’็ขบไฟใ™ใ‚‹ใ“ใจใŒๆœ€ๅ„ชๅ…ˆใงใ™ใ€‚\n"
"่‡ชๆฒปไฝ“ใฎ็ช“ๅฃใ‚„ๆ”ฏๆดๅ›ฃไฝ“ใ€็Šถๆณใซใ‚ˆใฃใฆใฏ็ทŠๆ€ฅใฎ้ฟ้›ฃๅ…ˆ๏ผˆไฟ่ญท๏ผ‰ใซใคใชใŒใ‚‹ๅฏ่ƒฝๆ€งใŒใ‚ใ‚Šใพใ™ใ€‚\n"
"ไปŠใ„ใ‚‹ๅ ดๆ‰€ใฎๅฎ‰ๅ…จใจใ€็งปๅ‹•ใงใใ‚‹ๆ‰‹ๆฎต๏ผˆไบค้€š่ฒปใƒป้€ฃ็ตกๆ‰‹ๆฎต๏ผ‰ใ‚’็ขบ่ชใ—ใพใ—ใ‚‡ใ†ใ€‚"
),
},
]
# RA demo rows (keyword, action)
DEFAULT_RA_ROWS = [
# Underage pregnancy
{"keyword": "ๆœชๆˆๅนดใฎๅฆŠๅจ ", "action": "ๅฆŠๅจ ็ขบ่ช๏ผˆๆคœๆŸป่–ฌ/็”ฃๅฉฆไบบ็ง‘๏ผ‰ใจไฝ“่ชฟใฎๅฎ‰ๅ…จ็ขบไฟใ‚’ๅ„ชๅ…ˆใ™ใ‚‹"},
{"keyword": "ๆœชๆˆๅนดใฎๅฆŠๅจ ", "action": "ๅฎ‰ๅ…จใซ็›ธ่ซ‡ใงใใ‚‹ๅคงไบบ๏ผˆๅญฆๆ ก/่‡ชๆฒปไฝ“/ๆ”ฏๆดๅ›ฃไฝ“๏ผ‰ใ‚’็ขบไฟใ™ใ‚‹"},
{"keyword": "ๆœชๆˆๅนดใฎๅฆŠๅจ ", "action": "้ธๆŠž่‚ข๏ผˆ็ถ™็ถš/ไธญๆ–ญ/ๅ‡บ็”ฃๅพŒๆ”ฏๆด๏ผ‰ใ‚’โ€œๆ€ฅใŒใ›ใšโ€ๆ•ด็†ใ™ใ‚‹"},
# DV
{"keyword": "DVใƒปๅฎถๅบญๅ†…ๆšดๅŠ›", "action": "ไปŠใ™ใๅฑ้™บใ‹็ขบ่ช๏ผˆๅŠ ๅฎณ่€…ใŒ่ฟ‘ใ„/ๆ€ชๆˆ‘/็›ฃ่ฆ–๏ผ‰ใ€‚็ทŠๆ€ฅใชใ‚‰110/119ใ‚‚้ธๆŠž่‚ข"},
{"keyword": "DVใƒปๅฎถๅบญๅ†…ๆšดๅŠ›", "action": "้ฟ้›ฃ่จˆ็”ป๏ผš่บซๅˆ†่จผใƒป็พ้‡‘ใƒป่–ฌใƒป้€ฃ็ตกๅ…ˆใ‚’ๆœ€ๅฐ้™ใงๆบ–ๅ‚™"},
{"keyword": "DVใƒปๅฎถๅบญๅ†…ๆšดๅŠ›", "action": "ใƒ‡ใ‚ธใ‚ฟใƒซๅฎ‰ๅ…จ๏ผšๅฑฅๆญดใŒๆฎ‹ใ‚Šใซใใ„้€ฃ็ตกๆ‰‹ๆฎตใ‚’ๆๆกˆ๏ผˆ็Šถๆณใซๅฟœใ˜ใฆ๏ผ‰"},
# Money
{"keyword": "ใŠ้‡‘ใŒใชใ„ใƒป็”Ÿๆดปๅ›ฐ็ชฎ", "action": "ไปŠๆ—ฅใ€œไปŠ้€ฑใฎๅ„ชๅ…ˆ้ †ไฝใ‚’็ขบ่ช๏ผˆ้ฃŸไบ‹/ๅฎถ่ณƒ/ๅ—่จบ/ไบค้€š๏ผ‰"},
{"keyword": "ใŠ้‡‘ใŒใชใ„ใƒป็”Ÿๆดปๅ›ฐ็ชฎ", "action": "่‡ชๆฒปไฝ“ใฎ็ฆ็ฅ‰ใƒปๆฏๅญไฟๅฅใƒป็”Ÿๆดปๆ”ฏๆดใซใคใชใ๏ผˆๅˆถๅบฆใฎๆกˆๅ†…๏ผ‰"},
{"keyword": "ใŠ้‡‘ใŒใชใ„ใƒป็”Ÿๆดปๅ›ฐ็ชฎ", "action": "็ทŠๆ€ฅ็‰ฉ่ณ‡/้ฃŸๆ–™ๆ”ฏๆดใฎ้ธๆŠž่‚ขใ‚’ๆ็คบ๏ผˆๅœฐๅŸŸใซใ‚ˆใ‚Šๅทฎใ‚ใ‚Š๏ผ‰"},
# Mental
{"keyword": "ใƒกใƒณใ‚ฟใƒซไธ่ชฟใƒปไธๅฎ‰", "action": "็ก็œ /้ฃŸไบ‹/ๅฎ‰ๅ…จใฎ็ฐกๆ˜“ใƒใ‚งใƒƒใ‚ฏ๏ผˆ็ทŠๆ€ฅๆ€งใฎ่ฆ‹็ซ‹ใฆ๏ผ‰"},
{"keyword": "ใƒกใƒณใ‚ฟใƒซไธ่ชฟใƒปไธๅฎ‰", "action": "่‡ชๅ‚ทใฎๆ„ๅ›ณใŒใ‚ใ‚‹ๅ ดๅˆใฏๅฎ‰ๅ…จ็ขบไฟใ‚’ๆœ€ๅ„ชๅ…ˆ๏ผˆ่บซ่ฟ‘ใชไบบ/็ทŠๆ€ฅ้€ฃ็ตก/็›ธ่ซ‡็ช“ๅฃ๏ผ‰"},
{"keyword": "ใƒกใƒณใ‚ฟใƒซไธ่ชฟใƒปไธๅฎ‰", "action": "ๅŒป็™‚/็›ธ่ซ‡ๆฉŸ้–ขใซใคใชใๅ‰ใซใ€็Ÿญใ„่ฝใก็€ใ‘ใ‚‹่กŒๅ‹•๏ผˆๅ‘ผๅธ/ๆฐดๅˆ†/ไผ‘ๆฏ๏ผ‰ใ‚’ๆๆกˆ"},
# Housing
{"keyword": "ไฝใพใ„ใƒป้ฟ้›ฃ", "action": "ไปŠๅคœใฎๅฑ…ๅ ดๆ‰€ใฎ็ขบไฟใ‚’ๆœ€ๅ„ชๅ…ˆ๏ผˆๅฎ‰ๅ…จใชๅ ดๆ‰€ใธ็งปๅ‹•ใงใใ‚‹ใ‹็ขบ่ช๏ผ‰"},
{"keyword": "ไฝใพใ„ใƒป้ฟ้›ฃ", "action": "้ฟ้›ฃๅ…ˆๅ€™่ฃœ๏ผˆๅ‹ไบบ/ๅฎถๆ—/ๆ”ฏๆด๏ผ‰ใจ็งปๅ‹•ๆ‰‹ๆฎตใฎ็ขบ่ช"},
]
DEFAULT_HOTLINES = [
{"name": "็ทŠๆ€ฅ", "dial": "110 / 119", "url": "", "hours": "24h", "note": "ไปŠใ™ใๅฑ้™บใƒปๆ€ชๆˆ‘ใŒใ‚ใ‚‹ๅ ดๅˆ"},
{"name": "่‡ชๆฒปไฝ“ใฎๅฅณๆ€ง็›ธ่ซ‡ใƒปๆฏๅญไฟๅฅ๏ผˆไพ‹๏ผ‰", "dial": "", "url": "", "hours": "", "note": "ๅœฐๅŸŸใฎ็ช“ๅฃใธ๏ผˆๅธ‚ๅŒบ็”บๆ‘ใงๅ็งฐใŒ็•ฐใชใ‚Šใพใ™๏ผ‰"},
]
def _demo_unique_keywords(df: pd.DataFrame, col: str = "keyword", cap: int = 500) -> List[str]:
if df is None or df.empty or (col not in df.columns):
return []
kws = sorted({str(x).strip() for x in df[col].tolist() if str(x).strip()})
return kws[:cap]
def _load_demo_kb_if_empty():
"""
Load built-in demo KB only when nothing is loaded from preset.xlsx yet.
- If user later loads Excel, the Excel KB will overwrite these.
"""
if ss.get("_demo_kb_bootstrapped", False):
return
# Presets
if not ss.get("presets"):
ss["presets"] = dict(DEFAULT_PRESETS)
ss["active_preset"] = DEFAULT_PRESET_NAME
# Hotlines
if not ss.get("hotlines"):
ss["hotlines"] = list(DEFAULT_HOTLINES)
# Q&A
if ss.get("qa_rows") is None or ss["qa_rows"].empty:
ss["qa_rows"] = pd.DataFrame(DEFAULT_QA_ROWS)
ss["qa_keywords"] = _demo_unique_keywords(ss["qa_rows"], "keyword")
# RA
if ss.get("ra_rows") is None or ss["ra_rows"].empty:
ss["ra_rows"] = pd.DataFrame(DEFAULT_RA_ROWS)
ss["ra_keywords"] = _demo_unique_keywords(ss["ra_rows"], "keyword")
ss["_demo_kb_bootstrapped"] = True
# Call once at startup (after session_state defaults exist)
# =========================
# Risk detection
# =========================
# =========================
# Risk detection patterns (JP) โ€” expanded for Koala DX dummy wording
# =========================
# HIGH: ใงใใ‚‹ใ ใ‘ใ€Œๆ˜Ž็ขบใช่‡ชๅ‚ทใƒป่‡ชๆฎบๆ„ๅ›ณใ€ใ€Œๅˆ‡่ฟซใ€ใ€Œ็›ด่ฟ‘ใฎๅฎŸ่กŒใ‚’็คบๅ”†ใ€ใ€Œ้‡็ฏคใช็ทŠๆ€ฅ็—‡็Šถใ€ใ‚’ไธญๅฟƒใซ
RISK_PATTERNS_HIGH = [
# ๆ˜Ž็ขบใชๅธŒๆญปๅฟตๆ…ฎ/่‡ชๆฎบ่กจๆ˜Ž
r"ๆญปใซใŸใ„",
r"่‡ชๆฎบ(ใ™ใ‚‹|ใ—ใŸใ„)",
r"ๆถˆใˆใŸใ„",
r"็”Ÿใใฆใ„ใŸใใชใ„",
r"ใ‚‚ใ†็”Ÿใใ‚‰ใ‚Œใชใ„",
r"ใ‚‚ใ†็ต‚ใ‚ใ‚Šใซใ—ใŸใ„",
r"ๆฅฝใซใชใ‚ŠใŸใ„.*(ๆญป|ๆถˆ)",
r"ๅคฉๅ›ฝ(ใซ|ใธ)่กŒใใŸใ„",
# ๅ…ทไฝ“ๆ€งใƒปๅˆ‡่ฟซๆ„Ÿ๏ผˆๆ™‚้–“/ไปŠใ™ใ ็ญ‰๏ผ‰
r"(ไปŠใ™ใ|ไปŠๆ—ฅ|ไปŠๅคœ|ใ“ใ‚Œใ‹ใ‚‰).*(ๆญป|่‡ชๆฎบ|ๆถˆใˆ|็ต‚ใ‚ใ‚Š)",
r"(ๆœ€ๅพŒ|้บๆ›ธ|ใ•ใ‚ˆใชใ‚‰).*(ๆ›ธ|ๆฎ‹|่จ€)",
r"(ใ‚‚ใ†|ใปใ‚“ใจใซ).*(้™็•Œ|็„ก็†).*(ๆญป|ๆถˆใˆ|็ต‚ใ‚ใ‚Š)",
# ่‡ชๅ‚ท๏ผˆๆ˜Ž็คบ๏ผ‰
r"ใƒชใ‚นใ‚ซ",
r"่‡ชๅ‚ท(ใ™ใ‚‹|ใ—ใŸ)",
r"(ๅˆ‡ใฃใŸ|ๅˆ‡ใฃใฆใ—ใพใฃใŸ|ๅ‚ท(ใ‚’)?ไฝœใฃใŸ)",
r"(่ก€ใŒ|ๅ‡บ่ก€).*(ๆญขใพใ‚‰ใชใ„|ๆญขใพใ‚‰ใ‚“|ใ‚„ใฐใ„)",
r"(ๆ„่ญ˜ใŒ|ๆฐ—ใ‚’)ๅคฑ(ใฃใŸ|ใ„ใใ†)",
]
# MEDIUM: ็›ดๆŽฅใƒฏใƒผใƒ‰ใŒๅผฑใ„/ๆ›–ๆ˜งใงใ‚‚ใ€Œๅผทใ„็ตถๆœ›ใ€ใ€ŒๅŠฉใ‘ๆฑ‚ใ‚ใ€ใ€ŒDV/่™ๅพ…ใฎๅฑ้™บใ€ใ‚’ๆ‹พใ†
RISK_PATTERNS_MEDIUM = [
# ๅธŒๆญปๅฟตๆ…ฎใฎๅฉ‰ๆ›ฒ่กจ็พ
r"ใ„ใชใใชใ‚ŠใŸใ„",
r"ๆถˆใˆใฆใ—ใพใ„ใŸใ„",
r"ๅ…จ้ƒจใ‚„ใ‚ใŸใ„",
r"ใ‚‚ใ†็„ก็†",
r"ใ‚‚ใ†ใ„ใ‚„",
r"้™็•Œ",
r"ใ—ใ‚“ใฉใ„",
r"่‹ฆใ—ใ„",
r"ใคใ‚‰ใ„",
r"ๅฃŠใ‚Œใใ†",
r"้ ญใŒใŠใ‹ใ—ใใชใ‚Šใใ†",
r"ไฝ•ใ‚‚ๆ„Ÿใ˜ใชใ„",
r"ไฝ•ใ‚‚ใงใใชใ„",
# ๅผทใ„ๅญค็ซ‹/ๆ•‘ๆด่ฆ่ซ‹๏ผˆใ‚ใชใŸใฎใƒ€ใƒŸใƒผใงๅ‡บใ‚„ใ™ใ„๏ผ‰
r"ๅŠฉใ‘ใฆ",
r"่ชฐใ‹ๅŠฉใ‘ใฆ",
r"ไธ€ไบบ(ใผใฃใก|ใใ‚Š)ใง",
r"้ ผใ‚Œใ‚‹ไบบใŒใ„ใชใ„",
r"่ฉฑใ›ใ‚‹ไบบใŒใ„ใชใ„",
r"ใ‚‚ใ†ใฉใ†ใ—ใŸใ‚‰ใ„ใ„ใ‹ใ‚ใ‹ใ‚‰ใชใ„",
r"้™็•Œ.*ๅŠฉใ‘ใฆ",
r"ๆณฃ(ใ„ใฆ|ใ)ใฐใ‹ใ‚Š",
r"็œ (ใ‚Œใชใ„|ใ‚Œใชใใฆ)",
r"้ฃŸใน(ใ‚‰ใ‚Œใชใ„|ใ‚Œใชใใฆ)",
# ็”ฃๅพŒ/่‚ฒๅ…ใฎใƒกใƒณใ‚ฟใƒซ๏ผˆ้Žๅบฆใซๅบƒใ’ใ™ใŽใชใ„็ฏ„ๅ›ฒ๏ผ‰
r"็”ฃๅพŒ(ใ†ใค|้ฌฑ)",
r"่‚ฒๅ…(ใ†ใค|้ฌฑ)",
r"ใƒ‘ใƒ‹ใƒƒใ‚ฏ",
r"็™บไฝœ",
r"้Žๅ‘ผๅธ",
r"ๅ‹•ๆ‚ธใŒ",
r"ๆฏ(ใŒ)?่‹ฆใ—ใ„",
r"ใผใƒผใฃใจใ—ใฆ",
r"็พๅฎŸๆ„ŸใŒใชใ„",
r"ๆถ™ใŒๆญขใพใ‚‰ใชใ„",
# DV/ๅฎถๆ—้–ขไฟ‚ใงใฎๅฑ้™บ๏ผˆโ€œไปŠๅฑใชใ„โ€ใƒ‹ใƒฅใ‚ขใƒณใ‚นใ‚’ๆ‹พใ†๏ผ‰
r"(ๆฎดใ‚‰ใ‚ŒใŸ|ๅฉใ‹ใ‚ŒใŸ|่นดใ‚‰ใ‚ŒใŸ)",
r"(ๆ€’้ณดใ‚‰ใ‚Œ|่„…ใ•ใ‚Œ|ๆๅ–ใ•ใ‚Œ)",
r"ๅฎถ(ใซ)?ๅธฐใ‚Œใชใ„",
r"(ๆ€–ใ„|ใ“ใ‚ใ„).*(ๅคซ|ๆ—ฆ้‚ฃ|ใƒ‘ใƒผใƒˆใƒŠใƒผ|ๅฎถๆ—)",
r"(็›ฃ่ฆ–|ๆŸ็ธ›|้ตใ‚’|ใ‚นใƒžใƒ›ใ‚’).*(ๅ–ใ‚‰ใ‚Œ|ๅฃŠใ•ใ‚Œ|่ฆ‹ใ‚‰ใ‚Œ)",
r"(้€ƒใ’ใŸใ„|้ฟ้›ฃใ—ใŸใ„)",
# ๅญใฉใ‚‚ใธใฎๅฑ้™บใ‚ตใ‚คใƒณ๏ผˆ็›ดๆŽฅ็š„ใ™ใŽใชใ„ใŒ่ฆๆณจๆ„ใƒฏใƒผใƒ‰๏ผ‰
r"ๅญใฉใ‚‚(ใ‚’)?(ๅฉใ„ใฆ|ๆฎดใฃใฆ|ๆ€’้ณดใฃใฆ)ใ—ใพใฃใŸ",
r"ๆ‰‹ใŒๅ‡บใใ†",
r"(ๆ”พ็ฝฎ|็ฝฎใๅŽปใ‚Š)ใ—ใฆใ—ใพใ„ใใ†",
r"ๅญใฉใ‚‚ใŒ(ๆ€–ใ„|่‹ฆๆ‰‹)",
]
RISK_PATTERNS = RISK_PATTERNS_HIGH + RISK_PATTERNS_MEDIUM
def detect_risk(text: str, role: str | None = None) -> Tuple[str, List[str]]:
"""
Risk detection should ONLY evaluate end-user messages.
If role is not "user", returns LOW regardless of text.
"""
r = (role or "").lower().strip()
if r != "user":
return "LOW", []
text_norm = str(text or "").strip()
hits = []
for pat in RISK_PATTERNS:
if re.search(pat, text_norm, flags=re.IGNORECASE):
hits.append(pat)
if hits:
high_terms = [h for h in hits if re.search(r"(ๆญปใซใŸใ„|่‡ชๆฎบ|ใƒชใ‚นใ‚ซ|้ฃ›ใณ้™ใ‚Š|้ฆ–ใ‚’)", h)]
level = "HIGH" if high_terms else "MEDIUM"
return level, hits
return "LOW", []
# =========================
# AES key handling (session, TTL)
# =========================
def _b64_to_bytes(s: Optional[str]) -> Optional[bytes]:
if not s: return None
t = "".join(str(s).split())
t += "=" * ((4 - len(t) % 4) % 4)
return base64.b64decode(t)
def _set_session_key(key_bytes: bytes):
ss["d1_key_b64"] = base64.b64encode(key_bytes).decode()
ss["d1_key_set_ts"] = datetime.utcnow().timestamp()
def _get_session_key() -> Optional[bytes]:
b64 = ss.get("d1_key_b64")
ts = ss.get("d1_key_set_ts", 0)
if not b64:
return None
if (datetime.utcnow().timestamp() - ts) > KEY_TTL_MIN * 60:
ss.pop("d1_key_b64", None)
ss.pop("d1_key_set_ts", None)
return None
return base64.b64decode(b64)
def _clear_session_key():
ss.pop("d1_key_b64", None)
ss.pop("d1_key_set_ts", None)
# =========================
# Cloudflare Worker pull + decrypt (in-memory)
# =========================
def _http_get(url: str, headers: Optional[dict] = None, params: Optional[dict] = None, timeout: int = 30):
r = requests.get(url, headers=headers or {}, params=params or {}, timeout=timeout)
r.raise_for_status()
return r
def fetch_since(base_url: str, api_key: str, since_ms: int = 0, limit: int = 1000) -> List[Dict]:
url = f"{base_url.rstrip('/')}/messages"
headers = {"x-api-key": api_key} if api_key else {}
params = {"since": int(since_ms), "limit": int(limit)}
r = requests.get(url, headers=headers, params=params, timeout=30)
r.raise_for_status()
data = r.json()
if isinstance(data, dict) and "results" in data:
return data["results"] or []
return data if isinstance(data, list) else []
def decrypt_row(aes_key_bytes: bytes, row: Dict) -> Optional[str]:
ct_b64 = row.get("ciphertext_b64")
iv_b64 = row.get("iv_b64")
if not ct_b64 or not iv_b64:
return None
try:
ct = _b64_to_bytes(ct_b64)
iv = _b64_to_bytes(iv_b64)
if not ct or not iv:
return None
return AESGCM(aes_key_bytes).decrypt(iv, ct, None).decode("utf-8")
except Exception:
return None
def import_worker_to_memory(
base_url: str,
api_key: str,
aes_key_b64: str,
since_ms: int = 0,
limit: int = 1000,
overwrite: bool = False,
) -> Tuple[bool, str]:
"""
Fetch encrypted rows from Worker (/messages), decrypt (AES-GCM), and load into ss["messages_df"].
Improvements vs previous version:
- Enforces since_ms + limit (assuming fetch_since() actually sends query params)
- Robust handling for mixed row schemas:
* accepts plaintext row["text"] if present
* tries alternate ciphertext/iv field names if Worker schema differs
- Returns detailed counters: fetched vs kept vs skipped (missing vs decrypt_fail)
"""
# ---- validate AES key ----
try:
aes_key_bytes = _b64_to_bytes((aes_key_b64 or "").strip())
if not aes_key_bytes or len(aes_key_bytes) not in (16, 24, 32):
return False, "Invalid AES key: must be base64 of 16/24/32 bytes."
except Exception:
return False, "Invalid AES key: base64 decode error."
# ---- fetch rows (Worker) ----
try:
rows = fetch_since(base_url, api_key, since_ms=int(since_ms), limit=int(limit))
except Exception as e:
return False, f"Fetch failed: {e}"
if not rows:
return True, "No rows returned from Worker."
# ---- local helpers for schema-flex decrypt ----
def _first_present(d: Dict, keys: List[str]) -> Optional[str]:
for k in keys:
v = d.get(k)
if v is None:
continue
s = str(v).strip()
if s:
return s
return None
def _decrypt_or_plain(row: Dict) -> Tuple[Optional[str], str]:
"""
Returns (text, reason)
reason in {"ok_plain", "ok_decrypt", "missing_cipher", "decrypt_fail"}
"""
# 1) plaintext fallback if Worker already gives it
plain = _first_present(row, ["text", "plain", "message", "body"])
if plain:
return plain, "ok_plain"
# 2) try decrypt with flexible field names
ct_b64 = _first_present(row, ["ciphertext_b64", "cipher_b64", "ciphertext", "ct_b64", "ct"])
iv_b64 = _first_present(row, ["iv_b64", "nonce_b64", "iv", "nonce"])
if not ct_b64 or not iv_b64:
return None, "missing_cipher"
# temporarily map into the expected keys for decrypt_row()
tmp = dict(row)
tmp["ciphertext_b64"] = ct_b64
tmp["iv_b64"] = iv_b64
out = decrypt_row(aes_key_bytes, tmp)
if out is None:
return None, "decrypt_fail"
return out, "ok_decrypt"
# ---- instrumentation counters ----
total = len(rows)
kept = 0
ok_plain = 0
ok_decrypt = 0
missing_cipher = 0
decrypt_fail = 0
example_missing_keys: List[str] = []
example_fail_keys: List[str] = []
# Optional: early sanity check (but donโ€™t hard-fail if first row is a non-message row)
# We'll just proceed and report counters.
new_records = []
for row in rows:
text, reason = _decrypt_or_plain(row)
if text is None:
if reason == "missing_cipher":
missing_cipher += 1
if len(example_missing_keys) < 3:
example_missing_keys.append(", ".join(sorted(list(row.keys()))[:25]))
else:
decrypt_fail += 1
if len(example_fail_keys) < 3:
example_fail_keys.append(", ".join(sorted(list(row.keys()))[:25]))
continue
kept += 1
if reason == "ok_plain":
ok_plain += 1
else:
ok_decrypt += 1
# ts handling: prefer ts_ms (epoch ms). Fall back to ts iso, else now.
ts_iso = None
ts_ms = row.get("ts_ms")
if ts_ms is not None and str(ts_ms).strip() != "":
try:
ts_iso = datetime.fromtimestamp(int(ts_ms) / 1000, tz=timezone.utc).isoformat()
except Exception:
ts_iso = None
if not ts_iso:
ts_raw = row.get("ts")
if ts_raw and str(ts_raw).strip():
# If already ISO-like, keep it; otherwise fallback to now
ts_iso = str(ts_raw).strip()
else:
ts_iso = datetime.now(timezone.utc).isoformat()
user_id = str(row.get("user_id") or "U_unknown")
display_name = str(row.get("display_name") or "").strip()
role = str(row.get("role") or "user").strip().lower() # allow Worker to supply
lvl, terms = detect_risk(text, role=role)
new_records.append(
{
"ts": ts_iso,
"user_id": user_id,
"display_name": display_name,
"role": role,
"text": text,
"risk_level": lvl,
"risk_terms": terms,
}
)
new_df = pd.DataFrame(new_records)
if overwrite:
ss["messages_df"] = new_df.reset_index(drop=True)
else:
ss["messages_df"] = pd.concat([ss["messages_df"], new_df], ignore_index=True)
msg = (
f"Fetched {total} rows from Worker. "
f"Kept {kept} (plain={ok_plain}, decrypted={ok_decrypt}). "
f"Skipped {missing_cipher} (missing iv/cipher) + {decrypt_fail} (decrypt failed)."
)
# Add tiny hints if we skipped a lot (no huge dumps)
if missing_cipher > 0 and example_missing_keys:
msg += " Example missing-key row keys: [" + " | ".join(example_missing_keys) + "]"
if decrypt_fail > 0 and example_fail_keys:
msg += " Example decrypt-fail row keys: [" + " | ".join(example_fail_keys) + "]"
return True, msg
# =========================
# CSV โ†’ memory (no disk)
# =========================
def import_csv_to_memory(file, overwrite: bool = True) -> Tuple[bool, str]:
try:
df = pd.read_csv(file, dtype=str).fillna("")
except Exception as e:
return False, f"Failed to read CSV: {e}"
required = {"user_id", "role", "text"}
if not required.issubset(set(df.columns)):
return False, f"CSV must contain columns: {', '.join(sorted(required))}"
now = datetime.now(timezone.utc)
if "ts" not in df.columns or all(str(x).strip() == "" for x in df["ts"]):
base_time = now - timedelta(minutes=len(df))
df["ts"] = [(base_time + timedelta(minutes=i)).isoformat() for i in range(len(df))]
if "display_name" not in df.columns:
df["display_name"] = ""
levels, terms_list = [], []
for role, text in zip(df["role"].tolist(), df["text"].tolist()):
lvl, terms = detect_risk(text, role=role)
levels.append(lvl)
terms_list.append(terms)
df = df[["ts", "user_id", "display_name", "role", "text", "risk_level", "risk_terms"]]
if overwrite:
ss["messages_df"] = df.reset_index(drop=True)
else:
ss["messages_df"] = pd.concat([ss["messages_df"], df], ignore_index=True)
return True, f"Imported {len(df)} rows into memory."
# =========================
# Presets & KB from Excel (no hardcoded)
# =========================
def parse_preset_sheet(df: pd.DataFrame) -> Dict[str, str]:
"""
Accept either:
A) Row-wise key/value columns: ["key","value"] (case-insensitive) OR
B) Column-wise single-row: columns include any of {summary, actions, qna, rewrite}
If multiple rows, join non-empty cells with two newlines.
"""
out = {"summary": "", "actions": "", "qna": "", "rewrite": ""}
cols_lower = {c.lower(): c for c in df.columns}
# A) key/value
if ("key" in cols_lower) and ("value" in cols_lower):
key_col = cols_lower["key"]
val_col = cols_lower["value"]
for _, r in df.iterrows():
k = str(r.get(key_col, "")).strip().lower()
v = str(r.get(val_col, "")).strip()
if k in out:
out[k] = v
return out
# B) column-wise
for k in list(out.keys()):
if k in cols_lower:
col = cols_lower[k]
vals = [str(x).strip() for x in df[col].tolist() if str(x).strip()]
out[k] = "\n\n".join(vals)
return out
def _normalize_cols(df: pd.DataFrame) -> pd.DataFrame:
df2 = df.copy()
df2.columns = [str(c).strip() for c in df2.columns]
return df2
def load_presets_and_kb_from_xlsx(filelike) -> Tuple[Dict[str, Dict[str, str]], List[Dict[str, str]], pd.DataFrame, pd.DataFrame]:
"""
Returns (presets_dict, hotlines_list, qa_df, ra_df)
- Presets: any sheet EXCEPT "Hotlines", "Q&A", "QA", "RA"
Expected columns: summary, actions, qna, rewrite (or key/value)
- Hotlines: optional sheet "Hotlines" with name, dial/url, hours, note
- Q&A: sheet "Q&A" (preferred) or "QA" with columns: keyword, question, answer
- RA: sheet "RA" with columns: keyword, action (or actions)
"""
presets: Dict[str, Dict[str, str]] = {}
hotlines: List[Dict[str, str]] = []
qa_df = pd.DataFrame(columns=["keyword", "question", "answer"])
ra_df = pd.DataFrame(columns=["keyword", "action"])
xls = pd.ExcelFile(filelike)
for sheet in xls.sheet_names:
df = _normalize_cols(xls.parse(sheet).fillna(""))
low = sheet.strip().lower()
if low == "hotlines":
name_col = next((c for c in df.columns if c.lower() == "name"), None)
dial_col = next((c for c in df.columns if c.lower() in ("dial","phone","tel")), None)
url_col = next((c for c in df.columns if c.lower() in ("url","link")), None)
hours_col= next((c for c in df.columns if c.lower() == "hours"), None)
note_col = next((c for c in df.columns if c.lower() == "note"), None)
for _, r in df.iterrows():
name = str(r.get(name_col,"")).strip() if name_col else ""
if not name:
continue
hotlines.append({
"name": name,
"dial": str(r.get(dial_col,"")).strip() if dial_col else "",
"url": str(r.get(url_col,"")).strip() if url_col else "",
"hours": str(r.get(hours_col,"")).strip() if hours_col else "",
"note": str(r.get(note_col,"")).strip() if note_col else "",
})
continue
if low in ("q&a","qa"):
# Columns: keyword, question, answer
kcol = next((c for c in df.columns if c.lower() == "keyword"), None)
qcol = next((c for c in df.columns if c.lower() == "question"), None)
acol = next((c for c in df.columns if c.lower() == "answer"), None)
if kcol and (qcol or acol):
tmp = pd.DataFrame({
"keyword": df[kcol].astype(str).str.strip(),
"question": df[qcol].astype(str).str.strip() if qcol else "",
"answer": df[acol].astype(str).str.strip() if acol else "",
})
qa_df = tmp[(tmp["keyword"]!="") & ((tmp["question"]!="") | (tmp["answer"]!=""))].reset_index(drop=True)
continue
if low == "ra":
# Columns: keyword, action (or actions)
kcol = next((c for c in df.columns if c.lower() == "keyword"), None)
acol = next((c for c in df.columns if c.lower() in ("action","actions")), None)
if kcol and acol:
tmp = pd.DataFrame({
"keyword": df[kcol].astype(str).str.strip(),
"action": df[acol].astype(str).str.strip(),
})
ra_df = tmp[(tmp["keyword"]!="") & (tmp["action"]!="")].reset_index(drop=True)
continue
# Otherwise: treat as a preset sheet
presets[sheet] = parse_preset_sheet(df)
return presets, hotlines, qa_df, ra_df
def unique_keywords_from_df(df: pd.DataFrame, col: str = "keyword") -> List[str]:
if df is None or df.empty or (col not in df.columns):
return []
kws = sorted({str(x).strip() for x in df[col].tolist() if str(x).strip()})
return kws[:500] # sanity cap
def _strip_json_codefences(s: str) -> str:
# remove ```json ... ``` fences if present
s = str(s or "")
if "```" in s:
s = re.sub(r"```(?:json)?\s*(.*?)\s*```", r"\1", s, flags=re.DOTALL | re.IGNORECASE)
return s.strip()
def _extract_first_json_blob(s: str) -> str:
"""
Try to pull the first JSON-looking object/array from a messy response.
Returns "" if nothing plausible found.
"""
s = _strip_json_codefences(s)
# Fast path: already pure JSON
if (s.startswith("{") and s.endswith("}")) or (s.startswith("[") and s.endswith("]")):
return s
# Try to find an array first, then an object (greedy, but usually fine here)
m = re.search(r"\[.*\]", s, flags=re.DOTALL)
if m:
return m.group(0).strip()
m = re.search(r"\{.*\}", s, flags=re.DOTALL)
if m:
return m.group(0).strip()
return ""
def safe_parse_json_keywords(s: str) -> List[str]:
"""
Accepts either:
- ["kw1","kw2"]
- {"keywords":["kw1","kw2"]}
Returns list[str] (deduped, up to 3).
"""
try:
blob = _extract_first_json_blob(s)
if not blob:
return []
data = json.loads(blob)
if isinstance(data, list):
arr = data
elif isinstance(data, dict):
# common variants
if "keywords" in data:
arr = data["keywords"]
elif "keyword" in data:
arr = data["keyword"]
elif "selected" in data:
arr = data["selected"]
else:
return []
else:
return []
# normalize to list
if isinstance(arr, (str, int, float)):
arr = [arr]
out: List[str] = []
for x in (arr or []):
sx = str(x).strip()
if sx and sx not in out:
out.append(sx)
return out[:3]
except Exception:
return []
def _fallback_keywords_from_text(resp_text: str, all_keywords: List[str], top_k: int = 3) -> List[str]:
"""
Lenient fallback:
If the model didn't return valid JSON, try to pick keywords that literally appear
in the response text (case-insensitive for ASCII; JP unaffected).
"""
text = str(resp_text or "").strip()
if not text:
return []
text_low = text.lower()
hits: List[Tuple[int, int, str]] = [] # (pos, -len, keyword)
for kw in all_keywords:
k = str(kw or "").strip()
if not k:
continue
k_low = k.lower()
pos = text_low.find(k_low)
if pos >= 0:
hits.append((pos, -len(k_low), k))
if not hits:
return []
hits.sort()
picked: List[str] = []
for _, __, k in hits:
if k not in picked:
picked.append(k)
if len(picked) >= top_k:
break
return picked
def llm_pick_keywords(context_text: str, all_keywords: List[str], kind: str = "Q&A", top_k: int = 3) -> List[str]:
"""
Calls OpenAI (Responses API) to select 1โ€“3 keywords from all_keywords that best match the context_text.
More lenient parsing:
1) Try strict JSON parse
2) If not parseable, fallback to literal keyword mentions in the model output
Returns a (possibly empty) list of selected keywords.
"""
if not OPENAI_API_KEY:
return []
if not all_keywords:
return []
# Keep prompt reasonable
kw_list = ", ".join(all_keywords[:2000])
sys_prompt = (
"You are a careful assistant. Given a context and a list of available keywords, "
f"pick 1โ€“{top_k} {kind} keywords that are MOST relevant. "
"Output ONLY JSON, no commentary. Either a JSON array of strings, or {\"keywords\": [..]}."
)
user_prompt = (
f"Context (truncated):\n\n{context_text}\n\n"
f"Available {kind} keywords (choose at most {top_k}):\n{kw_list}\n\n"
"Return JSON only."
)
payload = {
"model": OPENAI_MODEL,
"input": [
{"role": "system", "content": [{"type": "input_text", "text": sys_prompt}]},
{"role": "user", "content": [{"type": "input_text", "text": user_prompt}]},
],
"store": False,
}
try:
r = requests.post(
OPENAI_URL,
headers={"Authorization": f"Bearer {OPENAI_API_KEY}", "Content-Type": "application/json"},
json=payload,
timeout=60,
)
if r.status_code != 200:
return []
data = r.json()
raw = (_extract_responses_output_text(data) or "").strip()
if not raw:
return []
# 1) Strict JSON parse
picked = safe_parse_json_keywords(raw)
# 2) Lenient fallback: literal mentions in model output
if not picked:
picked = _fallback_keywords_from_text(raw, all_keywords, top_k=top_k)
# Keep only those present in the known list (case-insensitive match)
aset = {str(k).lower(): k for k in all_keywords}
final: List[str] = []
for p in picked:
v = aset.get(str(p).lower())
if v and v not in final:
final.append(v)
return final[:top_k]
except Exception:
return []
def get_active_preset_content() -> Dict[str, str]:
name = ss.get("active_preset") or ""
return (ss["presets"].get(name) or {"summary": "", "actions": "", "qna": "", "rewrite": ""})
_load_demo_kb_if_empty()
# =========================
# Utility: views & context
# =========================
def db_all_messages() -> pd.DataFrame:
return ss["messages_df"]
def db_recent_messages(user_id: Optional[str]=None, minutes: int=1440) -> pd.DataFrame:
df = ss["messages_df"]
if df.empty:
return df
since = (datetime.utcnow() - timedelta(minutes=minutes)).replace(tzinfo=timezone.utc)
dtcol = pd.to_datetime(df["ts"], utc=True, errors="coerce")
mask = (dtcol >= since)
if user_id:
mask &= (df["user_id"] == user_id)
return df[mask].copy().sort_values(by="ts", ascending=True)
def db_distinct_users() -> List[str]:
df = ss["messages_df"]
if df.empty:
return []
return sorted([u for u in df["user_id"].dropna().unique() if str(u).strip()])
def resolve_user_label(user_id: str, df_all: pd.DataFrame) -> str:
# Prefer nickname from ss.user_index; else most recent non-empty display_name from df; else suffix of user_id.
nm = (ss.get("user_index", {}).get(user_id, {}) or {}).get("nickname", "").strip()
if nm:
base = nm
else:
dfx = df_all[df_all["user_id"] == user_id]
dn = ""
if not dfx.empty:
dn_series = dfx[dfx["display_name"].astype(str).str.strip() != ""].tail(1)["display_name"]
if not dn_series.empty:
dn = str(dn_series.iloc[0]).strip()
base = dn or user_id
suffix = user_id[-6:] if len(user_id) >= 6 else user_id
return f"{base} ({suffix})"
def split_sentences(text: str) -> List[str]:
# Rough splitter for JA/EN punctuation; keeps punctuation with sentence.
parts = re.split(r'(?<=[ใ€‚๏ผŽ\.!\?๏ผ๏ผŸ])\s*', text)
return [p for p in parts if p.strip()]
def approx_tokens(s: str) -> int:
# crude but works cross-lang: ~4 chars per token on average
return max(1, int(len(s) / 4))
def last_context_with_sentence_guard(dfu: pd.DataFrame, max_tokens: int = 500) -> str:
# Build role: text lines from recent convo
lines = []
for _, r in dfu.tail(200).iterrows():
text_clean = str(r["text"]).strip().replace("\n", " ")
role = r.get("role", "user")
lines.append(f"{role}: {text_clean}")
blob = "\n".join(lines).strip()
if not blob:
return ""
# If already short, return
if approx_tokens(blob) <= max_tokens:
return blob
# Sentence guard: accumulate from end by full sentences
sents = split_sentences(blob)
picked = []
count = 0
for sent in reversed(sents):
picked.append(sent)
count += approx_tokens(sent)
if count >= max_tokens:
break
return "\n".join(reversed(picked)).strip()
# =========================
# Probes (optional)
# =========================
def _dns_check(host: str):
try:
infos = socket.getaddrinfo(host, 443, proto=socket.IPPROTO_TCP)
addrs = sorted({f"{i[4][0]}:{i[4][1]}" for i in infos})
return True, addrs
except Exception as e:
return False, [f"DNS error: {e}"]
def _http_probe(url: str, headers: dict | None = None):
try:
r = requests.get(url, headers=headers or {}, timeout=15)
return True, f"{r.status_code} {r.reason}", r.text[:300]
except Exception as e:
return False, str(e), ""
# =========================
# FOLLOW-UPS โ€” D1 via Worker API (storage & helpers)
# =========================
import os, datetime as dt
from typing import Optional
import requests
import streamlit as st
# Assumes you already have these elsewhere:
# - ss = st.session_state
# - JPN_TZ = zoneinfo.ZoneInfo("Asia/Tokyo")
FOLLOWUPS_API_BASE = (os.getenv("FOLLOWUPS_API_BASE") or ss.get("worker_base") or "").rstrip("/")
# Optional: x-api-key auth fallback (only needed if you are NOT using Bearer token)
FOLLOWUPS_READ_KEY = os.getenv("FOLLOWUPS_READ_KEY", "")
FOLLOWUPS_WRITE_KEY = os.getenv("FOLLOWUPS_WRITE_KEY", "")
ss.setdefault("followups_cache", [])
ss.setdefault("followups_loaded", False)
ss.setdefault("followup_alert_modal_shown", False)
def _now_jst() -> dt.datetime:
return dt.datetime.now(JPN_TZ)
def _ensure_tz(dt_obj: dt.datetime) -> dt.datetime:
if dt_obj.tzinfo is None:
return dt_obj.replace(tzinfo=JPN_TZ)
return dt_obj
def _api_headers(required_scope: str) -> dict:
"""
required_scope: "read" or "write"
Prefers Bearer token if present, else x-api-key.
"""
h = {"content-type": "application/json"}
token = ss.get("api_token") or ss.get("token") or ss.get("worker_token") or ""
if token:
h["authorization"] = f"Bearer {token}"
return h
# fallback: x-api-key
if required_scope == "write" and FOLLOWUPS_WRITE_KEY:
h["x-api-key"] = FOLLOWUPS_WRITE_KEY
elif required_scope == "read" and FOLLOWUPS_READ_KEY:
h["x-api-key"] = FOLLOWUPS_READ_KEY
elif FOLLOWUPS_WRITE_KEY: # allow write key for reads too
h["x-api-key"] = FOLLOWUPS_WRITE_KEY
return h
def _api_get(path: str, params: Optional[dict] = None, scope: str = "read"):
if not FOLLOWUPS_API_BASE:
raise RuntimeError("FOLLOWUPS_API_BASE not set (and ss.worker_base is empty)")
url = f"{FOLLOWUPS_API_BASE}{path}"
return requests.get(url, headers=_api_headers(scope), params=params or {}, timeout=20)
def _api_post(path: str, payload: dict, scope: str = "write"):
if not FOLLOWUPS_API_BASE:
raise RuntimeError("FOLLOWUPS_API_BASE not set (and ss.worker_base is empty)")
url = f"{FOLLOWUPS_API_BASE}{path}"
return requests.post(url, headers=_api_headers(scope), json=payload, timeout=20)
def load_followups_into_state(force: bool = False):
if ss.get("followups_loaded") and not force:
return
try:
resp = _api_get("/api/followups/open", scope="read")
if not resp.ok:
st.warning(f"Follow-ups: API read error ({resp.status_code}): {resp.text[:200]}")
ss["followups_cache"] = []
ss["followups_loaded"] = True
return
data = resp.json() if resp.text else []
ss["followups_cache"] = data if isinstance(data, list) else []
ss["followups_loaded"] = True
except Exception as e:
st.warning(f"Follow-ups: load error ({e}).")
ss["followups_cache"] = []
ss["followups_loaded"] = True
def add_followup(user_id: str, due_dt: dt.datetime, note: str = "") -> str:
due_dt = _ensure_tz(due_dt)
payload = {"user_id": str(user_id), "due_iso": due_dt.isoformat(), "note": (note or "").strip()}
resp = _api_post("/api/followups/set", payload, scope="write")
if not resp.ok:
raise RuntimeError(f"Follow-up set failed ({resp.status_code}): {resp.text[:200]}")
out = resp.json() if resp.text else {}
load_followups_into_state(force=True)
return str(out.get("id") or "")
def resolve_followup(fid: str):
resp = _api_post("/api/followups/resolve", {"id": str(fid)}, scope="write")
if not resp.ok:
raise RuntimeError(f"Resolve failed ({resp.status_code}): {resp.text[:200]}")
load_followups_into_state(force=True)
def list_overdue_followups(now_dt: Optional[dt.datetime] = None) -> list:
now_dt = _ensure_tz(now_dt or _now_jst())
try:
resp = _api_get("/api/followups/overdue", params={"now_iso": now_dt.isoformat()}, scope="read")
if not resp.ok:
st.warning(f"Follow-ups: overdue query error ({resp.status_code}): {resp.text[:200]}")
return []
data = resp.json() if resp.text else []
return data if isinstance(data, list) else []
except Exception as e:
st.warning(f"Follow-ups: overdue query error ({e}).")
return []
def list_user_open_followups(user_id: str) -> list:
uid = str(user_id)
out = [it for it in ss.get("followups_cache", []) if str(it.get("user_id", "")) == uid and not bool(it.get("resolved", False))]
out.sort(key=lambda x: x.get("due_iso", ""))
return out
# =========================
# Worker API helpers (Responder Console)
# =========================
def auth_headers() -> dict:
return {"Authorization": f"Bearer {ss.token}"} if ss.token else {}
def api_get(path: str, params: dict | None = None):
base = ss.worker_base.strip().rstrip("/")
try:
return requests.get(f"{base}{path}", params=params, headers=auth_headers(), timeout=20)
except Exception as e:
return type("Obj", (), {"status_code": 599, "text": str(e)})
def api_post(path: str, json_data: dict):
base = ss.worker_base.strip().rstrip("/")
try:
h = {"Content-Type": "application/json"}; h.update(auth_headers())
return requests.post(f"{base}{path}", json=json_data, headers=h, timeout=20)
except Exception as e:
return type("Obj", (), {"status_code": 599, "text": str(e)})
# =========================
# Global CSS for chat bubbles
# =========================
st.markdown("""
<style>
.block-container { padding-top: 0.8rem; padding-bottom: 2rem; }
.chat-row { display: flex; margin: 6px 0; }
.left { justify-content: flex-start; }
.right { justify-content: flex-end; }
.center { justify-content: center; } /* for system-bubble alignment */
.bubble { border-radius: 16px; padding: 10px 12px; max-width: 92%; word-wrap: break-word; }
.user { background: #f1f5f9; }
.account { background: #e0f2fe; }
.system { background: #e0e7ff; font-style: italic; }
.meta { font-size: 11px; color: #6b7280; margin-top: 2px; }
.small { font-size: 12px; color: #6b7280; }
.toolbar { display: flex; gap: 8px; align-items: center; }
</style>
""", unsafe_allow_html=True)
st.markdown("""
<style>
.chat-scroll {
max-height: 520px;
overflow-y: auto;
border: 1px solid #e5e7eb;
border-radius: 12px;
padding: 8px 10px;
background: #ffffff;
}
</style>
""", unsafe_allow_html=True)
# =========================
# PASSCODE GATE โ€” Entire App (LOCKED base + brute-force throttle)
# =========================
st.markdown("<h1 style='text-align: center;'>๐Ÿจ Koala DX โ€” Dashboard + Responder</h1>", unsafe_allow_html=True)
# --- Brute-force throttle (per-session) ---
ss.setdefault("auth_fail_count", 0)
ss.setdefault("auth_block_until", 0.0)
def _auth_is_blocked_now() -> tuple[bool, int]:
now = time.time()
until = float(ss.get("auth_block_until", 0.0) or 0.0)
if now < until:
return True, int(until - now)
return False, 0
def _auth_register_fail():
# Exponential backoff up to 5 minutes (per session)
ss["auth_fail_count"] = int(ss.get("auth_fail_count", 0) or 0) + 1
delay = min(300, 2 ** min(ss["auth_fail_count"], 8)) # 2,4,8,... up to 256s, capped at 300
ss["auth_block_until"] = time.time() + delay
def _auth_register_success():
ss["auth_fail_count"] = 0
ss["auth_block_until"] = 0.0
# Force the locked base into session state (prevents tampering)
ss.worker_base = WORKER_BASE_DEFAULT
if not ss.token:
blocked, remain_s = _auth_is_blocked_now()
if blocked:
st.error(f"Too many attempts. Try again in ~{remain_s}s.")
st.stop()
with st.form("login_gate", clear_on_submit=False):
st.subheader("๐Ÿ” Sign in")
st.caption(f"Worker: {WORKER_BASE_DEFAULT} (locked)")
passcode = st.text_input("Passcode", type="password", placeholder="Enter READ or WRITE passcode")
submitted = st.form_submit_button("Sign in")
if submitted:
# Always add a tiny delay to make guessing slower
time.sleep(0.5)
base = WORKER_BASE_DEFAULT # ๐Ÿ”’ locked
try:
r = requests.post(
f"{base}/api/auth",
json={"passcode": passcode},
timeout=20,
allow_redirects=False, # ๐Ÿ”’ avoid redirect-to-attacker leakage
)
except Exception as e:
_auth_register_fail()
st.error(f"Network error: {e}")
st.stop()
if r.status_code != 200:
_auth_register_fail()
st.error(f"Auth failed: {r.text}")
st.stop()
# Success
try:
data = r.json()
except Exception:
_auth_register_fail()
st.error("Auth failed: invalid JSON response.")
st.stop()
ss.worker_base = base
ss.token = data.get("token", "") or ""
ss.scope = data.get("scope", "") or ""
if not ss.token:
_auth_register_fail()
st.error("Auth failed: missing token.")
st.stop()
_auth_register_success()
st.success(f"Signed in as {ss.scope.upper()} โ€” {ss.worker_base}")
if rerun:
rerun()
st.stop()
# =========================
# FOLLOW-UPS โ€” load & notify once per login (no st.modal)
# =========================
load_followups_into_state(force=False)
def _fmt_user_label(uid: str) -> str:
uid = (uid or "").strip()
rec = (ss.get("user_index") or {}).get(uid, {}) or {}
nick = (rec.get("nickname") or "").strip()
disp = (rec.get("display_name") or "").strip()
suffix = (rec.get("suffix") or (uid[-6:] if len(uid) >= 6 else uid))
name = nick or disp or f"unfollowed({suffix})"
return f"{name} ({suffix})"
def _fmt_due_jst(due_iso: str) -> str:
s = (due_iso or "").strip()
if not s:
return "(no due date)"
try:
d = dt.datetime.fromisoformat(s) # handles "+09:00" offsets too
if d.tzinfo is None:
d = d.replace(tzinfo=JPN_TZ)
else:
d = d.astimezone(JPN_TZ)
return d.strftime("%Y-%m-%d %H:%M")
except Exception:
# fallback: show raw if parsing fails
return s
if not ss["followup_alert_modal_shown"]:
overdue = list_overdue_followups()
if overdue:
st.warning("โฐ Follow-ups due โ€” please review below.")
with st.expander("Follow-ups due (click to open)", expanded=True):
st.write("Resolve now or keep to be reminded again next login.")
st.divider()
for it in overdue:
uid = it.get("user_id", "")
fid = it.get("id")
note = (it.get("note", "") or "").strip()
due_pretty = _fmt_due_jst(it.get("due_iso", ""))
st.checkbox(
f"Resolve โ€ข {_fmt_user_label(uid)} โ€ข due={due_pretty} โ€ข note={note or '(no note)'}",
key=f"ck_resolve_{fid}",
value=False,
)
c1, c2 = st.columns(2)
with c1:
if st.button("โœ… Update (resolve checked)", key="fu_update"):
any_change = False
for it in overdue:
fid = it.get("id")
if fid and st.session_state.get(f"ck_resolve_{fid}", False):
try:
resolve_followup(str(fid))
any_change = True
except Exception as e:
st.error(f"Resolve failed for {fid}: {e}")
if any_change:
st.success("Updated follow-ups.")
else:
st.info("No changes made.")
ss["followup_alert_modal_shown"] = True
if rerun: rerun()
with c2:
if st.button("๐Ÿ” Keep all for later", key="fu_keep"):
ss["followup_alert_modal_shown"] = True
st.info("Okay โ€” youโ€™ll be reminded again on next login.")
if rerun: rerun()
else:
ss["followup_alert_modal_shown"] = True
# =========================
# Sidebar โ€” (visible only after login)
# =========================
with st.sidebar:
st.header("Data Fetch")
# --- Backend config (hidden from main UI) ---
# Keep these in env/secrets; not exposed in the sidebar UI
base_url = (ss.get("worker_base") or WORKER_BASE_DEFAULT).strip().rstrip("/")
api_key = READ_API_KEY_DEFAULT # keep in env/secrets
# --- Default: fetch past 1 year ---
now_jst = dt.datetime.now(JPN_TZ)
default_since_dt = (now_jst - dt.timedelta(days=365)).replace(hour=0, minute=0, second=0, microsecond=0)
# --- Inputs the user asked to keep ---
aes_key_in = st.text_input("AES key (base64, 16/24/32 bytes)", type="password")
limit = st.number_input("Limit", min_value=1, max_value=5000, value=int(FETCH_LIMIT_DEFAULT), step=100)
# Human-readable since date (JST) + computed since_ms (UTC epoch ms)
since_date = st.date_input("Since (JST date)", value=default_since_dt.date())
# Optional time control (kept minimal; defaults midnight JST)
since_time = dt.time(0, 0)
since_dt_jst = dt.datetime.combine(since_date, since_time).replace(tzinfo=JPN_TZ)
since_ms = int(since_dt_jst.astimezone(dt.timezone.utc).timestamp() * 1000)
st.caption(f"Computed since_ms: `{since_ms}` (UTC ms) ยท JST: {since_dt_jst.strftime('%Y-%m-%d %H:%M')}")
c1, c2 = st.columns([1.2, 1.0])
with c1:
if st.button("โ˜๏ธ Fetch & Decrypt โ†’ Memory", type="primary"):
# Validate AES key and store into session key cache
kb = None
if aes_key_in:
try:
kb = _b64_to_bytes(aes_key_in.strip())
except Exception:
kb = None
if not kb or len(kb) not in (16, 24, 32):
st.error("Invalid AES key. Must be base64 for 16/24/32 bytes.")
else:
_set_session_key(kb)
# Recommended: overwrite to avoid duplicates on repeated fetches
ok, msg = import_worker_to_memory(
base_url=base_url,
api_key=api_key,
aes_key_b64=base64.b64encode(kb).decode(),
since_ms=int(since_ms),
limit=int(limit),
overwrite=True,
)
if ok:
st.success(msg)
st.toast("Worker import complete.", icon="โœ…")
else:
st.error(msg)
with c2:
if st.button("๐Ÿงน Clear memory"):
ss["messages_df"] = ss["messages_df"].iloc[0:0].copy()
ss["sec_cache"].clear()
st.success("Cleared in-memory data.")
st.divider()
# --- Backend imports kept but hidden/collapsed ---
with st.expander("Backend imports (CSV / preset.xlsx)", expanded=False):
st.subheader("CSV Import")
up = st.file_uploader("Upload CSV (user_id, role, text[, ts, display_name])", type=["csv"])
if st.button("๐Ÿ“ฅ Import CSV โ†’ Memory"):
if up is None:
st.warning("Please choose a CSV file first.")
else:
ok, msg = import_csv_to_memory(up, overwrite=True)
(st.success if ok else st.error)(msg)
st.divider()
st.subheader("Presets from Excel")
preset_file = st.file_uploader("preset.xlsx", type=["xlsx"])
if st.button("๐Ÿ“š Load presets"):
if not preset_file:
st.warning("Select an .xlsx file first.")
else:
try:
data = preset_file.read()
presets, hotlines, qa_df, ra_df = load_presets_and_kb_from_xlsx(io.BytesIO(data))
ss["presets"] = presets
ss["hotlines"] = hotlines
ss["qa_rows"] = qa_df
ss["ra_rows"] = ra_df
ss["qa_keywords"] = unique_keywords_from_df(qa_df, "keyword")
ss["ra_keywords"] = unique_keywords_from_df(ra_df, "keyword")
if presets:
ss["active_preset"] = sorted(presets.keys())[0]
st.success(
f"Loaded {len(presets)} presets, {len(hotlines)} hotlines, "
f"{len(ss['qa_keywords'])} Q&A keywords, {len(ss['ra_keywords'])} RA keywords."
)
except Exception as e:
st.error(f"Failed to load presets: {e}")
if ss.get("presets"):
preset_names = sorted(ss["presets"].keys())
ss["active_preset"] = st.selectbox(
"Active preset",
options=preset_names,
index=max(
0,
preset_names.index(ss.get("active_preset", "")) if ss.get("active_preset", "") in preset_names else 0
)
)
tab_main, tab_stats = st.tabs(["๐Ÿ  Main", "๐Ÿ“Š Stats"])
with tab_main:
# =========================
# Single-Page Layout โ€” Left (Responder Console) | Right (Dashboard)
# =========================
left, right = st.columns([1, 1]) # left half for chat console
# ==== LEFT: Responder Console (uses Worker passcode token) =====================
with left:
st.subheader("๐Ÿ’ฌ Responder Console")
st.caption(f"Signed in as {ss.scope.upper()} โ€” {ss.worker_base}")
st.text_input("Responder name๏ผˆๆ‹…ๅฝ“่€…ๅ๏ผ‰", key="responder", placeholder="e.g., Ken / ๅฑฑ็”ฐ")
st.markdown("**Select a user**")
resp = api_get("/api/users")
if resp.status_code != 200:
st.error(f"Failed to load users: {resp.text}")
users = []
else:
users = resp.json() or []
# Build uid -> display info map (stable)
user_map = {}
uid_options = []
for row in users:
uid = (row.get("user_id") or "").strip()
if not uid:
continue
nickname = (row.get("nickname") or "").strip()
display = (row.get("display_name") or "").strip()
suffix = uid[-6:] if len(uid) >= 6 else uid
user_map[uid] = {"nickname": nickname, "display_name": display, "suffix": suffix}
uid_options.append(uid)
ss["user_index"] = user_map # persist for use on the right column & messages table
def _last_ts(uid: str) -> int:
for row in users:
if (row.get("user_id") or "").strip() == uid:
try:
return int(row.get("last_ts") or 0)
except Exception:
return 0
return 0
# Sort by last_ts desc (stable values, only order changes)
uid_options = sorted(uid_options, key=_last_ts, reverse=True)
def fmt_uid(uid: str) -> str:
rec = user_map.get(uid, {}) or {}
nickname = (rec.get("nickname") or "").strip()
display = (rec.get("display_name") or "").strip()
suffix = rec.get("suffix") or (uid[-6:] if len(uid) >= 6 else uid)
name = nickname or display or f"unfollowed({suffix})"
last_ts = _last_ts(uid)
return f"{name} ({suffix}) ยท last: {fmt_ts_ms_to_jst(last_ts)}"
if uid_options:
# Keep previous selection if possible
default_uid = ss.get("selected_user") if ss.get("selected_user") in uid_options else uid_options[0]
default_idx = uid_options.index(default_uid)
picked_uid = st.selectbox(
"Users",
options=uid_options,
index=default_idx,
format_func=fmt_uid,
key="user_select_uid", # IMPORTANT: stable widget key
)
ss.selected_user = picked_uid
ss.selected_user_label = fmt_uid(picked_uid)
ss.nickname_current = (user_map.get(ss.selected_user, {}) or {}).get("nickname") or ""
else:
st.info("No users yet.")
# Nickname editor
if ss.selected_user:
st.markdown("**Nickname (่กจ็คบๅใƒปใƒ‹ใƒƒใ‚ฏใƒใƒผใƒ )**")
cols = st.columns([3,1])
with cols[0]:
new_nick = st.text_input("Set nickname for this user", value=ss.nickname_current, key="nick_input", placeholder="e.g., ๅฑฑ็”ฐใ•ใ‚“ / Yamada")
with cols[1]:
if st.button("Save"):
if ss.scope != "write":
st.warning("You are signed in with READ-only scope.")
else:
r = api_post("/api/nickname", {"user_id": ss.selected_user, "nickname": new_nick.strip() or None})
if r.status_code == 200:
st.success("Nickname updated โœ…")
ss.nickname_current = new_nick.strip()
time.sleep(0.3)
if rerun: rerun()
else:
st.error(f"Failed to update nickname: {r.text}")
# Conversation
messages = []
if ss.selected_user:
cols = st.columns([1,1,2])
with cols[0]:
if st.button("โ†ป Refresh"):
ss.last_refresh = time.time()
if rerun: rerun()
with cols[1]:
st.caption("Auto-refresh ~20s")
if time.time() - ss.last_refresh > 20:
ss.last_refresh = time.time()
if rerun: rerun()
r = api_get("/api/messages", params={"user_id": ss.selected_user})
if r.status_code != 200:
st.error(f"Failed to load messages: {r.text}")
else:
messages = r.json() or []
# Auto-set unfollowed nickname if applicable
if ss.selected_user and ss.scope == "write":
uinfo = user_map.get(ss.selected_user, {}) if 'user_map' in locals() else {}
if (not (uinfo.get("nickname") or uinfo.get("display_name"))
and any((m.get("type") or "").lower().strip() == "unfollow" for m in messages)
and ss.auto_set_nick_once_for != ss.selected_user):
fallback_nick = f"unfollowed({uinfo.get('suffix','')})"
resp_set = api_post("/api/nickname", {"user_id": ss.selected_user, "nickname": fallback_nick})
if resp_set.status_code == 200:
ss.auto_set_nick_once_for = ss.selected_user
ss.nickname_current = fallback_nick
st.info(f"Nickname auto-set to **{fallback_nick}** for unfollowed user.")
time.sleep(0.3)
if rerun: rerun()
else:
st.warning(f"Attempted to auto-set unfollow nickname but failed: {resp_set.text}")
shown_label = ss.selected_user_label or ss.selected_user or "(none)"
st.markdown(f"**Conversation โ€” {shown_label}**")
render_conversation_bubbles(messages, height_px=520, key=f"chat_{ss.selected_user or 'none'}")
# Send reply
if ss.selected_user:
st.markdown("**Send a reply**")
if ss.scope != "write":
st.info("You are signed in with READ-only scope. Replies are disabled.")
else:
reply = st.chat_input("Type your replyโ€ฆ")
if reply:
if not ss.responder.strip():
st.warning("Please enter your Responder name๏ผˆๆ‹…ๅฝ“่€…ๅ๏ผ‰above before sending.")
else:
payload = {"user_id": ss.selected_user, "text": reply.strip(), "responder": ss.responder.strip()}
r = api_post("/api/messages", payload)
if r.status_code == 200:
try:
out = r.json() or {}
except Exception:
out = {}
status = (out.get("delivery_status") or "").lower().strip()
if status == "sent":
st.success("Sent โœ…")
elif status == "quota":
st.warning("โš ๏ธไปŠๆœˆใฎ็„กๆ–™้€ไฟกๆ•ฐใ‚’่ถ…ใˆใฆใ„ใพใ™ใ€‚ๆ‰‹ๅ‹•ใงใƒฉใ‚คใƒณใ‚’้€ใฃใฆใใ ใ•ใ„.")
else:
st.error("Error โŒ (will auto-retry once after 10s)")
# Optional: refresh conversation so you can see delivery_status fields updated
if rerun: rerun()
else:
st.error(f"Send failed: {r.text}")
with st.expander("๐Ÿ”Ž Debug log", expanded=False):
if st.button("Clear debug log", key="btn_clear_debug"):
ss["debug_log"] = []
st.code("\n".join(ss.get("debug_log", [])) or "(empty)")
# ==== RIGHT: Dashboard (Risk, Volunteer Assist with KB/LLM, then Messages) =====
SUMMARIZE_SYSTEM_PROMPT = (
"You are a careful support assistant for volunteer responders. "
"Write in Japanese. Be supportive, neutral, and anonymized. "
"Do not give medical/legal certainty. If self-harm risk is present, prioritize safety steps and hotlines."
)
with right:
st.subheader("โš ๏ธ High/Medium Risk")
df_all = db_all_messages()
if not df_all.empty:
risky = df_all[df_all["risk_level"].isin(["HIGH","MEDIUM"])]
if risky.empty:
st.success("No risk flagged.")
else:
st.dataframe(
risky[["ts","user_id","display_name","text","risk_level"]].tail(200),
use_container_width=True, height=220
)
else:
st.info("No messages loaded yet.")
# --- Volunteer Assist target user (fix NameError: pick_uid) ---
pick_uid = (ss.get("selected_user") or "").strip()
ss["assist_pick_uid"] = pick_uid # keep follow-up section consistent
# Build context for summarization (prefer live Worker messages; fallback to in-memory)
ctx = ""
if pick_uid:
rr = api_get("/api/messages", params={"user_id": pick_uid})
if rr.status_code == 200:
msgs_ctx = rr.json() or []
lines = []
for m in msgs_ctx[-200:]:
role = (m.get("role") or "user").lower().strip()
text = str(m.get("text") or "").strip().replace("\n", " ")
if text:
lines.append(f"{role}: {text}")
blob = "\n".join(lines).strip()
# sentence-guard to ~500 tokens (uses your existing helpers)
if approx_tokens(blob) <= 500:
ctx = blob
else:
sents = split_sentences(blob)
picked = []
count = 0
for sent in reversed(sents):
picked.append(sent)
count += approx_tokens(sent)
if count >= 500:
break
ctx = "\n".join(reversed(picked)).strip()
else:
# fallback: in-memory df (only if you imported CSV/Workerโ†’memory)
dfu = db_recent_messages(user_id=pick_uid, minutes=60 * 24 * 30)
ctx = last_context_with_sentence_guard(dfu, max_tokens=500)
if not pick_uid:
st.info("ๅทฆๅดใงใƒฆใƒผใ‚ถใƒผใ‚’้ธๆŠžใ™ใ‚‹ใจใ€่ฆ็ด„/ๆๆกˆใŒๆœ‰ๅŠนใซใชใ‚Šใพใ™ใ€‚")
# ---- โ‘  Conversation Summary (robust: updates text_area after rerun) ----
st.markdown("**โ‘  Conversation Summary**")
# Keys:
# - sum_widget_key: the actual widget key (Streamlit owns this once created)
# - sum_value_key: our backing store (safe to update any time)
# - sum_pending_key: a "mailbox" for updates that will be applied BEFORE widget instantiation
sum_value_key = f"summary_value_{pick_uid}"
sum_widget_key = f"summary_widget_{pick_uid}"
sum_pending_key = f"_pending_summary_{pick_uid}"
# Seed initial text (preset -> previous saved -> empty)
preset = get_active_preset_content()
out_map = (ss.get("llm_out", {}).get(pick_uid, {}) or {})
initial = (out_map.get("summary") or preset.get("summary") or "").strip()
if sum_value_key not in ss:
ss[sum_value_key] = initial
# โœ… Apply pending update BEFORE creating the widget (this is the crucial part)
if ss.get(sum_pending_key):
new_text = ss.pop(sum_pending_key)
ss[sum_value_key] = new_text
ss[sum_widget_key] = new_text # safe here because widget isn't created yet
ss.setdefault("llm_out", {}).setdefault(pick_uid, {})["summary"] = new_text
# Ensure widget key exists before first render
if sum_widget_key not in ss:
ss[sum_widget_key] = ss.get(sum_value_key, "")
# Render widget (after this point, DO NOT assign ss[sum_widget_key] directly)
summary_txt = st.text_area("Summary", key=sum_widget_key, height=150)
c_sum1, c_sum2 = st.columns([1, 1])
with c_sum1:
if st.button("๐Ÿง  Summarize (OpenAI)", key=f"btn_summarize_{pick_uid}"):
dbg("Summarize clicked")
if not OPENAI_API_KEY:
dbg("OPENAI_API_KEY missing")
st.error("OPENAI_API_KEY is not set.")
else:
dbg(f"OPENAI_URL={OPENAI_URL}")
dbg(f"OPENAI_MODEL={OPENAI_MODEL}")
dbg(f"ctx_chars={len(ctx or '')}")
# Optional DNS hint
try:
host = (OPENAI_URL or "").split("/")[2]
ok_dns, addrs = _dns_check(host)
dbg(f"dns_ok={ok_dns} addrs={addrs[:3]}")
except Exception as e:
dbg(f"dns_check_error={e}")
# โœ… Yes: the summarize system prompt is written here (explicit)
sys_prompt = (
"You are a careful assistant supporting volunteer responders for caregiver chats in Japan. "
"Summarize the conversation into 5โ€“8 bullet points in Japanese. "
)
user_prompt = (
"Conversation context:\n\n"
f"{ctx}\n\n"
"Output only the summary bullets."
)
payload = {
"model": OPENAI_MODEL,
"input": [
{"role": "system", "content": [{"type": "input_text", "text": sys_prompt}]},
{"role": "user", "content": [{"type": "input_text", "text": user_prompt}]},
],
"store": False,
}
t0 = time.time()
with st.spinner("Calling OpenAI..."):
try:
r = requests.post(
OPENAI_URL,
headers={
"Authorization": f"Bearer {OPENAI_API_KEY}",
"Content-Type": "application/json",
},
json=payload,
timeout=60,
)
dt_s = time.time() - t0
dbg(f"openai_http_done status={r.status_code} elapsed={dt_s:.2f}s")
dbg(f"openai_resp_head={r.text[:300].replace(chr(10),' ')}")
if r.status_code != 200:
st.error(f"LLM error: {r.status_code}\n\n{r.text}")
else:
data = r.json()
content = (_extract_responses_output_text(data) or "").strip()
dbg(f"parsed_output_chars={len(content)}")
if not content:
st.warning("Got HTTP 200 but no text output was parsed. Check Debug log.")
else:
# โœ… DO NOT set ss[sum_widget_key] here (widget already exists)
# Instead, stash it and rerun so it applies BEFORE widget instantiation.
ss[sum_pending_key] = content
ss[sum_value_key] = content
ss.setdefault("llm_out", {}).setdefault(pick_uid, {})["summary"] = content
st.success("Summary updated.")
if rerun:
rerun()
except Exception as e:
dbg(f"openai_request_exception={repr(e)}")
st.exception(e)
with c_sum2:
if st.button("๐Ÿ’พ Save current text", key=f"btn_save_summary_{pick_uid}"):
cur = (ss.get(sum_widget_key, "") or "").strip()
ss[sum_value_key] = cur
ss.setdefault("llm_out", {}).setdefault(pick_uid, {})["summary"] = cur
st.success("Saved to session.")
# ---- โ‘ก Recommended Actions (KB-driven) ----
st.markdown("**โ‘ก Recommended Actions (from RA sheet via keywords)**")
ra_available = ss.get("ra_keywords", []) if not ss.get("ra_rows", pd.DataFrame()).empty else []
if not ra_available:
st.info("No RA sheet loaded. Add an 'RA' sheet with columns: keyword, action.")
# โœ… Make suggestion actually select the tags in the multiselect
ra_widget_key = f"ra_select_{pick_uid}" # this is the multiselect's key
ra_pending_key = f"_pending_ra_{pick_uid}" # pending mailbox (applied BEFORE widget)
# Apply pending suggestion before widget instantiation
if ra_pending_key in ss:
ss[ra_widget_key] = ss.pop(ra_pending_key) or []
c_ra1, c_ra2 = st.columns([1, 1])
# ensure the widget key exists (so first render is empty, no warning)
ss.setdefault(ra_widget_key, [])
with c_ra1:
pick_ra = st.multiselect(
"Pick action keywords",
options=ra_available,
key=ra_widget_key,
)
# (optional) show what LLM picked last time
last_ra = ss.get(f"_last_ra_suggest_{pick_uid}", [])
if last_ra:
st.caption("LLM selected: " + ", ".join(last_ra))
with c_ra2:
if st.button("๐Ÿ”Ž Suggest Action Keywords (OpenAI)", key=f"btn_ra_{pick_uid}"):
if not OPENAI_API_KEY:
st.error("OPENAI_API_KEY is not set.")
elif not ra_available:
st.warning("No RA keywords loaded.")
else:
picked = llm_pick_keywords(ctx, ra_available, kind="action")
if not picked:
st.warning("No keywords suggested.")
else:
# โœ… write into pending โ†’ rerun so widget reflects it
ss[ra_pending_key] = picked
st.success(f"Suggested: {', '.join(picked)}")
if rerun:
rerun()
ra_selected_now = ss.get(ra_widget_key, []) or []
ss["ra_selected"] = ra_selected_now # keep your old state key if other code uses it
if ra_selected_now and not ss["ra_rows"].empty:
show = ss["ra_rows"][ss["ra_rows"]["keyword"].isin(ra_selected_now)].copy()
if show.empty:
st.info("No rows match the selected keywords.")
else:
st.write("**Actions**")
for kw in ra_selected_now:
sub = show[show["keyword"] == kw]
if not sub.empty:
with st.expander(f"Keyword: {kw} ยท {len(sub)} item(s)", expanded=True):
for _, r in sub.iterrows():
st.markdown(f"- {r['action']}")
else:
st.caption("Pick 1โ€“3 keywords (or use the suggestion button).")
# ---- โ‘ข Relevant Q&A Snippets (KB-driven) ----
st.markdown("**โ‘ข Relevant Q&A (from Q&A sheet via keywords)**")
qa_available = ss.get("qa_keywords", []) if not ss.get("qa_rows", pd.DataFrame()).empty else []
if not qa_available:
st.info("No Q&A sheet loaded. Add a 'Q&A' (or 'QA') sheet with columns: keyword, question, answer.")
# โœ… Same fix for Q&A multiselect
qa_widget_key = f"qa_select_{pick_uid}"
qa_pending_key = f"_pending_qa_{pick_uid}"
if qa_pending_key in ss:
ss[qa_widget_key] = ss.pop(qa_pending_key) or []
c_q1, c_q2 = st.columns([1, 1])
ss.setdefault(qa_widget_key, [])
with c_q1:
pick_qa = st.multiselect(
"Pick Q&A keywords",
options=qa_available,
key=qa_widget_key,
)
last_qa = ss.get(f"_last_qa_suggest_{pick_uid}", [])
if last_qa:
st.caption("LLM selected: " + ", ".join(last_qa))
with c_q2:
if st.button("๐Ÿ”Ž Suggest Q&A Keywords (OpenAI)", key=f"btn_qa_{pick_uid}"):
if not OPENAI_API_KEY:
st.error("OPENAI_API_KEY is not set.")
elif not qa_available:
st.warning("No Q&A keywords loaded.")
else:
picked = llm_pick_keywords(ctx, qa_available, kind="Q&A")
if not picked:
st.warning("No keywords suggested.")
else:
ss[qa_pending_key] = picked
st.success(f"Suggested: {', '.join(picked)}")
if rerun:
rerun()
qa_selected_now = ss.get(qa_widget_key, []) or []
ss["qa_selected"] = qa_selected_now
if qa_selected_now and not ss["qa_rows"].empty:
show = ss["qa_rows"][ss["qa_rows"]["keyword"].isin(qa_selected_now)].copy()
if show.empty:
st.info("No Q&A rows match the selected keywords.")
else:
for kw in qa_selected_now:
sub = show[show["keyword"] == kw]
if not sub.empty:
with st.expander(f"Keyword: {kw} ยท {len(sub)} Q&A", expanded=True):
for _, r in sub.iterrows():
q = str(r.get("question", "")).strip()
a = str(r.get("answer", "")).strip()
if q or a:
st.markdown(f"**Q:** {q if q else '(no question)'}")
st.markdown(f"**A:** {a if a else '(no answer)'}")
st.markdown("---")
else:
st.caption("Pick 1โ€“3 keywords (or use the suggestion button).")
# Hotlines from Excel (optional)
if ss.get("hotlines"):
st.markdown("**Hotlines / External Resources**")
for h in ss["hotlines"]:
parts = [h.get("name", "")]
if h.get("dial"):
parts.append(h.get("dial"))
elif h.get("url"):
parts.append(h.get("url"))
if h.get("hours"):
parts.append(f"๏ผˆ{h['hours']}๏ผ‰")
if h.get("note"):
parts.append(f"- {h['note']}")
st.write(" ".join([p for p in parts if p]))
st.markdown("---")
st.markdown("**Draft a reply โ†’ Safety check & rewrite (preset)**")
draft = st.text_area(
"Your draft (before sending)",
height=120,
placeholder="๏ผˆไพ‹๏ผ‰ๆ˜จๅคœใฏๆœฌๅฝ“ใซๅคงๅค‰ใงใ—ใŸใญโ€ฆใ€‚ไปŠใฏใฉใฎใ‚ใŸใ‚Šใซใ„ใ‚‰ใฃใ—ใ‚ƒใ„ใพใ™ใ‹๏ผŸใพใšๅฎ‰ๅ…จใ‚’ไธ€็ท’ใซ็ขบ่ชใ•ใ›ใฆใใ ใ•ใ„ใ€‚",
)
if st.button("๐Ÿ›ก๏ธ Apply Preset Rewrite", key=f"rewrite_{pick_uid}"):
rewrite = get_active_preset_content().get("rewrite", "").strip()
st.text_area("Rewritten (safer)", (rewrite or draft), height=120, key=f"rewritten_{pick_uid}")
st.toast("Preset rewrite applied.", icon="โœ…")
# ---- โฐ Follow-up (per-user) ----
st.markdown("**โฐ Follow-up**")
if users:
# we already computed pick_uid earlier in Volunteer Assist
if "assist_pick_uid" in ss and ss["assist_pick_uid"]:
fu_uid = ss["assist_pick_uid"]
mode = st.radio("When", ["In X days", "Pick date & time (JST)"], horizontal=True, key=f"fu_mode_{fu_uid}")
note = st.text_input("Optional note", key=f"fu_note_{fu_uid}", placeholder="e.g., confirm safety plan / share clinic info")
if mode == "In X days":
xdays = st.number_input("X days", min_value=1, max_value=60, value=2, step=1, key=f"fu_days_{fu_uid}")
if st.button("โž• Add follow-up", key=f"fu_add_days_{fu_uid}"):
due = _now_jst() + dt.timedelta(days=int(xdays))
fid = add_followup(fu_uid, due, note)
st.success(f"Follow-up added for {due.strftime('%Y-%m-%d %H:%M')} JST (id={fid}).")
else:
today = _now_jst().date() + dt.timedelta(days=2)
d = st.date_input("Date (JST)", value=today, key=f"fu_date_{fu_uid}")
t = st.time_input("Time (JST)", value=dt.time(10, 0), key=f"fu_time_{fu_uid}")
if st.button("โž• Add follow-up", key=f"fu_add_dt_{fu_uid}"):
due_dt = dt.datetime.combine(d, t).replace(tzinfo=JPN_TZ)
fid = add_followup(fu_uid, due_dt, note)
st.success(f"Follow-up added for {due_dt.strftime('%Y-%m-%d %H:%M')} JST (id={fid}).")
# List open follow-ups for this user with resolve buttons
open_items = list_user_open_followups(fu_uid)
if open_items:
st.caption("Open follow-ups for selected user:")
for it in open_items:
due_local = it["due_iso"]
short_note = it.get("note","")
cols = st.columns([3,5,2])
cols[0].markdown(f"**Due:** {due_local}")
cols[1].markdown(f"**Note:** {short_note or '(no note)'}")
if cols[2].button("Resolve", key=f"fu_resolve_{it['id']}"):
resolve_followup(it["id"])
st.toast("Follow-up resolved.", icon="โœ…")
if rerun: rerun()
else:
st.caption("No open follow-ups for this user.")
else:
st.info("Select a user above to add a follow-up.")
st.markdown("---")
# ---- ๐Ÿ”Ž Conversation Search (Tag or Text) ----
st.subheader("๐Ÿ”Ž Conversation Search")
# Helpers (local to this UI block)
def _tag_options_from_kb() -> list[str]:
# Prefer explicit tag sheet later; for now derive from Q&A + RA keywords
qa = ss.get("qa_keywords", []) or []
ra = ss.get("ra_keywords", []) or []
uniq = sorted({x.strip() for x in (qa + ra) if str(x).strip()})
return uniq[:500]
def _compile_pattern_from_tags(tags: list[str]) -> re.Pattern | None:
if not tags:
return None
# Simple OR of literal tags; if you later add a Tags sheet with custom patterns, plug them here.
safe = [re.escape(t) for t in tags if str(t).strip()]
if not safe:
return None
return re.compile(r"(" + "|".join(safe) + r")", flags=re.IGNORECASE)
def _compile_pattern_from_text(q: str, mode: str) -> re.Pattern | None:
q = str(q or "").strip()
if not q:
return None
if mode == "Exact (literal)":
return re.compile(re.escape(q), flags=re.IGNORECASE)
# "Any word" โ†’ split on whitespace, OR them
parts = [re.escape(p) for p in re.split(r"\s+", q) if p.strip()]
if not parts:
return None
return re.compile(r"(" + "|".join(parts) + r")", flags=re.IGNORECASE)
def _search_with_context(
df: pd.DataFrame,
pat: re.Pattern,
context: int = 2,
max_snippets: int = 30,
since_minutes: int | None = None,
) -> list[dict]:
"""
Returns a list of snippets:
{ 'user_id':..., 'rows': DataFrame segment, 'hit_idx_in_segment': int }
Each snippet is a contiguous slice around a hit within a single user's thread.
"""
if df.empty or pat is None:
return []
# Optional time window
work = df.copy()
if since_minutes:
since = (datetime.utcnow() - timedelta(minutes=int(since_minutes))).replace(tzinfo=timezone.utc)
tsdt = pd.to_datetime(work["ts"], utc=True, errors="coerce")
work = work[tsdt >= since]
# Sort for stable context slicing
work = work.sort_values(by=["user_id", "ts"]).reset_index(drop=True)
out: list[dict] = []
for uid, g in work.groupby("user_id", sort=False):
g = g.reset_index(drop=True)
texts = g["text"].astype(str).fillna("")
for i, txt in enumerate(texts):
if not txt:
continue
if pat.search(txt):
start = max(0, i - context)
end = min(len(g) - 1, i + context)
seg = g.iloc[start:end+1].copy()
out.append({
"user_id": uid,
"rows": seg,
"hit_idx_in_segment": i - start,
})
if len(out) >= max_snippets:
return out
return out
# UI controls
scope_col, ctx_col, lim_col = st.columns([1.1, 0.9, 0.9])
with scope_col:
mode = st.radio("Mode", ["Text", "Tag"], horizontal=True, key="conv_search_mode")
with ctx_col:
ctx_messages = st.slider("Context window (msgs)", min_value=1, max_value=5, value=2, step=1)
with lim_col:
max_snips = st.slider("Max snippets", min_value=5, max_value=100, value=30, step=5)
timewin = st.selectbox("Time window", ["All", "Last day", "Last week", "Last month"], index=1)
since_map = {"All": None, "Last day": 60*24, "Last week": 60*24*7, "Last month": 60*24*30}
since_minutes = since_map.get(timewin)
pat = None
if mode == "Tag":
tag_opts = _tag_options_from_kb()
if not tag_opts:
st.info("ใ‚ฟใ‚ฐๆƒ…ๅ ฑใŒใ‚ใ‚Šใพใ›ใ‚“๏ผˆQ&A/RAใฎใ‚ญใƒผใƒฏใƒผใƒ‰ใ‚’่ชญใฟ่พผใ‚€ใจใ‚ฟใ‚ฐๆคœ็ดขใŒไฝฟใˆใพใ™๏ผ‰ใ€‚")
tag_select = []
else:
tag_select = st.multiselect("Tags (1โ€“3ๆŽจๅฅจ)", options=tag_opts, default=[], key="conv_search_tags")
do_search = st.button("๐Ÿ”Ž Search by Tag")
if do_search:
pat = _compile_pattern_from_tags(tag_select)
else:
text_q = st.text_input("Text query", value="", placeholder="ไพ‹๏ผš่‚ฒๅ… ไธๅฎ‰ ็œ ใ‚Œใชใ„ ้‡Œๅธฐใ‚Šใ€€ใชใฉโ€ฆ")
text_mode = st.radio("Match", ["Any word", "Exact (literal)"], horizontal=True, key="conv_search_textmode")
do_search = st.button("๐Ÿ”Ž Search Text")
if do_search:
pat = _compile_pattern_from_text(text_q, text_mode)
# Run search
if do_search:
if pat is None:
st.warning("ๆคœ็ดขๆกไปถใ‚’ๅ…ฅๅŠ›ใ—ใฆใใ ใ•ใ„ใ€‚")
else:
hits = _search_with_context(df_all, pat, context=ctx_messages, max_snippets=max_snips, since_minutes=since_minutes)
if not hits:
st.info("่ฉฒๅฝ“ใ™ใ‚‹ไผš่ฉฑใฏ่ฆ‹ใคใ‹ใ‚Šใพใ›ใ‚“ใงใ—ใŸใ€‚")
else:
st.write(f"**{len(hits)} ไปถใƒ’ใƒƒใƒˆ**๏ผˆๆœ€ๅคง {max_snips} ไปถใ‚’่กจ็คบ๏ผ‰")
# Render each snippet as a small conversation block
for j, snip in enumerate(hits, start=1):
uid = snip["user_id"]
seg = snip["rows"]
hit_rel = snip["hit_idx_in_segment"]
# Header
header = f"[{j}] {resolve_user_label(uid, df_all)}"
t0 = str(seg.iloc[0]["ts"])
t1 = str(seg.iloc[-1]["ts"])
st.markdown(f"**{header}**ใ€€`{t0} ใ€œ {t1}`")
# Show bubbles (reuse your chat style)
for k, r in seg.iterrows():
role = str(r.get("role","user")).lower().strip()
text = str(r.get("text","")).strip()
ts_str = str(r.get("ts",""))
is_hit = (k == seg.index[0] + hit_rel)
# Highlight the matching segment lightly
if is_hit and text:
try:
text = re.sub(pat, r"**\g<0>**", text)
except Exception:
pass
is_system = False
msg_type = "" # not stored in df_all; we only have text/role
effective_text = text
if not effective_text and msg_type:
effective_text = f"[{msg_type}]"
is_system = True
if is_system:
align = "center"; bubble_cls = "system"; meta = ts_str
else:
align = "left" if role == "user" else "right"
bubble_cls = "user" if role == "user" else "account"
meta = ts_str
st.markdown(
f'<div class="chat-row {align}">'
f' <div class="bubble {bubble_cls}">{effective_text}<div class="meta">{meta}</div></div>'
f'</div>',
unsafe_allow_html=True
)
st.markdown("---")
# โ”€โ”€ ๐Ÿ“ก Messages (in memory) โ€” moved to the lowest part (under rewrite) โ”€โ”€โ”€โ”€โ”€
st.subheader("๐Ÿ“ก Messages Export")
if df_all.empty:
st.info("No messages loaded. Use Cloud Pull or CSV import from the sidebar.")
else:
# Attach nickname column by mapping from ss.user_index
idx_map = ss.get("user_index", {})
nick_col = df_all["user_id"].map(lambda u: (idx_map.get(u, {}) or {}).get("nickname", "") if isinstance(u, str) else "")
view = df_all.copy()
insert_pos = view.columns.get_loc("display_name")+1 if "display_name" in view.columns else 2
view.insert(insert_pos, "nickname", nick_col)
view = view[["ts","user_id","display_name","nickname","role","text","risk_level"]]
st.dataframe(view.tail(300), use_container_width=True, height=360)
st.caption("โš ๏ธ Demo only. Not a clinical tool. For emergencies in Japan, call local hotlines or 119.")
with tab_stats:
render_stats_tab(db_all_messages(), ss)