Spaces:
Paused
Paused
Upload 11 files
Browse files- .dockerignore +38 -0
- Dockerfile +27 -0
- app.py +346 -0
- chat_processor.py +92 -0
- config.py +24 -0
- db_manager.py +134 -0
- feedback_generator.py +120 -0
- llm_handler.py +135 -0
- prompts.py +129 -0
- rag_manager.py +132 -0
- requirements.txt +6 -0
.dockerignore
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Git
|
| 2 |
+
.git
|
| 3 |
+
.gitignore
|
| 4 |
+
|
| 5 |
+
# Python virtual environment
|
| 6 |
+
venv/
|
| 7 |
+
venv_feedback/
|
| 8 |
+
*.egg-info/
|
| 9 |
+
__pycache__/
|
| 10 |
+
*.pyc
|
| 11 |
+
*.pyo
|
| 12 |
+
|
| 13 |
+
# Docker specific
|
| 14 |
+
Dockerfile
|
| 15 |
+
.dockerignore
|
| 16 |
+
|
| 17 |
+
# IDE / OS specific
|
| 18 |
+
.vscode/
|
| 19 |
+
.idea/
|
| 20 |
+
.DS_Store
|
| 21 |
+
Thumbs.db
|
| 22 |
+
|
| 23 |
+
# Local configuration not for image (API key via HF Secrets)
|
| 24 |
+
.env
|
| 25 |
+
|
| 26 |
+
# Logs and other local artifacts
|
| 27 |
+
*.log
|
| 28 |
+
dist/
|
| 29 |
+
build/
|
| 30 |
+
*.local
|
| 31 |
+
|
| 32 |
+
# 如果你决定在git中提交空的数据库目录/文件以确保路径存在于持久化存储中,
|
| 33 |
+
# 那么不要在这里忽略它们。否则,如果应用可以自动创建,则可以忽略。
|
| 34 |
+
# 对于HF持久化存储,最好是让应用在运行时按需创建这些文件/目录在持久化卷上。
|
| 35 |
+
# 所以,通常不在这里忽略它们,但要确保初始提交时它们是空的或不存在,
|
| 36 |
+
# 避免将本地测试数据打入镜像或仓库。
|
| 37 |
+
# chroma_db/
|
| 38 |
+
# students.db
|
Dockerfile
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 1. 使用官方Python基础镜像
|
| 2 |
+
FROM python:3.9-slim-buster
|
| 3 |
+
|
| 4 |
+
# 2. 设置环境变量
|
| 5 |
+
ENV PYTHONDONTWRITEBYTECODE 1
|
| 6 |
+
ENV PYTHONUNBUFFERED 1
|
| 7 |
+
|
| 8 |
+
# 3. 设置工作目录
|
| 9 |
+
WORKDIR /app
|
| 10 |
+
|
| 11 |
+
# 4. (可选) 安装系统依赖 - 根据需要取消注释
|
| 12 |
+
# RUN apt-get update && apt-get install -y --no-install-recommends gcc && rm -rf /var/lib/apt/lists/*
|
| 13 |
+
|
| 14 |
+
# 5. 复制依赖文件
|
| 15 |
+
COPY requirements.txt .
|
| 16 |
+
|
| 17 |
+
# 6. 安装Python依赖
|
| 18 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 19 |
+
|
| 20 |
+
# 7. 复制项目所有文件到工作目录 (确保.dockerignore配置正确)
|
| 21 |
+
COPY . .
|
| 22 |
+
|
| 23 |
+
# 8. 暴露Streamlit运行的端口 (HF Spaces会自动处理端口映射)
|
| 24 |
+
EXPOSE 8501
|
| 25 |
+
|
| 26 |
+
# 9. 定义容器启动时运行的命令
|
| 27 |
+
CMD ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0", "--server.headless=true"]
|
app.py
ADDED
|
@@ -0,0 +1,346 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# app.py
|
| 2 |
+
import streamlit as st
|
| 3 |
+
import datetime
|
| 4 |
+
import os
|
| 5 |
+
|
| 6 |
+
# Import all necessary modules from your project
|
| 7 |
+
from config import GOOGLE_API_KEY, STUDENT_DB_PATH, CHROMA_DB_PATH, RAG_COLLECTION_NAME
|
| 8 |
+
from db_manager import init_student_db, get_all_student_names, get_student_characteristics, add_or_update_student
|
| 9 |
+
from rag_manager import add_documents_to_rag, query_rag # get_all_student_observations_from_rag is used by chat_processor
|
| 10 |
+
from chat_processor import extract_info_from_chat, update_student_characteristics_from_rag, batch_update_all_students_characteristics
|
| 11 |
+
from feedback_generator import (
|
| 12 |
+
generate_boss_feedback,
|
| 13 |
+
generate_public_feedback,
|
| 14 |
+
generate_parent_feedback,
|
| 15 |
+
get_events_summary_for_day
|
| 16 |
+
)
|
| 17 |
+
# import prompts # Prompts are used by other modules, not directly here typically
|
| 18 |
+
|
| 19 |
+
# --- Page Configuration and Initialization ---
|
| 20 |
+
st.set_page_config(page_title="晚托反馈助手", layout="wide", initial_sidebar_state="expanded")
|
| 21 |
+
|
| 22 |
+
# --- Check API Key ---
|
| 23 |
+
# On Hugging Face, this will be set via Secrets. For local, from .env or environment.
|
| 24 |
+
if not GOOGLE_API_KEY:
|
| 25 |
+
st.error("错误:GOOGLE_API_KEY 未配置。请在Hugging Face Space的Secrets中设置该值,或在本地的.env文件中配置。应用功能将受限。")
|
| 26 |
+
# st.stop() # Option to stop app, or let it run with limited functionality
|
| 27 |
+
|
| 28 |
+
# --- Initialize Databases (Idempotent) ---
|
| 29 |
+
# These functions now have internal error handling and directory creation
|
| 30 |
+
init_student_db() # For SQLite
|
| 31 |
+
# ChromaDB is initialized within rag_manager.py upon import/first use.
|
| 32 |
+
|
| 33 |
+
# --- Session State Management ---
|
| 34 |
+
# Helps persist data across Streamlit reruns
|
| 35 |
+
if 'processed_chat_extracts' not in st.session_state: # Renamed for clarity
|
| 36 |
+
st.session_state.processed_chat_extracts = [] # Stores list of {"student_name": ..., "observation": ...}
|
| 37 |
+
if 'current_processing_date' not in st.session_state: # Renamed
|
| 38 |
+
st.session_state.current_processing_date = datetime.date.today()
|
| 39 |
+
if 'student_list_cache' not in st.session_state: # Renamed
|
| 40 |
+
st.session_state.student_list_cache = get_all_student_names() # Initial load
|
| 41 |
+
|
| 42 |
+
# Feedback text states
|
| 43 |
+
if 'feedback_boss_text' not in st.session_state: st.session_state.feedback_boss_text = ""
|
| 44 |
+
if 'feedback_public_text' not in st.session_state: st.session_state.feedback_public_text = ""
|
| 45 |
+
if 'feedback_parent_text' not in st.session_state: st.session_state.feedback_parent_text = ""
|
| 46 |
+
if 'selected_student_for_parent_fb' not in st.session_state: st.session_state.selected_student_for_parent_fb = None
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
# --- Helper Functions for UI ---
|
| 50 |
+
def refresh_student_list_cache():
|
| 51 |
+
st.session_state.student_list_cache = get_all_student_names()
|
| 52 |
+
st.toast("学生列表已刷新。")
|
| 53 |
+
|
| 54 |
+
# --- Main Application UI ---
|
| 55 |
+
st.title("🚀 晚托反馈自动化助手")
|
| 56 |
+
|
| 57 |
+
# Sidebar for navigation and info
|
| 58 |
+
with st.sidebar:
|
| 59 |
+
st.header("导航")
|
| 60 |
+
menu_options = ["处理聊天记录", "生成反馈报告", "学生特点管理"]
|
| 61 |
+
choice = st.radio("选择功能:", menu_options, key="nav_menu")
|
| 62 |
+
|
| 63 |
+
st.markdown("---")
|
| 64 |
+
st.subheader("系统状态")
|
| 65 |
+
if GOOGLE_API_KEY:
|
| 66 |
+
st.success("Gemini API Key 已加载。")
|
| 67 |
+
else:
|
| 68 |
+
st.warning("Gemini API Key 未配置。")
|
| 69 |
+
|
| 70 |
+
# Simple check if DB files exist (more robust checks are within db/rag managers)
|
| 71 |
+
# These paths are inside the container / HF Space file system
|
| 72 |
+
student_db_exists = os.path.exists(STUDENT_DB_PATH)
|
| 73 |
+
chroma_dir_exists = os.path.exists(CHROMA_DB_PATH) and os.listdir(CHROMA_DB_PATH) # Check if dir exists and is not empty
|
| 74 |
+
|
| 75 |
+
if student_db_exists: st.markdown(f"✔️ 学生库: `{STUDENT_DB_PATH}`")
|
| 76 |
+
else: st.markdown(f"⚠️ 学生库未找到: `{STUDENT_DB_PATH}`")
|
| 77 |
+
|
| 78 |
+
if chroma_dir_exists: st.markdown(f"✔️ RAG库: `{CHROMA_DB_PATH}` (集合: {RAG_COLLECTION_NAME})")
|
| 79 |
+
else: st.markdown(f"⚠️ RAG库未找到: `{CHROMA_DB_PATH}`")
|
| 80 |
+
|
| 81 |
+
if st.button("🔄 刷新学生列表", key="sidebar_refresh_students"):
|
| 82 |
+
refresh_student_list_cache()
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
# --- Page 1: 处理聊天记录 ---
|
| 86 |
+
if choice == "处理聊天记录":
|
| 87 |
+
st.header("💬 聊天记录处理与数据构建")
|
| 88 |
+
st.markdown("在此粘贴每日微信聊天记录,AI将提取关键信息并存入知识库。")
|
| 89 |
+
|
| 90 |
+
# Date selection for the chat log
|
| 91 |
+
selected_date_for_processing = st.date_input(
|
| 92 |
+
"请选择聊天记录对应的日期",
|
| 93 |
+
value=st.session_state.current_processing_date, # Use session state for persistence
|
| 94 |
+
key="chat_date_input"
|
| 95 |
+
)
|
| 96 |
+
# Update session state if date changes
|
| 97 |
+
if selected_date_for_processing != st.session_state.current_processing_date:
|
| 98 |
+
st.session_state.current_processing_date = selected_date_for_processing
|
| 99 |
+
st.session_state.processed_chat_extracts = [] # Clear old extracts if date changes
|
| 100 |
+
st.experimental_rerun()
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
chat_log_text = st.text_area("在此粘贴聊天记录内容:", height=250, key="chat_log_input_area",
|
| 104 |
+
help="输入聊天内容后,点击“分析聊天记录”。")
|
| 105 |
+
|
| 106 |
+
if st.button("🤖 使用AI分析聊天记录", type="primary", key="analyze_chat_button"):
|
| 107 |
+
if not chat_log_text.strip():
|
| 108 |
+
st.warning("请输入聊天记录内容。")
|
| 109 |
+
elif not GOOGLE_API_KEY:
|
| 110 |
+
st.error("API Key未配置,无法分析。")
|
| 111 |
+
else:
|
| 112 |
+
with st.spinner("AI正在分析聊天记录,提取信息中..."):
|
| 113 |
+
st.session_state.processed_chat_extracts = extract_info_from_chat(chat_log_text)
|
| 114 |
+
|
| 115 |
+
if st.session_state.processed_chat_extracts:
|
| 116 |
+
st.success(f"AI成功提取到 {len(st.session_state.processed_chat_extracts)} 条信息!")
|
| 117 |
+
else:
|
| 118 |
+
st.info("AI分析完成,但未能从聊天记录中提取到格式化信息。")
|
| 119 |
+
# No st.error here as extract_info_from_chat might return empty on purpose
|
| 120 |
+
|
| 121 |
+
if st.session_state.processed_chat_extracts:
|
| 122 |
+
st.subheader("提取到的信息预览:")
|
| 123 |
+
preview_container = st.container()
|
| 124 |
+
with preview_container:
|
| 125 |
+
for item in st.session_state.processed_chat_extracts:
|
| 126 |
+
st.markdown(f"- **{item.get('student_name', 'N/A')}**: {item.get('observation', 'N/A')}")
|
| 127 |
+
|
| 128 |
+
st.markdown("---")
|
| 129 |
+
if st.button("➕ 确认并存入数据库和RAG知识库", key="store_extracted_data_button"):
|
| 130 |
+
with st.spinner("正在存储数据到RAG和学生数据库..."):
|
| 131 |
+
docs_to_rag = []
|
| 132 |
+
metadatas_to_rag = []
|
| 133 |
+
ids_to_rag = [] # RAG manager now generates robust IDs if None
|
| 134 |
+
processed_student_names_today = set()
|
| 135 |
+
date_str = st.session_state.current_processing_date.strftime("%Y-%m-%d")
|
| 136 |
+
|
| 137 |
+
for item_idx, item in enumerate(st.session_state.processed_chat_extracts):
|
| 138 |
+
s_name = item.get("student_name")
|
| 139 |
+
obs = item.get("observation")
|
| 140 |
+
if not s_name or not obs:
|
| 141 |
+
st.warning(f"跳过不完整的提取项: {item}")
|
| 142 |
+
continue
|
| 143 |
+
|
| 144 |
+
docs_to_rag.append(f"{s_name} 在 {date_str} 的表现: {obs}")
|
| 145 |
+
metadatas_to_rag.append({"student_name": str(s_name), "date": str(date_str), "source": "chat_log"})
|
| 146 |
+
# Let rag_manager handle ID generation if not provided or use robust ones here
|
| 147 |
+
# ids_to_rag.append(f"chat_{date_str.replace('-','')}_{str(s_name).replace(' ','_')}_{item_idx}")
|
| 148 |
+
|
| 149 |
+
add_or_update_student(s_name) # Ensure student exists in DB
|
| 150 |
+
processed_student_names_today.add(s_name)
|
| 151 |
+
|
| 152 |
+
storage_successful = False
|
| 153 |
+
if docs_to_rag:
|
| 154 |
+
if add_documents_to_rag(docs_to_rag, metadatas_to_rag, ids_to_rag): # ids can be None
|
| 155 |
+
storage_successful = True
|
| 156 |
+
else:
|
| 157 |
+
st.info("没有有效的提取信息可供存储。")
|
| 158 |
+
|
| 159 |
+
if storage_successful:
|
| 160 |
+
st.success(f"成功将 {len(docs_to_rag)} 条信息存入RAG。学生列表已更新。")
|
| 161 |
+
refresh_student_list_cache()
|
| 162 |
+
# Optionally trigger characteristics update for these students
|
| 163 |
+
if processed_student_names_today:
|
| 164 |
+
st.info("数据已存储。您可以前往“学生特点管理”页面更新这些学生的特点总结。")
|
| 165 |
+
st.session_state.processed_chat_extracts = [] # Clear after storing
|
| 166 |
+
st.experimental_rerun() # Rerun to clear preview and update UI
|
| 167 |
+
elif docs_to_rag: # If docs were there but storage failed
|
| 168 |
+
st.error("数据存入RAG失败。请检查日志。")
|
| 169 |
+
|
| 170 |
+
|
| 171 |
+
# --- Page 2: 生成反馈报告 ---
|
| 172 |
+
elif choice == "生成反馈报告":
|
| 173 |
+
st.header("📝 生成每日反馈报告")
|
| 174 |
+
st.markdown("根据已处理的信息或学生特点,选择不同模式生成反馈。")
|
| 175 |
+
|
| 176 |
+
feedback_target_date = st.date_input(
|
| 177 |
+
"请选择生成反馈对应的日期",
|
| 178 |
+
value=st.session_state.current_processing_date,
|
| 179 |
+
key="feedback_date_selector"
|
| 180 |
+
)
|
| 181 |
+
feedback_date_str = feedback_target_date.strftime("%Y-%m-%d")
|
| 182 |
+
|
| 183 |
+
# Determine summary for Boss/Public feedback
|
| 184 |
+
# Use extracts if date matches and extracts exist, otherwise query RAG
|
| 185 |
+
daily_summary_for_general_feedback = ""
|
| 186 |
+
processed_extracts_for_feedback_date = []
|
| 187 |
+
|
| 188 |
+
if feedback_target_date == st.session_state.current_processing_date and st.session_state.processed_chat_extracts:
|
| 189 |
+
processed_extracts_for_feedback_date = st.session_state.processed_chat_extracts
|
| 190 |
+
st.info(f"将使用为 {feedback_date_str} 刚处理的聊天记录生成反馈。")
|
| 191 |
+
temp_summary_parts = []
|
| 192 |
+
for item in processed_extracts_for_feedback_date:
|
| 193 |
+
temp_summary_parts.append(f"- {item.get('student_name', 'N/A')}: {item.get('observation', 'N/A')}")
|
| 194 |
+
if temp_summary_parts:
|
| 195 |
+
daily_summary_for_general_feedback = "\n".join(temp_summary_parts)
|
| 196 |
+
else:
|
| 197 |
+
daily_summary_for_general_feedback = get_events_summary_for_day(feedback_date_str) # Fallback
|
| 198 |
+
else:
|
| 199 |
+
with st.spinner(f"正在为日期 {feedback_date_str} 从知识库获取信息摘要..."):
|
| 200 |
+
daily_summary_for_general_feedback = get_events_summary_for_day(feedback_date_str)
|
| 201 |
+
|
| 202 |
+
st.markdown("---")
|
| 203 |
+
col1, col2 = st.columns(2)
|
| 204 |
+
|
| 205 |
+
with col1:
|
| 206 |
+
st.subheader("👔 给老板的反馈")
|
| 207 |
+
if st.button("生成老板反馈", key="generate_boss_fb"):
|
| 208 |
+
if not GOOGLE_API_KEY: st.error("API Key未配置。"); st.stop()
|
| 209 |
+
with st.spinner("正在生成老板反馈..."):
|
| 210 |
+
st.session_state.feedback_boss_text = generate_boss_feedback(daily_summary_for_general_feedback)
|
| 211 |
+
if st.session_state.feedback_boss_text: st.success("老板反馈生成成功!")
|
| 212 |
+
else: st.error("生成老板反馈失败或无内容返回。")
|
| 213 |
+
if st.session_state.feedback_boss_text:
|
| 214 |
+
st.text_area("老板反馈内容:", value=st.session_state.feedback_boss_text, height=200, key="boss_feedback_display")
|
| 215 |
+
|
| 216 |
+
with col2:
|
| 217 |
+
st.subheader("📢 公共反馈")
|
| 218 |
+
if st.button("生成公共反馈", key="generate_public_fb"):
|
| 219 |
+
if not GOOGLE_API_KEY: st.error("API Key未配置。"); st.stop()
|
| 220 |
+
with st.spinner("正在生成公共反馈..."):
|
| 221 |
+
st.session_state.feedback_public_text = generate_public_feedback(daily_summary_for_general_feedback)
|
| 222 |
+
if st.session_state.feedback_public_text: st.success("公共反馈生成成功!")
|
| 223 |
+
else: st.error("生成公共反馈失败或无内容返回。")
|
| 224 |
+
if st.session_state.feedback_public_text:
|
| 225 |
+
st.text_area("公共反馈内容:", value=st.session_state.feedback_public_text, height=200, key="public_feedback_display")
|
| 226 |
+
|
| 227 |
+
st.markdown("---")
|
| 228 |
+
st.subheader("👨👩👧👦 给家长的反馈")
|
| 229 |
+
|
| 230 |
+
if not st.session_state.student_list_cache:
|
| 231 |
+
st.warning("学生列表为空。请先通过“处理聊天记录”功能添加学生并处理数据。")
|
| 232 |
+
else:
|
| 233 |
+
st.session_state.selected_student_for_parent_fb = st.selectbox(
|
| 234 |
+
"选择学生:",
|
| 235 |
+
options=[""] + st.session_state.student_list_cache, # Add empty option for placeholder
|
| 236 |
+
index=0, # Default to empty
|
| 237 |
+
format_func=lambda x: "请选择..." if x == "" else x,
|
| 238 |
+
key="parent_feedback_student_selector"
|
| 239 |
+
)
|
| 240 |
+
|
| 241 |
+
feedback_modes_map = {
|
| 242 |
+
"正常模式 (基于当日记录)": "normal",
|
| 243 |
+
"偷懒模式 (组合历史事件)": "lazy",
|
| 244 |
+
"LLM特点生成 (创意发挥)": "llm_direct"
|
| 245 |
+
}
|
| 246 |
+
selected_mode_display_name = st.radio(
|
| 247 |
+
"选择反馈模式:",
|
| 248 |
+
options=list(feedback_modes_map.keys()),
|
| 249 |
+
key="parent_feedback_mode_selector"
|
| 250 |
+
)
|
| 251 |
+
mode_value = feedback_modes_map[selected_mode_display_name]
|
| 252 |
+
|
| 253 |
+
if st.button(f"为选定学生生成家长反馈", key="generate_parent_fb"):
|
| 254 |
+
if not GOOGLE_API_KEY: st.error("API Key未配置。"); st.stop()
|
| 255 |
+
if not st.session_state.selected_student_for_parent_fb:
|
| 256 |
+
st.warning("请先选择一个学生。")
|
| 257 |
+
else:
|
| 258 |
+
student_name = st.session_state.selected_student_for_parent_fb
|
| 259 |
+
with st.spinner(f"正在为 {student_name} ({selected_mode_display_name}) 生成家长反馈..."):
|
| 260 |
+
# Pass today's extracted data for the student if available (for "normal" mode)
|
| 261 |
+
student_specific_extracts_today = []
|
| 262 |
+
if feedback_target_date == st.session_state.current_processing_date and st.session_state.processed_chat_extracts:
|
| 263 |
+
student_specific_extracts_today = [
|
| 264 |
+
item for item in st.session_state.processed_chat_extracts if item.get("student_name") == student_name
|
| 265 |
+
]
|
| 266 |
+
|
| 267 |
+
st.session_state.feedback_parent_text = generate_parent_feedback(
|
| 268 |
+
student_name,
|
| 269 |
+
mode_value,
|
| 270 |
+
feedback_date_str,
|
| 271 |
+
student_specific_extracts_today # Pass specific extracts for normal mode
|
| 272 |
+
)
|
| 273 |
+
if st.session_state.feedback_parent_text:
|
| 274 |
+
st.success(f"为 {student_name} 生成家长反馈成功!")
|
| 275 |
+
else:
|
| 276 |
+
st.error(f"为 {student_name} 生成家长反馈失败或无内容返回。")
|
| 277 |
+
|
| 278 |
+
if st.session_state.feedback_parent_text and st.session_state.selected_student_for_parent_fb:
|
| 279 |
+
st.text_area(
|
| 280 |
+
f"给 {st.session_state.selected_student_for_parent_fb} 家长的反馈:",
|
| 281 |
+
value=st.session_state.feedback_parent_text,
|
| 282 |
+
height=300,
|
| 283 |
+
key="parent_feedback_display"
|
| 284 |
+
)
|
| 285 |
+
|
| 286 |
+
# --- Page 3: 学生特点管理 ---
|
| 287 |
+
elif choice == "学生特点管理":
|
| 288 |
+
st.header("🧑🎓 学生特点数据库管理")
|
| 289 |
+
st.markdown("查看和更新AI总结的学生特点。特点会基于RAG中的历史记录生成。")
|
| 290 |
+
|
| 291 |
+
if st.button("🔄 强制刷新学生列表和显示", key="admin_refresh_students_btn"):
|
| 292 |
+
refresh_student_list_cache()
|
| 293 |
+
st.experimental_rerun()
|
| 294 |
+
|
| 295 |
+
|
| 296 |
+
if not st.session_state.student_list_cache:
|
| 297 |
+
st.info("当前没有学生数据。请先通过“处理聊天记录”功能添加并存储学生相关信息。")
|
| 298 |
+
else:
|
| 299 |
+
st.subheader("当前学生列表及特点:")
|
| 300 |
+
|
| 301 |
+
num_students = len(st.session_state.student_list_cache)
|
| 302 |
+
cols_per_row = 3 # Adjust number of columns for display
|
| 303 |
+
|
| 304 |
+
for i in range(0, num_students, cols_per_row):
|
| 305 |
+
cols = st.columns(cols_per_row)
|
| 306 |
+
for j in range(cols_per_row):
|
| 307 |
+
student_idx = i + j
|
| 308 |
+
if student_idx < num_students:
|
| 309 |
+
student_name = st.session_state.student_list_cache[student_idx]
|
| 310 |
+
with cols[j]:
|
| 311 |
+
with st.expander(f"{student_name}", expanded=False):
|
| 312 |
+
characteristics = get_student_characteristics(student_name)
|
| 313 |
+
st.markdown(f"**AI总结特点:**\n {characteristics if characteristics else '暂无总结。'}")
|
| 314 |
+
if st.button(f"更新 {student_name} 特点", key=f"update_char_{student_name}_{student_idx}"):
|
| 315 |
+
if not GOOGLE_API_KEY: st.error("API Key未配置。"); st.stop()
|
| 316 |
+
with st.spinner(f"正在为 {student_name} 更新特点..."):
|
| 317 |
+
update_student_characteristics_from_rag(student_name)
|
| 318 |
+
st.success(f"{student_name} 的特点已更新!请重新展开查看。")
|
| 319 |
+
st.experimental_rerun() # Rerun to reflect changes
|
| 320 |
+
|
| 321 |
+
st.markdown("---")
|
| 322 |
+
st.subheader("批量操作")
|
| 323 |
+
if st.button("✨ 批量更新所有学生的特点总结", key="batch_update_all_chars_btn"):
|
| 324 |
+
if not GOOGLE_API_KEY: st.error("API Key未配置。"); st.stop()
|
| 325 |
+
if not st.session_state.student_list_cache:
|
| 326 |
+
st.warning("没有学生可供批量更新。")
|
| 327 |
+
else:
|
| 328 |
+
# Confirmation dialog for safety
|
| 329 |
+
# Using a more explicit confirmation
|
| 330 |
+
placeholder = st.empty()
|
| 331 |
+
with placeholder.container():
|
| 332 |
+
st.warning(f"此操作将为数据库中所有 {len(st.session_state.student_list_cache)} 位学生重新生成特点总结,可能需要较长时间并消耗API额度。")
|
| 333 |
+
if st.button("我确认执行批量更新", key="confirm_batch_update"):
|
| 334 |
+
placeholder.empty() # Remove confirmation message
|
| 335 |
+
with st.spinner("正在批量更新所有学生特点,请耐心等待..."):
|
| 336 |
+
batch_update_all_students_characteristics() # This function has internal st.progress
|
| 337 |
+
st.success("所有学生特点总结批量更新完毕!")
|
| 338 |
+
st.experimental_rerun()
|
| 339 |
+
elif st.button("取消批量更新", key="cancel_batch_update"):
|
| 340 |
+
placeholder.empty()
|
| 341 |
+
st.info("批量更新已取消。")
|
| 342 |
+
|
| 343 |
+
|
| 344 |
+
# --- Footer ---
|
| 345 |
+
st.markdown("---")
|
| 346 |
+
st.markdown("晚托反馈助手 v1.0.0 (HF Dockerized) | 技术支持: Gemini LLM + RAG")
|
chat_processor.py
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# chat_processor.py
|
| 2 |
+
import datetime
|
| 3 |
+
from llm_handler import get_gemini_response
|
| 4 |
+
from rag_manager import add_documents_to_rag, get_all_student_observations_from_rag
|
| 5 |
+
from db_manager import add_or_update_student, get_all_student_names
|
| 6 |
+
import prompts
|
| 7 |
+
import re
|
| 8 |
+
import streamlit as st
|
| 9 |
+
|
| 10 |
+
def extract_info_from_chat(chat_log_text: str) -> list:
|
| 11 |
+
"""使用LLM从聊天记录中提取学生表现信息"""
|
| 12 |
+
if not chat_log_text.strip():
|
| 13 |
+
return []
|
| 14 |
+
|
| 15 |
+
prompt = prompts.CHAT_EXTRACTION_USER_PROMPT_TEMPLATE.format(chat_log_text=chat_log_text)
|
| 16 |
+
system_instruction = prompts.CHAT_EXTRACTION_SYSTEM_PROMPT
|
| 17 |
+
|
| 18 |
+
response_text = get_gemini_response(prompt, system_instruction=system_instruction)
|
| 19 |
+
if not response_text:
|
| 20 |
+
st.warning("AI未能从聊天记录中提取到文本响应。")
|
| 21 |
+
return []
|
| 22 |
+
|
| 23 |
+
extracted_items = []
|
| 24 |
+
lines = response_text.strip().split('\n')
|
| 25 |
+
for line in lines:
|
| 26 |
+
line = line.strip()
|
| 27 |
+
if not line: continue # Skip empty lines
|
| 28 |
+
# More robust regex: allows for names with spaces if not ending with colon immediately
|
| 29 |
+
match = re.match(r"([^:]+?)\s*:\s*(.+)", line)
|
| 30 |
+
if match:
|
| 31 |
+
student_name = match.group(1).strip()
|
| 32 |
+
observation = match.group(2).strip()
|
| 33 |
+
if student_name and observation: # Ensure both parts are non-empty
|
| 34 |
+
extracted_items.append({"student_name": student_name, "observation": observation})
|
| 35 |
+
else:
|
| 36 |
+
print(f"Skipping partially extracted line: '{line}'") # Log for debugging
|
| 37 |
+
else:
|
| 38 |
+
print(f"Could not parse line from LLM: '{line}'") # Log for debugging
|
| 39 |
+
|
| 40 |
+
if not extracted_items:
|
| 41 |
+
st.info("AI分析完成,但未能按预期格式解析出学生信息。可能是聊天内容不包含相关信息,或AI响应格式不符。")
|
| 42 |
+
return extracted_items
|
| 43 |
+
|
| 44 |
+
def update_student_characteristics_from_rag(student_name: str):
|
| 45 |
+
"""从RAG中获取学生所有记录,让LLM总结特点,并更新到学生数据库"""
|
| 46 |
+
observations = get_all_student_observations_from_rag(student_name)
|
| 47 |
+
if not observations:
|
| 48 |
+
st.info(f"在RAG中未找到学生 {student_name} 的历史表现记录,无法更新特点。")
|
| 49 |
+
# Ensure student exists in DB even if no observations yet, or update timestamp
|
| 50 |
+
add_or_update_student(student_name)
|
| 51 |
+
return
|
| 52 |
+
|
| 53 |
+
# Limit number of observations to avoid overly long prompts for LLM
|
| 54 |
+
MAX_OBSERVATIONS_FOR_SUMMARY = 50 # Adjust as needed
|
| 55 |
+
if len(observations) > MAX_OBSERVATIONS_FOR_SUMMARY:
|
| 56 |
+
st.info(f"学生 {student_name} 有超过 {MAX_OBSERVATIONS_FOR_SUMMARY} 条记录,将使用最新的 {MAX_OBSERVATIONS_FOR_SUMMARY} 条进行特点总结。")
|
| 57 |
+
observations_to_use = observations[-MAX_OBSERVATIONS_FOR_SUMMARY:]
|
| 58 |
+
else:
|
| 59 |
+
observations_to_use = observations
|
| 60 |
+
|
| 61 |
+
observations_text = "\n".join([f"- {obs}" for obs in observations_to_use]) # Add bullet points for clarity
|
| 62 |
+
|
| 63 |
+
prompt = prompts.STUDENT_CHARACTERISTICS_USER_PROMPT_TEMPLATE.format(
|
| 64 |
+
student_name=student_name,
|
| 65 |
+
observations_text=observations_text
|
| 66 |
+
)
|
| 67 |
+
system_instruction = prompts.STUDENT_CHARACTERISTICS_SYSTEM_PROMPT
|
| 68 |
+
|
| 69 |
+
summary = get_gemini_response(prompt, system_instruction=system_instruction)
|
| 70 |
+
|
| 71 |
+
if summary:
|
| 72 |
+
if add_or_update_student(student_name, characteristics_summary=summary.strip()):
|
| 73 |
+
st.success(f"已更新学生 {student_name} 的特点总结。")
|
| 74 |
+
else:
|
| 75 |
+
st.error(f"更新学生 {student_name} 的特点总结到数据库时失败。")
|
| 76 |
+
else:
|
| 77 |
+
st.warning(f"未能为学生 {student_name} 生成特点总结。AI未返回有效内容。")
|
| 78 |
+
|
| 79 |
+
def batch_update_all_students_characteristics():
|
| 80 |
+
"""为数据库中所有学生更新其特点总结"""
|
| 81 |
+
student_names = get_all_student_names()
|
| 82 |
+
if not student_names:
|
| 83 |
+
st.info("学生数据库为空,无法批量更新特点。")
|
| 84 |
+
return
|
| 85 |
+
|
| 86 |
+
st.info(f"开始批量更新 {len(student_names)} 位学生的特点总结...")
|
| 87 |
+
progress_bar = st.progress(0)
|
| 88 |
+
for i, name in enumerate(student_names):
|
| 89 |
+
st.write(f"正在处理: {name}...") # Give some feedback during long process
|
| 90 |
+
update_student_characteristics_from_rag(name)
|
| 91 |
+
progress_bar.progress((i + 1) / len(student_names))
|
| 92 |
+
st.success("所有学生特点总结批量更新完毕!")
|
config.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# config.py
|
| 2 |
+
import os
|
| 3 |
+
from dotenv import load_dotenv
|
| 4 |
+
|
| 5 |
+
# Load environment variables from .env file if it exists (for local development)
|
| 6 |
+
# In Hugging Face Spaces, GOOGLE_API_KEY will be set via Secrets.
|
| 7 |
+
load_dotenv()
|
| 8 |
+
|
| 9 |
+
# API Keys and Model Configuration
|
| 10 |
+
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
|
| 11 |
+
EMBEDDING_MODEL = "models/embedding-001"
|
| 12 |
+
GENERATIVE_MODEL = "gemini-1.5-flash-latest" # Or "gemini-pro" or other preferred model
|
| 13 |
+
|
| 14 |
+
# Database Paths
|
| 15 |
+
# These paths are relative to the WORKDIR defined in Dockerfile (i.e., /app)
|
| 16 |
+
# Hugging Face Spaces persistent storage will store data created at these paths.
|
| 17 |
+
CHROMA_DB_PATH = "./chroma_db" # Will be /app/chroma_db inside the container
|
| 18 |
+
STUDENT_DB_PATH = "./students.db" # Will be /app/students.db inside the container
|
| 19 |
+
RAG_COLLECTION_NAME = "chat_records_v2" # Changed name to avoid conflicts if old data exists
|
| 20 |
+
|
| 21 |
+
# Ensure API key is available (especially for local runs, HF handles missing secrets with errors)
|
| 22 |
+
# if not GOOGLE_API_KEY:
|
| 23 |
+
# print("Warning: GOOGLE_API_KEY not found. Please set it in your environment or .env file.")
|
| 24 |
+
# For HF deployment, if secret is not set, the app might fail at runtime when API is called.
|
db_manager.py
ADDED
|
@@ -0,0 +1,134 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# db_manager.py
|
| 2 |
+
import sqlite3
|
| 3 |
+
from config import STUDENT_DB_PATH
|
| 4 |
+
import streamlit as st
|
| 5 |
+
import os
|
| 6 |
+
import time
|
| 7 |
+
|
| 8 |
+
MAX_RETRIES = 3
|
| 9 |
+
RETRY_DELAY = 2 # seconds
|
| 10 |
+
|
| 11 |
+
def get_db_connection():
|
| 12 |
+
# Ensure the directory for the SQLite DB exists
|
| 13 |
+
db_dir = os.path.dirname(STUDENT_DB_PATH)
|
| 14 |
+
if db_dir and not os.path.exists(db_dir):
|
| 15 |
+
try:
|
| 16 |
+
os.makedirs(db_dir, exist_ok=True)
|
| 17 |
+
print(f"Created directory for SQLite DB: {db_dir}")
|
| 18 |
+
except Exception as e:
|
| 19 |
+
st.error(f"无法创建SQLite数据库目录 {db_dir}: {e}")
|
| 20 |
+
print(f"Could not create SQLite DB directory {db_dir}: {e}")
|
| 21 |
+
return None # Cannot proceed if directory creation fails
|
| 22 |
+
|
| 23 |
+
conn = None
|
| 24 |
+
for attempt in range(MAX_RETRIES):
|
| 25 |
+
try:
|
| 26 |
+
conn = sqlite3.connect(STUDENT_DB_PATH, timeout=10) # Added timeout
|
| 27 |
+
return conn
|
| 28 |
+
except sqlite3.OperationalError as e:
|
| 29 |
+
if "database is locked" in str(e):
|
| 30 |
+
print(f"SQLite DB is locked (Attempt {attempt + 1}/{MAX_RETRIES}). Retrying in {RETRY_DELAY}s...")
|
| 31 |
+
if attempt < MAX_RETRIES - 1:
|
| 32 |
+
time.sleep(RETRY_DELAY)
|
| 33 |
+
else:
|
| 34 |
+
st.error("SQLite数据库持续锁定,请稍后再试。")
|
| 35 |
+
print("SQLite DB remains locked after multiple retries.")
|
| 36 |
+
return None
|
| 37 |
+
else:
|
| 38 |
+
st.error(f"连接SQLite数据库时出错: {e}")
|
| 39 |
+
print(f"Error connecting to SQLite DB: {e}")
|
| 40 |
+
return None
|
| 41 |
+
except Exception as e: # Catch other potential errors
|
| 42 |
+
st.error(f"连接SQLite数据库时发生未知错误: {e}")
|
| 43 |
+
print(f"Unknown error connecting to SQLite DB: {e}")
|
| 44 |
+
return None
|
| 45 |
+
return None
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
def init_student_db():
|
| 49 |
+
conn = get_db_connection()
|
| 50 |
+
if conn is None:
|
| 51 |
+
return
|
| 52 |
+
try:
|
| 53 |
+
cursor = conn.cursor()
|
| 54 |
+
cursor.execute('''
|
| 55 |
+
CREATE TABLE IF NOT EXISTS students (
|
| 56 |
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
| 57 |
+
name TEXT UNIQUE NOT NULL,
|
| 58 |
+
characteristics_summary TEXT,
|
| 59 |
+
last_updated TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
| 60 |
+
)
|
| 61 |
+
''')
|
| 62 |
+
conn.commit()
|
| 63 |
+
except Exception as e:
|
| 64 |
+
st.error(f"初始化学生数据库表时出错: {e}")
|
| 65 |
+
print(f"Error initializing student DB table: {e}")
|
| 66 |
+
finally:
|
| 67 |
+
if conn:
|
| 68 |
+
conn.close()
|
| 69 |
+
|
| 70 |
+
def add_or_update_student(name: str, characteristics_summary: str = None):
|
| 71 |
+
conn = get_db_connection()
|
| 72 |
+
if conn is None:
|
| 73 |
+
return False
|
| 74 |
+
try:
|
| 75 |
+
cursor = conn.cursor()
|
| 76 |
+
# Upsert logic: Insert if name doesn't exist, or update if it does.
|
| 77 |
+
# Using INSERT OR IGNORE then UPDATE is a common pattern.
|
| 78 |
+
cursor.execute("INSERT OR IGNORE INTO students (name) VALUES (?)", (name,))
|
| 79 |
+
if characteristics_summary is not None: # Allow updating only name or also characteristics
|
| 80 |
+
cursor.execute("""
|
| 81 |
+
UPDATE students
|
| 82 |
+
SET characteristics_summary = ?, last_updated = CURRENT_TIMESTAMP
|
| 83 |
+
WHERE name = ?
|
| 84 |
+
""", (characteristics_summary, name))
|
| 85 |
+
else: # Only ensure the student exists, update last_updated if already present
|
| 86 |
+
cursor.execute("""
|
| 87 |
+
UPDATE students
|
| 88 |
+
SET last_updated = CURRENT_TIMESTAMP
|
| 89 |
+
WHERE name = ? AND EXISTS (SELECT 1 FROM students WHERE name = ?)
|
| 90 |
+
""", (name,name))
|
| 91 |
+
|
| 92 |
+
conn.commit()
|
| 93 |
+
return True
|
| 94 |
+
except Exception as e:
|
| 95 |
+
st.error(f"添加或更新学生 '{name}' 时出错: {e}")
|
| 96 |
+
print(f"Error adding/updating student '{name}': {e}")
|
| 97 |
+
return False
|
| 98 |
+
finally:
|
| 99 |
+
if conn:
|
| 100 |
+
conn.close()
|
| 101 |
+
|
| 102 |
+
def get_student_characteristics(name: str):
|
| 103 |
+
conn = get_db_connection()
|
| 104 |
+
if conn is None:
|
| 105 |
+
return None
|
| 106 |
+
try:
|
| 107 |
+
cursor = conn.cursor()
|
| 108 |
+
cursor.execute("SELECT characteristics_summary FROM students WHERE name = ?", (name,))
|
| 109 |
+
result = cursor.fetchone()
|
| 110 |
+
return result[0] if result else None
|
| 111 |
+
except Exception as e:
|
| 112 |
+
st.error(f"获取学生 '{name}' 特点时出错: {e}")
|
| 113 |
+
print(f"Error getting characteristics for student '{name}': {e}")
|
| 114 |
+
return None
|
| 115 |
+
finally:
|
| 116 |
+
if conn:
|
| 117 |
+
conn.close()
|
| 118 |
+
|
| 119 |
+
def get_all_student_names():
|
| 120 |
+
conn = get_db_connection()
|
| 121 |
+
if conn is None:
|
| 122 |
+
return []
|
| 123 |
+
try:
|
| 124 |
+
cursor = conn.cursor()
|
| 125 |
+
cursor.execute("SELECT name FROM students ORDER BY name ASC")
|
| 126 |
+
results = [row[0] for row in cursor.fetchall()]
|
| 127 |
+
return results
|
| 128 |
+
except Exception as e:
|
| 129 |
+
st.error(f"获取所有学生姓名时出错: {e}")
|
| 130 |
+
print(f"Error getting all student names: {e}")
|
| 131 |
+
return []
|
| 132 |
+
finally:
|
| 133 |
+
if conn:
|
| 134 |
+
conn.close()
|
feedback_generator.py
ADDED
|
@@ -0,0 +1,120 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# feedback_generator.py
|
| 2 |
+
from llm_handler import get_gemini_response
|
| 3 |
+
from rag_manager import query_rag
|
| 4 |
+
from db_manager import get_student_characteristics
|
| 5 |
+
import prompts
|
| 6 |
+
import datetime
|
| 7 |
+
import streamlit as st
|
| 8 |
+
|
| 9 |
+
def get_events_summary_for_day(date_str: str, processed_chat_data: list = None) -> str:
|
| 10 |
+
"""
|
| 11 |
+
获取指定日期的事件总结。
|
| 12 |
+
优先使用当日处理的聊天数据,否则从RAG查询。
|
| 13 |
+
"""
|
| 14 |
+
if processed_chat_data:
|
| 15 |
+
summary_parts = []
|
| 16 |
+
for item in processed_chat_data:
|
| 17 |
+
# Ensure item has the expected keys
|
| 18 |
+
student_name = item.get("student_name", "未知学生")
|
| 19 |
+
observation = item.get("observation", "无具体描述")
|
| 20 |
+
summary_parts.append(f"- {student_name}: {observation}")
|
| 21 |
+
if summary_parts:
|
| 22 |
+
return "\n".join(summary_parts)
|
| 23 |
+
else: # processed_chat_data was empty or malformed
|
| 24 |
+
st.info(f"当日处理的聊天数据为空或格式不正确 ({date_str})。")
|
| 25 |
+
# Fall through to RAG query
|
| 26 |
+
|
| 27 |
+
# Fallback to RAG if no direct processed_chat_data
|
| 28 |
+
# This query needs to be general enough to pull daily highlights
|
| 29 |
+
# Or specific if you store daily summary documents.
|
| 30 |
+
st.info(f"尝试从RAG中检索日期 {date_str} 的整体活动信息...")
|
| 31 |
+
rag_results = query_rag(
|
| 32 |
+
query_text=f"{date_str} 发生的关键事件和整体情况",
|
| 33 |
+
n_results=10, # Get a few diverse entries
|
| 34 |
+
filter_metadata={"date": date_str} # Filter by date if metadata is set
|
| 35 |
+
)
|
| 36 |
+
if not rag_results:
|
| 37 |
+
return f"关于日期 {date_str}:今日无特别记录或未能从RAG中检索到信息。"
|
| 38 |
+
return f"关于日期 {date_str} 的记录:\n" + "\n".join([f"- {r}" for r in rag_results])
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def generate_boss_feedback(today_events_summary: str):
|
| 42 |
+
if not today_events_summary or "无特别记录" in today_events_summary:
|
| 43 |
+
return "今日无足够信息生成老板反馈。"
|
| 44 |
+
prompt = prompts.BOSS_FEEDBACK_USER_PROMPT_TEMPLATE.format(today_events_summary=today_events_summary)
|
| 45 |
+
return get_gemini_response(prompt, system_instruction=prompts.BOSS_FEEDBACK_SYSTEM_PROMPT)
|
| 46 |
+
|
| 47 |
+
def generate_public_feedback(today_events_summary: str):
|
| 48 |
+
if not today_events_summary or "无特别记录" in today_events_summary:
|
| 49 |
+
return "今日无足够信息生成公共反馈。"
|
| 50 |
+
prompt = prompts.PUBLIC_FEEDBACK_USER_PROMPT_TEMPLATE.format(today_events_summary=today_events_summary)
|
| 51 |
+
return get_gemini_response(prompt, system_instruction=prompts.PUBLIC_FEEDBACK_SYSTEM_PROMPT)
|
| 52 |
+
|
| 53 |
+
def generate_parent_feedback(student_name: str, mode: str, date_str: str, processed_student_data_today: list = None):
|
| 54 |
+
characteristics = get_student_characteristics(student_name) or "暂无该生详细特点记录。"
|
| 55 |
+
|
| 56 |
+
if mode == "normal":
|
| 57 |
+
today_student_specific_events = "今天没有关于该生的特别记录。"
|
| 58 |
+
if processed_student_data_today: # Prefer data extracted today for this student
|
| 59 |
+
student_obs = [item['observation'] for item in processed_student_data_today if item['student_name'] == student_name]
|
| 60 |
+
if student_obs:
|
| 61 |
+
today_student_specific_events = "\n".join([f"- {obs}" for obs in student_obs])
|
| 62 |
+
|
| 63 |
+
if today_student_specific_events == "今天没有关于该生的特别记录.": # Fallback to RAG if not found in today's extract
|
| 64 |
+
rag_student_events = query_rag(
|
| 65 |
+
query_text=f"{student_name} 在 {date_str} 的具体表现",
|
| 66 |
+
n_results=5,
|
| 67 |
+
filter_metadata={"student_name": student_name, "date": date_str}
|
| 68 |
+
)
|
| 69 |
+
if rag_student_events:
|
| 70 |
+
today_student_specific_events = "\n".join([f"- {r}" for r in rag_student_events])
|
| 71 |
+
|
| 72 |
+
prompt_vars = {
|
| 73 |
+
"student_name": student_name,
|
| 74 |
+
"student_characteristics": characteristics,
|
| 75 |
+
"today_student_specific_events": today_student_specific_events
|
| 76 |
+
}
|
| 77 |
+
user_prompt = prompts.PARENT_NORMAL_USER_PROMPT_TEMPLATE.format(**prompt_vars)
|
| 78 |
+
system_instruction = prompts.PARENT_NORMAL_SYSTEM_PROMPT
|
| 79 |
+
|
| 80 |
+
elif mode == "lazy":
|
| 81 |
+
past_events_list = query_rag(
|
| 82 |
+
query_text=f"{student_name} 过往的各种积极表现和活动片段",
|
| 83 |
+
n_results=10, # Get more for variety
|
| 84 |
+
filter_metadata={"student_name": student_name} # No date filter for past events
|
| 85 |
+
)
|
| 86 |
+
# Filter out any very short or generic entries if possible
|
| 87 |
+
past_events_for_student = "\n".join([f"- {r}" for r in past_events_list if len(r.split()) > 5]) if past_events_list else "暂无该生足够的多样化历史表现记录用于此模式。"
|
| 88 |
+
|
| 89 |
+
if "暂无该生足够的多样化历史表现记录" in past_events_for_student and characteristics != "暂无该生详细特点记录。":
|
| 90 |
+
st.info("偷懒模式:历史具体事件不足,将尝试结合��生特点进行创意生成。")
|
| 91 |
+
# Fallback to a slightly modified LLM direct mode if lazy mode has no data
|
| 92 |
+
user_prompt = prompts.PARENT_LLM_DIRECT_USER_PROMPT_TEMPLATE.format(
|
| 93 |
+
student_name=student_name,
|
| 94 |
+
student_characteristics=characteristics
|
| 95 |
+
)
|
| 96 |
+
system_instruction = prompts.PARENT_LLM_DIRECT_SYSTEM_PROMPT
|
| 97 |
+
else:
|
| 98 |
+
prompt_vars = {
|
| 99 |
+
"student_name": student_name,
|
| 100 |
+
"student_characteristics": characteristics, # Still useful for LLM to know
|
| 101 |
+
"past_events_for_student": past_events_for_student
|
| 102 |
+
}
|
| 103 |
+
user_prompt = prompts.PARENT_LAZY_USER_PROMPT_TEMPLATE.format(**prompt_vars)
|
| 104 |
+
system_instruction = prompts.PARENT_LAZY_SYSTEM_PROMPT
|
| 105 |
+
|
| 106 |
+
|
| 107 |
+
elif mode == "llm_direct":
|
| 108 |
+
if characteristics == "暂无该生详细特点记录。":
|
| 109 |
+
return f"无法使用LLM直接生成模式,学生 {student_name} 的特点数据不足。请先更新其特点。"
|
| 110 |
+
prompt_vars = {
|
| 111 |
+
"student_name": student_name,
|
| 112 |
+
"student_characteristics": characteristics
|
| 113 |
+
}
|
| 114 |
+
user_prompt = prompts.PARENT_LLM_DIRECT_USER_PROMPT_TEMPLATE.format(**prompt_vars)
|
| 115 |
+
system_instruction = prompts.PARENT_LLM_DIRECT_SYSTEM_PROMPT
|
| 116 |
+
else:
|
| 117 |
+
st.error("无效的家长反馈模式。")
|
| 118 |
+
return "无效的反馈模式。"
|
| 119 |
+
|
| 120 |
+
return get_gemini_response(user_prompt, system_instruction=system_instruction)
|
llm_handler.py
ADDED
|
@@ -0,0 +1,135 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# llm_handler.py
|
| 2 |
+
import google.generativeai as genai
|
| 3 |
+
from config import GOOGLE_API_KEY, GENERATIVE_MODEL, EMBEDDING_MODEL
|
| 4 |
+
import streamlit as st # For displaying errors or warnings if needed
|
| 5 |
+
|
| 6 |
+
# Configure Gemini API
|
| 7 |
+
if GOOGLE_API_KEY:
|
| 8 |
+
try:
|
| 9 |
+
genai.configure(api_key=GOOGLE_API_KEY)
|
| 10 |
+
except Exception as e:
|
| 11 |
+
st.error(f"Failed to configure Gemini API: {e}") # Show error in Streamlit if app is running
|
| 12 |
+
print(f"Failed to configure Gemini API: {e}") # Print to console for server logs
|
| 13 |
+
else:
|
| 14 |
+
# This will be handled by Streamlit UI in app.py if key is missing
|
| 15 |
+
print("Warning: GOOGLE_API_KEY is not set. LLM features will not work.")
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def get_gemini_response(prompt_text, system_instruction=None):
|
| 19 |
+
"""获取Gemini模型的响应"""
|
| 20 |
+
if not GOOGLE_API_KEY:
|
| 21 |
+
st.error("Gemini API Key未配置,无法获取模型响应。请在Hugging Face Space Secrets中设置 GOOGLE_API_KEY。")
|
| 22 |
+
return None
|
| 23 |
+
try:
|
| 24 |
+
model = genai.GenerativeModel(
|
| 25 |
+
GENERATIVE_MODEL,
|
| 26 |
+
system_instruction=system_instruction if system_instruction else None
|
| 27 |
+
)
|
| 28 |
+
response = model.generate_content(prompt_text)
|
| 29 |
+
return response.text
|
| 30 |
+
except Exception as e:
|
| 31 |
+
error_message = f"与Gemini通信时出错: {e}"
|
| 32 |
+
if hasattr(e, 'message') and "API key not valid" in e.message:
|
| 33 |
+
error_message = "Gemini API Key无效或权限不足。请检查Hugging Face Space Secrets中的GOOGLE_API_KEY。"
|
| 34 |
+
st.error(error_message)
|
| 35 |
+
print(error_message) # For server logs
|
| 36 |
+
return None
|
| 37 |
+
|
| 38 |
+
# Using genai.embed_content directly is often simpler for ChromaDB
|
| 39 |
+
# but if you need a callable for ChromaDB's embedding_functions parameter:
|
| 40 |
+
class GeminiEmbeddingFunctionForChroma(genai.embedding.EmbeddingFunction):
|
| 41 |
+
def __call__(self, input: genai.embedding.EmbedContentRequest) -> genai.embedding.EmbedContentResponse:
|
| 42 |
+
# Ensure 'input' is a list of strings (documents)
|
| 43 |
+
if not isinstance(input, list) or not all(isinstance(doc, str) for doc in input):
|
| 44 |
+
# ChromaDB typically passes a list of documents (strings)
|
| 45 |
+
# genai.embed_content expects a 'content' field which can be a string or list of strings
|
| 46 |
+
# The structure of 'input' from ChromaDB needs to be correctly mapped.
|
| 47 |
+
# ChromaDB's `embedding_function` interface expects a function that takes a list of texts
|
| 48 |
+
# and returns a list of embeddings.
|
| 49 |
+
|
| 50 |
+
# Let's assume 'input' is a list of document strings.
|
| 51 |
+
docs_to_embed = input
|
| 52 |
+
else: # Fallback if input structure is different, adapt as needed
|
| 53 |
+
docs_to_embed = [str(item) for item in input]
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
if not docs_to_embed:
|
| 57 |
+
return {"embedding": []} # Return empty embedding list for empty input
|
| 58 |
+
|
| 59 |
+
try:
|
| 60 |
+
# Embed a batch of documents.
|
| 61 |
+
# `task_type` is important for retrieval.
|
| 62 |
+
result = genai.embed_content(
|
| 63 |
+
model=EMBEDDING_MODEL,
|
| 64 |
+
content=docs_to_embed,
|
| 65 |
+
task_type="RETRIEVAL_DOCUMENT"
|
| 66 |
+
)
|
| 67 |
+
return result['embedding'] # ChromaDB expects a list of embeddings
|
| 68 |
+
except Exception as e:
|
| 69 |
+
error_message = f"获取文本嵌入时出错: {e}"
|
| 70 |
+
st.error(error_message)
|
| 71 |
+
print(error_message)
|
| 72 |
+
# Return a list of Nones or empty lists of the correct length if an error occurs for some documents
|
| 73 |
+
return [None] * len(docs_to_embed)
|
| 74 |
+
|
| 75 |
+
# --- Alternative simpler embedding function for ChromaDB ---
|
| 76 |
+
# This is often easier to integrate if ChromaDB's embedding_function
|
| 77 |
+
# parameter expects a function that takes a list of texts.
|
| 78 |
+
from chromadb import Documents, EmbeddingFunction, Embeddings
|
| 79 |
+
|
| 80 |
+
class GeminiChromaEF(EmbeddingFunction):
|
| 81 |
+
def __init__(self, model_name: str = EMBEDDING_MODEL, task_type: str = "RETRIEVAL_DOCUMENT"):
|
| 82 |
+
self._model_name = model_name
|
| 83 |
+
self._task_type = task_type
|
| 84 |
+
if not GOOGLE_API_KEY:
|
| 85 |
+
print("Warning: GOOGLE_API_KEY not set. Embedding function might fail.")
|
| 86 |
+
# Optionally raise an error or handle appropriately
|
| 87 |
+
|
| 88 |
+
def __call__(self, input_texts: Documents) -> Embeddings:
|
| 89 |
+
if not GOOGLE_API_KEY:
|
| 90 |
+
st.error("Gemini API Key未配置,无法生成文本嵌入。")
|
| 91 |
+
print("Gemini API Key not configured for embeddings.")
|
| 92 |
+
return [([0.0] * 768) for _ in input_texts] # Return dummy embeddings or handle error
|
| 93 |
+
|
| 94 |
+
if not input_texts:
|
| 95 |
+
return []
|
| 96 |
+
try:
|
| 97 |
+
# Filter out any None or non-string inputs, though Documents type should be list of str
|
| 98 |
+
valid_texts = [text for text in input_texts if isinstance(text, str)]
|
| 99 |
+
if not valid_texts:
|
| 100 |
+
# Handle case where all inputs were invalid
|
| 101 |
+
return [([0.0] * 768) for _ in input_texts]
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
result = genai.embed_content(
|
| 105 |
+
model=self._model_name,
|
| 106 |
+
content=valid_texts,
|
| 107 |
+
task_type=self._task_type
|
| 108 |
+
)
|
| 109 |
+
# Ensure the result matches the number of valid_texts.
|
| 110 |
+
# If there was an error, result['embedding'] might be shorter or None.
|
| 111 |
+
# A robust handler would map results back to original input count, perhaps with None for errors.
|
| 112 |
+
# For simplicity here, assuming success or a catastrophic failure handled by the try-except.
|
| 113 |
+
|
| 114 |
+
# Map embeddings back to the original input_texts length, filling with None for invalid ones
|
| 115 |
+
# This part is tricky because genai.embed_content might error out entirely or skip bad inputs.
|
| 116 |
+
# Let's assume it returns embeddings for valid_texts only.
|
| 117 |
+
embeddings_dict = {text: emb for text, emb in zip(valid_texts, result['embedding'])}
|
| 118 |
+
|
| 119 |
+
final_embeddings = []
|
| 120 |
+
for text in input_texts:
|
| 121 |
+
if isinstance(text, str) and text in embeddings_dict:
|
| 122 |
+
final_embeddings.append(embeddings_dict[text])
|
| 123 |
+
else:
|
| 124 |
+
# Provide a dummy embedding or None for invalid/missing inputs
|
| 125 |
+
# The dimension (e.g., 768) depends on your embedding model.
|
| 126 |
+
# For "models/embedding-001", it's 768.
|
| 127 |
+
final_embeddings.append([0.0] * 768) # Placeholder for invalid inputs
|
| 128 |
+
return final_embeddings
|
| 129 |
+
|
| 130 |
+
except Exception as e:
|
| 131 |
+
error_message = f"获取文本嵌入时出错 (GeminiChromaEF): {e}"
|
| 132 |
+
st.error(error_message)
|
| 133 |
+
print(error_message)
|
| 134 |
+
# Return dummy embeddings for all inputs in case of a general error
|
| 135 |
+
return [[0.0] * 768 for _ in input_texts] # Placeholder dimension
|
prompts.py
ADDED
|
@@ -0,0 +1,129 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# prompts.py
|
| 2 |
+
# (Paste the content of your prompts.py file here)
|
| 3 |
+
# Example:
|
| 4 |
+
# CHAT_EXTRACTION_SYSTEM_PROMPT = """..."""
|
| 5 |
+
# CHAT_EXTRACTION_USER_PROMPT_TEMPLATE = """..."""
|
| 6 |
+
# ... and all other prompts ...
|
| 7 |
+
CHAT_EXTRACTION_SYSTEM_PROMPT = """
|
| 8 |
+
你是一个晚托班聊天记录分析助手。你的任务是从提供的聊天记录中,为每个提到的学生提取关键信息。
|
| 9 |
+
信息应包括:学生姓名,以及关于该学生的具体事件、学术表现、行为、情绪、社交互动或任何值得注意的观察。
|
| 10 |
+
如果一个学生有多条相关信息,请都列出来。
|
| 11 |
+
专注于事实和具体描述。
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
CHAT_EXTRACTION_USER_PROMPT_TEMPLATE = """
|
| 15 |
+
请分析以下今天的聊天记录,提取每个学生相关的具体事件、表现或评价。
|
| 16 |
+
输出格式为:
|
| 17 |
+
学生姓名: [事件/表现/评价]
|
| 18 |
+
学生姓名: [另一个事件/表现/评价]
|
| 19 |
+
...
|
| 20 |
+
|
| 21 |
+
聊天记录内容如下:
|
| 22 |
+
---
|
| 23 |
+
{chat_log_text}
|
| 24 |
+
---
|
| 25 |
+
请严格按照上述格式输出,每条信息占一行。只输出提取结果。
|
| 26 |
+
"""
|
| 27 |
+
|
| 28 |
+
STUDENT_CHARACTERISTICS_SYSTEM_PROMPT = """
|
| 29 |
+
你是一个资深的儿童教育心理分析师。你的任务是根据提供的一系列关于某个学生的日常表现记录,总结该学生的主要性格特点、学习习惯、社交风格和潜在优势或需要关注的方面。
|
| 30 |
+
总结应全面、客观、简洁,并使用积极的语言。
|
| 31 |
+
"""
|
| 32 |
+
|
| 33 |
+
STUDENT_CHARACTERISTICS_USER_PROMPT_TEMPLATE = """
|
| 34 |
+
学生姓名: {student_name}
|
| 35 |
+
历史表现记录如下:
|
| 36 |
+
---
|
| 37 |
+
{observations_text}
|
| 38 |
+
---
|
| 39 |
+
请基于以上记录,为 {student_name} 总结其主要特点。
|
| 40 |
+
"""
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
# --- Feedback Generation Prompts ---
|
| 44 |
+
# BOSS FEEDBACK
|
| 45 |
+
BOSS_FEEDBACK_SYSTEM_PROMPT = """
|
| 46 |
+
你是一位经验丰富的晚托机构主管助理。你的任务是根据今天收集到的学生表现信息,撰写一份给老板的每日工作反馈。
|
| 47 |
+
反馈应简洁明了,突出重点:
|
| 48 |
+
1. 今日整体情况概述。
|
| 49 |
+
2. 表现特别突出(正面或负面)的学生及其简要事迹。
|
| 50 |
+
3. 任何需要老板知晓或跟进的特殊事件或问题。
|
| 51 |
+
4. 可以提出简要的工作建议(可选)。
|
| 52 |
+
语言需专业、客观。
|
| 53 |
+
"""
|
| 54 |
+
BOSS_FEEDBACK_USER_PROMPT_TEMPLATE = """
|
| 55 |
+
今日学生表现信息汇总:
|
| 56 |
+
---
|
| 57 |
+
{today_events_summary}
|
| 58 |
+
---
|
| 59 |
+
请根据以上信息,生成一份给老板的晚托工作反馈。
|
| 60 |
+
"""
|
| 61 |
+
|
| 62 |
+
# PUBLIC FEEDBACK
|
| 63 |
+
PUBLIC_FEEDBACK_SYSTEM_PROMPT = """
|
| 64 |
+
你是一位活泼且富有创意的晚托机构宣传专员。你的任务是根据今天收集到的学生表现素材,撰写一份公开的、积极正面的晚托活动反馈。
|
| 65 |
+
这份反馈将会发布在机构的社交媒体或公告栏。
|
| 66 |
+
主要目标是:
|
| 67 |
+
1. 展示孩子们在晚托的快乐学习时光和丰富多彩的活动。
|
| 68 |
+
2. 传递积极向上的教育理念和氛围。
|
| 69 |
+
3. 除非是集体性的表扬,否则避免提及具体学生姓名,可以使用“有的小朋友”、“大家”等代称。
|
| 70 |
+
风格应活泼、温馨、吸引人。
|
| 71 |
+
"""
|
| 72 |
+
PUBLIC_FEEDBACK_USER_PROMPT_TEMPLATE = """
|
| 73 |
+
今日学生表现素材:
|
| 74 |
+
---
|
| 75 |
+
{today_events_summary}
|
| 76 |
+
---
|
| 77 |
+
请根据以上素材,生成一份公开的晚托活动反馈。
|
| 78 |
+
"""
|
| 79 |
+
|
| 80 |
+
# PARENT FEEDBACK (NORMAL MODE)
|
| 81 |
+
PARENT_NORMAL_SYSTEM_PROMPT = """
|
| 82 |
+
你是一位经验丰富、充满爱心且专业的晚托班老师。你的任务是给学生家长写一份关于孩子今天在晚托班表现的反馈。
|
| 83 |
+
反馈应包含:
|
| 84 |
+
1. 问候家长。
|
| 85 |
+
2. 具体描述孩子今天的学习情况(如作业完成度、遇到的困难、取得的进步)。
|
| 86 |
+
3. 描述孩子的行为表现和情绪状态。
|
| 87 |
+
4. 描述孩子的社交互动情况。
|
| 88 |
+
5. 基于观察给予积极的肯定和鼓励。
|
| 89 |
+
6. 如有必要,可以给出温和的建议或需要家长配合的事项。
|
| 90 |
+
语言需亲切、真诚、具体、正面引导。
|
| 91 |
+
"""
|
| 92 |
+
PARENT_NORMAL_USER_PROMPT_TEMPLATE = """
|
| 93 |
+
学生姓名: {student_name}
|
| 94 |
+
该生一般特点: {student_characteristics}
|
| 95 |
+
|
| 96 |
+
今天关于 {student_name} 的具体表现记录:
|
| 97 |
+
---
|
| 98 |
+
{today_student_specific_events}
|
| 99 |
+
---
|
| 100 |
+
请根据以上信息,为 {student_name} 的家长写一份今日反馈。
|
| 101 |
+
"""
|
| 102 |
+
|
| 103 |
+
# PARENT FEEDBACK (LAZY MODE)
|
| 104 |
+
PARENT_LAZY_SYSTEM_PROMPT = PARENT_NORMAL_SYSTEM_PROMPT # 可以复用
|
| 105 |
+
PARENT_LAZY_USER_PROMPT_TEMPLATE = """
|
| 106 |
+
学生姓名: {student_name}
|
| 107 |
+
该生一般特点: {student_characteristics}
|
| 108 |
+
|
| 109 |
+
以下是 {student_name} 过去的一些表现记录,请从中挑选几件【不同】的事情,巧妙地组合并略作修改,形成一份【听起来像是今天发生】的反馈给家长。
|
| 110 |
+
确保反馈内容积极正面,并且事件之间有一定区隔,不要都揉在一起说。
|
| 111 |
+
|
| 112 |
+
历史表现记录(供挑选组合):
|
| 113 |
+
---
|
| 114 |
+
{past_events_for_student}
|
| 115 |
+
---
|
| 116 |
+
请根据以上要求,为 {student_name} 的家长写一份反馈。
|
| 117 |
+
"""
|
| 118 |
+
|
| 119 |
+
# PARENT FEEDBACK (LLM DIRECT MODE - Based on characteristics)
|
| 120 |
+
PARENT_LLM_DIRECT_SYSTEM_PROMPT = PARENT_NORMAL_SYSTEM_PROMPT # 可以复用
|
| 121 |
+
PARENT_LLM_DIRECT_USER_PROMPT_TEMPLATE = """
|
| 122 |
+
学生姓名: {student_name}
|
| 123 |
+
该生一般特点: {student_characteristics}
|
| 124 |
+
|
| 125 |
+
今天晚托班的常规活动包括:作业辅导、阅读、主题活动(例如手工、科学小实验或小组游戏)、自由活动。
|
| 126 |
+
请你基于 {student_name} 的已知特点,并结合今天的常规活动,【设想并生成】一份他/她今天可能的表现,并据此给家长写一份反馈。
|
| 127 |
+
例如,如果学生特点是“专注数学”,可以设想他今天在数学作业上表现出色。如果特点是“乐于助人”,可以设想他帮助了同学。
|
| 128 |
+
反馈需要听起来自然、具体,就像真实观察到的一样。
|
| 129 |
+
"""
|
rag_manager.py
ADDED
|
@@ -0,0 +1,132 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# rag_manager.py
|
| 2 |
+
import chromadb
|
| 3 |
+
from config import CHROMA_DB_PATH, RAG_COLLECTION_NAME
|
| 4 |
+
from llm_handler import GeminiChromaEF # Use the robust embedding function
|
| 5 |
+
import streamlit as st
|
| 6 |
+
import time
|
| 7 |
+
|
| 8 |
+
# Initialize the embedding function globally so it's created once.
|
| 9 |
+
gemini_ef = None
|
| 10 |
+
try:
|
| 11 |
+
gemini_ef = GeminiChromaEF()
|
| 12 |
+
except Exception as e:
|
| 13 |
+
st.error(f"无法初始化Gemini Embedding Function: {e}. RAG功能将受限。")
|
| 14 |
+
print(f"Error initializing GeminiChromaEF: {e}")
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
# Initialize ChromaDB client.
|
| 18 |
+
# Using a try-except block for robustness, especially in shared environments like HF Spaces.
|
| 19 |
+
db_client = None
|
| 20 |
+
collection = None
|
| 21 |
+
MAX_RETRIES = 3
|
| 22 |
+
RETRY_DELAY = 5 # seconds
|
| 23 |
+
|
| 24 |
+
for attempt in range(MAX_RETRIES):
|
| 25 |
+
try:
|
| 26 |
+
if not os.path.exists(CHROMA_DB_PATH):
|
| 27 |
+
os.makedirs(CHROMA_DB_PATH, exist_ok=True)
|
| 28 |
+
print(f"Created ChromaDB directory: {CHROMA_DB_PATH}")
|
| 29 |
+
|
| 30 |
+
db_client = chromadb.PersistentClient(path=CHROMA_DB_PATH)
|
| 31 |
+
|
| 32 |
+
if gemini_ef:
|
| 33 |
+
collection = db_client.get_or_create_collection(
|
| 34 |
+
name=RAG_COLLECTION_NAME,
|
| 35 |
+
embedding_function=gemini_ef
|
| 36 |
+
)
|
| 37 |
+
print(f"成功连接到RAG集合 '{RAG_COLLECTION_NAME}' 并使用Gemini embeddings.")
|
| 38 |
+
else:
|
| 39 |
+
# Fallback if embedding function failed to initialize
|
| 40 |
+
# This collection won't be very useful without a working embedding function
|
| 41 |
+
collection = db_client.get_or_create_collection(name=RAG_COLLECTION_NAME)
|
| 42 |
+
st.warning("RAG集合已创建,但Gemini Embedding Function未成功初始化。语义搜索可能无法正常工作。")
|
| 43 |
+
print(f"RAG collection '{RAG_COLLECTION_NAME}' created without a proper embedding function due to prior errors.")
|
| 44 |
+
break # Success
|
| 45 |
+
except Exception as e: # Catching a broad exception, sqlite3.OperationalError: database is locked is common
|
| 46 |
+
st.error(f"初始化ChromaDB客户端失败 (尝试 {attempt + 1}/{MAX_RETRIES}): {e}")
|
| 47 |
+
print(f"Error initializing ChromaDB client (Attempt {attempt + 1}/{MAX_RETRIES}): {e}")
|
| 48 |
+
if attempt < MAX_RETRIES - 1:
|
| 49 |
+
time.sleep(RETRY_DELAY)
|
| 50 |
+
else:
|
| 51 |
+
st.error("已达到最大重试次数,ChromaDB可能无法使用。请检查日志。")
|
| 52 |
+
print("Max retries reached for ChromaDB client initialization.")
|
| 53 |
+
# `collection` will remain None, functions below need to handle this.
|
| 54 |
+
|
| 55 |
+
def add_documents_to_rag(documents: list[str], metadatas: list[dict] = None, ids: list[str] = None):
|
| 56 |
+
if collection is None or gemini_ef is None:
|
| 57 |
+
st.error("RAG集合或Embedding Function未初始化,无法添加文档。")
|
| 58 |
+
print("RAG collection or EF not initialized in add_documents_to_rag.")
|
| 59 |
+
return False
|
| 60 |
+
if not documents:
|
| 61 |
+
st.info("没有文档需要添加到RAG。")
|
| 62 |
+
return True # Not an error, just nothing to do
|
| 63 |
+
|
| 64 |
+
num_docs = len(documents)
|
| 65 |
+
if not ids:
|
| 66 |
+
# Generate more robust unique IDs, e.g., using a hash or UUID if not provided
|
| 67 |
+
from hashlib import md5
|
| 68 |
+
ids = [f"doc_{md5(doc.encode()).hexdigest()}_{i}" for i, doc in enumerate(documents)]
|
| 69 |
+
if metadatas is None:
|
| 70 |
+
metadatas = [{}] * num_docs
|
| 71 |
+
|
| 72 |
+
# Ensure lengths match, truncate to min_len if they don't
|
| 73 |
+
min_len = min(len(documents), len(metadatas), len(ids))
|
| 74 |
+
if min_len < num_docs:
|
| 75 |
+
st.warning(f"文档、元数据或ID列表长度不一致。将使用最短长度: {min_len}")
|
| 76 |
+
documents = documents[:min_len]
|
| 77 |
+
metadatas = metadatas[:min_len]
|
| 78 |
+
ids = ids[:min_len]
|
| 79 |
+
if min_len == 0:
|
| 80 |
+
st.info("调整后没有文档可添加。")
|
| 81 |
+
return True
|
| 82 |
+
|
| 83 |
+
try:
|
| 84 |
+
collection.add(
|
| 85 |
+
documents=documents,
|
| 86 |
+
metadatas=metadatas,
|
| 87 |
+
ids=ids
|
| 88 |
+
)
|
| 89 |
+
st.success(f"成功添加 {len(documents)} 个文档到RAG集合 '{RAG_COLLECTION_NAME}'.")
|
| 90 |
+
return True
|
| 91 |
+
except Exception as e:
|
| 92 |
+
st.error(f"添加文档到RAG时出错: {e}")
|
| 93 |
+
print(f"Error adding documents to RAG: {e}")
|
| 94 |
+
return False
|
| 95 |
+
|
| 96 |
+
def query_rag(query_text: str, n_results: int = 5, filter_metadata: dict = None):
|
| 97 |
+
if collection is None or gemini_ef is None:
|
| 98 |
+
st.error("RAG集合或Embedding Function未初始化,无法查询。")
|
| 99 |
+
print("RAG collection or EF not initialized in query_rag.")
|
| 100 |
+
return []
|
| 101 |
+
|
| 102 |
+
if not query_text:
|
| 103 |
+
return []
|
| 104 |
+
|
| 105 |
+
try:
|
| 106 |
+
results = collection.query(
|
| 107 |
+
query_texts=[query_text],
|
| 108 |
+
n_results=n_results,
|
| 109 |
+
where=filter_metadata if filter_metadata else None
|
| 110 |
+
# include=['metadatas', 'documents', 'distances'] # To get more info
|
| 111 |
+
)
|
| 112 |
+
return results['documents'][0] if results and results['documents'] else []
|
| 113 |
+
except Exception as e:
|
| 114 |
+
st.error(f"查询RAG时出错: {e}")
|
| 115 |
+
print(f"Error querying RAG: {e}")
|
| 116 |
+
return []
|
| 117 |
+
|
| 118 |
+
def get_all_student_observations_from_rag(student_name: str):
|
| 119 |
+
if collection is None:
|
| 120 |
+
st.error("RAG集合未初始化,无法获取学生观察记录。")
|
| 121 |
+
return []
|
| 122 |
+
try:
|
| 123 |
+
# Using 'where' clause for filtering directly in the get call
|
| 124 |
+
entries = collection.get(
|
| 125 |
+
where={"student_name": student_name},
|
| 126 |
+
include=["documents"] # Only need documents here
|
| 127 |
+
)
|
| 128 |
+
return entries['documents'] if entries and entries['documents'] else []
|
| 129 |
+
except Exception as e:
|
| 130 |
+
st.error(f"从RAG获取学生 {student_name} 的所有观察记录时出错: {e}")
|
| 131 |
+
print(f"Error getting all observations for {student_name} from RAG: {e}")
|
| 132 |
+
return []
|
requirements.txt
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
streamlit
|
| 2 |
+
google-generativeai
|
| 3 |
+
python-dotenv # 仍然有用,config.py会尝试加载,即使在HF上主要是为了本地运行或读取非敏感配置
|
| 4 |
+
chromadb>=0.4.22 # 确保版本兼容性,特别是对于PersistentClient和EmbeddingFunctions
|
| 5 |
+
# sentence-transformers # 如果你决定使用它作为 embedding function
|
| 6 |
+
# 其他你项目中可能用到的库
|