Spaces:

forzen
/

LLM-Powered

Paused

App Files Files Community

forzen commited on May 30, 2025

Commit

634b5dc

verified ·

1 Parent(s): 86ce4e4

Upload 11 files

Browse files

Files changed (11) hide show

.dockerignore +38 -0
Dockerfile +27 -0
app.py +346 -0
chat_processor.py +92 -0
config.py +24 -0
db_manager.py +134 -0
feedback_generator.py +120 -0
llm_handler.py +135 -0
prompts.py +129 -0
rag_manager.py +132 -0
requirements.txt +6 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,38 @@

+# Git
+.git
+.gitignore
+# Python virtual environment
+venv/
+venv_feedback/
+*.egg-info/
+__pycache__/
+*.pyc
+*.pyo
+# Docker specific
+Dockerfile
+.dockerignore
+# IDE / OS specific
+.vscode/
+.idea/
+.DS_Store
+Thumbs.db
+# Local configuration not for image (API key via HF Secrets)
+.env
+# Logs and other local artifacts
+*.log
+dist/
+build/
+*.local
+# 如果你决定在git中提交空的数据库目录/文件以确保路径存在于持久化存储中，
+# 那么不要在这里忽略它们。否则，如果应用可以自动创建，则可以忽略。
+# 对于HF持久化存储，最好是让应用在运行时按需创建这些文件/目录在持久化卷上。
+# 所以，通常不在这里忽略它们，但要确保初始提交时它们是空的或不存在，
+# 避免将本地测试数据打入镜像或仓库。
+# chroma_db/
+# students.db

Dockerfile ADDED Viewed

	@@ -0,0 +1,27 @@

+# 1. 使用官方Python基础镜像
+FROM python:3.9-slim-buster
+# 2. 设置环境变量
+ENV PYTHONDONTWRITEBYTECODE 1
+ENV PYTHONUNBUFFERED 1
+# 3. 设置工作目录
+WORKDIR /app
+# 4. (可选) 安装系统依赖 - 根据需要取消注释
+# RUN apt-get update && apt-get install -y --no-install-recommends gcc && rm -rf /var/lib/apt/lists/*
+# 5. 复制依赖文件
+COPY requirements.txt .
+# 6. 安装Python依赖
+RUN pip install --no-cache-dir -r requirements.txt
+# 7. 复制项目所有文件到工作目录 (确保.dockerignore配置正确)
+COPY . .
+# 8. 暴露Streamlit运行的端口 (HF Spaces会自动处理端口映射)
+EXPOSE 8501
+# 9. 定义容器启动时运行的命令
+CMD ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0", "--server.headless=true"]

app.py ADDED Viewed

	@@ -0,0 +1,346 @@

+# app.py
+import streamlit as st
+import datetime
+import os
+# Import all necessary modules from your project
+from config import GOOGLE_API_KEY, STUDENT_DB_PATH, CHROMA_DB_PATH, RAG_COLLECTION_NAME
+from db_manager import init_student_db, get_all_student_names, get_student_characteristics, add_or_update_student
+from rag_manager import add_documents_to_rag, query_rag # get_all_student_observations_from_rag is used by chat_processor
+from chat_processor import extract_info_from_chat, update_student_characteristics_from_rag, batch_update_all_students_characteristics
+from feedback_generator import (
+    generate_boss_feedback,
+    generate_public_feedback,
+    generate_parent_feedback,
+    get_events_summary_for_day
+)
+# import prompts # Prompts are used by other modules, not directly here typically
+# --- Page Configuration and Initialization ---
+st.set_page_config(page_title="晚托反馈助手", layout="wide", initial_sidebar_state="expanded")
+# --- Check API Key ---
+# On Hugging Face, this will be set via Secrets. For local, from .env or environment.
+if not GOOGLE_API_KEY:
+    st.error("错误：GOOGLE_API_KEY 未配置。请在Hugging Face Space的Secrets中设置该值，或在本地的.env文件中配置。应用功能将受限。")
+    # st.stop() # Option to stop app, or let it run with limited functionality
+# --- Initialize Databases (Idempotent) ---
+# These functions now have internal error handling and directory creation
+init_student_db() # For SQLite
+# ChromaDB is initialized within rag_manager.py upon import/first use.
+# --- Session State Management ---
+# Helps persist data across Streamlit reruns
+if 'processed_chat_extracts' not in st.session_state: # Renamed for clarity
+    st.session_state.processed_chat_extracts = [] # Stores list of {"student_name": ..., "observation": ...}
+if 'current_processing_date' not in st.session_state: # Renamed
+    st.session_state.current_processing_date = datetime.date.today()
+if 'student_list_cache' not in st.session_state: # Renamed
+    st.session_state.student_list_cache = get_all_student_names() # Initial load
+# Feedback text states
+if 'feedback_boss_text' not in st.session_state: st.session_state.feedback_boss_text = ""
+if 'feedback_public_text' not in st.session_state: st.session_state.feedback_public_text = ""
+if 'feedback_parent_text' not in st.session_state: st.session_state.feedback_parent_text = ""
+if 'selected_student_for_parent_fb' not in st.session_state: st.session_state.selected_student_for_parent_fb = None
+# --- Helper Functions for UI ---
+def refresh_student_list_cache():
+    st.session_state.student_list_cache = get_all_student_names()
+    st.toast("学生列表已刷新。")
+# --- Main Application UI ---
+st.title("🚀 晚托反馈自动化助手")
+# Sidebar for navigation and info
+with st.sidebar:
+    st.header("导航")
+    menu_options = ["处理聊天记录", "生成反馈报告", "学生特点管理"]
+    choice = st.radio("选择功能:", menu_options, key="nav_menu")
+    st.markdown("---")
+    st.subheader("系统状态")
+    if GOOGLE_API_KEY:
+        st.success("Gemini API Key 已加载。")
+    else:
+        st.warning("Gemini API Key 未配置。")
+    # Simple check if DB files exist (more robust checks are within db/rag managers)
+    # These paths are inside the container / HF Space file system
+    student_db_exists = os.path.exists(STUDENT_DB_PATH)
+    chroma_dir_exists = os.path.exists(CHROMA_DB_PATH) and os.listdir(CHROMA_DB_PATH) # Check if dir exists and is not empty
+    if student_db_exists: st.markdown(f"✔️ 学生库: `{STUDENT_DB_PATH}`")
+    else: st.markdown(f"⚠️ 学生库未找到: `{STUDENT_DB_PATH}`")
+    if chroma_dir_exists: st.markdown(f"✔️ RAG库: `{CHROMA_DB_PATH}` (集合: {RAG_COLLECTION_NAME})")
+    else: st.markdown(f"⚠️ RAG库未找到: `{CHROMA_DB_PATH}`")
+    if st.button("🔄 刷新学生列表", key="sidebar_refresh_students"):
+        refresh_student_list_cache()
+# --- Page 1: 处理聊天记录 ---
+if choice == "处理聊天记录":
+    st.header("💬 聊天记录处理与数据构建")
+    st.markdown("在此粘贴每日微信聊天记录，AI将提取关键信息并存入知识库。")
+    # Date selection for the chat log
+    selected_date_for_processing = st.date_input(
+        "请选择聊天记录对应的日期",
+        value=st.session_state.current_processing_date, # Use session state for persistence
+        key="chat_date_input"
+    )
+    # Update session state if date changes
+    if selected_date_for_processing != st.session_state.current_processing_date:
+        st.session_state.current_processing_date = selected_date_for_processing
+        st.session_state.processed_chat_extracts = [] # Clear old extracts if date changes
+        st.experimental_rerun()
+    chat_log_text = st.text_area("在此粘贴聊天记录内容:", height=250, key="chat_log_input_area",
+                                 help="输入聊天内容后，点击“分析聊天记录”。")
+    if st.button("🤖 使用AI分析聊天记录", type="primary", key="analyze_chat_button"):
+        if not chat_log_text.strip():
+            st.warning("请输入聊天记录内容。")
+        elif not GOOGLE_API_KEY:
+            st.error("API Key未配置，无法分析。")
+        else:
+            with st.spinner("AI正在分析聊天记录，提取信息中..."):
+                st.session_state.processed_chat_extracts = extract_info_from_chat(chat_log_text)
+            if st.session_state.processed_chat_extracts:
+                st.success(f"AI成功提取到 {len(st.session_state.processed_chat_extracts)} 条信息！")
+            else:
+                st.info("AI分析完成，但未能从聊天记录中提取到格式化信息。")
+                # No st.error here as extract_info_from_chat might return empty on purpose
+    if st.session_state.processed_chat_extracts:
+        st.subheader("提取到的信息预览:")
+        preview_container = st.container()
+        with preview_container:
+            for item in st.session_state.processed_chat_extracts:
+                st.markdown(f"- **{item.get('student_name', 'N/A')}**: {item.get('observation', 'N/A')}")
+        st.markdown("---")
+        if st.button("➕ 确认并存入数据库和RAG知识库", key="store_extracted_data_button"):
+            with st.spinner("正在存储数据到RAG和学生数据库..."):
+                docs_to_rag = []
+                metadatas_to_rag = []
+                ids_to_rag = [] # RAG manager now generates robust IDs if None
+                processed_student_names_today = set()
+                date_str = st.session_state.current_processing_date.strftime("%Y-%m-%d")
+                for item_idx, item in enumerate(st.session_state.processed_chat_extracts):
+                    s_name = item.get("student_name")
+                    obs = item.get("observation")
+                    if not s_name or not obs:
+                        st.warning(f"跳过不完整的提取项: {item}")
+                        continue
+                    docs_to_rag.append(f"{s_name} 在 {date_str} 的表现: {obs}")
+                    metadatas_to_rag.append({"student_name": str(s_name), "date": str(date_str), "source": "chat_log"})
+                    # Let rag_manager handle ID generation if not provided or use robust ones here
+                    # ids_to_rag.append(f"chat_{date_str.replace('-','')}_{str(s_name).replace(' ','_')}_{item_idx}")
+                    add_or_update_student(s_name) # Ensure student exists in DB
+                    processed_student_names_today.add(s_name)
+                storage_successful = False
+                if docs_to_rag:
+                    if add_documents_to_rag(docs_to_rag, metadatas_to_rag, ids_to_rag): # ids can be None
+                        storage_successful = True
+                else:
+                    st.info("没有有效的提取信息可供存储。")
+                if storage_successful:
+                    st.success(f"成功将 {len(docs_to_rag)} 条信息存入RAG。学生列表已更新。")
+                    refresh_student_list_cache()
+                    # Optionally trigger characteristics update for these students
+                    if processed_student_names_today:
+                        st.info("数据已存储。您可以前往“学生特点管理”页面更新这些学生的特点总结。")
+                    st.session_state.processed_chat_extracts = [] # Clear after storing
+                    st.experimental_rerun() # Rerun to clear preview and update UI
+                elif docs_to_rag: # If docs were there but storage failed
+                    st.error("数据存入RAG失败。请检查日志。")
+# --- Page 2: 生成反馈报告 ---
+elif choice == "生成反馈报告":
+    st.header("📝 生成每日反馈报告")
+    st.markdown("根据已处理的信息或学生特点，选择不同模式生成反馈。")
+    feedback_target_date = st.date_input(
+        "请选择生成反馈对应的日期",
+        value=st.session_state.current_processing_date,
+        key="feedback_date_selector"
+    )
+    feedback_date_str = feedback_target_date.strftime("%Y-%m-%d")
+    # Determine summary for Boss/Public feedback
+    # Use extracts if date matches and extracts exist, otherwise query RAG
+    daily_summary_for_general_feedback = ""
+    processed_extracts_for_feedback_date = []
+    if feedback_target_date == st.session_state.current_processing_date and st.session_state.processed_chat_extracts:
+        processed_extracts_for_feedback_date = st.session_state.processed_chat_extracts
+        st.info(f"将使用为 {feedback_date_str} 刚处理的聊天记录生成反馈。")
+        temp_summary_parts = []
+        for item in processed_extracts_for_feedback_date:
+            temp_summary_parts.append(f"- {item.get('student_name', 'N/A')}: {item.get('observation', 'N/A')}")
+        if temp_summary_parts:
+            daily_summary_for_general_feedback = "\n".join(temp_summary_parts)
+        else:
+            daily_summary_for_general_feedback = get_events_summary_for_day(feedback_date_str) # Fallback
+    else:
+        with st.spinner(f"正在为日期 {feedback_date_str} 从知识库获取信息摘要..."):
+            daily_summary_for_general_feedback = get_events_summary_for_day(feedback_date_str)
+    st.markdown("---")
+    col1, col2 = st.columns(2)
+    with col1:
+        st.subheader("👔 给老板的反馈")
+        if st.button("生成老板反馈", key="generate_boss_fb"):
+            if not GOOGLE_API_KEY: st.error("API Key未配置。"); st.stop()
+            with st.spinner("正在生成老板反馈..."):
+                st.session_state.feedback_boss_text = generate_boss_feedback(daily_summary_for_general_feedback)
+            if st.session_state.feedback_boss_text: st.success("老板反馈生成成功！")
+            else: st.error("生成老板反馈失败或无内容返回。")
+        if st.session_state.feedback_boss_text:
+            st.text_area("老板反馈内容:", value=st.session_state.feedback_boss_text, height=200, key="boss_feedback_display")
+    with col2:
+        st.subheader("📢 公共反馈")
+        if st.button("生成公共反馈", key="generate_public_fb"):
+            if not GOOGLE_API_KEY: st.error("API Key未配置。"); st.stop()
+            with st.spinner("正在生成公共反馈..."):
+                st.session_state.feedback_public_text = generate_public_feedback(daily_summary_for_general_feedback)
+            if st.session_state.feedback_public_text: st.success("公共反馈生成成功！")
+            else: st.error("生成公共反馈失败或无内容返回。")
+        if st.session_state.feedback_public_text:
+            st.text_area("公共反馈内容:", value=st.session_state.feedback_public_text, height=200, key="public_feedback_display")
+    st.markdown("---")
+    st.subheader("👨‍👩‍👧‍👦 给家长的反馈")
+    if not st.session_state.student_list_cache:
+        st.warning("学生列表为空。请先通过“处理聊天记录”功能添加学生并处理数据。")
+    else:
+        st.session_state.selected_student_for_parent_fb = st.selectbox(
+            "选择学生:",
+            options=[""] + st.session_state.student_list_cache, # Add empty option for placeholder
+            index=0, # Default to empty
+            format_func=lambda x: "请选择..." if x == "" else x,
+            key="parent_feedback_student_selector"
+        )
+        feedback_modes_map = {
+            "正常模式 (基于当日记录)": "normal",
+            "偷懒模式 (组合历史事件)": "lazy",
+            "LLM特点生成 (创意发挥)": "llm_direct"
+        }
+        selected_mode_display_name = st.radio(
+            "选择反馈模式:",
+            options=list(feedback_modes_map.keys()),
+            key="parent_feedback_mode_selector"
+        )
+        mode_value = feedback_modes_map[selected_mode_display_name]
+        if st.button(f"为选定学生生成家长反馈", key="generate_parent_fb"):
+            if not GOOGLE_API_KEY: st.error("API Key未配置。"); st.stop()
+            if not st.session_state.selected_student_for_parent_fb:
+                st.warning("请先选择一个学生。")
+            else:
+                student_name = st.session_state.selected_student_for_parent_fb
+                with st.spinner(f"正在为 {student_name} ({selected_mode_display_name}) 生成家长反馈..."):
+                    # Pass today's extracted data for the student if available (for "normal" mode)
+                    student_specific_extracts_today = []
+                    if feedback_target_date == st.session_state.current_processing_date and st.session_state.processed_chat_extracts:
+                        student_specific_extracts_today = [
+                            item for item in st.session_state.processed_chat_extracts if item.get("student_name") == student_name
+                        ]
+                    st.session_state.feedback_parent_text = generate_parent_feedback(
+                        student_name,
+                        mode_value,
+                        feedback_date_str,
+                        student_specific_extracts_today # Pass specific extracts for normal mode
+                    )
+                if st.session_state.feedback_parent_text:
+                    st.success(f"为 {student_name} 生成家长反馈成功！")
+                else:
+                    st.error(f"为 {student_name} 生成家长反馈失败或无内容返回。")
+        if st.session_state.feedback_parent_text and st.session_state.selected_student_for_parent_fb:
+            st.text_area(
+                f"给 {st.session_state.selected_student_for_parent_fb} 家长的反馈:",
+                value=st.session_state.feedback_parent_text,
+                height=300,
+                key="parent_feedback_display"
+            )
+# --- Page 3: 学生特点管理 ---
+elif choice == "学生特点管理":
+    st.header("🧑‍🎓 学生特点数据库管理")
+    st.markdown("查看和更新AI总结的学生特点。特点会基于RAG中的历史记录生成。")
+    if st.button("🔄 强制刷新学生列表和显示", key="admin_refresh_students_btn"):
+        refresh_student_list_cache()
+        st.experimental_rerun()
+    if not st.session_state.student_list_cache:
+        st.info("当前没有学生数据。请先通过“处理聊天记录”功能添加并存储学生相关信息。")
+    else:
+        st.subheader("当前学生列表及特点:")
+        num_students = len(st.session_state.student_list_cache)
+        cols_per_row = 3 # Adjust number of columns for display
+        for i in range(0, num_students, cols_per_row):
+            cols = st.columns(cols_per_row)
+            for j in range(cols_per_row):
+                student_idx = i + j
+                if student_idx < num_students:
+                    student_name = st.session_state.student_list_cache[student_idx]
+                    with cols[j]:
+                        with st.expander(f"{student_name}", expanded=False):
+                            characteristics = get_student_characteristics(student_name)
+                            st.markdown(f"**AI总结特点:**\n {characteristics if characteristics else '暂无总结。'}")
+                            if st.button(f"更新 {student_name} 特点", key=f"update_char_{student_name}_{student_idx}"):
+                                if not GOOGLE_API_KEY: st.error("API Key未配置。"); st.stop()
+                                with st.spinner(f"正在为 {student_name} 更新特点..."):
+                                    update_student_characteristics_from_rag(student_name)
+                                st.success(f"{student_name} 的特点已更新！请重新展开查看。")
+                                st.experimental_rerun() # Rerun to reflect changes
+    st.markdown("---")
+    st.subheader("批量操作")
+    if st.button("✨ 批量更新所有学生的特点总结", key="batch_update_all_chars_btn"):
+        if not GOOGLE_API_KEY: st.error("API Key未配置。"); st.stop()
+        if not st.session_state.student_list_cache:
+            st.warning("没有学生可供批量更新。")
+        else:
+            # Confirmation dialog for safety
+            # Using a more explicit confirmation
+            placeholder = st.empty()
+            with placeholder.container():
+                 st.warning(f"此操作将为数据库中所有 {len(st.session_state.student_list_cache)} 位学生重新生成特点总结，可能需要较长时间并消耗API额度。")
+                 if st.button("我确认执行批量更新", key="confirm_batch_update"):
+                     placeholder.empty() # Remove confirmation message
+                     with st.spinner("正在批量更新所有学生特点，请耐心等待..."):
+                         batch_update_all_students_characteristics() # This function has internal st.progress
+                     st.success("所有学生特点总结批量更新完毕！")
+                     st.experimental_rerun()
+                 elif st.button("取消批量更新", key="cancel_batch_update"):
+                     placeholder.empty()
+                     st.info("批量更新已取消。")
+# --- Footer ---
+st.markdown("---")
+st.markdown("晚托反馈助手 v1.0.0 (HF Dockerized) | 技术支持: Gemini LLM + RAG")

chat_processor.py ADDED Viewed

	@@ -0,0 +1,92 @@

+# chat_processor.py
+import datetime
+from llm_handler import get_gemini_response
+from rag_manager import add_documents_to_rag, get_all_student_observations_from_rag
+from db_manager import add_or_update_student, get_all_student_names
+import prompts
+import re
+import streamlit as st
+def extract_info_from_chat(chat_log_text: str) -> list:
+    """使用LLM从聊天记录中提取学生表现信息"""
+    if not chat_log_text.strip():
+        return []
+    prompt = prompts.CHAT_EXTRACTION_USER_PROMPT_TEMPLATE.format(chat_log_text=chat_log_text)
+    system_instruction = prompts.CHAT_EXTRACTION_SYSTEM_PROMPT
+    response_text = get_gemini_response(prompt, system_instruction=system_instruction)
+    if not response_text:
+        st.warning("AI未能从聊天记录中提取到文本响应。")
+        return []
+    extracted_items = []
+    lines = response_text.strip().split('\n')
+    for line in lines:
+        line = line.strip()
+        if not line: continue # Skip empty lines
+        # More robust regex: allows for names with spaces if not ending with colon immediately
+        match = re.match(r"([^:]+?)\s*:\s*(.+)", line)
+        if match:
+            student_name = match.group(1).strip()
+            observation = match.group(2).strip()
+            if student_name and observation: # Ensure both parts are non-empty
+                extracted_items.append({"student_name": student_name, "observation": observation})
+            else:
+                print(f"Skipping partially extracted line: '{line}'") # Log for debugging
+        else:
+            print(f"Could not parse line from LLM: '{line}'") # Log for debugging
+    if not extracted_items:
+        st.info("AI分析完成，但未能按预期格式解析出学生信息。可能是聊天内容不包含相关信息，或AI响应格式不符。")
+    return extracted_items
+def update_student_characteristics_from_rag(student_name: str):
+    """从RAG中获取学生所有记录，让LLM总结特点，并更新到学生数据库"""
+    observations = get_all_student_observations_from_rag(student_name)
+    if not observations:
+        st.info(f"在RAG中未找到学生 {student_name} 的历史表现记录，无法更新特点。")
+        # Ensure student exists in DB even if no observations yet, or update timestamp
+        add_or_update_student(student_name)
+        return
+    # Limit number of observations to avoid overly long prompts for LLM
+    MAX_OBSERVATIONS_FOR_SUMMARY = 50 # Adjust as needed
+    if len(observations) > MAX_OBSERVATIONS_FOR_SUMMARY:
+        st.info(f"学生 {student_name} 有超过 {MAX_OBSERVATIONS_FOR_SUMMARY} 条记录，将使用最新的 {MAX_OBSERVATIONS_FOR_SUMMARY} 条进行特点总结。")
+        observations_to_use = observations[-MAX_OBSERVATIONS_FOR_SUMMARY:]
+    else:
+        observations_to_use = observations
+    observations_text = "\n".join([f"- {obs}" for obs in observations_to_use]) # Add bullet points for clarity
+    prompt = prompts.STUDENT_CHARACTERISTICS_USER_PROMPT_TEMPLATE.format(
+        student_name=student_name,
+        observations_text=observations_text
+    )
+    system_instruction = prompts.STUDENT_CHARACTERISTICS_SYSTEM_PROMPT
+    summary = get_gemini_response(prompt, system_instruction=system_instruction)
+    if summary:
+        if add_or_update_student(student_name, characteristics_summary=summary.strip()):
+             st.success(f"已更新学生 {student_name} 的特点总结。")
+        else:
+             st.error(f"更新学生 {student_name} 的特点总结到数据库时失败。")
+    else:
+        st.warning(f"未能为学生 {student_name} 生成特点总结。AI未返回有效内容。")
+def batch_update_all_students_characteristics():
+    """为数据库中所有学生更新其特点总结"""
+    student_names = get_all_student_names()
+    if not student_names:
+        st.info("学生数据库为空，无法批量更新特点。")
+        return
+    st.info(f"开始批量更新 {len(student_names)} 位学生的特点总结...")
+    progress_bar = st.progress(0)
+    for i, name in enumerate(student_names):
+        st.write(f"正在处理: {name}...") # Give some feedback during long process
+        update_student_characteristics_from_rag(name)
+        progress_bar.progress((i + 1) / len(student_names))
+    st.success("所有学生特点总结批量更新完毕！")

config.py ADDED Viewed

	@@ -0,0 +1,24 @@

+# config.py
+import os
+from dotenv import load_dotenv
+# Load environment variables from .env file if it exists (for local development)
+# In Hugging Face Spaces, GOOGLE_API_KEY will be set via Secrets.
+load_dotenv()
+# API Keys and Model Configuration
+GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
+EMBEDDING_MODEL = "models/embedding-001"
+GENERATIVE_MODEL = "gemini-1.5-flash-latest" # Or "gemini-pro" or other preferred model
+# Database Paths
+# These paths are relative to the WORKDIR defined in Dockerfile (i.e., /app)
+# Hugging Face Spaces persistent storage will store data created at these paths.
+CHROMA_DB_PATH = "./chroma_db"  # Will be /app/chroma_db inside the container
+STUDENT_DB_PATH = "./students.db" # Will be /app/students.db inside the container
+RAG_COLLECTION_NAME = "chat_records_v2" # Changed name to avoid conflicts if old data exists
+# Ensure API key is available (especially for local runs, HF handles missing secrets with errors)
+# if not GOOGLE_API_KEY:
+#     print("Warning: GOOGLE_API_KEY not found. Please set it in your environment or .env file.")
+    # For HF deployment, if secret is not set, the app might fail at runtime when API is called.

db_manager.py ADDED Viewed

	@@ -0,0 +1,134 @@

+# db_manager.py
+import sqlite3
+from config import STUDENT_DB_PATH
+import streamlit as st
+import os
+import time
+MAX_RETRIES = 3
+RETRY_DELAY = 2 # seconds
+def get_db_connection():
+    # Ensure the directory for the SQLite DB exists
+    db_dir = os.path.dirname(STUDENT_DB_PATH)
+    if db_dir and not os.path.exists(db_dir):
+        try:
+            os.makedirs(db_dir, exist_ok=True)
+            print(f"Created directory for SQLite DB: {db_dir}")
+        except Exception as e:
+            st.error(f"无法创建SQLite数据库目录 {db_dir}: {e}")
+            print(f"Could not create SQLite DB directory {db_dir}: {e}")
+            return None # Cannot proceed if directory creation fails
+    conn = None
+    for attempt in range(MAX_RETRIES):
+        try:
+            conn = sqlite3.connect(STUDENT_DB_PATH, timeout=10) # Added timeout
+            return conn
+        except sqlite3.OperationalError as e:
+            if "database is locked" in str(e):
+                print(f"SQLite DB is locked (Attempt {attempt + 1}/{MAX_RETRIES}). Retrying in {RETRY_DELAY}s...")
+                if attempt < MAX_RETRIES - 1:
+                    time.sleep(RETRY_DELAY)
+                else:
+                    st.error("SQLite数据库持续锁定，请稍后再试。")
+                    print("SQLite DB remains locked after multiple retries.")
+                    return None
+            else:
+                st.error(f"连接SQLite数据库时出错: {e}")
+                print(f"Error connecting to SQLite DB: {e}")
+                return None
+        except Exception as e: # Catch other potential errors
+            st.error(f"连接SQLite数据库时发生未知错误: {e}")
+            print(f"Unknown error connecting to SQLite DB: {e}")
+            return None
+    return None
+def init_student_db():
+    conn = get_db_connection()
+    if conn is None:
+        return
+    try:
+        cursor = conn.cursor()
+        cursor.execute('''
+        CREATE TABLE IF NOT EXISTS students (
+            id INTEGER PRIMARY KEY AUTOINCREMENT,
+            name TEXT UNIQUE NOT NULL,
+            characteristics_summary TEXT,
+            last_updated TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+        )
+        ''')
+        conn.commit()
+    except Exception as e:
+        st.error(f"初始化学生数据库表时出错: {e}")
+        print(f"Error initializing student DB table: {e}")
+    finally:
+        if conn:
+            conn.close()
+def add_or_update_student(name: str, characteristics_summary: str = None):
+    conn = get_db_connection()
+    if conn is None:
+        return False
+    try:
+        cursor = conn.cursor()
+        # Upsert logic: Insert if name doesn't exist, or update if it does.
+        # Using INSERT OR IGNORE then UPDATE is a common pattern.
+        cursor.execute("INSERT OR IGNORE INTO students (name) VALUES (?)", (name,))
+        if characteristics_summary is not None: # Allow updating only name or also characteristics
+            cursor.execute("""
+            UPDATE students
+            SET characteristics_summary = ?, last_updated = CURRENT_TIMESTAMP
+            WHERE name = ?
+            """, (characteristics_summary, name))
+        else: # Only ensure the student exists, update last_updated if already present
+             cursor.execute("""
+            UPDATE students
+            SET last_updated = CURRENT_TIMESTAMP
+            WHERE name = ? AND EXISTS (SELECT 1 FROM students WHERE name = ?)
+            """, (name,name))
+        conn.commit()
+        return True
+    except Exception as e:
+        st.error(f"添加或更新学生 '{name}' 时出错: {e}")
+        print(f"Error adding/updating student '{name}': {e}")
+        return False
+    finally:
+        if conn:
+            conn.close()
+def get_student_characteristics(name: str):
+    conn = get_db_connection()
+    if conn is None:
+        return None
+    try:
+        cursor = conn.cursor()
+        cursor.execute("SELECT characteristics_summary FROM students WHERE name = ?", (name,))
+        result = cursor.fetchone()
+        return result[0] if result else None
+    except Exception as e:
+        st.error(f"获取学生 '{name}' 特点时出错: {e}")
+        print(f"Error getting characteristics for student '{name}': {e}")
+        return None
+    finally:
+        if conn:
+            conn.close()
+def get_all_student_names():
+    conn = get_db_connection()
+    if conn is None:
+        return []
+    try:
+        cursor = conn.cursor()
+        cursor.execute("SELECT name FROM students ORDER BY name ASC")
+        results = [row[0] for row in cursor.fetchall()]
+        return results
+    except Exception as e:
+        st.error(f"获取所有学生姓名时出错: {e}")
+        print(f"Error getting all student names: {e}")
+        return []
+    finally:
+        if conn:
+            conn.close()

feedback_generator.py ADDED Viewed

	@@ -0,0 +1,120 @@

+# feedback_generator.py
+from llm_handler import get_gemini_response
+from rag_manager import query_rag
+from db_manager import get_student_characteristics
+import prompts
+import datetime
+import streamlit as st
+def get_events_summary_for_day(date_str: str, processed_chat_data: list = None) -> str:
+    """
+    获取指定日期的事件总结。
+    优先使用当日处理的聊天数据，否则从RAG查询。
+    """
+    if processed_chat_data:
+        summary_parts = []
+        for item in processed_chat_data:
+            # Ensure item has the expected keys
+            student_name = item.get("student_name", "未知学生")
+            observation = item.get("observation", "无具体描述")
+            summary_parts.append(f"- {student_name}: {observation}")
+        if summary_parts:
+            return "\n".join(summary_parts)
+        else: # processed_chat_data was empty or malformed
+            st.info(f"当日处理的聊天数据为空或格式不正确 ({date_str})。")
+            # Fall through to RAG query
+    # Fallback to RAG if no direct processed_chat_data
+    # This query needs to be general enough to pull daily highlights
+    # Or specific if you store daily summary documents.
+    st.info(f"尝试从RAG中检索日期 {date_str} 的整体活动信息...")
+    rag_results = query_rag(
+        query_text=f"{date_str} 发生的关键事件和整体情况",
+        n_results=10, # Get a few diverse entries
+        filter_metadata={"date": date_str} # Filter by date if metadata is set
+    )
+    if not rag_results:
+        return f"关于日期 {date_str}：今日无特别记录或未能从RAG中检索到信息。"
+    return f"关于日期 {date_str} 的记录：\n" + "\n".join([f"- {r}" for r in rag_results])
+def generate_boss_feedback(today_events_summary: str):
+    if not today_events_summary or "无特别记录" in today_events_summary:
+        return "今日无足够信息生成老板反馈。"
+    prompt = prompts.BOSS_FEEDBACK_USER_PROMPT_TEMPLATE.format(today_events_summary=today_events_summary)
+    return get_gemini_response(prompt, system_instruction=prompts.BOSS_FEEDBACK_SYSTEM_PROMPT)
+def generate_public_feedback(today_events_summary: str):
+    if not today_events_summary or "无特别记录" in today_events_summary:
+        return "今日无足够信息生成公共反馈。"
+    prompt = prompts.PUBLIC_FEEDBACK_USER_PROMPT_TEMPLATE.format(today_events_summary=today_events_summary)
+    return get_gemini_response(prompt, system_instruction=prompts.PUBLIC_FEEDBACK_SYSTEM_PROMPT)
+def generate_parent_feedback(student_name: str, mode: str, date_str: str, processed_student_data_today: list = None):
+    characteristics = get_student_characteristics(student_name) or "暂无该生详细特点记录。"
+    if mode == "normal":
+        today_student_specific_events = "今天没有关于该生的特别记录。"
+        if processed_student_data_today: # Prefer data extracted today for this student
+            student_obs = [item['observation'] for item in processed_student_data_today if item['student_name'] == student_name]
+            if student_obs:
+                today_student_specific_events = "\n".join([f"- {obs}" for obs in student_obs])
+        if today_student_specific_events == "今天没有关于该生的特别记录.": # Fallback to RAG if not found in today's extract
+            rag_student_events = query_rag(
+                query_text=f"{student_name} 在 {date_str} 的具体表现",
+                n_results=5,
+                filter_metadata={"student_name": student_name, "date": date_str}
+            )
+            if rag_student_events:
+                today_student_specific_events = "\n".join([f"- {r}" for r in rag_student_events])
+        prompt_vars = {
+            "student_name": student_name,
+            "student_characteristics": characteristics,
+            "today_student_specific_events": today_student_specific_events
+        }
+        user_prompt = prompts.PARENT_NORMAL_USER_PROMPT_TEMPLATE.format(**prompt_vars)
+        system_instruction = prompts.PARENT_NORMAL_SYSTEM_PROMPT
+    elif mode == "lazy":
+        past_events_list = query_rag(
+            query_text=f"{student_name} 过往的各种积极表现和活动片段",
+            n_results=10, # Get more for variety
+            filter_metadata={"student_name": student_name} # No date filter for past events
+        )
+        # Filter out any very short or generic entries if possible
+        past_events_for_student = "\n".join([f"- {r}" for r in past_events_list if len(r.split()) > 5]) if past_events_list else "暂无该生足够的多样化历史表现记录用于此模式。"
+        if "暂无该生足够的多样化历史表现记录" in past_events_for_student and characteristics != "暂无该生详细特点记录。":
+             st.info("偷懒模式：历史具体事件不足，将尝试结合��生特点进行创意生成。")
+             # Fallback to a slightly modified LLM direct mode if lazy mode has no data
+             user_prompt = prompts.PARENT_LLM_DIRECT_USER_PROMPT_TEMPLATE.format(
+                student_name=student_name,
+                student_characteristics=characteristics
+            )
+             system_instruction = prompts.PARENT_LLM_DIRECT_SYSTEM_PROMPT
+        else:
+            prompt_vars = {
+                "student_name": student_name,
+                "student_characteristics": characteristics, # Still useful for LLM to know
+                "past_events_for_student": past_events_for_student
+            }
+            user_prompt = prompts.PARENT_LAZY_USER_PROMPT_TEMPLATE.format(**prompt_vars)
+            system_instruction = prompts.PARENT_LAZY_SYSTEM_PROMPT
+    elif mode == "llm_direct":
+        if characteristics == "暂无该生详细特点记录。":
+            return f"无法使用LLM直接生成模式，学生 {student_name} 的特点数据不足。请先更新其特点。"
+        prompt_vars = {
+            "student_name": student_name,
+            "student_characteristics": characteristics
+        }
+        user_prompt = prompts.PARENT_LLM_DIRECT_USER_PROMPT_TEMPLATE.format(**prompt_vars)
+        system_instruction = prompts.PARENT_LLM_DIRECT_SYSTEM_PROMPT
+    else:
+        st.error("无效的家长反馈模式。")
+        return "无效的反馈模式。"
+    return get_gemini_response(user_prompt, system_instruction=system_instruction)

llm_handler.py ADDED Viewed

	@@ -0,0 +1,135 @@

+# llm_handler.py
+import google.generativeai as genai
+from config import GOOGLE_API_KEY, GENERATIVE_MODEL, EMBEDDING_MODEL
+import streamlit as st # For displaying errors or warnings if needed
+# Configure Gemini API
+if GOOGLE_API_KEY:
+    try:
+        genai.configure(api_key=GOOGLE_API_KEY)
+    except Exception as e:
+        st.error(f"Failed to configure Gemini API: {e}") # Show error in Streamlit if app is running
+        print(f"Failed to configure Gemini API: {e}") # Print to console for server logs
+else:
+    # This will be handled by Streamlit UI in app.py if key is missing
+    print("Warning: GOOGLE_API_KEY is not set. LLM features will not work.")
+def get_gemini_response(prompt_text, system_instruction=None):
+    """获取Gemini模型的响应"""
+    if not GOOGLE_API_KEY:
+        st.error("Gemini API Key未配置，无法获取模型响应。请在Hugging Face Space Secrets中设置 GOOGLE_API_KEY。")
+        return None
+    try:
+        model = genai.GenerativeModel(
+            GENERATIVE_MODEL,
+            system_instruction=system_instruction if system_instruction else None
+        )
+        response = model.generate_content(prompt_text)
+        return response.text
+    except Exception as e:
+        error_message = f"与Gemini通信时出错: {e}"
+        if hasattr(e, 'message') and "API key not valid" in e.message:
+             error_message = "Gemini API Key无效或权限不足。请检查Hugging Face Space Secrets中的GOOGLE_API_KEY。"
+        st.error(error_message)
+        print(error_message) # For server logs
+        return None
+# Using genai.embed_content directly is often simpler for ChromaDB
+# but if you need a callable for ChromaDB's embedding_functions parameter:
+class GeminiEmbeddingFunctionForChroma(genai.embedding.EmbeddingFunction):
+    def __call__(self, input: genai.embedding.EmbedContentRequest) -> genai.embedding.EmbedContentResponse:
+        # Ensure 'input' is a list of strings (documents)
+        if not isinstance(input, list) or not all(isinstance(doc, str) for doc in input):
+            # ChromaDB typically passes a list of documents (strings)
+            # genai.embed_content expects a 'content' field which can be a string or list of strings
+            # The structure of 'input' from ChromaDB needs to be correctly mapped.
+            # ChromaDB's `embedding_function` interface expects a function that takes a list of texts
+            # and returns a list of embeddings.
+            # Let's assume 'input' is a list of document strings.
+            docs_to_embed = input
+        else: # Fallback if input structure is different, adapt as needed
+            docs_to_embed = [str(item) for item in input]
+        if not docs_to_embed:
+            return {"embedding": []} # Return empty embedding list for empty input
+        try:
+            # Embed a batch of documents.
+            # `task_type` is important for retrieval.
+            result = genai.embed_content(
+                model=EMBEDDING_MODEL,
+                content=docs_to_embed,
+                task_type="RETRIEVAL_DOCUMENT"
+            )
+            return result['embedding'] # ChromaDB expects a list of embeddings
+        except Exception as e:
+            error_message = f"获取文本嵌入时出错: {e}"
+            st.error(error_message)
+            print(error_message)
+            # Return a list of Nones or empty lists of the correct length if an error occurs for some documents
+            return [None] * len(docs_to_embed)
+# --- Alternative simpler embedding function for ChromaDB ---
+# This is often easier to integrate if ChromaDB's embedding_function
+# parameter expects a function that takes a list of texts.
+from chromadb import Documents, EmbeddingFunction, Embeddings
+class GeminiChromaEF(EmbeddingFunction):
+    def __init__(self, model_name: str = EMBEDDING_MODEL, task_type: str = "RETRIEVAL_DOCUMENT"):
+        self._model_name = model_name
+        self._task_type = task_type
+        if not GOOGLE_API_KEY:
+            print("Warning: GOOGLE_API_KEY not set. Embedding function might fail.")
+            # Optionally raise an error or handle appropriately
+    def __call__(self, input_texts: Documents) -> Embeddings:
+        if not GOOGLE_API_KEY:
+            st.error("Gemini API Key未配置，无法生成文本嵌入。")
+            print("Gemini API Key not configured for embeddings.")
+            return [([0.0] * 768) for _ in input_texts] # Return dummy embeddings or handle error
+        if not input_texts:
+            return []
+        try:
+            # Filter out any None or non-string inputs, though Documents type should be list of str
+            valid_texts = [text for text in input_texts if isinstance(text, str)]
+            if not valid_texts:
+                # Handle case where all inputs were invalid
+                return [([0.0] * 768) for _ in input_texts]
+            result = genai.embed_content(
+                model=self._model_name,
+                content=valid_texts,
+                task_type=self._task_type
+            )
+            # Ensure the result matches the number of valid_texts.
+            # If there was an error, result['embedding'] might be shorter or None.
+            # A robust handler would map results back to original input count, perhaps with None for errors.
+            # For simplicity here, assuming success or a catastrophic failure handled by the try-except.
+            # Map embeddings back to the original input_texts length, filling with None for invalid ones
+            # This part is tricky because genai.embed_content might error out entirely or skip bad inputs.
+            # Let's assume it returns embeddings for valid_texts only.
+            embeddings_dict = {text: emb for text, emb in zip(valid_texts, result['embedding'])}
+            final_embeddings = []
+            for text in input_texts:
+                if isinstance(text, str) and text in embeddings_dict:
+                    final_embeddings.append(embeddings_dict[text])
+                else:
+                    # Provide a dummy embedding or None for invalid/missing inputs
+                    # The dimension (e.g., 768) depends on your embedding model.
+                    # For "models/embedding-001", it's 768.
+                    final_embeddings.append([0.0] * 768) # Placeholder for invalid inputs
+            return final_embeddings
+        except Exception as e:
+            error_message = f"获取文本嵌入时出错 (GeminiChromaEF): {e}"
+            st.error(error_message)
+            print(error_message)
+            # Return dummy embeddings for all inputs in case of a general error
+            return [[0.0] * 768 for _ in input_texts] # Placeholder dimension

prompts.py ADDED Viewed

	@@ -0,0 +1,129 @@

+# prompts.py
+# (Paste the content of your prompts.py file here)
+# Example:
+# CHAT_EXTRACTION_SYSTEM_PROMPT = """..."""
+# CHAT_EXTRACTION_USER_PROMPT_TEMPLATE = """..."""
+# ... and all other prompts ...
+CHAT_EXTRACTION_SYSTEM_PROMPT = """
+你是一个晚托班聊天记录分析助手。你的任务是从提供的聊天记录中，为每个提到的学生提取关键信息。
+信息应包括：学生姓名，以及关于该学生的具体事件、学术表现、行为、情绪、社交互动或任何值得注意的观察。
+如果一个学生有多条相关信息，请都列出来。
+专注于事实和具体描述。
+"""
+CHAT_EXTRACTION_USER_PROMPT_TEMPLATE = """
+请分析以下今天的聊天记录，提取每个学生相关的具体事件、表现或评价。
+输出格式为：
+学生姓名: [事件/表现/评价]
+学生姓名: [另一个事件/表现/评价]
+...
+聊天记录内容如下:
+---
+{chat_log_text}
+---
+请严格按照上述格式输出，每条信息占一行。只输出提取结果。
+"""
+STUDENT_CHARACTERISTICS_SYSTEM_PROMPT = """
+你是一个资深的儿童教育心理分析师。你的任务是根据提供的一系列关于某个学生的日常表现记录，总结该学生的主要性格特点、学习习惯、社交风格和潜在优势或需要关注的方面。
+总结应全面、客观、简洁，并使用积极的语言。
+"""
+STUDENT_CHARACTERISTICS_USER_PROMPT_TEMPLATE = """
+学生姓名: {student_name}
+历史表现记录如下:
+---
+{observations_text}
+---
+请基于以上记录，为 {student_name} 总结其主要特点。
+"""
+# --- Feedback Generation Prompts ---
+# BOSS FEEDBACK
+BOSS_FEEDBACK_SYSTEM_PROMPT = """
+你是一位经验丰富的晚托机构主管助理。你的任务是根据今天收集到的学生表现信息，撰写一份给老板的每日工作反馈。
+反馈应简洁明了，突出重点：
+1.  今日整体情况概述。
+2.  表现特别突出（正面或负面）的学生及其简要事迹。
+3.  任何需要老板知晓或跟进的特殊事件或问题。
+4.  可以提出简要的工作建议（可选）。
+语言需专业、客观。
+"""
+BOSS_FEEDBACK_USER_PROMPT_TEMPLATE = """
+今日学生表现信息汇总:
+---
+{today_events_summary}
+---
+请根据以上信息，生成一份给老板的晚托工作反馈。
+"""
+# PUBLIC FEEDBACK
+PUBLIC_FEEDBACK_SYSTEM_PROMPT = """
+你是一位活泼且富有创意的晚托机构宣传专员。你的任务是根据今天收集到的学生表现素材，撰写一份公开的、积极正面的晚托活动反馈。
+这份反馈将会发布在机构的社交媒体或公告栏。
+主要目标是：
+1.  展示孩子们在晚托的快乐学习时光和丰富多彩的活动。
+2.  传递积极向上的教育理念和氛围。
+3.  除非是集体性的表扬，否则避免提及具体学生姓名，可以使用“有的小朋友”、“大家”等代称。
+风格应活泼、温馨、吸引人。
+"""
+PUBLIC_FEEDBACK_USER_PROMPT_TEMPLATE = """
+今日学生表现素材:
+---
+{today_events_summary}
+---
+请根据以上素材，生成一份公开的晚托活动反馈。
+"""
+# PARENT FEEDBACK (NORMAL MODE)
+PARENT_NORMAL_SYSTEM_PROMPT = """
+你是一位经验丰富、充满爱心且专业的晚托班老师。你的任务是给学生家长写一份关于孩子今天在晚托班表现的反馈。
+反馈应包含：
+1.  问候家长。
+2.  具体描述孩子今天的学习情况（如作业完成度、遇到的困难、取得的进步）。
+3.  描述孩子的行为表现和情绪状态。
+4.  描述孩子的社交互动情况。
+5.  基于观察给予积极的肯定和鼓励。
+6.  如有必要，可以给出温和的建议或需要家长配合的事项。
+语言需亲切、真诚、具体、正面引导。
+"""
+PARENT_NORMAL_USER_PROMPT_TEMPLATE = """
+学生姓名: {student_name}
+该生一般特点: {student_characteristics}
+今天关于 {student_name} 的具体表现记录:
+---
+{today_student_specific_events}
+---
+请根据以上信息，为 {student_name} 的家长写一份今日反馈。
+"""
+# PARENT FEEDBACK (LAZY MODE)
+PARENT_LAZY_SYSTEM_PROMPT = PARENT_NORMAL_SYSTEM_PROMPT # 可以复用
+PARENT_LAZY_USER_PROMPT_TEMPLATE = """
+学生姓名: {student_name}
+该生一般特点: {student_characteristics}
+以下是 {student_name} 过去的一些表现记录，请从中挑选几件【不同】的事情，巧妙地组合并略作修改，形成一份【听起来像是今天发生】的反馈给家长。
+确保反馈内容积极正面，并且事件之间有一定区隔，不要都揉在一起说。
+历史表现记录（供挑选组合）:
+---
+{past_events_for_student}
+---
+请根据以上要求，为 {student_name} 的家长写一份反馈。
+"""
+# PARENT FEEDBACK (LLM DIRECT MODE - Based on characteristics)
+PARENT_LLM_DIRECT_SYSTEM_PROMPT = PARENT_NORMAL_SYSTEM_PROMPT # 可以复用
+PARENT_LLM_DIRECT_USER_PROMPT_TEMPLATE = """
+学生姓名: {student_name}
+该生一般特点: {student_characteristics}
+今天晚托班的常规活动包括：作业辅导、阅读、主题活动（例如手工、科学小实验或小组游戏）、自由活动。
+请你基于 {student_name} 的已知特点，并结合今天的常规活动，【设想并生成】一份他/她今天可能的表现，并据此给家长写一份反馈。
+例如，如果学生特点是“专注数学”，可以设想他今天在数学作业上表现出色。如果特点是“乐于助人”，可以设想他帮助了同学。
+反馈需要听起来自然、具体，就像真实观察到的一样。
+"""

rag_manager.py ADDED Viewed

	@@ -0,0 +1,132 @@

+# rag_manager.py
+import chromadb
+from config import CHROMA_DB_PATH, RAG_COLLECTION_NAME
+from llm_handler import GeminiChromaEF # Use the robust embedding function
+import streamlit as st
+import time
+# Initialize the embedding function globally so it's created once.
+gemini_ef = None
+try:
+    gemini_ef = GeminiChromaEF()
+except Exception as e:
+    st.error(f"无法初始化Gemini Embedding Function: {e}. RAG功能将受限。")
+    print(f"Error initializing GeminiChromaEF: {e}")
+# Initialize ChromaDB client.
+# Using a try-except block for robustness, especially in shared environments like HF Spaces.
+db_client = None
+collection = None
+MAX_RETRIES = 3
+RETRY_DELAY = 5 # seconds
+for attempt in range(MAX_RETRIES):
+    try:
+        if not os.path.exists(CHROMA_DB_PATH):
+            os.makedirs(CHROMA_DB_PATH, exist_ok=True)
+            print(f"Created ChromaDB directory: {CHROMA_DB_PATH}")
+        db_client = chromadb.PersistentClient(path=CHROMA_DB_PATH)
+        if gemini_ef:
+            collection = db_client.get_or_create_collection(
+                name=RAG_COLLECTION_NAME,
+                embedding_function=gemini_ef
+            )
+            print(f"成功连接到RAG集合 '{RAG_COLLECTION_NAME}' 并使用Gemini embeddings.")
+        else:
+            # Fallback if embedding function failed to initialize
+            # This collection won't be very useful without a working embedding function
+            collection = db_client.get_or_create_collection(name=RAG_COLLECTION_NAME)
+            st.warning("RAG集合已创建，但Gemini Embedding Function未成功初始化。语义搜索可能无法正常工作。")
+            print(f"RAG collection '{RAG_COLLECTION_NAME}' created without a proper embedding function due to prior errors.")
+        break # Success
+    except Exception as e: # Catching a broad exception, sqlite3.OperationalError: database is locked is common
+        st.error(f"初始化ChromaDB客户端失败 (尝试 {attempt + 1}/{MAX_RETRIES}): {e}")
+        print(f"Error initializing ChromaDB client (Attempt {attempt + 1}/{MAX_RETRIES}): {e}")
+        if attempt < MAX_RETRIES - 1:
+            time.sleep(RETRY_DELAY)
+        else:
+            st.error("已达到最大重试次数，ChromaDB可能无法使用。请检查日志。")
+            print("Max retries reached for ChromaDB client initialization.")
+            # `collection` will remain None, functions below need to handle this.
+def add_documents_to_rag(documents: list[str], metadatas: list[dict] = None, ids: list[str] = None):
+    if collection is None or gemini_ef is None:
+        st.error("RAG集合或Embedding Function未初始化，无法添加文档。")
+        print("RAG collection or EF not initialized in add_documents_to_rag.")
+        return False
+    if not documents:
+        st.info("没有文档需要添加到RAG。")
+        return True # Not an error, just nothing to do
+    num_docs = len(documents)
+    if not ids:
+        # Generate more robust unique IDs, e.g., using a hash or UUID if not provided
+        from hashlib import md5
+        ids = [f"doc_{md5(doc.encode()).hexdigest()}_{i}" for i, doc in enumerate(documents)]
+    if metadatas is None:
+        metadatas = [{}] * num_docs
+    # Ensure lengths match, truncate to min_len if they don't
+    min_len = min(len(documents), len(metadatas), len(ids))
+    if min_len < num_docs:
+        st.warning(f"文档、元数据或ID列表长度不一致。将使用最短长度: {min_len}")
+        documents = documents[:min_len]
+        metadatas = metadatas[:min_len]
+        ids = ids[:min_len]
+        if min_len == 0:
+            st.info("调整后没有文档可添加。")
+            return True
+    try:
+        collection.add(
+            documents=documents,
+            metadatas=metadatas,
+            ids=ids
+        )
+        st.success(f"成功添加 {len(documents)} 个文档到RAG集合 '{RAG_COLLECTION_NAME}'.")
+        return True
+    except Exception as e:
+        st.error(f"添加文档到RAG时出错: {e}")
+        print(f"Error adding documents to RAG: {e}")
+        return False
+def query_rag(query_text: str, n_results: int = 5, filter_metadata: dict = None):
+    if collection is None or gemini_ef is None:
+        st.error("RAG集合或Embedding Function未初始化，无法查询。")
+        print("RAG collection or EF not initialized in query_rag.")
+        return []
+    if not query_text:
+        return []
+    try:
+        results = collection.query(
+            query_texts=[query_text],
+            n_results=n_results,
+            where=filter_metadata if filter_metadata else None
+            # include=['metadatas', 'documents', 'distances'] # To get more info
+        )
+        return results['documents'][0] if results and results['documents'] else []
+    except Exception as e:
+        st.error(f"查询RAG时出错: {e}")
+        print(f"Error querying RAG: {e}")
+        return []
+def get_all_student_observations_from_rag(student_name: str):
+    if collection is None:
+        st.error("RAG集合未初始化，无法获取学生观察记录。")
+        return []
+    try:
+        # Using 'where' clause for filtering directly in the get call
+        entries = collection.get(
+            where={"student_name": student_name},
+            include=["documents"] # Only need documents here
+        )
+        return entries['documents'] if entries and entries['documents'] else []
+    except Exception as e:
+        st.error(f"从RAG获取学生 {student_name} 的所有观察记录时出错: {e}")
+        print(f"Error getting all observations for {student_name} from RAG: {e}")
+        return []

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+streamlit
+google-generativeai
+python-dotenv # 仍然有用，config.py会尝试加载，即使在HF上主要是为了本地运行或读取非敏感配置
+chromadb>=0.4.22 # 确保版本兼容性，特别是对于PersistentClient和EmbeddingFunctions
+# sentence-transformers # 如果你决定使用它作为 embedding function
+# 其他你项目中可能用到的库