Spaces:

forzen
/

LLM-Powered

Paused

File size: 18,915 Bytes

634b5dc

# app.py
import streamlit as st
import datetime
import os

# Import all necessary modules from your project
from config import GOOGLE_API_KEY, STUDENT_DB_PATH, CHROMA_DB_PATH, RAG_COLLECTION_NAME
from db_manager import init_student_db, get_all_student_names, get_student_characteristics, add_or_update_student
from rag_manager import add_documents_to_rag, query_rag # get_all_student_observations_from_rag is used by chat_processor
from chat_processor import extract_info_from_chat, update_student_characteristics_from_rag, batch_update_all_students_characteristics
from feedback_generator import (
    generate_boss_feedback,
    generate_public_feedback,
    generate_parent_feedback,
    get_events_summary_for_day
)
# import prompts # Prompts are used by other modules, not directly here typically

# --- Page Configuration and Initialization ---
st.set_page_config(page_title="晚托反馈助手", layout="wide", initial_sidebar_state="expanded")

# --- Check API Key ---
# On Hugging Face, this will be set via Secrets. For local, from .env or environment.
if not GOOGLE_API_KEY:
    st.error("错误：GOOGLE_API_KEY 未配置。请在Hugging Face Space的Secrets中设置该值，或在本地的.env文件中配置。应用功能将受限。")
    # st.stop() # Option to stop app, or let it run with limited functionality

# --- Initialize Databases (Idempotent) ---
# These functions now have internal error handling and directory creation
init_student_db() # For SQLite
# ChromaDB is initialized within rag_manager.py upon import/first use.

# --- Session State Management ---
# Helps persist data across Streamlit reruns
if 'processed_chat_extracts' not in st.session_state: # Renamed for clarity
    st.session_state.processed_chat_extracts = [] # Stores list of {"student_name": ..., "observation": ...}
if 'current_processing_date' not in st.session_state: # Renamed
    st.session_state.current_processing_date = datetime.date.today()
if 'student_list_cache' not in st.session_state: # Renamed
    st.session_state.student_list_cache = get_all_student_names() # Initial load

# Feedback text states
if 'feedback_boss_text' not in st.session_state: st.session_state.feedback_boss_text = ""
if 'feedback_public_text' not in st.session_state: st.session_state.feedback_public_text = ""
if 'feedback_parent_text' not in st.session_state: st.session_state.feedback_parent_text = ""
if 'selected_student_for_parent_fb' not in st.session_state: st.session_state.selected_student_for_parent_fb = None


# --- Helper Functions for UI ---
def refresh_student_list_cache():
    st.session_state.student_list_cache = get_all_student_names()
    st.toast("学生列表已刷新。")

# --- Main Application UI ---
st.title("🚀 晚托反馈自动化助手")

# Sidebar for navigation and info
with st.sidebar:
    st.header("导航")
    menu_options = ["处理聊天记录", "生成反馈报告", "学生特点管理"]
    choice = st.radio("选择功能:", menu_options, key="nav_menu")
    
    st.markdown("---")
    st.subheader("系统状态")
    if GOOGLE_API_KEY:
        st.success("Gemini API Key 已加载。")
    else:
        st.warning("Gemini API Key 未配置。")
    
    # Simple check if DB files exist (more robust checks are within db/rag managers)
    # These paths are inside the container / HF Space file system
    student_db_exists = os.path.exists(STUDENT_DB_PATH)
    chroma_dir_exists = os.path.exists(CHROMA_DB_PATH) and os.listdir(CHROMA_DB_PATH) # Check if dir exists and is not empty
    
    if student_db_exists: st.markdown(f"✔️ 学生库: `{STUDENT_DB_PATH}`")
    else: st.markdown(f"⚠️ 学生库未找到: `{STUDENT_DB_PATH}`")
        
    if chroma_dir_exists: st.markdown(f"✔️ RAG库: `{CHROMA_DB_PATH}` (集合: {RAG_COLLECTION_NAME})")
    else: st.markdown(f"⚠️ RAG库未找到: `{CHROMA_DB_PATH}`")

    if st.button("🔄 刷新学生列表", key="sidebar_refresh_students"):
        refresh_student_list_cache()


# --- Page 1: 处理聊天记录 ---
if choice == "处理聊天记录":
    st.header("💬 聊天记录处理与数据构建")
    st.markdown("在此粘贴每日微信聊天记录，AI将提取关键信息并存入知识库。")

    # Date selection for the chat log
    selected_date_for_processing = st.date_input(
        "请选择聊天记录对应的日期",
        value=st.session_state.current_processing_date, # Use session state for persistence
        key="chat_date_input"
    )
    # Update session state if date changes
    if selected_date_for_processing != st.session_state.current_processing_date:
        st.session_state.current_processing_date = selected_date_for_processing
        st.session_state.processed_chat_extracts = [] # Clear old extracts if date changes
        st.experimental_rerun()


    chat_log_text = st.text_area("在此粘贴聊天记录内容:", height=250, key="chat_log_input_area",
                                 help="输入聊天内容后，点击“分析聊天记录”。")

    if st.button("🤖 使用AI分析聊天记录", type="primary", key="analyze_chat_button"):
        if not chat_log_text.strip():
            st.warning("请输入聊天记录内容。")
        elif not GOOGLE_API_KEY:
            st.error("API Key未配置，无法分析。")
        else:
            with st.spinner("AI正在分析聊天记录，提取信息中..."):
                st.session_state.processed_chat_extracts = extract_info_from_chat(chat_log_text)
            
            if st.session_state.processed_chat_extracts:
                st.success(f"AI成功提取到 {len(st.session_state.processed_chat_extracts)} 条信息！")
            else:
                st.info("AI分析完成，但未能从聊天记录中提取到格式化信息。")
                # No st.error here as extract_info_from_chat might return empty on purpose

    if st.session_state.processed_chat_extracts:
        st.subheader("提取到的信息预览:")
        preview_container = st.container()
        with preview_container:
            for item in st.session_state.processed_chat_extracts:
                st.markdown(f"- **{item.get('student_name', 'N/A')}**: {item.get('observation', 'N/A')}")
        
        st.markdown("---")
        if st.button("➕ 确认并存入数据库和RAG知识库", key="store_extracted_data_button"):
            with st.spinner("正在存储数据到RAG和学生数据库..."):
                docs_to_rag = []
                metadatas_to_rag = []
                ids_to_rag = [] # RAG manager now generates robust IDs if None
                processed_student_names_today = set()
                date_str = st.session_state.current_processing_date.strftime("%Y-%m-%d")

                for item_idx, item in enumerate(st.session_state.processed_chat_extracts):
                    s_name = item.get("student_name")
                    obs = item.get("observation")
                    if not s_name or not obs:
                        st.warning(f"跳过不完整的提取项: {item}")
                        continue

                    docs_to_rag.append(f"{s_name} 在 {date_str} 的表现: {obs}")
                    metadatas_to_rag.append({"student_name": str(s_name), "date": str(date_str), "source": "chat_log"})
                    # Let rag_manager handle ID generation if not provided or use robust ones here
                    # ids_to_rag.append(f"chat_{date_str.replace('-','')}_{str(s_name).replace(' ','_')}_{item_idx}")

                    add_or_update_student(s_name) # Ensure student exists in DB
                    processed_student_names_today.add(s_name)
                
                storage_successful = False
                if docs_to_rag:
                    if add_documents_to_rag(docs_to_rag, metadatas_to_rag, ids_to_rag): # ids can be None
                        storage_successful = True
                else:
                    st.info("没有有效的提取信息可供存储。")

                if storage_successful:
                    st.success(f"成功将 {len(docs_to_rag)} 条信息存入RAG。学生列表已更新。")
                    refresh_student_list_cache()
                    # Optionally trigger characteristics update for these students
                    if processed_student_names_today:
                        st.info("数据已存储。您可以前往“学生特点管理”页面更新这些学生的特点总结。")
                    st.session_state.processed_chat_extracts = [] # Clear after storing
                    st.experimental_rerun() # Rerun to clear preview and update UI
                elif docs_to_rag: # If docs were there but storage failed
                    st.error("数据存入RAG失败。请检查日志。")


# --- Page 2: 生成反馈报告 ---
elif choice == "生成反馈报告":
    st.header("📝 生成每日反馈报告")
    st.markdown("根据已处理的信息或学生特点，选择不同模式生成反馈。")

    feedback_target_date = st.date_input(
        "请选择生成反馈对应的日期",
        value=st.session_state.current_processing_date,
        key="feedback_date_selector"
    )
    feedback_date_str = feedback_target_date.strftime("%Y-%m-%d")

    # Determine summary for Boss/Public feedback
    # Use extracts if date matches and extracts exist, otherwise query RAG
    daily_summary_for_general_feedback = ""
    processed_extracts_for_feedback_date = []

    if feedback_target_date == st.session_state.current_processing_date and st.session_state.processed_chat_extracts:
        processed_extracts_for_feedback_date = st.session_state.processed_chat_extracts
        st.info(f"将使用为 {feedback_date_str} 刚处理的聊天记录生成反馈。")
        temp_summary_parts = []
        for item in processed_extracts_for_feedback_date:
            temp_summary_parts.append(f"- {item.get('student_name', 'N/A')}: {item.get('observation', 'N/A')}")
        if temp_summary_parts:
            daily_summary_for_general_feedback = "\n".join(temp_summary_parts)
        else:
            daily_summary_for_general_feedback = get_events_summary_for_day(feedback_date_str) # Fallback
    else:
        with st.spinner(f"正在为日期 {feedback_date_str} 从知识库获取信息摘要..."):
            daily_summary_for_general_feedback = get_events_summary_for_day(feedback_date_str)

    st.markdown("---")
    col1, col2 = st.columns(2)

    with col1:
        st.subheader("👔 给老板的反馈")
        if st.button("生成老板反馈", key="generate_boss_fb"):
            if not GOOGLE_API_KEY: st.error("API Key未配置。"); st.stop()
            with st.spinner("正在生成老板反馈..."):
                st.session_state.feedback_boss_text = generate_boss_feedback(daily_summary_for_general_feedback)
            if st.session_state.feedback_boss_text: st.success("老板反馈生成成功！")
            else: st.error("生成老板反馈失败或无内容返回。")
        if st.session_state.feedback_boss_text:
            st.text_area("老板反馈内容:", value=st.session_state.feedback_boss_text, height=200, key="boss_feedback_display")

    with col2:
        st.subheader("📢 公共反馈")
        if st.button("生成公共反馈", key="generate_public_fb"):
            if not GOOGLE_API_KEY: st.error("API Key未配置。"); st.stop()
            with st.spinner("正在生成公共反馈..."):
                st.session_state.feedback_public_text = generate_public_feedback(daily_summary_for_general_feedback)
            if st.session_state.feedback_public_text: st.success("公共反馈生成成功！")
            else: st.error("生成公共反馈失败或无内容返回。")
        if st.session_state.feedback_public_text:
            st.text_area("公共反馈内容:", value=st.session_state.feedback_public_text, height=200, key="public_feedback_display")

    st.markdown("---")
    st.subheader("👨‍👩‍👧‍👦 给家长的反馈")

    if not st.session_state.student_list_cache:
        st.warning("学生列表为空。请先通过“处理聊天记录”功能添加学生并处理数据。")
    else:
        st.session_state.selected_student_for_parent_fb = st.selectbox(
            "选择学生:",
            options=[""] + st.session_state.student_list_cache, # Add empty option for placeholder
            index=0, # Default to empty
            format_func=lambda x: "请选择..." if x == "" else x,
            key="parent_feedback_student_selector"
        )
        
        feedback_modes_map = {
            "正常模式 (基于当日记录)": "normal",
            "偷懒模式 (组合历史事件)": "lazy",
            "LLM特点生成 (创意发挥)": "llm_direct"
        }
        selected_mode_display_name = st.radio(
            "选择反馈模式:",
            options=list(feedback_modes_map.keys()),
            key="parent_feedback_mode_selector"
        )
        mode_value = feedback_modes_map[selected_mode_display_name]

        if st.button(f"为选定学生生成家长反馈", key="generate_parent_fb"):
            if not GOOGLE_API_KEY: st.error("API Key未配置。"); st.stop()
            if not st.session_state.selected_student_for_parent_fb:
                st.warning("请先选择一个学生。")
            else:
                student_name = st.session_state.selected_student_for_parent_fb
                with st.spinner(f"正在为 {student_name} ({selected_mode_display_name}) 生成家长反馈..."):
                    # Pass today's extracted data for the student if available (for "normal" mode)
                    student_specific_extracts_today = []
                    if feedback_target_date == st.session_state.current_processing_date and st.session_state.processed_chat_extracts:
                        student_specific_extracts_today = [
                            item for item in st.session_state.processed_chat_extracts if item.get("student_name") == student_name
                        ]

                    st.session_state.feedback_parent_text = generate_parent_feedback(
                        student_name,
                        mode_value,
                        feedback_date_str,
                        student_specific_extracts_today # Pass specific extracts for normal mode
                    )
                if st.session_state.feedback_parent_text:
                    st.success(f"为 {student_name} 生成家长反馈成功！")
                else:
                    st.error(f"为 {student_name} 生成家长反馈失败或无内容返回。")
        
        if st.session_state.feedback_parent_text and st.session_state.selected_student_for_parent_fb:
            st.text_area(
                f"给 {st.session_state.selected_student_for_parent_fb} 家长的反馈:",
                value=st.session_state.feedback_parent_text,
                height=300,
                key="parent_feedback_display"
            )

# --- Page 3: 学生特点管理 ---
elif choice == "学生特点管理":
    st.header("🧑‍🎓 学生特点数据库管理")
    st.markdown("查看和更新AI总结的学生特点。特点会基于RAG中的历史记录生成。")

    if st.button("🔄 强制刷新学生列表和显示", key="admin_refresh_students_btn"):
        refresh_student_list_cache()
        st.experimental_rerun()


    if not st.session_state.student_list_cache:
        st.info("当前没有学生数据。请先通过“处理聊天记录”功能添加并存储学生相关信息。")
    else:
        st.subheader("当前学生列表及特点:")
        
        num_students = len(st.session_state.student_list_cache)
        cols_per_row = 3 # Adjust number of columns for display
        
        for i in range(0, num_students, cols_per_row):
            cols = st.columns(cols_per_row)
            for j in range(cols_per_row):
                student_idx = i + j
                if student_idx < num_students:
                    student_name = st.session_state.student_list_cache[student_idx]
                    with cols[j]:
                        with st.expander(f"{student_name}", expanded=False):
                            characteristics = get_student_characteristics(student_name)
                            st.markdown(f"**AI总结特点:**\n {characteristics if characteristics else '暂无总结。'}")
                            if st.button(f"更新 {student_name} 特点", key=f"update_char_{student_name}_{student_idx}"):
                                if not GOOGLE_API_KEY: st.error("API Key未配置。"); st.stop()
                                with st.spinner(f"正在为 {student_name} 更新特点..."):
                                    update_student_characteristics_from_rag(student_name)
                                st.success(f"{student_name} 的特点已更新！请重新展开查看。")
                                st.experimental_rerun() # Rerun to reflect changes

    st.markdown("---")
    st.subheader("批量操作")
    if st.button("✨ 批量更新所有学生的特点总结", key="batch_update_all_chars_btn"):
        if not GOOGLE_API_KEY: st.error("API Key未配置。"); st.stop()
        if not st.session_state.student_list_cache:
            st.warning("没有学生可供批量更新。")
        else:
            # Confirmation dialog for safety
            # Using a more explicit confirmation
            placeholder = st.empty()
            with placeholder.container():
                 st.warning(f"此操作将为数据库中所有 {len(st.session_state.student_list_cache)} 位学生重新生成特点总结，可能需要较长时间并消耗API额度。")
                 if st.button("我确认执行批量更新", key="confirm_batch_update"):
                     placeholder.empty() # Remove confirmation message
                     with st.spinner("正在批量更新所有学生特点，请耐心等待..."):
                         batch_update_all_students_characteristics() # This function has internal st.progress
                     st.success("所有学生特点总结批量更新完毕！")
                     st.experimental_rerun()
                 elif st.button("取消批量更新", key="cancel_batch_update"):
                     placeholder.empty()
                     st.info("批量更新已取消。")


# --- Footer ---
st.markdown("---")
st.markdown("晚托反馈助手 v1.0.0 (HF Dockerized) | 技术支持: Gemini LLM + RAG")