diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000000000000000000000000000000000000..be2d170289a3f4a5842b226f42d3bc8e03bc00eb --- /dev/null +++ b/.dockerignore @@ -0,0 +1,75 @@ +# Git +.git +.gitignore +.gitattributes + +# Python +__pycache__ +*.py[cod] +*$py.class +*.so +.Python +env/ +venv/ +ENV/ +.venv + +# Testing +.pytest_cache +.hypothesis +.coverage +htmlcov/ +*.log + +# IDE +.vscode +.idea +*.swp +*.swo +*~ + +# Documentation (keep only essential) +docs/ +PRD.md +PROJECT_STRUCTURE.md +局域网访问修复完成.md + +# Deployment files (not needed in container) +deployment/ +scripts/start_local.py +scripts/start_local.bat +scripts/test_lan_access.bat +scripts/build_and_deploy.sh +scripts/build_and_deploy.bat + +# Frontend source (only need dist) +frontend/node_modules +frontend/src +frontend/components +frontend/services +frontend/utils +frontend/.env.local +frontend/package.json +frontend/package-lock.json +frontend/tsconfig.json +frontend/vite.config.ts +frontend/index.tsx +frontend/index.css +frontend/types.ts +frontend/App.tsx +frontend/test-*.html + +# Tests +tests/ + +# Logs +logs/ + +# OS +.DS_Store +Thumbs.db + +# Temporary files +*.tmp +*.bak +*.swp diff --git a/.env.example b/.env.example new file mode 100644 index 0000000000000000000000000000000000000000..05e398b08fdb65add586ea8793611d4867812e2d --- /dev/null +++ b/.env.example @@ -0,0 +1,33 @@ +# Voice Text Processor Configuration +# Copy this file to .env and fill in your values + +# Required: Zhipu AI API Key (for semantic parsing) +# 获取方式: https://open.bigmodel.cn/ -> API Keys +ZHIPU_API_KEY=your_zhipu_api_key_here + +# Required: MiniMax API Key (for image generation) +# 获取方式: https://platform.minimax.io/ -> API Keys +# 格式示例: sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx +MINIMAX_API_KEY=your_minimax_api_key_here + +# Optional: MiniMax Group ID (已废弃,保留用于兼容性) +MINIMAX_GROUP_ID=your_group_id_here + +# Optional: Data storage directory (default: data/) +DATA_DIR=data + +# Optional: Maximum audio file size in bytes (default: 10485760 = 10MB) +MAX_AUDIO_SIZE=10485760 + +# Optional: Logging level (default: INFO) +# Valid values: DEBUG, INFO, WARNING, ERROR, CRITICAL +LOG_LEVEL=INFO + +# Optional: Log file path (default: logs/app.log) +LOG_FILE=logs/app.log + +# Optional: Server host (default: 0.0.0.0) +HOST=0.0.0.0 + +# Optional: Server port (default: 8000) +PORT=8000 diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b --- /dev/null +++ b/.gitattributes @@ -0,0 +1,35 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/.github/workflows/sync.yml b/.github/workflows/sync.yml new file mode 100644 index 0000000000000000000000000000000000000000..db1068fe204f4c6f8bd4c67e52a4f2f127e37b84 --- /dev/null +++ b/.github/workflows/sync.yml @@ -0,0 +1,38 @@ +name: Sync to Hugging Face hub +on: + push: + branches: [main] + workflow_dispatch: + +jobs: + sync-to-hub: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + with: + fetch-depth: 0 + lfs: true + - name: Push to hub + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} + run: | + # 1. 配置身份 + git config --global user.email "bot@github.com" + git config --global user.name "GitHub Action" + + # 2. 彻底移除二进制文件及其索引 + rm -rf generated_images + git rm -r --cached generated_images || echo "Already removed" + + # 3. 创建一个全新的、没有历史记录的临时分支 + git checkout --orphan temp-branch + + # 4. 只添加当前的代码文件 + git add . + git commit -m "Deploy clean version of Nora" + + # 5. 强制推送到 Hugging Face 的 main 分支 + # 注意:这会覆盖 HF 上的所有历史,非常适合解决当前死锁 + git push --force https://kernel14:$HF_TOKEN@huggingface.co/spaces/kernel14/Nora temp-branch:main + + diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..dd6c6de8ca7154568123b346e0bc75063e5bb0c1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,61 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# Virtual environments +venv/ +env/ +ENV/ +.venv + +# IDE +.vscode/ +.idea/ +*.swp +*.swo +*~ + +# Environment variables +.env + +# Logs +logs/ +*.log + +# Data files +data/*.json + +# Testing +.pytest_cache/ +.coverage +htmlcov/ +.hypothesis/ + +# OS +.DS_Store +Thumbs.db + +# Frontend (开发时忽略,但部署时需要 dist) +frontend/node_modules/ +# 注意:frontend/dist/ 不要忽略,部署需要它! + +# Docker(不要忽略 Dockerfile) +# Dockerfile 需要提交 + diff --git a/.kiro/specs/voice-text-processor/design.md b/.kiro/specs/voice-text-processor/design.md new file mode 100644 index 0000000000000000000000000000000000000000..402fe645840c18c84624bfe7cc456dd6f12bad62 --- /dev/null +++ b/.kiro/specs/voice-text-processor/design.md @@ -0,0 +1,514 @@ +# Design Document: Voice Text Processor + +## Overview + +本系统是一个基于 FastAPI 的 REST API 服务,用于处理用户的语音录音或文字输入,通过智谱 API 进行语音识别和语义解析,提取情绪、灵感和待办事项等结构化数据,并持久化到本地 JSON 文件。 + +系统采用分层架构设计: +- **API 层**:FastAPI 路由和请求处理 +- **服务层**:业务逻辑处理(ASR、语义解析) +- **存储层**:JSON 文件持久化 + +核心工作流程: +1. 接收用户输入(音频文件或文本) +2. 如果是音频,调用智谱 ASR API 转写为文本 +3. 调用 GLM-4-Flash API 进行语义解析 +4. 提取情绪、灵感、待办数据 +5. 持久化到对应的 JSON 文件 +6. 返回结构化响应 + +## Architecture + +系统采用三层架构: + +``` +┌─────────────────────────────────────┐ +│ API Layer (FastAPI) │ +│ - POST /api/process │ +│ - Request validation │ +│ - Response formatting │ +└──────────────┬──────────────────────┘ + │ +┌──────────────▼──────────────────────┐ +│ Service Layer │ +│ - ASRService │ +│ - SemanticParserService │ +│ - StorageService │ +└──────────────┬──────────────────────┘ + │ +┌──────────────▼──────────────────────┐ +│ External Services │ +│ - Zhipu ASR API │ +│ - GLM-4-Flash API │ +│ - Local JSON Files │ +└─────────────────────────────────────┘ +``` + +### 模块职责 + +**API Layer**: +- 处理 HTTP 请求和响应 +- 输入验证(文件格式、大小、文本编码) +- 错误处理和状态码映射 +- 请求日志记录 + +**Service Layer**: +- `ASRService`: 封装智谱 ASR API 调用,处理音频转文字 +- `SemanticParserService`: 封装 GLM-4-Flash API 调用,执行语义解析 +- `StorageService`: 管理 JSON 文件读写,生成唯一 ID 和时间戳 + +**Configuration**: +- 环境变量管理(API 密钥、文件路径、大小限制) +- 启动时配置验证 + +## Components and Interfaces + +### 1. API Endpoint + +```python +@app.post("/api/process") +async def process_input( + audio: Optional[UploadFile] = File(None), + text: Optional[str] = Body(None) +) -> ProcessResponse +``` + +**输入**: +- `audio`: 音频文件(multipart/form-data),支持 mp3, wav, m4a +- `text`: 文本内容(application/json),UTF-8 编码 + +**输出**: +```python +class ProcessResponse(BaseModel): + record_id: str + timestamp: str + mood: Optional[MoodData] + inspirations: List[InspirationData] + todos: List[TodoData] + error: Optional[str] +``` + +### 2. ASRService + +```python +class ASRService: + def __init__(self, api_key: str): + self.api_key = api_key + self.client = httpx.AsyncClient() + + async def transcribe(self, audio_file: bytes) -> str: + """ + 调用智谱 ASR API 进行语音识别 + + 参数: + audio_file: 音频文件字节流 + + 返回: + 转写后的文本内容 + + 异常: + ASRServiceError: API 调用失败或识别失败 + """ +``` + +### 3. SemanticParserService + +```python +class SemanticParserService: + def __init__(self, api_key: str): + self.api_key = api_key + self.client = httpx.AsyncClient() + self.system_prompt = ( + "你是一个数据转换器。请将文本解析为 JSON 格式。" + "维度包括:1.情绪(type,intensity,keywords); " + "2.灵感(core_idea,tags,category); " + "3.待办(task,time,location)。" + "必须严格遵循 JSON 格式返回。" + ) + + async def parse(self, text: str) -> ParsedData: + """ + 调用 GLM-4-Flash API 进行语义解析 + + 参数: + text: 待解析的文本内容 + + 返回: + ParsedData 对象,包含 mood, inspirations, todos + + 异常: + SemanticParserError: API 调用失败或解析失败 + """ +``` + +### 4. StorageService + +```python +class StorageService: + def __init__(self, data_dir: str): + self.data_dir = Path(data_dir) + self.records_file = self.data_dir / "records.json" + self.moods_file = self.data_dir / "moods.json" + self.inspirations_file = self.data_dir / "inspirations.json" + self.todos_file = self.data_dir / "todos.json" + + def save_record(self, record: RecordData) -> str: + """ + 保存完整记录到 records.json + + 参数: + record: 记录数据对象 + + 返回: + 生成的唯一 record_id + + 异常: + StorageError: 文件写入失败 + """ + + def append_mood(self, mood: MoodData, record_id: str) -> None: + """追加情绪数据到 moods.json""" + + def append_inspirations(self, inspirations: List[InspirationData], record_id: str) -> None: + """追加灵感数据到 inspirations.json""" + + def append_todos(self, todos: List[TodoData], record_id: str) -> None: + """追加待办数据到 todos.json""" +``` + +## Data Models + +### 核心数据结构 + +```python +class MoodData(BaseModel): + type: Optional[str] = None + intensity: Optional[int] = Field(None, ge=1, le=10) + keywords: List[str] = [] + +class InspirationData(BaseModel): + core_idea: str = Field(..., max_length=20) + tags: List[str] = Field(default_factory=list, max_items=5) + category: Literal["工作", "生活", "学习", "创意"] + +class TodoData(BaseModel): + task: str + time: Optional[str] = None + location: Optional[str] = None + status: str = "pending" + +class ParsedData(BaseModel): + mood: Optional[MoodData] = None + inspirations: List[InspirationData] = [] + todos: List[TodoData] = [] + +class RecordData(BaseModel): + record_id: str + timestamp: str + input_type: Literal["audio", "text"] + original_text: str + parsed_data: ParsedData +``` + +### 存储格式 + +**records.json**: +```json +[ + { + "record_id": "uuid-string", + "timestamp": "2024-01-01T12:00:00Z", + "input_type": "audio", + "original_text": "转写后的文本", + "parsed_data": { + "mood": {...}, + "inspirations": [...], + "todos": [...] + } + } +] +``` + +**moods.json**: +```json +[ + { + "record_id": "uuid-string", + "timestamp": "2024-01-01T12:00:00Z", + "type": "开心", + "intensity": 8, + "keywords": ["愉快", "放松"] + } +] +``` + +**inspirations.json**: +```json +[ + { + "record_id": "uuid-string", + "timestamp": "2024-01-01T12:00:00Z", + "core_idea": "新的项目想法", + "tags": ["创新", "技术"], + "category": "工作" + } +] +``` + +**todos.json**: +```json +[ + { + "record_id": "uuid-string", + "timestamp": "2024-01-01T12:00:00Z", + "task": "完成报告", + "time": "明天下午", + "location": "办公室", + "status": "pending" + } +] +``` + + +## Correctness Properties + +属性(Property)是关于系统行为的特征或规则,应该在所有有效执行中保持为真。属性是人类可读规范和机器可验证正确性保证之间的桥梁。 + +### Property 1: 音频格式验证 +*For any* 提交的文件,如果文件扩展名是 mp3、wav 或 m4a,系统应该接受该文件;如果是其他格式,系统应该拒绝并返回错误。 +**Validates: Requirements 1.1** + +### Property 2: UTF-8 文本接受 +*For any* UTF-8 编码的文本字符串(包括中文、emoji、特殊字符),系统应该正确接受并处理。 +**Validates: Requirements 1.2** + +### Property 3: 无效输入错误处理 +*For any* 空输入或格式无效的输入,系统应该返回包含 error 字段的 JSON 响应,而不是崩溃或返回成功状态。 +**Validates: Requirements 1.3, 9.1** + +### Property 4: 解析结果结构完整性 +*For any* 成功的语义解析结果,返回的 JSON 应该包含 mood、inspirations、todos 三个字段,即使某些字段为空值或空数组。 +**Validates: Requirements 3.3** + +### Property 5: 缺失维度处理 +*For any* 不包含特定维度信息的文本,解析结果中该维度应该返回 null(对于 mood)或空数组(对于 inspirations 和 todos)。 +**Validates: Requirements 3.4** + +### Property 6: 情绪数据结构验证 +*For any* 解析出的情绪数据,应该包含 type(字符串)、intensity(1-10 的整数)、keywords(字符串数组)三个字段,且 intensity 必须在有效范围内。 +**Validates: Requirements 4.1, 4.2, 4.3** + +### Property 7: 灵感数据结构验证 +*For any* 解析出的灵感数据,应该包含 core_idea(长度 ≤ 20)、tags(数组长度 ≤ 5)、category(枚举值:工作/生活/学习/创意)三个字段,且所有约束都被满足。 +**Validates: Requirements 5.1, 5.2, 5.3** + +### Property 8: 待办数据结构验证 +*For any* 解析出的待办数据,应该包含 task(必需)、time(可选)、location(可选)、status(默认为 "pending")四个字段。 +**Validates: Requirements 6.1, 6.2, 6.3, 6.4** + +### Property 9: 数据持久化完整性 +*For any* 成功处理的记录,应该在 records.json 中保存完整记录,并且如果包含情绪/灵感/待办数据,应该同时追加到对应的 moods.json、inspirations.json、todos.json 文件中。 +**Validates: Requirements 7.1, 7.2, 7.3, 7.4** + +### Property 10: 文件初始化 +*For any* 不存在的 JSON 文件,当首次写入时,系统应该创建该文件并初始化为空数组 `[]`。 +**Validates: Requirements 7.5** + +### Property 11: 唯一 ID 生成 +*For any* 两条不同的记录,生成的 record_id 应该是唯一的(不重复)。 +**Validates: Requirements 7.7** + +### Property 12: 成功响应格式 +*For any* 成功处理的请求,HTTP 响应应该返回 200 状态码,并且响应 JSON 包含 record_id、timestamp、mood、inspirations、todos 字段。 +**Validates: Requirements 8.4, 8.6** + +### Property 13: 错误响应格式 +*For any* 处理失败的请求,HTTP 响应应该返回适当的错误状态码(400 或 500),并且响应 JSON 包含 error 字段,描述具体错误信息。 +**Validates: Requirements 8.5, 9.1, 9.3** + +### Property 14: 错误日志记录 +*For any* 系统发生的错误,应该在日志文件中记录该错误,包含时间戳和错误堆栈信息。 +**Validates: Requirements 9.5** + +### Property 15: 敏感信息保护 +*For any* 日志输出,不应该包含敏感信息(如 API 密钥、用户密码等)。 +**Validates: Requirements 10.5** + +## Error Handling + +### 错误分类 + +**1. 输入验证错误(HTTP 400)**: +- 音频文件格式不支持 +- 音频文件大小超过限制 +- 文本内容为空 +- 请求格式错误(既没有 audio 也没有 text) + +**2. 外部服务错误(HTTP 500)**: +- 智谱 ASR API 调用失败 +- GLM-4-Flash API 调用失败 +- API 返回非预期格式 + +**3. 存储错误(HTTP 500)**: +- JSON 文件写入失败 +- 磁盘空间不足 +- 文件权限错误 + +**4. 配置错误(启动时失败)**: +- API 密钥缺失 +- 数据目录不可访问 +- 必需配置项缺失 + +### 错误处理策略 + +```python +class APIError(Exception): + """API 层错误基类""" + def __init__(self, message: str, status_code: int): + self.message = message + self.status_code = status_code + +class ASRServiceError(APIError): + """ASR 服务错误""" + def __init__(self, message: str = "语音识别服务不可用"): + super().__init__(message, 500) + +class SemanticParserError(APIError): + """语义解析服务错误""" + def __init__(self, message: str = "语义解析服务不可用"): + super().__init__(message, 500) + +class StorageError(APIError): + """存储错误""" + def __init__(self, message: str = "数据存储失败"): + super().__init__(message, 500) + +class ValidationError(APIError): + """输入验证错误""" + def __init__(self, message: str): + super().__init__(message, 400) +``` + +### 错误响应格式 + +```json +{ + "error": "具体错误描述", + "detail": "详细错误信息(可选)", + "timestamp": "2024-01-01T12:00:00Z" +} +``` + +### 日志记录 + +使用 Python logging 模块: +- **INFO**: 正常请求处理流程 +- **WARNING**: 可恢复的异常情况(如 API 重试) +- **ERROR**: 错误情况,包含完整堆栈信息 +- **DEBUG**: 详细调试信息(开发环境) + +日志格式: +``` +[2024-01-01 12:00:00] [ERROR] [request_id: xxx] ASR API call failed: Connection timeout +Traceback: ... +``` + +## Testing Strategy + +本系统采用双重测试策略:单元测试和基于属性的测试(Property-Based Testing)。 + +### 单元测试 + +单元测试用于验证特定示例、边缘情况和错误条件: + +**测试范围**: +- API 端点的请求/响应处理 +- 各服务类的 mock 测试(模拟外部 API) +- 数据模型的验证逻辑 +- 错误处理流程 +- 配置加载和验证 + +**示例测试用例**: +- 测试 POST /api/process 端点存在 +- 测试接受 multipart/form-data 格式 +- 测试接受 application/json 格式 +- 测试 ASR API 调用失败时的错误处理 +- 测试 GLM-4-Flash API 调用失败时的错误处理 +- 测试文件写入失败时的错误处理 +- 测试配置缺失时启动失败 +- 测试空音频识别的边缘情况 +- 测试无情绪信息文本的边缘情况 +- 测试无灵感信息文本的边缘情况 +- 测试无待办信息文本的边缘情况 + +### 基于属性的测试(Property-Based Testing) + +基于属性的测试用于验证通用属性在所有输入下都成立。 + +**测试库**: 使用 `hypothesis` 库(Python 的 PBT 框架) + +**配置**: +- 每个属性测试运行最少 100 次迭代 +- 每个测试必须引用设计文档中的属性编号 +- 标签格式:`# Feature: voice-text-processor, Property N: [property text]` + +**属性测试覆盖**: +- Property 1: 音频格式验证 +- Property 2: UTF-8 文本接受 +- Property 3: 无效输入错误处理 +- Property 4: 解析结果结构完整性 +- Property 5: 缺失维度处理 +- Property 6: 情绪数据结构验证 +- Property 7: 灵感数据结构验证 +- Property 8: 待办数据结构验证 +- Property 9: 数据持久化完整性 +- Property 10: 文件初始化 +- Property 11: 唯一 ID 生成 +- Property 12: 成功响应格式 +- Property 13: 错误响应格式 +- Property 14: 错误日志记录 +- Property 15: 敏感信息保护 + +**测试策略**: +- 使用 hypothesis 生成随机输入(文件名、文本、数据结构) +- 使用 pytest-mock 模拟外部 API 调用 +- 使用临时文件系统进行存储测试 +- 验证所有属性在随机输入下都成立 + +**示例属性测试**: +```python +from hypothesis import given, strategies as st +import pytest + +@given(st.text(min_size=1)) +def test_property_2_utf8_text_acceptance(text): + """ + Feature: voice-text-processor, Property 2: UTF-8 文本接受 + For any UTF-8 encoded text string, the system should accept and process it. + """ + response = client.post("/api/process", json={"text": text}) + assert response.status_code in [200, 500] # 接受输入,可能解析失败但不应拒绝 + +@given(st.lists(st.text(), min_size=1, max_size=10)) +def test_property_11_unique_id_generation(texts): + """ + Feature: voice-text-processor, Property 11: 唯一 ID 生成 + For any two different records, the generated record_ids should be unique. + """ + record_ids = [] + for text in texts: + response = client.post("/api/process", json={"text": text}) + if response.status_code == 200: + record_ids.append(response.json()["record_id"]) + + # 所有 ID 应该唯一 + assert len(record_ids) == len(set(record_ids)) +``` + +### 测试覆盖目标 + +- 代码覆盖率:≥ 80% +- 属性测试:覆盖所有 15 个正确性属性 +- 单元测试:覆盖所有边缘情况和错误路径 +- 集成测试:端到端流程测试(音频 → 转写 → 解析 → 存储) + diff --git a/.kiro/specs/voice-text-processor/requirements.md b/.kiro/specs/voice-text-processor/requirements.md new file mode 100644 index 0000000000000000000000000000000000000000..1a0e71db610517e628aef53e6310ab8e612e64ae --- /dev/null +++ b/.kiro/specs/voice-text-processor/requirements.md @@ -0,0 +1,139 @@ +# Requirements Document + +## Introduction + +这是一个治愈系记录助手的后端核心模块。系统接收语音录音或文字输入,通过智谱 API 进行语音转写和语义解析,输出包含情绪、灵感、待办的结构化 JSON 数据,并持久化到本地文件系统。 + +## Glossary + +- **System**: 治愈系记录助手后端系统 +- **ASR_Service**: 智谱 API 语音识别服务 +- **Semantic_Parser**: GLM-4-Flash 语义解析引擎 +- **Storage_Manager**: 本地 JSON 文件存储管理器 +- **Record**: 用户输入的单次记录(语音或文字) +- **Mood**: 情绪数据结构(type, intensity, keywords) +- **Inspiration**: 灵感数据结构(core_idea, tags, category) +- **Todo**: 待办事项数据结构(task, time, location, status) + +## Requirements + +### Requirement 1: 接收用户输入 + +**User Story:** 作为用户,我想要提交语音录音或文字内容,以便系统能够处理我的记录。 + +#### Acceptance Criteria + +1. WHEN 用户提交音频文件,THE System SHALL 接受常见音频格式(mp3, wav, m4a) +2. WHEN 用户提交文字内容,THE System SHALL 接受 UTF-8 编码的文本字符串 +3. WHEN 输入数据为空或格式无效,THE System SHALL 返回明确的错误信息 +4. WHEN 音频文件大小超过 10MB,THE System SHALL 拒绝处理并返回文件过大错误 + +### Requirement 2: 语音转文字 + +**User Story:** 作为用户,我想要系统将我的语音录音转换为文字,以便进行后续的语义分析。 + +#### Acceptance Criteria + +1. WHEN 接收到音频文件,THE ASR_Service SHALL 调用智谱 ASR API 进行语音识别 +2. WHEN 语音识别成功,THE ASR_Service SHALL 返回转写后的文本内容 +3. IF 智谱 API 调用失败,THEN THE System SHALL 记录错误日志并返回转写失败错误 +4. WHEN 音频内容无法识别,THE ASR_Service SHALL 返回空文本并标记为识别失败 + +### Requirement 3: 语义解析 + +**User Story:** 作为用户,我想要系统从我的文本中提取情绪、灵感和待办事项,以便获得结构化的记录数据。 + +#### Acceptance Criteria + +1. WHEN 接收到文本内容,THE Semantic_Parser SHALL 调用 GLM-4-Flash API 进行语义解析 +2. WHEN 调用 GLM-4-Flash,THE System SHALL 使用指定的 System Prompt:"你是一个数据转换器。请将文本解析为 JSON 格式。维度包括:1.情绪(type,intensity,keywords); 2.灵感(core_idea,tags,category); 3.待办(task,time,location)。必须严格遵循 JSON 格式返回。" +3. WHEN 解析成功,THE Semantic_Parser SHALL 返回包含 mood、inspirations、todos 的 JSON 结构 +4. WHEN 文本中不包含某个维度的信息,THE Semantic_Parser SHALL 返回该维度的空值或空数组 +5. IF GLM-4-Flash API 调用失败,THEN THE System SHALL 记录错误日志并返回解析失败错误 + +### Requirement 4: 情绪数据提取 + +**User Story:** 作为用户,我想要系统识别我的情绪状态,以便追踪我的情绪变化。 + +#### Acceptance Criteria + +1. WHEN 解析情绪数据,THE Semantic_Parser SHALL 提取情绪类型(type) +2. WHEN 解析情绪数据,THE Semantic_Parser SHALL 提取情绪强度(intensity),范围为 1-10 的整数 +3. WHEN 解析情绪数据,THE Semantic_Parser SHALL 提取情绪关键词(keywords),以字符串数组形式返回 +4. WHEN 文本中不包含明确的情绪信息,THE Semantic_Parser SHALL 返回 null 或默认值 + +### Requirement 5: 灵感数据提取 + +**User Story:** 作为用户,我想要系统捕捉我的灵感想法,以便日后回顾和整理。 + +#### Acceptance Criteria + +1. WHEN 解析灵感数据,THE Semantic_Parser SHALL 提取核心观点(core_idea),长度不超过 20 个字符 +2. WHEN 解析灵感数据,THE Semantic_Parser SHALL 提取标签(tags),以字符串数组形式返回,最多 5 个标签 +3. WHEN 解析灵感数据,THE Semantic_Parser SHALL 提取分类(category),值为"工作"、"生活"、"学习"或"创意"之一 +4. WHEN 文本中包含多个灵感,THE Semantic_Parser SHALL 返回灵感数组 +5. WHEN 文本中不包含灵感信息,THE Semantic_Parser SHALL 返回空数组 + +### Requirement 6: 待办事项提取 + +**User Story:** 作为用户,我想要系统识别我提到的待办事项,以便自动创建任务清单。 + +#### Acceptance Criteria + +1. WHEN 解析待办数据,THE Semantic_Parser SHALL 提取任务描述(task) +2. WHEN 解析待办数据,THE Semantic_Parser SHALL 提取时间信息(time),保留原始表达(如"明晚"、"下周三") +3. WHEN 解析待办数据,THE Semantic_Parser SHALL 提取地点信息(location) +4. WHEN 创建新待办事项,THE System SHALL 设置状态(status)为"pending" +5. WHEN 文本中包含多个待办事项,THE Semantic_Parser SHALL 返回待办数组 +6. WHEN 文本中不包含待办信息,THE Semantic_Parser SHALL 返回空数组 + +### Requirement 7: 数据持久化 + +**User Story:** 作为用户,我想要系统保存我的记录数据,以便日后查询和分析。 + +#### Acceptance Criteria + +1. WHEN 解析完成后,THE Storage_Manager SHALL 将完整记录保存到 records.json 文件 +2. WHEN 提取到情绪数据,THE Storage_Manager SHALL 将情绪信息追加到 moods.json 文件 +3. WHEN 提取到灵感数据,THE Storage_Manager SHALL 将灵感信息追加到 inspirations.json 文件 +4. WHEN 提取到待办数据,THE Storage_Manager SHALL 将待办信息追加到 todos.json 文件 +5. WHEN JSON 文件不存在,THE Storage_Manager SHALL 创建新文件并初始化为空数组 +6. WHEN 写入文件失败,THE System SHALL 记录错误日志并返回存储失败错误 +7. WHEN 保存记录时,THE System SHALL 为每条记录生成唯一 ID 和时间戳 + +### Requirement 8: API 接口设计 + +**User Story:** 作为前端开发者,我想要调用清晰的 REST API,以便集成后端功能。 + +#### Acceptance Criteria + +1. THE System SHALL 提供 POST /api/process 接口接收用户输入 +2. WHEN 请求包含音频文件,THE System SHALL 接受 multipart/form-data 格式 +3. WHEN 请求包含文字内容,THE System SHALL 接受 application/json 格式 +4. WHEN 处理成功,THE System SHALL 返回 HTTP 200 状态码和结构化 JSON 响应 +5. WHEN 处理失败,THE System SHALL 返回适当的 HTTP 错误状态码(400/500)和错误信息 +6. THE System SHALL 在响应中包含 record_id 和 timestamp 字段 + +### Requirement 9: 错误处理 + +**User Story:** 作为用户,我想要在系统出错时获得清晰的错误提示,以便了解问题所在。 + +#### Acceptance Criteria + +1. WHEN 任何步骤发生错误,THE System SHALL 返回包含 error 字段的 JSON 响应 +2. WHEN 智谱 API 调用失败,THE System SHALL 返回"语音识别服务不可用"或"语义解析服务不可用"错误 +3. WHEN 输入验证失败,THE System SHALL 返回具体的验证错误信息 +4. WHEN 文件操作失败,THE System SHALL 返回"数据存储失败"错误 +5. THE System SHALL 记录所有错误到日志文件,包含时间戳和错误堆栈 + +### Requirement 10: 配置管理 + +**User Story:** 作为系统管理员,我想要配置 API 密钥和系统参数,以便灵活部署系统。 + +#### Acceptance Criteria + +1. THE System SHALL 从环境变量或配置文件读取智谱 API 密钥 +2. THE System SHALL 支持配置数据文件存储路径 +3. THE System SHALL 支持配置音频文件大小限制 +4. WHEN 必需的配置项缺失,THE System SHALL 在启动时报错并拒绝启动 +5. THE System SHALL 不在日志中输出敏感信息(如 API 密钥) diff --git a/.kiro/specs/voice-text-processor/tasks.md b/.kiro/specs/voice-text-processor/tasks.md new file mode 100644 index 0000000000000000000000000000000000000000..168f4653a812cfe7cccff2006f6cd995abb33c19 --- /dev/null +++ b/.kiro/specs/voice-text-processor/tasks.md @@ -0,0 +1,204 @@ +# Implementation Plan: Voice Text Processor + +## Overview + +本实现计划将语音文本处理系统分解为离散的编码步骤。实现顺序遵循从核心基础设施到业务逻辑,再到集成测试的渐进式方法。每个任务都引用具体的需求条款,确保完整的需求覆盖。 + +## Tasks + +- [x] 1. 设置项目结构和核心配置 + - 创建项目目录结构(app/, tests/, data/) + - 设置 FastAPI 应用和基础配置 + - 实现配置管理模块(从环境变量读取 API 密钥、数据路径、文件大小限制) + - 配置日志系统(格式、级别、文件输出) + - 添加启动时配置验证(缺失必需配置时拒绝启动) + - _Requirements: 10.1, 10.2, 10.3, 10.4, 10.5_ + +- [x] 2. 实现数据模型和验证 + - [x] 2.1 创建 Pydantic 数据模型 + - 实现 MoodData 模型(type, intensity 1-10, keywords) + - 实现 InspirationData 模型(core_idea ≤20 字符, tags ≤5, category 枚举) + - 实现 TodoData 模型(task, time, location, status 默认 "pending") + - 实现 ParsedData 模型(mood, inspirations, todos) + - 实现 RecordData 模型(record_id, timestamp, input_type, original_text, parsed_data) + - 实现 ProcessResponse 模型(record_id, timestamp, mood, inspirations, todos, error) + - _Requirements: 4.1, 4.2, 4.3, 5.1, 5.2, 5.3, 6.1, 6.2, 6.3, 6.4_ + + - [x] 2.2 编写数据模型属性测试 + - **Property 6: 情绪数据结构验证** + - **Validates: Requirements 4.1, 4.2, 4.3** + + - [x] 2.3 编写数据模型属性测试 + - **Property 7: 灵感数据结构验证** + - **Validates: Requirements 5.1, 5.2, 5.3** + + - [x] 2.4 编写数据模型属性测试 + - **Property 8: 待办数据结构验证** + - **Validates: Requirements 6.1, 6.2, 6.3, 6.4** + +- [x] 3. 实现存储服务(StorageService) + - [x] 3.1 实现 JSON 文件存储管理器 + - 实现 save_record 方法(保存到 records.json,生成唯一 UUID) + - 实现 append_mood 方法(追加到 moods.json) + - 实现 append_inspirations 方法(追加到 inspirations.json) + - 实现 append_todos 方法(追加到 todos.json) + - 实现文件初始化逻辑(不存在时创建并初始化为空数组) + - 实现错误处理(文件写入失败时抛出 StorageError) + - _Requirements: 7.1, 7.2, 7.3, 7.4, 7.5, 7.6, 7.7_ + + - [x] 3.2 编写存储服务属性测试 + - **Property 9: 数据持久化完整性** + - **Validates: Requirements 7.1, 7.2, 7.3, 7.4** + + - [x] 3.3 编写存储服务属性测试 + - **Property 10: 文件初始化** + - **Validates: Requirements 7.5** + + - [x] 3.4 编写存储服务属性测试 + - **Property 11: 唯一 ID 生成** + - **Validates: Requirements 7.7** + + - [x] 3.5 编写存储服务单元测试 + - 测试文件写入失败的错误处理 + - 测试并发写入的安全性 + - _Requirements: 7.6_ + +- [x] 4. 检查点 - 确保存储层测试通过 + - 确保所有测试通过,如有问题请询问用户。 + +- [x] 5. 实现 ASR 服务(ASRService) + - [x] 5.1 实现语音识别服务 + - 创建 ASRService 类,初始化 httpx.AsyncClient + - 实现 transcribe 方法(调用智谱 ASR API) + - 处理 API 响应,提取转写文本 + - 实现错误处理(API 调用失败时抛出 ASRServiceError) + - 处理空识别结果(返回空字符串并标记) + - 记录错误日志(包含时间戳和堆栈) + - _Requirements: 2.1, 2.2, 2.3, 2.4, 9.2, 9.5_ + + - [x] 5.2 编写 ASR 服务单元测试 + - 测试 API 调用成功场景(使用 mock) + - 测试 API 调用失败场景(使用 mock) + - 测试空识别结果的边缘情况 + - _Requirements: 2.1, 2.2, 2.3, 2.4_ + +- [x] 6. 实现语义解析服务(SemanticParserService) + - [x] 6.1 实现语义解析服务 + - 创建 SemanticParserService 类,初始化 httpx.AsyncClient + - 配置 System Prompt(数据转换器提示词) + - 实现 parse 方法(调用 GLM-4-Flash API) + - 解析 API 返回的 JSON 结构 + - 处理缺失维度(返回 null 或空数组) + - 实现错误处理(API 调用失败时抛出 SemanticParserError) + - 记录错误日志(包含时间戳和堆栈) + - _Requirements: 3.1, 3.2, 3.3, 3.4, 3.5, 9.2, 9.5_ + + - [x] 6.2 编写语义解析服务属性测试 + - **Property 4: 解析结果结构完整性** + - **Validates: Requirements 3.3** + + - [x] 6.3 编写语义解析服务属性测试 + - **Property 5: 缺失维度处理** + - **Validates: Requirements 3.4** + + - [x] 6.4 编写语义解析服务单元测试 + - 测试 API 调用成功场景(使用 mock) + - 测试 API 调用失败场景(使用 mock) + - 测试 System Prompt 正确使用 + - 测试无情绪信息文本的边缘情况 + - 测试无灵感信息文本的边缘情况 + - 测试无待办信息文本的边缘情况 + - _Requirements: 3.1, 3.2, 3.3, 3.4, 3.5_ + +- [x] 7. 检查点 - 确保服务层测试通过 + - 确保所有测试通过,如有问题请询问用户。 + +- [x] 8. 实现 API 端点和请求处理 + - [x] 8.1 实现 POST /api/process 端点 + - 创建 FastAPI 路由处理器 + - 实现输入验证(音频格式、文件大小、文本编码) + - 处理 multipart/form-data 格式(音频文件) + - 处理 application/json 格式(文本内容) + - 实现请求日志记录 + - _Requirements: 1.1, 1.2, 8.1, 8.2, 8.3_ + + - [x] 8.2 实现业务逻辑编排 + - 如果是音频输入,调用 ASRService.transcribe + - 调用 SemanticParserService.parse 进行语义解析 + - 生成 record_id 和 timestamp + - 调用 StorageService 保存数据 + - 构建成功响应(HTTP 200,包含 record_id, timestamp, mood, inspirations, todos) + - _Requirements: 7.7, 8.4, 8.6_ + + - [x] 8.3 实现错误处理和响应 + - 捕获 ValidationError,返回 HTTP 400 和错误信息 + - 捕获 ASRServiceError,返回 HTTP 500 和"语音识别服务不可用" + - 捕获 SemanticParserError,返回 HTTP 500 和"语义解析服务不可用" + - 捕获 StorageError,返回 HTTP 500 和"数据存储失败" + - 所有错误响应包含 error 字段和 timestamp + - 记录所有错误到日志文件 + - _Requirements: 1.3, 8.5, 9.1, 9.2, 9.3, 9.4, 9.5_ + + - [x] 8.4 编写 API 端点属性测试 + - **Property 1: 音频格式验证** + - **Validates: Requirements 1.1** + + - [x] 8.5 编写 API 端点属性测试 + - **Property 2: UTF-8 文本接受** + - **Validates: Requirements 1.2** + + - [x] 8.6 编写 API 端点属性测试 + - **Property 3: 无效输入错误处理** + - **Validates: Requirements 1.3, 9.1** + + - [x] 8.7 编写 API 端点属性测试 + - **Property 12: 成功响应格式** + - **Validates: Requirements 8.4, 8.6** + + - [x] 8.8 编写 API 端点属性测试 + - **Property 13: 错误响应格式** + - **Validates: Requirements 8.5, 9.1, 9.3** + + - [x] 8.9 编写 API 端点单元测试 + - 测试 POST /api/process 端点存在 + - 测试接受 multipart/form-data 格式 + - 测试接受 application/json 格式 + - _Requirements: 8.1, 8.2, 8.3_ + +- [x] 9. 实现日志安全性和错误日志 + - [x] 9.1 实现日志过滤器 + - 创建日志过滤器,屏蔽敏感信息(API 密钥、密码等) + - 配置日志格式(包含 request_id, timestamp, level, message) + - 确保错误日志包含完整堆栈信息 + - _Requirements: 9.5, 10.5_ + + - [x] 9.2 编写日志属性测试 + - **Property 14: 错误日志记录** + - **Validates: Requirements 9.5** + + - [-] 9.3 编写日志属性测试 + - **Property 15: 敏感信息保护** + - **Validates: Requirements 10.5** + +- [x] 10. 检查点 - 确保所有测试通过 + - 确保所有测试通过,如有问题请询问用户。 + +- [x] 11. 集成测试 + - [x] 11.1 编写端到端集成测试 + - 测试完整流程:音频上传 → ASR → 语义解析 → 存储 → 响应 + - 测试完整流程:文本提交 → 语义解析 → 存储 → 响应 + - 测试错误场景的端到端处理 + - _Requirements: 所有需求_ + +- [x] 12. 最终检查点 + - 确保所有测试通过,代码覆盖率达到 80% 以上,如有问题请询问用户。 + +## Notes + +- 所有任务均为必需任务,确保全面的测试覆盖 +- 每个任务都引用了具体的需求条款,确保可追溯性 +- 检查点任务确保增量验证 +- 属性测试验证通用正确性属性(使用 hypothesis 库,最少 100 次迭代) +- 单元测试验证特定示例和边缘情况 +- 所有外部 API 调用使用 mock 进行测试 + diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..a6c74154b4712507c904f564e8a64096c770d135 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,31 @@ +FROM python:3.11-slim + +WORKDIR /app + +# 安装系统依赖 +RUN apt-get update && apt-get install -y \ + build-essential \ + && rm -rf /var/lib/apt/lists/* + +# 复制依赖文件 +COPY requirements.txt . + +# 安装 Python 依赖 +RUN pip install --no-cache-dir -r requirements.txt + +# 复制应用代码 +COPY app/ ./app/ +COPY data/ ./data/ +COPY frontend/dist/ ./frontend/dist/ + +# 复制启动脚本 +COPY start.py . + +# 创建必要的目录 +RUN mkdir -p generated_images logs + +# 暴露端口 +EXPOSE 7860 + +# 启动应用 +CMD ["python", "start.py"] diff --git a/HOTFIX_DOCKER_BUILD.md b/HOTFIX_DOCKER_BUILD.md new file mode 100644 index 0000000000000000000000000000000000000000..8f72d4cfb3e3027848477d982f0cc3513ab1a285 --- /dev/null +++ b/HOTFIX_DOCKER_BUILD.md @@ -0,0 +1,123 @@ +# 🔧 紧急修复:Docker 构建失败 + +## 🐛 问题描述 +Hugging Face Space 构建失败: +``` +ERROR: failed to calculate checksum of ref: "/generated_images": not found +``` + +## 🔍 问题原因 +1. `Dockerfile` 尝试复制 `generated_images/` 目录 +2. 但该目录在 GitHub 仓库中被 `.github/workflows/sync.yml` 删除了 +3. Docker 构建时找不到该目录,导致失败 + +## ✅ 已修复 + +### 1. 简化 Dockerfile +**文件**:`Dockerfile` + +**修改前**: +```dockerfile +COPY generated_images/ ./generated_images/ +``` + +**修改后**: +```dockerfile +# 只创建空目录,不复制文件 +RUN mkdir -p generated_images logs +``` + +### 2. 修改默认配置 +**文件**:`app/user_config.py` 和 `app/storage.py` + +**修改前**: +```python +"image_url": "generated_images/default_character.jpeg", +``` + +**修改后**: +```python +"image_url": "", # 空字符串,前端会显示占位符 +``` + +**原因**: +- 不依赖 Git 仓库中的图片文件 +- 用户首次使用时可以生成自己的 AI 形象 +- 或者前端显示一个默认占位符 + +## 🚀 部署步骤 + +### 1. 提交修复 +```bash +git add Dockerfile app/user_config.py app/storage.py +git commit -m "Fix: Remove dependency on generated_images directory in Docker build" +git push origin main +``` + +### 2. 同步到 Hugging Face +1. 访问:https://huggingface.co/spaces/kernel14/Nora +2. Settings → Sync from GitHub → **Sync now** + +### 3. 等待重新构建 +- 查看 **Logs** 标签页 +- 应该能看到构建成功 + +## ✅ 验证修复 + +构建成功后,访问: +``` +https://kernel14-nora.hf.space/ +``` + +应该能看到: +- ✅ 前端正常加载 +- ✅ AI 形象位置显示占位符(或默认图标) +- ✅ 可以点击 ✨ 按钮生成自定义形象 +- ✅ 所有功能正常工作 + +## 📝 技术说明 + +### 为什么不在 Docker 镜像中包含默认图片? + +1. **Git 仓库限制**: + - 图片文件较大(几百 KB) + - 会增加仓库体积 + - 被 `.github/workflows/sync.yml` 清理 + +2. **更好的方案**: + - 用户首次使用时生成个性化形象 + - 或者使用 CDN 托管的默认图片 + - 或者前端显示 SVG 占位符 + +3. **运行时生成**: + - 用户可以随时生成新形象 + - 图片保存在容器的 `generated_images/` 目录 + - 重启容器后会丢失(可以接受) + +### 未来改进方向 + +1. **使用对象存储**: + - 将生成的图片上传到 S3/OSS + - 持久化存储,不会丢失 + - 支持多实例共享 + +2. **内嵌默认图片**: + - 将默认图片转为 Base64 + - 直接写在代码中 + - 或者使用 SVG 矢量图 + +3. **CDN 托管**: + - 将默认图片放在 CDN + - 配置 URL 指向 CDN + - 加载更快 + +## 🎉 修复完成 + +修复后,Docker 构建应该能成功,Space 可以正常运行。 + +--- + +**修复时间**:2026-01-18 +**影响范围**:Hugging Face Space Docker 构建 +**严重程度**:高(导致构建失败) +**修复状态**:✅ 已完成 diff --git a/HOTFIX_NULL_ERROR.md b/HOTFIX_NULL_ERROR.md new file mode 100644 index 0000000000000000000000000000000000000000..19b5889b00c8c64aaffe8319b8b934888ec42a71 --- /dev/null +++ b/HOTFIX_NULL_ERROR.md @@ -0,0 +1,129 @@ +# 🔧 紧急修复:Python null 错误 + +## 🐛 问题描述 +Hugging Face Space 部署后出现错误: +``` +NameError: name 'null' is not defined +``` + +## 🔍 问题原因 +在 `app/storage.py` 中使用了 JavaScript 语法的 `null`,但 Python 中应该使用 `None`。 + +## ✅ 已修复 + +### 1. 修复 storage.py 中的 null +**文件**:`app/storage.py` + +**修改位置**: +- 第 173-175 行:`_get_default_records()` 方法 +- 第 315-317 行:`_get_default_todos()` 方法 + +**修改内容**: +```python +# 错误 ❌ +"time": null, +"location": null, + +# 正确 ✅ +"time": None, +"location": None, +``` + +### 2. 修复 Dockerfile +**文件**:`Dockerfile` + +**问题**:未复制 `generated_images/` 目录,导致默认角色图片 404 + +**修改**: +```dockerfile +# 添加这行 +COPY generated_images/ ./generated_images/ +``` + +## 🚀 部署步骤 + +### 1. 提交修复 +```bash +git add app/storage.py Dockerfile +git commit -m "Fix: Replace null with None in Python code" +git push origin main +``` + +### 2. 同步到 Hugging Face +1. 访问:https://huggingface.co/spaces/kernel14/Nora +2. Settings → Sync from GitHub → **Sync now** + +### 3. 等待重新构建 +- 查看 **Logs** 标签页 +- 等待构建完成 + +## ✅ 验证修复 + +访问以下 API 端点,应该都能正常返回: + +1. **健康检查**: + ``` + https://kernel14-nora.hf.space/health + ``` + +2. **获取记录**: + ``` + https://kernel14-nora.hf.space/api/records + ``` + +3. **获取心情**: + ``` + https://kernel14-nora.hf.space/api/moods + ``` + +4. **获取待办**: + ``` + https://kernel14-nora.hf.space/api/todos + ``` + +5. **默认角色图片**: + ``` + https://kernel14-nora.hf.space/generated_images/default_character.jpeg + ``` + +## 📝 技术说明 + +### Python vs JavaScript 的 null/None + +| 语言 | 空值表示 | +|------|---------| +| JavaScript | `null` | +| Python | `None` | +| JSON | `null` | + +在 Python 代码中: +- ✅ 使用 `None` +- ❌ 不要使用 `null` + +在 JSON 字符串中(如 AI 提示): +- ✅ 使用 `"null"`(字符串形式) +- ✅ 这是正确的,因为是 JSON 格式 + +### 为什么会出现这个错误? + +1. **复制粘贴错误**:可能从 JSON 示例中复制了代码 +2. **语言混淆**:在多语言项目中容易混淆语法 +3. **IDE 未检测**:某些 IDE 可能不会立即标记这个错误 + +### 如何避免? + +1. **使用 Linter**:配置 pylint 或 flake8 +2. **类型检查**:使用 mypy 进行类型检查 +3. **单元测试**:编写测试覆盖默认数据生成 +4. **代码审查**:提交前仔细检查 + +## 🎉 修复完成 + +修复后,Space 应该能正常运行,所有 API 端点都能正常响应。 + +--- + +**修复时间**:2026-01-18 +**影响范围**:Hugging Face Space 部署 +**严重程度**:高(导致服务无法启动) +**修复状态**:✅ 已完成 diff --git a/HUGGINGFACE_DEPLOY.md b/HUGGINGFACE_DEPLOY.md new file mode 100644 index 0000000000000000000000000000000000000000..d741e10313493a48fd3c872b89a3cd8448222a39 --- /dev/null +++ b/HUGGINGFACE_DEPLOY.md @@ -0,0 +1,176 @@ +# 🚀 Hugging Face Spaces 部署指南 + +## ✅ 部署前检查清单 + +### 1. 根目录必需文件 + +确保以下文件在**根目录**(不是子目录): + +- ✅ `Dockerfile` - Docker 构建配置 +- ✅ `start.py` - 应用启动脚本 +- ✅ `requirements.txt` - Python 依赖 +- ✅ `README_HF.md` - Hugging Face 专用 README(带 frontmatter) + +### 2. 前端构建文件 + +确保前端已构建: + +```bash +cd frontend +npm install +npm run build +``` + +检查 `frontend/dist/` 目录是否存在且包含: +- ✅ `index.html` +- ✅ `assets/` 目录(包含 JS 和 CSS 文件) + +### 3. 环境变量配置 + +在 Hugging Face Space 的 **Settings → Variables and secrets** 中配置: + +**必需:** +- `ZHIPU_API_KEY` - 智谱 AI API 密钥 + +**可选:** +- `MINIMAX_API_KEY` - MiniMax API 密钥 +- `MINIMAX_GROUP_ID` - MiniMax Group ID + +### 4. README 配置 + +确保 `README_HF.md` 包含正确的 frontmatter: + +```yaml +--- +title: Nora - 治愈系记录助手 +emoji: 🌟 +colorFrom: purple +colorTo: pink +sdk: docker +pinned: false +license: mit +--- +``` + +## 🔧 部署步骤 + +### 方法 1:通过 GitHub 同步(推荐) + +1. **提交所有更改到 GitHub**: + ```bash + git add . + git commit -m "Fix: Add required files to root directory for HF deployment" + git push origin main + ``` + +2. **在 Hugging Face Space 中同步**: + - 进入你的 Space:https://huggingface.co/spaces/kernel14/Nora + - 点击 **Settings** + - 找到 **Sync from GitHub** 部分 + - 点击 **Sync now** + +3. **等待构建完成**: + - 查看 **Logs** 标签页 + - 等待 Docker 构建完成(可能需要 5-10 分钟) + +### 方法 2:直接上传文件 + +1. **在 Hugging Face Space 中上传文件**: + - 进入 **Files** 标签页 + - 上传以下文件到根目录: + - `Dockerfile` + - `start.py` + - `requirements.txt` + - `README_HF.md`(重命名为 `README.md`) + +2. **上传应用代码**: + - 上传 `app/` 目录 + - 上传 `data/` 目录 + - 上传 `frontend/dist/` 目录 + +3. **触发重新构建**: + - 点击 **Factory reboot** + +## 🐛 常见问题 + +### 问题 1:Space 显示 "Missing app file" + +**原因**:根目录缺少 `Dockerfile` 或 `start.py` + +**解决方案**: +1. 确认根目录有 `Dockerfile` 和 `start.py` +2. 如果使用 GitHub 同步,确保这些文件已提交并推送 +3. Factory reboot 重启 Space + +### 问题 2:Docker 构建失败 + +**原因**:依赖安装失败或文件路径错误 + +**解决方案**: +1. 查看 **Logs** 标签页的详细错误信息 +2. 检查 `requirements.txt` 是否正确 +3. 检查 `Dockerfile` 中的路径是否正确 + +### 问题 3:前端无法加载 + +**原因**:`frontend/dist/` 目录不存在或未包含在 Docker 镜像中 + +**解决方案**: +1. 本地运行 `cd frontend && npm run build` +2. 确认 `frontend/dist/` 目录存在 +3. 提交并推送到 GitHub +4. 重新同步 Space + +### 问题 4:API 调用失败 + +**原因**:未配置环境变量 + +**解决方案**: +1. 在 Space Settings 中配置 `ZHIPU_API_KEY` +2. Factory reboot 重启 Space +3. 检查 Logs 确认环境变量已加载 + +## 📊 验证部署 + +部署成功后,访问你的 Space URL,应该能看到: + +1. ✅ 前端页面正常加载 +2. ✅ AI 角色形象显示 +3. ✅ 可以进行文本输入 +4. ✅ 可以查看心情、灵感、待办数据 + +测试 API 端点: +- `https://你的space.hf.space/health` - 应该返回健康状态 +- `https://你的space.hf.space/docs` - 应该显示 API 文档 + +## 🔄 更新部署 + +当你更新代码后: + +1. **提交到 GitHub**: + ```bash + git add . + git commit -m "Update: 描述你的更改" + git push origin main + ``` + +2. **同步到 Hugging Face**: + - 在 Space Settings 中点击 **Sync now** + - 或者等待自动同步(如果已配置) + +3. **重启 Space**(如果需要): + - 点击 **Factory reboot** + +## 📚 相关文档 + +- [Hugging Face Spaces 文档](https://huggingface.co/docs/hub/spaces) +- [Docker SDK 文档](https://huggingface.co/docs/hub/spaces-sdks-docker) +- [项目完整文档](README.md) + +## 🆘 需要帮助? + +如果遇到问题: + +1. 查看 Space 的 **Logs** 标签页 +2. 检查 **Community** 标签页的讨论 +3. 在 GitHub 仓库提 Issue diff --git a/HUGGINGFACE_FIX_SUMMARY.md b/HUGGINGFACE_FIX_SUMMARY.md new file mode 100644 index 0000000000000000000000000000000000000000..0ec4b1d9de2257d8a948596cc7382d2d0f310a13 --- /dev/null +++ b/HUGGINGFACE_FIX_SUMMARY.md @@ -0,0 +1,223 @@ +# ✅ Hugging Face Spaces 部署问题已修复 + +## 🎯 问题描述 +Hugging Face Space 显示错误: +``` +This Space is missing an app file. An app file is required for the Space to build and run properly. +``` + +## 🔍 问题原因 +之前为了整理项目结构,将部署文件移到了 `deployment/` 目录,但 Hugging Face Spaces 要求关键文件必须在**根目录**。 + +## 🔧 已完成的修复 + +### 1. 复制关键文件到根目录 +- ✅ `Dockerfile` - 从 `deployment/Dockerfile` 复制到根目录 +- ✅ `start.py` - 从 `scripts/start.py` 复制到根目录 +- ✅ `README_HF.md` - 创建了带 frontmatter 的 Hugging Face 专用 README + +### 2. 创建部署工具 +- ✅ `.dockerignore` - 优化 Docker 构建,排除不必要的文件 +- ✅ `HUGGINGFACE_DEPLOY.md` - 完整的部署指南 +- ✅ `scripts/prepare_hf_deploy.bat` - 自动化部署准备脚本 + +### 3. 验证文件结构 +根目录现在包含所有必需文件: +``` +项目根目录/ +├── Dockerfile ✅ Docker 构建配置 +├── start.py ✅ 应用启动脚本 +├── requirements.txt ✅ Python 依赖 +├── README_HF.md ✅ HF 专用 README(带 frontmatter) +├── app/ ✅ 应用代码 +├── data/ ✅ 数据目录 +├── frontend/dist/ ✅ 前端构建文件 +└── generated_images/ ✅ 图片目录 +``` + +## 🚀 立即部署 + +### 方法 1:使用自动化脚本(推荐) + +运行准备脚本: +```bash +scripts\prepare_hf_deploy.bat +``` + +这会自动: +- ✅ 检查所有必需文件 +- ✅ 构建前端(如果需要) +- ✅ 生成部署清单 +- ✅ 显示下一步操作 + +### 方法 2:手动操作 + +#### 步骤 1:确认文件存在 +```bash +# 检查根目录文件 +dir Dockerfile +dir start.py +dir requirements.txt +dir README_HF.md + +# 检查前端构建 +dir frontend\dist\index.html +``` + +#### 步骤 2:提交到 GitHub +```bash +git add . +git commit -m "Fix: Add required files to root directory for HF deployment" +git push origin main +``` + +#### 步骤 3:同步到 Hugging Face +1. 访问:https://huggingface.co/spaces/kernel14/Nora +2. 点击 **Settings** 标签 +3. 找到 **Sync from GitHub** 部分 +4. 点击 **Sync now** 按钮 + +#### 步骤 4:配置环境变量 +1. 在 Settings 中找到 **Variables and secrets** +2. 添加环境变量: + - `ZHIPU_API_KEY` - 智谱 AI API 密钥(必需) + - `MINIMAX_API_KEY` - MiniMax API 密钥(可选) + - `MINIMAX_GROUP_ID` - MiniMax Group ID(可选) +3. 点击 **Factory reboot** 重启 Space + +#### 步骤 5:等待构建完成 +1. 切换到 **Logs** 标签页 +2. 观察 Docker 构建过程 +3. 等待显示 "Running on http://0.0.0.0:7860" + +## ✅ 验证部署 + +部署成功后,测试以下功能: + +### 1. 访问主页 +``` +https://kernel14-nora.hf.space/ +``` +应该看到: +- ✅ 前端页面正常加载 +- ✅ AI 角色形象显示 +- ✅ 输入框可用 + +### 2. 测试 API +``` +https://kernel14-nora.hf.space/health +``` +应该返回: +```json +{ + "status": "healthy", + "data_dir": "data", + "max_audio_size": 10485760 +} +``` + +### 3. 查看 API 文档 +``` +https://kernel14-nora.hf.space/docs +``` +应该显示完整的 API 文档 + +### 4. 测试功能 +- ✅ 文本输入和处理 +- ✅ 查看心情、灵感、待办 +- ✅ AI 对话功能 +- ✅ 心情气泡池 + +## 🐛 故障排查 + +### 问题 1:仍然显示 "Missing app file" + +**可能原因**: +- 文件未正确提交到 GitHub +- GitHub 同步未完成 + +**解决方案**: +1. 检查 GitHub 仓库根目录是否有 `Dockerfile` 和 `start.py` +2. 在 HF Space 中手动触发同步 +3. 查看 Logs 标签页的详细错误 + +### 问题 2:Docker 构建失败 + +**可能原因**: +- 依赖安装失败 +- 文件路径错误 + +**解决方案**: +1. 查看 Logs 标签页的详细错误信息 +2. 检查 `requirements.txt` 是否正确 +3. 确认 `frontend/dist/` 目录存在 + +### 问题 3:前端无法加载 + +**可能原因**: +- `frontend/dist/` 目录不存在或为空 +- 前端构建文件未提交 + +**解决方案**: +1. 本地运行:`cd frontend && npm run build` +2. 确认 `frontend/dist/` 包含 `index.html` 和 `assets/` +3. 提交并推送到 GitHub +4. 重新同步 Space + +### 问题 4:API 调用失败 + +**可能原因**: +- 未配置 `ZHIPU_API_KEY` +- API 密钥无效或配额不足 + +**解决方案**: +1. 在 Space Settings 中配置环境变量 +2. 访问 https://open.bigmodel.cn/ 检查 API 密钥和配额 +3. Factory reboot 重启 Space + +## 📊 部署状态检查 + +运行以下命令检查本地准备情况: +```bash +scripts\prepare_hf_deploy.bat +``` + +查看生成的 `deploy_checklist.txt` 文件。 + +## 📚 相关文档 + +- [HUGGINGFACE_DEPLOY.md](HUGGINGFACE_DEPLOY.md) - 完整部署指南 +- [README_HF.md](README_HF.md) - Hugging Face Space 的 README +- [deployment/DEPLOYMENT.md](deployment/DEPLOYMENT.md) - 通用部署文档 + +## 🎉 成功标志 + +当看到以下内容时,说明部署成功: + +1. ✅ Space 状态显示为 "Running" +2. ✅ 可以访问主页并看到 UI +3. ✅ API 端点正常响应 +4. ✅ 可以进行文本输入和查看数据 +5. ✅ Logs 中没有错误信息 + +--- + +## 📝 技术说明 + +### 为什么需要文件在根目录? + +Hugging Face Spaces 的构建系统会在根目录查找以下文件: + +1. **Dockerfile** - 用于 Docker SDK 的 Space +2. **app.py** - 用于 Gradio/Streamlit SDK 的 Space +3. **README.md** - 带 frontmatter 的配置文件 + +如果这些文件不在根目录,构建系统会报错 "Missing app file"。 + +### 我们的解决方案 + +- 保留 `deployment/` 目录用于备份和文档 +- 在根目录创建必需文件的副本 +- 使用 `.dockerignore` 优化构建,避免包含不必要的文件 + +这样既保持了项目结构的整洁,又满足了 Hugging Face 的要求。 diff --git a/PRD.md b/PRD.md new file mode 100644 index 0000000000000000000000000000000000000000..c36fcbd64cb343a69bcde8fa8f3a7d9309dab30e --- /dev/null +++ b/PRD.md @@ -0,0 +1,155 @@ +--- + +# 产品概述 + +一款通过 **iOS 原生 (SwiftUI)** 构建,结合 **BLE 蓝牙硬件** 震动提醒与 **AI 语义解析** 的治愈系记录助手。用户通过 APP 或配套硬件录音,系统自动将内容拆解为灵感、心情与待办,并通过 RAG 技术实现历史记忆的回溯。 + +# 核心交互逻辑 + +## 硬件交互:蓝牙协议 + +由于使用 iOS 原生开发,手机充当“网关”角色,负责硬件与云端的中转。 + +- **连接流程 (Local Only)**: + - **无需 API 接口**。iOS APP 使用 `CBCentralManager` 扫描硬件 UUID。 + - 硬件作为外设 (Peripheral) 被手机连接。 +- **指令交互**: + - **录音阶段**:硬件按下录音键,通过蓝牙特征值 (Characteristic) 将音频数据包流式传输或发送结束信号至 iOS。 + - **震动反馈**: + - **轻微短振(心跳感)**:iOS 检测到录音启动,向蓝牙写入 `0x01` 指令。 + - **急促振动(提醒感)**:iOS 的待办逻辑触发,向蓝牙写入 `0x02` 指令。 + +## AI:调用智谱原生api + +- **语音转写**:iOS 使用 `URLSession` 调用智谱 **ASR API** 上传音频,实时获取转写文字。 +- **语义理解**:iOS 调用 **GLM-4-Flash API**,通过 Prompt 约束 AI 返回标准 JSON(包含情绪、灵感、待办)。 +- **形象定制**:登录时调用 **CogView API** 生成固定形象,图片下载后由 iOS 进行本地持久化存储。 + +# **技术架构 (iOS Native)** + +## **前端:SwiftUI** + +- **状态管理**:使用 `@Observable` (iOS 17+) 实时同步 AI 解析出的心情颜色和形象气泡。 +- **持久化**:使用 **SwiftData** 存储本地 JSON 结构的记录(`records`, `moods`, `todos`, `inspirations`)。 +- **安全性**:智谱 API Key 存储在 **Keychain** 中,避免硬编码。 + +## **AI 引擎 (智谱 API 集成)** + +| **模块** | **API 模型** | **职责** | +| --- | --- | --- | +| **ASR** | 智谱语音识别 | 硬件原始音频转文字 | +| **NLP** | GLM-4-Flash | 解析 JSON 结构、RAG 历史回溯对话 | +| **图像** | CogView-3 | 登录时一次性生成固定猫咪形象 | + +# AI形象生成 + +## 设置 + +- **初始化生成**:用户注册/首次登录时,系统引导用户输入关键词(或默认随机),调用 **GLM-Image (CogView)** 生成 1-3 张插画。 +- **持久化存储**:生成的图片 URL 存储在用户配置中,不再随每次录音改变。 +- **按需修改**:在“设置”提供修改接口,用户可以消耗积分或次数重新生成。 + +## 生成逻辑 + +为了保证品牌统一性,系统预设为”**治愈系插画猫咪**”,通过映射逻辑处理用户输入。 + +- **提示词生成逻辑 (Prompt Engineering)** + +| **用户输入维度** | **映射逻辑 (Internal Tags)** | **示例** | +| --- | --- | --- | +| **颜色** | 主色调 & 环境色 | 温暖粉 -> `soft pastel pink fur, rose-colored aesthetic` | +| **性格** | 构图 & 眼神光 | 活泼 -> `big curious eyes, dynamic paw gesture, energetic aura` | +| **形象** | 配饰 & 特征 | 戴眼镜 -> `wearing tiny round glasses, scholarly look` | + +【陪伴式朋友】【温柔照顾型长辈】【引导型 老师】 + +**系统底座提示词 (System Base Prompt):** + +> "A masterpiece cute stylized cat illustration, [Color] theme, [Personality] facial expression and posture, [Description]. Japanese watercolor style, clean minimalist background, high quality, soft studio lighting, 4k." +> + +## 技术架构 + +### 前端:iOS Native (SwiftUI) + +- **UI 渲染**:利用 `SwiftUI` 实现毛玻璃效果与治愈系猫咪插画的流畅加载。 +- **状态管理**:使用 `Combine` 或 `Observation` 框架同步心情颜色变化。 +- **硬件接口**:`CoreBluetooth`。 + +### 后端:FastAPI (Python) + +- **API 核心**:处理 ASR、NLP、RAG 和 Image Generation。 +- **存储**:本地 JSON 文件系统(`records.json`, `moods.json`, `todos.json`, `inspirations.json`)。 + +### AI 引擎 (智谱全家桶) + +- **ASR**:语音转文字。 +- **GLM-4-Flash**:语义解析与 RAG 问答。 +- **GLM-Image (CogView)**:基于情绪映射生成的静态形象。 + +# 核心功能模块 + +### 首页 - 录音与实时处理 + +- **功能描述:** + - 支持语音录音(5-30 秒)或文字直接输入。 + - **静态形象展示**:页面中心展示常驻形象。 + - 实时处理:完成录音后自动触发后端 ASR 与 NLP 流程。 + - **结果速览**:展示最近一次分析的**原文及摘要**(提取出的情绪、灵感标签或待办任务)。 +- **数据存储:** * 音频文件:`data/audio/{timestamp}.wav` + - 完整记录索引:`data/records.json`(包含关联的 JSON ID 和音频路径)。 + +### 灵感看板页面 + +- **功能描述:** + - **瀑布流展示**:以卡片形式展示所有灵感。 + - **核心要素**:显示 AI 总结的核心观点、自动生成的标签、所属分类(工作/生活/学习/创意)。 + - **筛选排序**:支持按分类筛选及时间顺序/倒序排列。 +- **数据结构:** `inspirations.json` 存储核心观点、关键字及原文引用。 + +### 心情日记页面 + +- **功能描述:** + - **情绪可视化**:展示情绪分布柱状图(如:本周 60% 平静,20% 喜悦)。 + - **记录列表**:显示每条记录的情绪类型、强度(1-10)及当时的心情关键词。 + - **筛选**:可单独查看“喜”或“哀”等特定情绪的历史。 +- **数据结构:** `moods.json` 记录 `type`, `intensity`, `keywords` 等字段。 + +### 待办清单页面 + +- **功能描述:** + - **任务管理**:从输入中自动提取出的任务(包含时间、地点、内容)。 + - **状态切换**:支持手动勾选“已完成”。 + - **统计**:显示待办/已完成的数量对比。 +- **数据结构:** `todos.json` 包含任务描述、时间实体及完成状态。 + +### AI 对话页面 + +- **功能描述:** + - **智能检索**:用户询问“我上周关于论文有什么灵感?”时,系统通过 RAG 技术检索 `records.json` 并回答。 + - **快捷指令**:提供“总结今日心情”、“还有哪些待办”等快捷按钮。 +- **技术实现:** 基于 **GLM-4-Flash** 进行上下文理解与 RAG 检索。 + +--- + +# 业务流程与数据流 + +iOS 端在请求 GLM-4 时,使用以下 System Prompt 确保数据可被解析: + +> "你是一个数据转换器。请将文本解析为 JSON 格式。维度包括:1.情绪(type,intensity); 2.灵感(core_idea,tags); 3.待办(task,time,location)。必须严格遵循 JSON 格式返回。" +> + +### NLP 语义解析策略 + +| **提取维度** | **逻辑** | **去向** | +| --- | --- | --- | +| **情绪** | 识别情感极性与 1-10 的强度值 | `moods.json` | +| **灵感** | 提炼 20 字以内的核心观点 + 3个标签 | `inspirations.json` | +| **待办** | 识别时间词(如“明晚”)、地点与动词短语 | `todos.json` | + +# 技术栈总结 + +- **开发语言**:Swift 6.0 / SwiftUI +- **核心框架**:CoreBluetooth (硬件), SwiftData (存储), CoreHaptics (震动) +- **AI 接口**:智谱 API (HTTP/HTTPS 请求) +- **数据存储**:iOS Local SandBox (音频文件 + 结构化数据) \ No newline at end of file diff --git a/PROJECT_STRUCTURE.md b/PROJECT_STRUCTURE.md new file mode 100644 index 0000000000000000000000000000000000000000..807d1a821c394d7a1d6b2cf220d2f584a9a5ff32 --- /dev/null +++ b/PROJECT_STRUCTURE.md @@ -0,0 +1,155 @@ +# 项目目录结构 + +``` +Inspiration-Record-APP/ +├── app/ # 后端应用代码 +│ ├── __init__.py +│ ├── main.py # FastAPI 主应用 +│ ├── config.py # 配置管理 +│ ├── models.py # 数据模型 +│ ├── storage.py # 数据存储 +│ ├── asr_service.py # 语音识别服务 +│ ├── semantic_parser.py # 语义解析服务 +│ ├── image_service.py # 图像生成服务 +│ ├── user_config.py # 用户配置管理 +│ └── logging_config.py # 日志配置 +│ +├── frontend/ # 前端应用 +│ ├── components/ # React 组件 +│ ├── services/ # API 服务 +│ ├── utils/ # 工具函数 +│ ├── dist/ # 构建产物(部署需要) +│ ├── App.tsx # 主应用组件 +│ ├── index.tsx # 入口文件 +│ ├── types.ts # TypeScript 类型定义 +│ ├── package.json # 前端依赖 +│ └── vite.config.ts # Vite 配置 +│ +├── data/ # 数据存储目录 +│ ├── moods.json # 心情数据 +│ ├── inspirations.json # 灵感数据 +│ ├── todos.json # 待办数据 +│ ├── records.json # 记录数据 +│ └── user_config.json # 用户配置 +│ +├── generated_images/ # AI 生成的图片 +│ └── default_character.jpeg # 默认形象 +│ +├── logs/ # 日志文件 +│ └── app.log +│ +├── tests/ # 测试文件 +│ ├── test_*.py # 单元测试 +│ ├── test_api.html # API 测试页面 +│ ├── test_chat_api.py # 聊天 API 测试 +│ └── test_default_character.py # 默认形象测试 +│ +├── scripts/ # 脚本文件 +│ ├── start_local.py # 本地启动脚本(8000端口) +│ ├── start_local.bat # Windows 启动脚本 +│ ├── start.py # 通用启动脚本(7860端口) +│ ├── build_and_deploy.bat # 构建并部署脚本 +│ └── build_and_deploy.sh # Linux/Mac 部署脚本 +│ +├── deployment/ # 部署配置文件 +│ ├── Dockerfile # Docker 配置 +│ ├── app_modelscope.py # ModelScope 入口 +│ ├── configuration.json # ModelScope 配置 +│ ├── ms_deploy.json # ModelScope 部署配置 +│ ├── requirements_hf.txt # Hugging Face 依赖 +│ ├── requirements_modelscope.txt # ModelScope 依赖 +│ ├── README_HF.md # Hugging Face 说明 +│ ├── README_MODELSCOPE.md # ModelScope 说明 +│ ├── DEPLOY_CHECKLIST.md # 部署检查清单 +│ ├── DEPLOYMENT.md # 部署指南 +│ ├── deploy_to_hf.bat # 部署到 HF 脚本 +│ └── deploy_to_hf.sh # 部署到 HF 脚本 +│ +├── docs/ # 文档目录 +│ ├── README.md # 项目文档 +│ ├── FEATURE_SUMMARY.md # 功能总结 +│ ├── API_配置说明.md # API 配置说明 +│ ├── 局域网访问指南.md # 局域网访问指南 +│ ├── 功能架构图.md # 架构图 +│ ├── 后端启动问题排查.md # 故障排查 +│ ├── 心情气泡池功能说明.md +│ ├── 心情气泡池快速开始.md +│ └── 语音录制问题排查.md +│ +├── .github/ # GitHub 配置 +│ └── workflows/ +│ └── sync.yml # 自动同步工作流 +│ +├── .env # 环境变量(本地) +├── .env.example # 环境变量示例 +├── .gitignore # Git 忽略文件 +├── requirements.txt # Python 依赖(开发环境) +├── pytest.ini # Pytest 配置 +├── PRD.md # 产品需求文档 +└── README.md # 项目说明 +``` + +## 目录说明 + +### 核心目录 + +- **app/** - 后端 FastAPI 应用,包含所有业务逻辑 +- **frontend/** - 前端 React 应用,使用 TypeScript + Vite +- **data/** - 运行时数据存储,JSON 格式 +- **generated_images/** - AI 生成的角色图片 + +### 开发目录 + +- **tests/** - 所有测试文件,包括单元测试和集成测试 +- **scripts/** - 开发和部署脚本 +- **logs/** - 应用日志文件 + +### 部署目录 + +- **deployment/** - 所有部署相关的配置文件 + - Hugging Face Spaces 部署 + - ModelScope 部署 + - Docker 部署 + +### 文档目录 + +- **docs/** - 项目文档和使用指南 + +## 快速开始 + +### 本地开发 + +```bash +# 1. 安装依赖 +pip install -r requirements.txt +cd frontend && npm install && cd .. + +# 2. 构建前端 +cd frontend && npm run build && cd .. + +# 3. 启动服务器 +python scripts/start_local.py +``` + +### 部署 + +**Hugging Face:** +```bash +cd deployment +./deploy_to_hf.sh +``` + +**ModelScope:** +- 上传所有文件到 ModelScope +- 确保 `ms_deploy.json` 在根目录 + +## 文件清理说明 + +已删除的冗余文件: +- `app_gradio_old.py.bak` - 旧的 Gradio 备份文件 +- `packages.txt` - 不再使用的包列表 + +已整理的文件: +- 脚本文件 → `scripts/` +- 部署文件 → `deployment/` +- 测试文件 → `tests/` diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..5b44a3e0bb38bb0266e3f33d95804146644ebf54 --- /dev/null +++ b/README.md @@ -0,0 +1,175 @@ +--- +title: Nora - 治愈系记录助手 +emoji: 🌟 +colorFrom: purple +colorTo: pink +sdk: docker +pinned: false +license: mit +--- + +# 🌟 治愈系记录助手 - SoulMate AI Companion + +一个温暖、治愈的 AI 陪伴应用,帮助你记录心情、捕捉灵感、管理待办。 + +目前已上线huggingface,体验链接:https://huggingface.co/spaces/kernel14/Nora + +## ✨ 核心特性 + +- 🎤 **语音/文字快速记录** - 自动分类保存 +- 🤖 **AI 语义解析** - 智能提取情绪、灵感和待办 +- 💬 **AI 对话陪伴(RAG)** - 基于历史记录的个性化对话 +- 🖼️ **AI 形象定制** - 生成专属治愈系角色(720 种组合) +- 🫧 **物理引擎心情池** - 基于 Matter.js 的动态气泡可视化 + +## 🚀 快速开始 + +### 在线使用 + +直接访问本 Space 即可使用完整功能! + +### 配置 API 密钥 + +在 Space 的 **Settings → Repository secrets** 中配置: + +**必需:** +- `ZHIPU_API_KEY` - 智谱 AI API 密钥 + - 获取地址:https://open.bigmodel.cn/ + - 用途:语音识别、语义解析、AI 对话 + +**可选:** +- `MINIMAX_API_KEY` - MiniMax API 密钥 +- `MINIMAX_GROUP_ID` - MiniMax Group ID + - 获取地址:https://platform.minimaxi.com/ + - 用途:AI 形象生成 + +## 📖 使用说明 + +1. **首页快速记录** + - 点击麦克风录音或在输入框输入文字 + - AI 自动分析并分类保存 + +2. **查看分类数据** + - 点击顶部心情、灵感、待办图标 + - 查看不同类型的记录 + +3. **与 AI 对话** + - 点击 AI 形象显示问候对话框 + - 点击对话框中的聊天图标进入完整对话 + - AI 基于你的历史记录提供个性化回复 + +4. **定制 AI 形象** + - 点击右下角 ✨ 按钮 + - 选择颜色、性格、外观、角色 + - 生成专属形象(需要 MiniMax API) + +5. **心情气泡池** + - 点击顶部心情图标 + - 左右滑动查看不同日期的心情卡片 + - 点击卡片展开查看当天的气泡池 + - 可以拖拽气泡,感受物理引擎效果 + +## 📊 API 端点 + +- `POST /api/process` - 处理文本/语音输入 +- `POST /api/chat` - 与 AI 对话(RAG) +- `GET /api/records` - 获取所有记录 +- `GET /api/moods` - 获取情绪数据 +- `GET /api/inspirations` - 获取灵感 +- `GET /api/todos` - 获取待办事项 +- `POST /api/character/generate` - 生成角色形象 +- `GET /health` - 健康检查 +- `GET /docs` - API 文档 + +## 🔗 相关链接 + +- [GitHub 仓库](https://github.com/kernel-14/Nora) +- [详细文档](https://github.com/kernel-14/Nora/blob/main/README.md) +- [智谱 AI](https://open.bigmodel.cn/) +- [MiniMax](https://platform.minimaxi.com/) +- [Huggingface](https://huggingface.co/spaces/kernel14/Nora) + +## 📝 技术栈 + +- **后端**: FastAPI + Python 3.11 +- **前端**: React + TypeScript + Vite +- **物理引擎**: Matter.js +- **AI 服务**: 智谱 AI (GLM-4) + MiniMax +- **部署**: Hugging Face Spaces (Docker) + +## 🔧 本地开发 + +### 启动后端服务 + +```bash +# 安装依赖 +pip install -r requirements.txt + +# 配置环境变量(复制 .env.example 为 .env 并填写) +cp .env.example .env + +# 启动服务(端口 8000) +python scripts/start_local.py +``` + +### 构建前端 + +```bash +cd frontend +npm install +npm run build +``` + +### 局域网访问 + +1. 启动后端后,会显示局域网访问地址(如 `http://192.168.1.100:8000/`) +2. 其他设备连接同一 WiFi 后,使用该地址访问 +3. 如果无法访问,请参考 [局域网访问快速修复指南](docs/局域网访问快速修复.md) + +**快速诊断**: +```bash +# Windows +scripts\test_lan_access.bat + +# 或访问诊断页面 +http://你的IP:8000/test-connection.html +``` + +## 🐛 故障排查 + +### 问题:其他设备访问显示 "Load failed" + +**原因**:防火墙阻止、网络隔离或 API 地址配置错误 + +**解决方案**: +1. 运行诊断工具:`scripts\test_lan_access.bat` +2. 访问诊断页面:`http://你的IP:8000/test-connection.html` +3. 查看详细指南:[局域网访问快速修复](docs/局域网访问快速修复.md) + +### 问题:语音识别失败 + +**原因**:未配置 ZHIPU_API_KEY 或 API 配额不足 + +**解决方案**: +1. 检查 `.env` 文件中的 `ZHIPU_API_KEY` +2. 访问 https://open.bigmodel.cn/ 检查配额 + +### 问题:AI 形象生成失败 + +**原因**:未配置 MINIMAX_API_KEY 或 API 配额不足 + +**解决方案**: +1. 检查 `.env` 文件中的 `MINIMAX_API_KEY` 和 `MINIMAX_GROUP_ID` +2. 访问 https://platform.minimaxi.com/ 检查配额 + +## 📚 文档 + +- [功能架构图](docs/功能架构图.md) +- [API 配置说明](docs/API_配置说明.md) +- [局域网访问指南](docs/局域网访问指南.md) +- [局域网访问快速修复](docs/局域网访问快速修复.md) +- [心情气泡池功能说明](docs/心情气泡池功能说明.md) + +## 📄 License + +MIT License diff --git a/README_HF.md b/README_HF.md new file mode 100644 index 0000000000000000000000000000000000000000..cded59053da30a978f8f4886259a40693fde247a --- /dev/null +++ b/README_HF.md @@ -0,0 +1,131 @@ +--- +title: Nora - 治愈系记录助手 +emoji: 🌟 +colorFrom: purple +colorTo: pink +sdk: docker +pinned: false +license: mit +--- + +# 🌟 治愈系记录助手 - SoulMate AI Companion + +一个温暖、治愈的 AI 陪伴应用,帮助你记录心情、捕捉灵感、管理待办。 + +## ✨ 核心特性 + +- 🎤 **语音/文字快速记录** - 自动分类保存 +- 🤖 **AI 语义解析** - 智能提取情绪、灵感和待办 +- 💬 **AI 对话陪伴(RAG)** - 基于历史记录的个性化对话 +- 🖼️ **AI 形象定制** - 生成专属治愈系角色(720 种组合) +- 🫧 **物理引擎心情池** - 基于 Matter.js 的动态气泡可视化 + +## 🚀 快速开始 + +### 在线使用 + +直接访问本 Space 即可使用完整功能! + +### ⚙️ 配置 API 密钥 + +在 Space 的 **Settings → Variables and secrets** 中配置: + +**必需:** +- `ZHIPU_API_KEY` - 智谱 AI API 密钥 + - 获取地址:https://open.bigmodel.cn/ + - 用途:语音识别、语义解析、AI 对话 + +**可选:** +- `MINIMAX_API_KEY` - MiniMax API 密钥 +- `MINIMAX_GROUP_ID` - MiniMax Group ID + - 获取地址:https://platform.minimaxi.com/ + - 用途:AI 形象生成 + +配置后,点击 **Factory reboot** 重启 Space 使配置生效。 + +## 📖 使用说明 + +1. **首页快速记录** + - 点击麦克风录音或在输入框输入文字 + - AI 自动分析并分类保存 + +2. **查看分类数据** + - 点击顶部心情、灵感、待办图标 + - 查看不同类型的记录 + +3. **与 AI 对话** + - 点击 AI 形象显示问候对话框 + - 点击对话框中的聊天图标进入完整对话 + - AI 基于你的历史记录提供个性化回复 + +4. **定制 AI 形象** + - 点击右下角 ✨ 按钮 + - 选择颜色、性格、外观、角色 + - 生成专属形象(需要 MiniMax API) + +5. **心情气泡池** + - 点击顶部心情图标 + - 左右滑动查看不同日期的心情卡片 + - 点击卡片展开查看当天的气泡池 + - 可以拖拽气泡,感受物理引擎效果 + +## 📊 API 端点 + +- `POST /api/process` - 处理文本/语音输入 +- `POST /api/chat` - 与 AI 对话(RAG) +- `GET /api/records` - 获取所有记录 +- `GET /api/moods` - 获取情绪数据 +- `GET /api/inspirations` - 获取灵感 +- `GET /api/todos` - 获取待办事项 +- `POST /api/character/generate` - 生成角色形象 +- `GET /health` - 健康检查 +- `GET /docs` - API 文档 + +## 🔗 相关链接 + +- [GitHub 仓库](https://github.com/kernel-14/Nora) +- [完整文档](https://github.com/kernel-14/Nora/blob/main/README.md) +- [智谱 AI](https://open.bigmodel.cn/) +- [MiniMax](https://platform.minimaxi.com/) + +## 📝 技术栈 + +- **后端**: FastAPI + Python 3.11 +- **前端**: React + TypeScript + Vite +- **物理引擎**: Matter.js +- **AI 服务**: 智谱 AI (GLM-4) + MiniMax +- **部署**: Hugging Face Spaces (Docker) + +## 🐛 故障排查 + +### 问题:语音识别失败 + +**原因**:未配置 ZHIPU_API_KEY 或 API 配额不足 + +**解决方案**: +1. 在 Space Settings 中配置 `ZHIPU_API_KEY` +2. 访问 https://open.bigmodel.cn/ 检查配额 +3. Factory reboot 重启 Space + +### 问题:AI 形象生成失败 + +**原因**:未配置 MINIMAX_API_KEY 或 API 配额不足 + +**解决方案**: +1. 在 Space Settings 中配置 `MINIMAX_API_KEY` 和 `MINIMAX_GROUP_ID` +2. 访问 https://platform.minimaxi.com/ 检查配额 +3. Factory reboot 重启 Space + +### 问题:Space 构建失败 + +**原因**:缺少必要的文件或配置 + +**检查清单**: +- ✅ 根目录有 `Dockerfile` +- ✅ 根目录有 `start.py` +- ✅ 根目录有 `requirements.txt` +- ✅ `frontend/dist/` 目录存在且包含构建文件 + +## 📄 License + +MIT License diff --git a/app/__init__.py b/app/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..f3d05711f1fa89afd7b476d5c6c6370e4cb1a7a4 --- /dev/null +++ b/app/__init__.py @@ -0,0 +1 @@ +"""Voice Text Processor Application""" diff --git a/app/asr_service.py b/app/asr_service.py new file mode 100644 index 0000000000000000000000000000000000000000..4e85ff40ca7ce99857945e3670a2bf2b52cc2fbf --- /dev/null +++ b/app/asr_service.py @@ -0,0 +1,202 @@ +"""ASR (Automatic Speech Recognition) service for Voice Text Processor. + +This module implements the ASRService class for transcribing audio files +to text using the Zhipu AI GLM-ASR-2512 API. + +Requirements: 2.1, 2.2, 2.3, 2.4, 9.2, 9.5 +""" + +import logging +from typing import Optional +import httpx + + +logger = logging.getLogger(__name__) + + +class ASRServiceError(Exception): + """Exception raised when ASR service operations fail. + + This exception is raised when the Zhipu ASR API call fails, + such as due to network issues, API errors, or invalid responses. + + Requirements: 2.3 + """ + + def __init__(self, message: str = "语音识别服务不可用"): + """Initialize ASRServiceError. + + Args: + message: Error message describing the failure + """ + super().__init__(message) + self.message = message + + +class ASRService: + """Service for transcribing audio files using Zhipu AI ASR API. + + This service handles audio file transcription by calling the Zhipu AI + GLM-ASR-2512 API. It manages API authentication, request formatting, + response parsing, and error handling. + + Attributes: + api_key: Zhipu AI API key for authentication + client: Async HTTP client for making API requests + api_url: Zhipu AI ASR API endpoint URL + model: ASR model identifier + + Requirements: 2.1, 2.2, 2.3, 2.4, 9.2, 9.5 + """ + + def __init__(self, api_key: str): + """Initialize the ASR service. + + Args: + api_key: Zhipu AI API key for authentication + """ + self.api_key = api_key + self.client = httpx.AsyncClient(timeout=30.0) + self.api_url = "https://api.z.ai/api/paas/v4/audio/transcriptions" + self.model = "glm-asr-2512" + + async def close(self): + """Close the HTTP client. + + This should be called when the service is no longer needed + to properly clean up resources. + """ + await self.client.aclose() + + async def transcribe(self, audio_file: bytes, filename: str = "audio.mp3") -> str: + """Transcribe audio file to text using Zhipu ASR API. + + This method sends the audio file to the Zhipu AI ASR API and returns + the transcribed text. It handles API errors, empty recognition results, + and logs all errors with timestamps and stack traces. + + Args: + audio_file: Audio file content as bytes + filename: Name of the audio file (for API request) + + Returns: + Transcribed text content. Returns empty string if audio cannot + be recognized (empty recognition result). + + Raises: + ASRServiceError: If API call fails or returns invalid response + + Requirements: 2.1, 2.2, 2.3, 2.4, 9.2, 9.5 + """ + try: + # Prepare request headers + headers = { + "Authorization": f"Bearer {self.api_key}" + } + + # Prepare multipart form data + files = { + "file": (filename, audio_file, "audio/mpeg") + } + + data = { + "model": self.model, + "stream": "false" + } + + logger.info(f"Calling Zhipu ASR API for file: {filename}") + + # Make API request + response = await self.client.post( + self.api_url, + headers=headers, + files=files, + data=data + ) + + # Check response status + if response.status_code != 200: + error_msg = f"ASR API returned status {response.status_code}" + try: + error_detail = response.json() + error_msg += f": {error_detail}" + except Exception: + error_msg += f": {response.text}" + + logger.error( + f"ASR API call failed: {error_msg}", + exc_info=True, + extra={"timestamp": logger.makeRecord( + logger.name, logging.ERROR, "", 0, error_msg, (), None + ).created} + ) + raise ASRServiceError(f"语音识别服务不可用: {error_msg}") + + # Parse response + try: + result = response.json() + except Exception as e: + error_msg = f"Failed to parse ASR API response: {str(e)}" + logger.error( + error_msg, + exc_info=True, + extra={"timestamp": logger.makeRecord( + logger.name, logging.ERROR, "", 0, error_msg, (), None + ).created} + ) + raise ASRServiceError(f"语音识别服务不可用: 响应格式无效") + + # Extract transcribed text + text = result.get("text", "") + + # Handle empty recognition result + if not text or text.strip() == "": + logger.warning( + f"ASR returned empty text for file: {filename}. " + "Audio content may be unrecognizable." + ) + return "" + + logger.info( + f"ASR transcription successful for {filename}. " + f"Text length: {len(text)} characters" + ) + + return text + + except ASRServiceError: + # Re-raise ASRServiceError as-is + raise + + except httpx.TimeoutException as e: + error_msg = f"ASR API request timeout: {str(e)}" + logger.error( + error_msg, + exc_info=True, + extra={"timestamp": logger.makeRecord( + logger.name, logging.ERROR, "", 0, error_msg, (), None + ).created} + ) + raise ASRServiceError("语音识别服务不可用: 请求超时") + + except httpx.RequestError as e: + error_msg = f"ASR API request failed: {str(e)}" + logger.error( + error_msg, + exc_info=True, + extra={"timestamp": logger.makeRecord( + logger.name, logging.ERROR, "", 0, error_msg, (), None + ).created} + ) + raise ASRServiceError(f"语音识别服务不可用: 网络错误") + + except Exception as e: + error_msg = f"Unexpected error in ASR service: {str(e)}" + logger.error( + error_msg, + exc_info=True, + extra={"timestamp": logger.makeRecord( + logger.name, logging.ERROR, "", 0, error_msg, (), None + ).created} + ) + raise ASRServiceError(f"语音识别服务不可用: {str(e)}") diff --git a/app/config.py b/app/config.py new file mode 100644 index 0000000000000000000000000000000000000000..9c0c0263d89bb8a7f95afdf3ecc9c5da34a08c6c --- /dev/null +++ b/app/config.py @@ -0,0 +1,226 @@ +"""Configuration management module for Voice Text Processor. + +This module handles loading configuration from environment variables, +validating required settings, and providing configuration access throughout +the application. + +Requirements: 10.1, 10.2, 10.3, 10.4, 10.5 +""" + +import os +from pathlib import Path +from typing import Optional +from pydantic import BaseModel, Field, field_validator +from dotenv import load_dotenv + + +class Config(BaseModel): + """Application configuration loaded from environment variables.""" + + # API Keys + zhipu_api_key: str = Field( + ..., + description="Zhipu AI API key for ASR and GLM-4-Flash services" + ) + + minimax_api_key: Optional[str] = Field( + default=None, + description="MiniMax API key for image generation (optional)" + ) + + minimax_group_id: Optional[str] = Field( + default=None, + description="MiniMax Group ID (optional)" + ) + + # Data storage paths + data_dir: Path = Field( + default=Path("data"), + description="Directory for storing JSON data files" + ) + + # File size limits (in bytes) + max_audio_size: int = Field( + default=10 * 1024 * 1024, # 10 MB default + description="Maximum audio file size in bytes" + ) + + # Logging configuration + log_level: str = Field( + default="INFO", + description="Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)" + ) + + log_file: Optional[Path] = Field( + default=Path("logs/app.log"), + description="Log file path" + ) + + # Server configuration + host: str = Field( + default="0.0.0.0", + description="Server host" + ) + + port: int = Field( + default=8000, + description="Server port" + ) + + @field_validator("log_level") + @classmethod + def validate_log_level(cls, v: str) -> str: + """Validate log level is one of the standard levels.""" + valid_levels = ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] + v_upper = v.upper() + if v_upper not in valid_levels: + raise ValueError(f"log_level must be one of {valid_levels}") + return v_upper + + @field_validator("max_audio_size") + @classmethod + def validate_max_audio_size(cls, v: int) -> int: + """Validate max audio size is positive.""" + if v <= 0: + raise ValueError("max_audio_size must be positive") + return v + + @field_validator("data_dir", "log_file") + @classmethod + def convert_to_path(cls, v) -> Path: + """Convert string paths to Path objects.""" + if isinstance(v, str): + return Path(v) + return v + + class Config: + """Pydantic configuration.""" + frozen = True # Make config immutable + + +def load_config() -> Config: + """Load configuration from environment variables. + + Returns: + Config: Validated configuration object + + Raises: + ValueError: If required configuration is missing or invalid + + Environment Variables: + ZHIPU_API_KEY: Required. API key for Zhipu AI services + MINIMAX_API_KEY: Optional. API key for MiniMax image generation + MINIMAX_GROUP_ID: Optional. MiniMax Group ID + DATA_DIR: Optional. Directory for data storage (default: data/) + MAX_AUDIO_SIZE: Optional. Max audio file size in bytes (default: 10MB) + LOG_LEVEL: Optional. Logging level (default: INFO) + LOG_FILE: Optional. Log file path (default: logs/app.log) + HOST: Optional. Server host (default: 0.0.0.0) + PORT: Optional. Server port (default: 8000) + """ + # Load environment variables from .env file + load_dotenv() + + # Load from environment variables + config_dict = { + "zhipu_api_key": os.getenv("ZHIPU_API_KEY"), + "minimax_api_key": os.getenv("MINIMAX_API_KEY"), + "minimax_group_id": os.getenv("MINIMAX_GROUP_ID"), + "data_dir": os.getenv("DATA_DIR", "data"), + "max_audio_size": int(os.getenv("MAX_AUDIO_SIZE", str(10 * 1024 * 1024))), + "log_level": os.getenv("LOG_LEVEL", "INFO"), + "log_file": os.getenv("LOG_FILE", "logs/app.log"), + "host": os.getenv("HOST", "0.0.0.0"), + "port": int(os.getenv("PORT", "8000")), + } + + # Validate required fields + if not config_dict["zhipu_api_key"]: + raise ValueError( + "ZHIPU_API_KEY environment variable is required. " + "Please set it before starting the application." + ) + + # Create and validate config + try: + config = Config(**config_dict) + except Exception as e: + raise ValueError(f"Configuration validation failed: {e}") + + # Ensure data directory exists + config.data_dir.mkdir(parents=True, exist_ok=True) + + # Ensure log directory exists + if config.log_file: + config.log_file.parent.mkdir(parents=True, exist_ok=True) + + return config + + +def validate_config(config: Config) -> None: + """Validate configuration at startup. + + Args: + config: Configuration object to validate + + Raises: + ValueError: If configuration is invalid or required resources are unavailable + """ + # Check data directory is writable + if not os.access(config.data_dir, os.W_OK): + raise ValueError( + f"Data directory {config.data_dir} is not writable. " + "Please check permissions." + ) + + # Check log directory is writable + if config.log_file and not os.access(config.log_file.parent, os.W_OK): + raise ValueError( + f"Log directory {config.log_file.parent} is not writable. " + "Please check permissions." + ) + + # Validate API key format (basic check) + if len(config.zhipu_api_key) < 10: + raise ValueError( + "ZHIPU_API_KEY appears to be invalid (too short). " + "Please check your API key." + ) + + +# Global config instance (loaded on import) +_config: Optional[Config] = None + + +def get_config() -> Config: + """Get the global configuration instance. + + Returns: + Config: The application configuration + + Raises: + RuntimeError: If configuration has not been initialized + """ + global _config + if _config is None: + raise RuntimeError( + "Configuration not initialized. Call init_config() first." + ) + return _config + + +def init_config() -> Config: + """Initialize the global configuration. + + This should be called once at application startup. + + Returns: + Config: The initialized configuration + + Raises: + ValueError: If configuration is invalid + """ + global _config + _config = load_config() + validate_config(_config) + return _config diff --git a/app/image_service.py b/app/image_service.py new file mode 100644 index 0000000000000000000000000000000000000000..a425492388f847c5a394332389697662f0068801 --- /dev/null +++ b/app/image_service.py @@ -0,0 +1,441 @@ +"""Image Generation service for Voice Text Processor. + +This module implements the ImageGenerationService class for generating +cat character images using the MiniMax Text-to-Image API. + +Requirements: PRD - AI形象生成模块 +""" + +import logging +import httpx +from typing import Optional, Dict, List +import time +import json +from pathlib import Path + +logger = logging.getLogger(__name__) + + +class ImageGenerationError(Exception): + """Exception raised when image generation operations fail. + + This exception is raised when the MiniMax API call fails, + such as due to network issues, API errors, or invalid responses. + """ + + def __init__(self, message: str = "图像生成服务不可用"): + """Initialize ImageGenerationError. + + Args: + message: Error message describing the failure + """ + super().__init__(message) + self.message = message + + +class ImageGenerationService: + """Service for generating cat character images using MiniMax API. + + This service handles image generation by calling the MiniMax Text-to-Image API + to create healing-style cat illustrations based on user preferences + (color, personality, appearance). + + Attributes: + api_key: MiniMax API key for authentication + group_id: MiniMax group ID for authentication + client: Async HTTP client for making API requests + api_url: MiniMax API endpoint URL + model: Model identifier (text-to-image-v2) + + Requirements: PRD - AI形象生成模块 + """ + + # 颜色映射 + COLOR_MAPPING = { + "温暖粉": "soft pastel pink fur, rose-colored aesthetic", + "天空蓝": "light sky blue fur, serene blue atmosphere", + "薄荷绿": "mint green fur, fresh green ambiance", + "奶油黄": "cream yellow fur, warm golden glow", + "薰衣草紫": "lavender purple fur, gentle purple tones", + "珊瑚橙": "coral orange fur, warm peachy atmosphere", + "纯白": "pure white fur, clean minimalist aesthetic", + "浅灰": "light gray fur, soft neutral tones" + } + + # 性格映射 + PERSONALITY_MAPPING = { + "活泼": "big curious eyes, dynamic paw gesture, energetic aura, playful expression", + "温柔": "soft gentle eyes, calm posture, peaceful expression, caring demeanor", + "聪明": "intelligent eyes, thoughtful expression, wise appearance, attentive look", + "慵懒": "relaxed eyes, lounging posture, comfortable expression, laid-back vibe", + "勇敢": "confident eyes, strong posture, determined expression, courageous stance", + "害羞": "shy eyes, timid posture, gentle expression, reserved demeanor" + } + + # 形象特征映射 + APPEARANCE_MAPPING = { + "戴眼镜": "wearing tiny round glasses, scholarly look", + "戴帽子": "wearing a cute small hat, fashionable style", + "戴围巾": "wearing a cozy scarf, warm appearance", + "戴蝴蝶结": "wearing a cute bow tie, elegant look", + "无配饰": "natural appearance, simple and pure" + } + + # 角色类型映射 + ROLE_MAPPING = { + "陪伴式朋友": "friendly companion, approachable and warm", + "温柔照顾型长辈": "caring elder figure, nurturing and protective", + "引导型老师": "wise teacher figure, knowledgeable and patient" + } + + # 系统底座提示词 + BASE_PROMPT = ( + "A masterpiece cute stylized cat illustration, {color} theme, " + "{personality} facial expression and posture, {appearance}. " + "{role}. Japanese watercolor style, clean minimalist background, " + "high quality, soft studio lighting, 4k, healing aesthetic, " + "adorable and heartwarming" + ) + + def __init__(self, api_key: str, group_id: Optional[str] = None): + """Initialize the image generation service. + + Args: + api_key: MiniMax API key for authentication + group_id: MiniMax group ID (optional, for compatibility) + """ + self.api_key = api_key + self.group_id = group_id # 保留但不使用 + self.client = httpx.AsyncClient(timeout=120.0) # 图像生成需要更长时间 + self.api_url = "https://api.minimaxi.com/v1/image_generation" + self.model = "image-01" + + async def close(self): + """Close the HTTP client. + + This should be called when the service is no longer needed + to properly clean up resources. + """ + await self.client.aclose() + + async def download_image(self, url: str, save_path: str) -> str: + """Download image from URL and save to local file. + + Args: + url: Image URL to download + save_path: Local file path to save the image + + Returns: + Absolute path to the saved image file + + Raises: + ImageGenerationError: If download fails + """ + try: + logger.info(f"Downloading image from: {url}") + + # 创建保存目录(如果不存在) + save_path_obj = Path(save_path) + save_path_obj.parent.mkdir(parents=True, exist_ok=True) + + # 下载图像 + response = await self.client.get(url, timeout=60.0) + + if response.status_code != 200: + error_msg = f"Failed to download image: HTTP {response.status_code}" + logger.error(error_msg) + raise ImageGenerationError(error_msg) + + # 保存到文件 + with open(save_path, 'wb') as f: + f.write(response.content) + + abs_path = str(save_path_obj.absolute()) + logger.info(f"Image saved to: {abs_path}") + + return abs_path + + except ImageGenerationError: + raise + except Exception as e: + error_msg = f"Failed to download image: {str(e)}" + logger.error(error_msg) + raise ImageGenerationError(error_msg) + + def build_prompt( + self, + color: str = "温暖粉", + personality: str = "温柔", + appearance: str = "无配饰", + role: str = "陪伴式朋友" + ) -> str: + """Build the complete prompt for image generation. + + Args: + color: Color preference (温暖粉/天空蓝/薄荷绿等) + personality: Personality trait (活泼/温柔/聪明等) + appearance: Appearance feature (戴眼镜/戴帽子等) + role: Character role (陪伴式朋友/温柔照顾型长辈等) + + Returns: + Complete prompt string for CogView API + """ + # 获取映射值,如果没有则使用默认值 + color_desc = self.COLOR_MAPPING.get(color, self.COLOR_MAPPING["温暖粉"]) + personality_desc = self.PERSONALITY_MAPPING.get( + personality, + self.PERSONALITY_MAPPING["温柔"] + ) + appearance_desc = self.APPEARANCE_MAPPING.get( + appearance, + self.APPEARANCE_MAPPING["无配饰"] + ) + role_desc = self.ROLE_MAPPING.get( + role, + self.ROLE_MAPPING["陪伴式朋友"] + ) + + # 构建完整提示词 + prompt = self.BASE_PROMPT.format( + color=color_desc, + personality=personality_desc, + appearance=appearance_desc, + role=role_desc + ) + + logger.info(f"Generated prompt: {prompt[:100]}...") + return prompt + + async def generate_image( + self, + color: str = "温暖粉", + personality: str = "温柔", + appearance: str = "无配饰", + role: str = "陪伴式朋友", + aspect_ratio: str = "1:1", + n: int = 1, + response_format: str = "url" + ) -> Dict[str, str]: + """Generate a cat character image using MiniMax API. + + This method sends a request to the MiniMax API with the constructed + prompt and returns the generated image URL or base64 data. + + Args: + color: Color preference + personality: Personality trait + appearance: Appearance feature + role: Character role + aspect_ratio: Image aspect ratio (1:1, 16:9, 9:16, 4:3, 3:4) + n: Number of images to generate (1-4) + response_format: Response format ("url" or "base64") + + Returns: + Dictionary containing: + - url: Image URL (if response_format="url") + - data: Base64 image data (if response_format="base64") + - prompt: Used prompt + - task_id: Task ID from MiniMax + + Raises: + ImageGenerationError: If API call fails or returns invalid response + """ + try: + # 构建提示词 + prompt = self.build_prompt(color, personality, appearance, role) + + # 准备请求 + headers = { + "Authorization": f"Bearer {self.api_key.strip()}", + "Content-Type": "application/json" + } + + payload = { + "model": self.model, + "prompt": prompt, + "aspect_ratio": aspect_ratio, + "response_format": "url", + "n": n, + "prompt_optimizer": True + } + + logger.info( + f"Calling MiniMax API for image generation. " + f"Aspect ratio: {aspect_ratio}, Count: {n}" + ) + logger.debug(f"API URL: {self.api_url}") + logger.debug(f"API Key (first 20 chars): {self.api_key[:20]}...") + logger.debug(f"Payload: {json.dumps(payload, ensure_ascii=False)}") + + # 发送请求 + response = await self.client.post( + self.api_url, + headers=headers, + json=payload + ) + + # 检查响应状态 + if response.status_code != 200: + error_msg = f"MiniMax API returned status {response.status_code}" + try: + error_detail = response.json() + error_msg += f": {json.dumps(error_detail, ensure_ascii=False)}" + except Exception: + error_msg += f": {response.text}" + + logger.error(f"Image generation API call failed: {error_msg}") + logger.error(f"Request URL: {self.api_url}") + logger.error(f"Request headers: Authorization=Bearer {self.api_key[:20]}..., Content-Type=application/json") + logger.error(f"Request payload: {json.dumps(payload, ensure_ascii=False)}") + raise ImageGenerationError(f"图像生成服务不可用: {error_msg}") + + # 解析响应 + try: + result = response.json() + logger.info(f"API Response (full): {json.dumps(result, indent=2, ensure_ascii=False)}") + except Exception as e: + error_msg = f"Failed to parse MiniMax API response: {str(e)}" + logger.error(error_msg) + logger.error(f"Raw response text: {response.text}") + raise ImageGenerationError(f"图像生成服务不可用: 响应格式无效") + + # 提取图像 URL + try: + # MiniMax 实际返回格式: + # { + # "id": "task_id", + # "data": {"image_urls": [...]}, + # "metadata": {...}, + # "base_resp": {"status_code": 0, "status_msg": "success"} + # } + + # 先检查是否有 base_resp + if "base_resp" in result: + base_resp = result.get("base_resp", {}) + status_code = base_resp.get("status_code", -1) + error_msg = base_resp.get("status_msg", "Unknown error") + + # status_code = 0 表示成功 + if status_code != 0: + logger.error(f"MiniMax API error: {status_code} - {error_msg}") + raise ImageGenerationError(f"图像生成失败: {error_msg}") + + logger.info(f"MiniMax API success: {status_code} - {error_msg}") + + # 提取 task_id(可能在 id 或 task_id 字段) + task_id = result.get("id") or result.get("task_id", "") + + # 提取图像数据 + if "data" in result: + data = result["data"] + logger.info(f"Data field keys: {list(data.keys()) if isinstance(data, dict) else 'not a dict'}") + + if isinstance(data, dict): + # 尝试多个可能的字段名 + urls = None + if "image_urls" in data: + urls = data["image_urls"] + logger.info("Found image_urls field") + elif "url" in data: + urls = data["url"] + logger.info("Found url field") + + if urls: + # 如果只生成一张,返回单个 URL + image_url = urls[0] if n == 1 else urls + logger.info(f"Image generation successful. URLs: {urls}") + + return { + "url": image_url, + "prompt": prompt, + "task_id": task_id, + "metadata": result.get("metadata", {}) + } + + # 如果到这里还没有返回,说明响应格式不符合预期 + logger.error(f"Could not extract image URLs from response: {json.dumps(result, ensure_ascii=False)}") + raise ImageGenerationError("API 响应格式错误: 无法提取图像 URL") + + except (KeyError, IndexError) as e: + error_msg = f"Invalid API response structure: {str(e)}, Response: {json.dumps(result, ensure_ascii=False)}" + logger.error(error_msg) + raise ImageGenerationError(f"图像生成服务不可用: 响应结构无效") + + except ImageGenerationError: + # Re-raise ImageGenerationError as-is + raise + + except httpx.TimeoutException as e: + error_msg = f"MiniMax API request timeout: {str(e)}" + logger.error(error_msg) + raise ImageGenerationError("图像生成服务不可用: 请求超时") + + except httpx.RequestError as e: + error_msg = f"MiniMax API request failed: {str(e)}" + logger.error(error_msg) + raise ImageGenerationError(f"图像生成服务不可用: 网络错误") + + except Exception as e: + error_msg = f"Unexpected error in image generation service: {str(e)}" + logger.error(error_msg, exc_info=True) + raise ImageGenerationError(f"图像生成服务不可用: {str(e)}") + + async def generate_multiple_images( + self, + color: str = "温暖粉", + personality: str = "温柔", + appearance: str = "无配饰", + role: str = "陪伴式朋友", + count: int = 3, + aspect_ratio: str = "1:1" + ) -> List[Dict[str, str]]: + """Generate multiple cat character images. + + This method generates multiple images with the same parameters, + allowing users to choose their favorite one. + + Args: + color: Color preference + personality: Personality trait + appearance: Appearance feature + role: Character role + count: Number of images to generate (1-4) + aspect_ratio: Image aspect ratio + + Returns: + List of dictionaries, each containing url, prompt, and task_id + + Raises: + ImageGenerationError: If any API call fails + """ + if count < 1 or count > 4: + raise ValueError("Count must be between 1 and 4") + + try: + # MiniMax 支持一次生成多张图像 + result = await self.generate_image( + color=color, + personality=personality, + appearance=appearance, + role=role, + aspect_ratio=aspect_ratio, + n=count + ) + + # 将结果转换为列表格式 + urls = result['url'] if isinstance(result['url'], list) else [result['url']] + + images = [] + for i, url in enumerate(urls): + images.append({ + "url": url, + "prompt": result['prompt'], + "task_id": result['task_id'], + "index": i + }) + + return images + + except ImageGenerationError as e: + logger.error(f"Failed to generate images: {e.message}") + raise diff --git a/app/logging_config.py b/app/logging_config.py new file mode 100644 index 0000000000000000000000000000000000000000..b5c2809fc8b9bc44d44659eec973e6dbbe272e2b --- /dev/null +++ b/app/logging_config.py @@ -0,0 +1,196 @@ +"""Logging configuration for Voice Text Processor. + +This module sets up the logging system with proper formatting, levels, +and file output. It also includes a filter to prevent sensitive information +from being logged. + +Requirements: 10.5, 9.5 +""" + +import logging +import re +from typing import Optional +from pathlib import Path +from contextvars import ContextVar + + +# Context variable to store request_id across async calls +request_id_var: ContextVar[Optional[str]] = ContextVar('request_id', default=None) + + +class RequestIdFilter(logging.Filter): + """Filter to add request_id to log records. + + This filter adds the request_id from context to each log record, + making it available in the log format. + + Requirements: 9.5 + """ + + def filter(self, record: logging.LogRecord) -> bool: + """Add request_id to log record. + + Args: + record: Log record to enhance + + Returns: + bool: Always True (we modify but don't reject records) + """ + # Get request_id from context, default to empty string if not set + record.request_id = request_id_var.get() or '-' + return True + + +class SensitiveDataFilter(logging.Filter): + """Filter to remove sensitive information from log records. + + This filter masks API keys, passwords, and other sensitive data + to prevent them from appearing in logs. + + Requirements: 10.5 + """ + + # Patterns to detect and mask sensitive data + SENSITIVE_PATTERNS = [ + # API keys (various formats) + (re.compile(r'(api[_-]?key["\s:=]+)([a-zA-Z0-9_-]{10,})', re.IGNORECASE), r'\1***REDACTED***'), + (re.compile(r'(zhipu[_-]?api[_-]?key["\s:=]+)([a-zA-Z0-9_-]{10,})', re.IGNORECASE), r'\1***REDACTED***'), + # Bearer tokens + (re.compile(r'(bearer\s+)([a-zA-Z0-9_-]{10,})', re.IGNORECASE), r'\1***REDACTED***'), + # Passwords + (re.compile(r'(password["\s:=]+)([^\s"]+)', re.IGNORECASE), r'\1***REDACTED***'), + # Authorization headers (capture the whole value) + (re.compile(r'(authorization["\s:=]+)([^\s"]+)', re.IGNORECASE), r'\1***REDACTED***'), + ] + + def filter(self, record: logging.LogRecord) -> bool: + """Filter log record to mask sensitive data. + + Args: + record: Log record to filter + + Returns: + bool: Always True (we modify but don't reject records) + """ + # Mask sensitive data in the message + if hasattr(record, 'msg') and isinstance(record.msg, str): + record.msg = self._mask_sensitive_data(record.msg) + + # Mask sensitive data in arguments + if hasattr(record, 'args') and record.args: + if isinstance(record.args, dict): + record.args = { + k: self._mask_sensitive_data(str(v)) if isinstance(v, str) else v + for k, v in record.args.items() + } + elif isinstance(record.args, tuple): + record.args = tuple( + self._mask_sensitive_data(str(arg)) if isinstance(arg, str) else arg + for arg in record.args + ) + + return True + + def _mask_sensitive_data(self, text: str) -> str: + """Mask sensitive data in text using regex patterns. + + Args: + text: Text to mask + + Returns: + str: Text with sensitive data masked + """ + for pattern, replacement in self.SENSITIVE_PATTERNS: + text = pattern.sub(replacement, text) + return text + + +def setup_logging( + log_level: str = "INFO", + log_file: Optional[Path] = None, + log_format: Optional[str] = None +) -> None: + """Set up logging configuration for the application. + + Args: + log_level: Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL) + log_file: Optional path to log file. If None, logs only to console. + log_format: Optional custom log format string + + Requirements: 10.5, 9.5 + """ + # Default log format with request_id, timestamp, level, and message + if log_format is None: + log_format = "[%(asctime)s] [%(levelname)s] [%(request_id)s] [%(name)s] %(message)s" + + # Date format + date_format = "%Y-%m-%d %H:%M:%S" + + # Create formatter + formatter = logging.Formatter(log_format, datefmt=date_format) + + # Get root logger + root_logger = logging.getLogger() + root_logger.setLevel(getattr(logging, log_level.upper())) + + # Remove existing handlers + root_logger.handlers.clear() + + # Add filters + request_id_filter = RequestIdFilter() + sensitive_filter = SensitiveDataFilter() + + # Console handler + console_handler = logging.StreamHandler() + console_handler.setFormatter(formatter) + console_handler.addFilter(request_id_filter) + console_handler.addFilter(sensitive_filter) + root_logger.addHandler(console_handler) + + # File handler (if log file specified) + if log_file: + file_handler = logging.FileHandler(log_file, encoding="utf-8") + file_handler.setFormatter(formatter) + file_handler.addFilter(request_id_filter) + file_handler.addFilter(sensitive_filter) + root_logger.addHandler(file_handler) + + # Log startup message + logger = logging.getLogger(__name__) + logger.info(f"Logging initialized at level {log_level}") + if log_file: + logger.info(f"Logging to file: {log_file}") + + +def get_logger(name: str) -> logging.Logger: + """Get a logger instance for a module. + + Args: + name: Logger name (typically __name__) + + Returns: + logging.Logger: Logger instance + """ + return logging.getLogger(name) + + +def set_request_id(request_id: str) -> None: + """Set the request_id in the current context. + + This should be called at the beginning of each request to ensure + all log messages include the request_id. + + Args: + request_id: Unique identifier for the request + + Requirements: 9.5 + """ + request_id_var.set(request_id) + + +def clear_request_id() -> None: + """Clear the request_id from the current context. + + This should be called at the end of each request to clean up. + """ + request_id_var.set(None) diff --git a/app/main.py b/app/main.py new file mode 100644 index 0000000000000000000000000000000000000000..4f9fd51ca8544824b0dd479371f1748484722875 --- /dev/null +++ b/app/main.py @@ -0,0 +1,1132 @@ +"""Main FastAPI application for Voice Text Processor. + +This module initializes the FastAPI application, sets up configuration, +logging, and defines the application lifecycle. + +Requirements: 10.1, 10.2, 10.3, 10.4, 10.5 +""" + +import logging +import uuid +from contextlib import asynccontextmanager +from datetime import datetime +from typing import Optional +from fastapi import FastAPI, File, UploadFile, Form, HTTPException +from fastapi.responses import JSONResponse +from fastapi.middleware.cors import CORSMiddleware +from fastapi.staticfiles import StaticFiles + +from app.config import init_config, get_config +from app.logging_config import setup_logging, set_request_id, clear_request_id +from app.models import ProcessResponse, RecordData, ParsedData +from app.storage import StorageService, StorageError +from app.asr_service import ASRService, ASRServiceError +from app.semantic_parser import SemanticParserService, SemanticParserError + + +logger = logging.getLogger(__name__) + + +@asynccontextmanager +async def lifespan(app: FastAPI): + """Application lifespan manager. + + This handles startup and shutdown events for the application. + On startup, it initializes configuration and logging. + + Requirements: 10.4 - Startup configuration validation + """ + # Startup + logger.info("Starting Voice Text Processor application...") + + try: + # Initialize configuration (will raise ValueError if invalid) + config = init_config() + logger.info("Configuration loaded and validated successfully") + + # Setup logging with config values + setup_logging( + log_level=config.log_level, + log_file=config.log_file + ) + logger.info("Logging system configured") + + # Log configuration (without sensitive data) + logger.info(f"Data directory: {config.data_dir}") + logger.info(f"Max audio size: {config.max_audio_size} bytes") + logger.info(f"Log level: {config.log_level}") + + except ValueError as e: + # Configuration validation failed - refuse to start + logger.error(f"Configuration validation failed: {e}") + logger.error("Application startup aborted due to configuration errors") + raise RuntimeError(f"Configuration error: {e}") from e + except Exception as e: + logger.error(f"Unexpected error during startup: {e}", exc_info=True) + raise RuntimeError(f"Startup error: {e}") from e + + logger.info("Application startup complete") + + yield + + # Shutdown + logger.info("Shutting down Voice Text Processor application...") + logger.info("Application shutdown complete") + + +# Create FastAPI application +app = FastAPI( + title="Voice Text Processor", + description="治愈系记录助手后端核心模块 - 语音和文本处理服务", + version="1.0.0", + lifespan=lifespan +) + +# Add CORS middleware +app.add_middleware( + CORSMiddleware, + allow_origins=[ + "http://localhost:5173", + "http://localhost:3000", + "http://172.18.16.245:5173", # 允许从电脑 IP 访问 + "*" # 开发环境允许所有来源(生产环境应该限制) + ], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +# Mount static files for generated images +from pathlib import Path +from fastapi import Request + +generated_images_dir = Path("generated_images") +generated_images_dir.mkdir(exist_ok=True) +app.mount("/generated_images", StaticFiles(directory="generated_images"), name="generated_images") + + +def get_base_url(request: Request) -> str: + """获取请求的基础 URL(支持局域网访问)""" + # 使用请求的 host 来构建 URL + scheme = request.url.scheme # http 或 https + host = request.headers.get("host", "localhost:8000") + return f"{scheme}://{host}" + + +@app.get("/api/status") +async def root(): + """API status endpoint.""" + return { + "service": "Voice Text Processor", + "status": "running", + "version": "1.0.0" + } + + +@app.get("/health") +async def health_check(): + """Health check endpoint.""" + try: + config = get_config() + return { + "status": "healthy", + "data_dir": str(config.data_dir), + "max_audio_size": config.max_audio_size + } + except Exception as e: + logger.error(f"Health check failed: {e}") + return JSONResponse( + status_code=503, + content={ + "status": "unhealthy", + "error": str(e) + } + ) + + +# Validation error class +class ValidationError(Exception): + """Exception raised when input validation fails. + + Requirements: 1.3, 8.5, 9.1 + """ + def __init__(self, message: str): + super().__init__(message) + self.message = message + + +# Supported audio formats +SUPPORTED_AUDIO_FORMATS = {".mp3", ".wav", ".m4a", ".webm"} + + +@app.post("/api/process", response_model=ProcessResponse) +async def process_input( + audio: Optional[UploadFile] = File(None), + text: Optional[str] = Form(None) +) -> ProcessResponse: + """Process user input (audio or text) and extract structured data. + + This endpoint accepts either an audio file or text content, performs + speech recognition (if audio), semantic parsing, and stores the results. + + Args: + audio: Audio file (multipart/form-data) in mp3, wav, or m4a format + text: Text content (application/json) in UTF-8 encoding + + Returns: + ProcessResponse containing record_id, timestamp, mood, inspirations, todos + + Raises: + HTTPException: With appropriate status code and error message + + Requirements: 1.1, 1.2, 1.3, 7.7, 8.1, 8.2, 8.3, 8.4, 8.5, 8.6, 9.1, 9.2, 9.3, 9.4, 9.5 + """ + request_id = str(uuid.uuid4()) + timestamp = datetime.utcnow().isoformat() + "Z" + + # Set request_id in logging context + set_request_id(request_id) + + logger.info(f"Processing request - audio: {audio is not None}, text: {text is not None}") + + try: + # Input validation + if audio is None and text is None: + raise ValidationError("请提供音频文件或文本内容") + + if audio is not None and text is not None: + raise ValidationError("请只提供音频文件或文本内容中的一种") + + # Get configuration + config = get_config() + + # Initialize services + storage_service = StorageService(str(config.data_dir)) + asr_service = ASRService(config.zhipu_api_key) + parser_service = SemanticParserService(config.zhipu_api_key) + + original_text = "" + input_type = "text" + + try: + # Handle audio input + if audio is not None: + input_type = "audio" + + # Validate audio format + filename = audio.filename or "audio" + file_ext = "." + filename.split(".")[-1].lower() if "." in filename else "" + + if file_ext not in SUPPORTED_AUDIO_FORMATS: + raise ValidationError( + f"不支持的音频格式: {file_ext}. " + f"支持的格式: {', '.join(SUPPORTED_AUDIO_FORMATS)}" + ) + + # Read audio file + audio_content = await audio.read() + + # Validate audio file size + if len(audio_content) > config.max_audio_size: + raise ValidationError( + f"音频文件过大: {len(audio_content)} bytes. " + f"最大允许: {config.max_audio_size} bytes" + ) + + logger.info( + f"Audio file received: {filename}, " + f"size: {len(audio_content)} bytes" + ) + + # Transcribe audio to text + try: + original_text = await asr_service.transcribe(audio_content, filename) + logger.info( + f"ASR transcription successful. " + f"Text length: {len(original_text)}" + ) + except ASRServiceError as e: + logger.error( + f"ASR service error: {e.message}", + exc_info=True + ) + raise + + # Handle text input + else: + # Validate text encoding (UTF-8) + # Accept whitespace-only text as valid UTF-8, but reject None or empty string + if text is None or text == "": + raise ValidationError("文本内容不能为空") + + original_text = text + logger.info( + f"Text input received. " + f"Length: {len(original_text)}" + ) + + # Perform semantic parsing + try: + parsed_data = await parser_service.parse(original_text) + logger.info( + f"Semantic parsing successful. " + f"Mood: {'present' if parsed_data.mood else 'none'}, " + f"Inspirations: {len(parsed_data.inspirations)}, " + f"Todos: {len(parsed_data.todos)}" + ) + except SemanticParserError as e: + logger.error( + f"Semantic parser error: {e.message}", + exc_info=True + ) + raise + + # Generate record ID and timestamp + record_id = str(uuid.uuid4()) + record_timestamp = datetime.utcnow().isoformat() + "Z" + + # Create record data + record = RecordData( + record_id=record_id, + timestamp=record_timestamp, + input_type=input_type, + original_text=original_text, + parsed_data=parsed_data + ) + + # Save to storage + try: + storage_service.save_record(record) + logger.info(f"Record saved: {record_id}") + + # Save mood if present + if parsed_data.mood: + storage_service.append_mood( + parsed_data.mood, + record_id, + record_timestamp + ) + logger.info(f"Mood data saved") + + # Save inspirations if present + if parsed_data.inspirations: + storage_service.append_inspirations( + parsed_data.inspirations, + record_id, + record_timestamp + ) + logger.info( + f"{len(parsed_data.inspirations)} " + f"inspiration(s) saved" + ) + + # Save todos if present + if parsed_data.todos: + storage_service.append_todos( + parsed_data.todos, + record_id, + record_timestamp + ) + logger.info( + f"{len(parsed_data.todos)} " + f"todo(s) saved" + ) + + except StorageError as e: + logger.error( + f"Storage error: {str(e)}", + exc_info=True + ) + raise + + # Build success response + response = ProcessResponse( + record_id=record_id, + timestamp=record_timestamp, + mood=parsed_data.mood, + inspirations=parsed_data.inspirations, + todos=parsed_data.todos + ) + + logger.info(f"Request processed successfully") + + return response + + finally: + # Clean up services + await asr_service.close() + await parser_service.close() + # Clear request_id from context + clear_request_id() + + except ValidationError as e: + # Input validation error - HTTP 400 + logger.warning( + f"Validation error: {e.message}", + exc_info=True + ) + clear_request_id() + return JSONResponse( + status_code=400, + content={ + "error": e.message, + "timestamp": timestamp + } + ) + + except ASRServiceError as e: + # ASR service error - HTTP 500 + logger.error( + f"ASR service unavailable: {e.message}", + exc_info=True + ) + clear_request_id() + return JSONResponse( + status_code=500, + content={ + "error": "语音识别服务不可用", + "detail": e.message, + "timestamp": timestamp + } + ) + + except SemanticParserError as e: + # Semantic parser error - HTTP 500 + logger.error( + f"Semantic parser unavailable: {e.message}", + exc_info=True + ) + clear_request_id() + return JSONResponse( + status_code=500, + content={ + "error": "语义解析服务不可用", + "detail": e.message, + "timestamp": timestamp + } + ) + + except StorageError as e: + # Storage error - HTTP 500 + logger.error( + f"Storage error: {str(e)}", + exc_info=True + ) + clear_request_id() + return JSONResponse( + status_code=500, + content={ + "error": "数据存储失败", + "detail": str(e), + "timestamp": timestamp + } + ) + + except Exception as e: + # Unexpected error - HTTP 500 + logger.error( + f"Unexpected error: {str(e)}", + exc_info=True + ) + clear_request_id() + return JSONResponse( + status_code=500, + content={ + "error": "服务器内部错误", + "detail": str(e), + "timestamp": timestamp + } + ) + + +@app.get("/api/records") +async def get_records(): + """Get all records.""" + try: + config = get_config() + storage_service = StorageService(str(config.data_dir)) + records = storage_service._read_json_file(storage_service.records_file) + return {"records": records} + except Exception as e: + logger.error(f"Failed to get records: {e}") + return JSONResponse( + status_code=500, + content={"error": str(e)} + ) + + +@app.get("/api/moods") +async def get_moods(): + """Get all moods from both moods.json and records.json.""" + try: + config = get_config() + storage_service = StorageService(str(config.data_dir)) + + # 1. 读取 moods.json + moods_from_file = storage_service._read_json_file(storage_service.moods_file) + logger.info(f"Loaded {len(moods_from_file)} moods from moods.json") + + # 2. 从 records.json 中提取心情数据 + records = storage_service._read_json_file(storage_service.records_file) + moods_from_records = [] + + for record in records: + # 检查 parsed_data 中是否有 mood + parsed_data = record.get("parsed_data", {}) + mood_data = parsed_data.get("mood") + + if mood_data and mood_data.get("type"): + # 构造心情对象 + mood_obj = { + "record_id": record["record_id"], + "timestamp": record["timestamp"], + "type": mood_data.get("type"), + "intensity": mood_data.get("intensity", 5), + "keywords": mood_data.get("keywords", []), + "original_text": record.get("original_text", "") # 添加原文 + } + moods_from_records.append(mood_obj) + + logger.info(f"Extracted {len(moods_from_records)} moods from records.json") + + # 3. 合并两个来源的心情数据(去重,优先使用 records 中的数据) + # 同时需要补充 moods.json 中缺失的 original_text + mood_dict = {} + + # 先添加 moods.json 中的数据 + for mood in moods_from_file: + mood_dict[mood["record_id"]] = mood + # 如果没有 original_text,设置为空字符串 + if "original_text" not in mood: + mood["original_text"] = "" + + # 再添加/覆盖 records.json 中的数据(包含 original_text) + for mood in moods_from_records: + mood_dict[mood["record_id"]] = mood + + # 转换为列表并按时间排序(最新的在前) + all_moods = list(mood_dict.values()) + all_moods.sort(key=lambda x: x["timestamp"], reverse=True) + + logger.info(f"Total unique moods: {len(all_moods)}") + + return {"moods": all_moods} + except Exception as e: + logger.error(f"Failed to get moods: {e}", exc_info=True) + return JSONResponse( + status_code=500, + content={"error": str(e)} + ) + + +@app.get("/api/inspirations") +async def get_inspirations(): + """Get all inspirations.""" + try: + config = get_config() + storage_service = StorageService(str(config.data_dir)) + inspirations = storage_service._read_json_file(storage_service.inspirations_file) + return {"inspirations": inspirations} + except Exception as e: + logger.error(f"Failed to get inspirations: {e}") + return JSONResponse( + status_code=500, + content={"error": str(e)} + ) + + +@app.get("/api/todos") +async def get_todos(): + """Get all todos.""" + try: + config = get_config() + storage_service = StorageService(str(config.data_dir)) + todos = storage_service._read_json_file(storage_service.todos_file) + return {"todos": todos} + except Exception as e: + logger.error(f"Failed to get todos: {e}") + return JSONResponse( + status_code=500, + content={"error": str(e)} + ) + + +@app.patch("/api/todos/{todo_id}") +async def update_todo(todo_id: str, status: str = Form(...)): + """Update todo status.""" + try: + config = get_config() + storage_service = StorageService(str(config.data_dir)) + todos = storage_service._read_json_file(storage_service.todos_file) + + # Find and update todo + updated = False + for todo in todos: + if todo.get("record_id") == todo_id or str(hash(todo.get("task", ""))) == todo_id: + todo["status"] = status + updated = True + break + + if not updated: + return JSONResponse( + status_code=404, + content={"error": "Todo not found"} + ) + + storage_service._write_json_file(storage_service.todos_file, todos) + return {"success": True} + except Exception as e: + logger.error(f"Failed to update todo: {e}") + return JSONResponse( + status_code=500, + content={"error": str(e)} + ) + + +@app.post("/api/chat") +async def chat_with_ai(text: str = Form(...)): + """Chat with AI assistant using RAG with records.json as knowledge base. + + This endpoint provides conversational AI that has context about the user's + previous records, moods, inspirations, and todos. + """ + try: + config = get_config() + storage_service = StorageService(str(config.data_dir)) + + # Load user's records as RAG knowledge base + records = storage_service._read_json_file(storage_service.records_file) + + # Build context from recent records (last 10) + recent_records = records[-10:] if len(records) > 10 else records + context_parts = [] + + for record in recent_records: + original_text = record.get('original_text', '') + timestamp = record.get('timestamp', '') + + # Add parsed data context + parsed_data = record.get('parsed_data', {}) + mood = parsed_data.get('mood') + inspirations = parsed_data.get('inspirations', []) + todos = parsed_data.get('todos', []) + + context_entry = f"[{timestamp}] 用户说: {original_text}" + + if mood: + context_entry += f"\n情绪: {mood.get('type')} (强度: {mood.get('intensity')})" + + if inspirations: + ideas = [insp.get('core_idea') for insp in inspirations] + context_entry += f"\n灵感: {', '.join(ideas)}" + + if todos: + tasks = [todo.get('task') for todo in todos] + context_entry += f"\n待办: {', '.join(tasks)}" + + context_parts.append(context_entry) + + # Build system prompt with context + context_text = "\n\n".join(context_parts) if context_parts else "暂无历史记录" + + system_prompt = f"""你是一个温柔、善解人意的AI陪伴助手。你的名字叫小喵。 +你会用温暖、治愈的语气和用户聊天,给予他们情感支持和陪伴。 +回复要简短、自然、有温度。 + +你可以参考用户的历史记录来提供更贴心的回复: + +{context_text} + +请基于这些背景信息,用温暖、理解的语气回复用户。如果用户提到之前的事情,你可以自然地关联起来。""" + + try: + import httpx + + # 增加超时时间,添加重试逻辑 + async with httpx.AsyncClient(timeout=60.0) as client: + response = await client.post( + "https://open.bigmodel.cn/api/paas/v4/chat/completions", + headers={ + "Authorization": f"Bearer {config.zhipu_api_key}", + "Content-Type": "application/json" + }, + json={ + "model": "glm-4-flash", + "messages": [ + { + "role": "system", + "content": system_prompt + }, + { + "role": "user", + "content": text + } + ], + "temperature": 0.8, + "top_p": 0.9 + } + ) + + if response.status_code == 200: + result = response.json() + ai_response = result.get("choices", [{}])[0].get("message", {}).get("content", "") + logger.info(f"AI chat successful with RAG context") + return {"response": ai_response} + else: + logger.error(f"AI chat failed: {response.status_code} {response.text}") + return {"response": "抱歉,我现在有点累了,稍后再聊好吗?"} + + except httpx.TimeoutException: + logger.error(f"AI API timeout") + return {"response": "抱歉,网络有点慢,请稍后再试~"} + except httpx.ConnectError: + logger.error(f"AI API connection error") + return {"response": "抱歉,无法连接到AI服务,请检查网络连接~"} + except Exception as e: + logger.error(f"AI API call error: {e}") + return {"response": "抱歉,我现在有点累了,稍后再聊好吗?"} + + except Exception as e: + logger.error(f"Chat error: {e}") + return {"response": "抱歉,我现在有点累了,稍后再聊好吗?"} + + +@app.get("/api/user/config") +async def get_user_config(request: Request): + """Get user configuration including character image.""" + try: + from app.user_config import UserConfig + from pathlib import Path + import os + + config = get_config() + user_config = UserConfig(str(config.data_dir)) + user_data = user_config.load_config() + + base_url = get_base_url(request) + + # 如果没有保存的图片,尝试加载默认形象或最新的本地图片 + if not user_data.get('character', {}).get('image_url'): + generated_images_dir = Path("generated_images") + default_image = generated_images_dir / "default_character.jpeg" + + # 优先使用默认形象 + if default_image.exists(): + logger.info("Loading default character image") + user_config.save_character_image( + image_url=str(default_image), + prompt="默认治愈系小猫形象", + preferences={ + "color": "薰衣草紫", + "personality": "温柔", + "appearance": "无配饰", + "role": "陪伴式朋友" + } + ) + user_data = user_config.load_config() + logger.info("Default character image loaded successfully") + + # 如果没有默认形象,尝试加载最新的本地图片 + elif generated_images_dir.exists(): + # 获取所有图片文件 + image_files = list(generated_images_dir.glob("character_*.jpeg")) + if image_files: + # 按修改时间排序,获取最新的 + latest_image = max(image_files, key=lambda p: p.stat().st_mtime) + + # 构建 URL 路径(使用动态 base_url) + image_url = f"{base_url}/generated_images/{latest_image.name}" + + # 从文件名提取偏好设置 + # 格式: character_颜色_性格_时间戳.jpeg + parts = latest_image.stem.split('_') + if len(parts) >= 3: + color = parts[1] + personality = parts[2] + + # 更新配置 + user_config.save_character_image( + image_url=str(latest_image), + prompt=f"Character with {color} and {personality}", + preferences={ + "color": color, + "personality": personality, + "appearance": "无配饰", + "role": "陪伴式朋友" + } + ) + + # 重新加载配置 + user_data = user_config.load_config() + + logger.info(f"Loaded latest local image: {latest_image.name}") + + # 如果 image_url 是本地路径,转换为 URL + image_url = user_data.get('character', {}).get('image_url') + if image_url and not image_url.startswith('http'): + # 本地路径,转换为 URL(处理 Windows 和 Unix 路径) + image_path = Path(image_url) + if image_path.exists(): + # 使用正斜杠构建 URL(使用动态 base_url) + user_data['character']['image_url'] = f"{base_url}/generated_images/{image_path.name}" + else: + # 如果路径不存在,尝试只使用文件名 + filename = image_path.name + full_path = Path("generated_images") / filename + if full_path.exists(): + user_data['character']['image_url'] = f"{base_url}/generated_images/{filename}" + logger.info(f"Converted path to URL: {filename}") + + return user_data + except Exception as e: + logger.error(f"Failed to get user config: {e}") + return JSONResponse( + status_code=500, + content={"error": str(e)} + ) + + +@app.post("/api/character/generate") +async def generate_character( + request: Request, + color: str = Form(...), + personality: str = Form(...), + appearance: str = Form(...), + role: str = Form(...) +): + """Generate AI character image based on preferences. + + Args: + color: Color preference (温暖粉/天空蓝/薄荷绿等) + personality: Personality trait (活泼/温柔/聪明等) + appearance: Appearance feature (戴眼镜/戴帽子等) + role: Character role (陪伴式朋友/温柔照顾型长辈等) + + Returns: + JSON with image_url, prompt, and preferences + """ + try: + from app.image_service import ImageGenerationService, ImageGenerationError + from app.user_config import UserConfig + from datetime import datetime + from pathlib import Path + import httpx + + config = get_config() + + # 检查是否配置了 MiniMax API + minimax_api_key = getattr(config, 'minimax_api_key', None) + + if not minimax_api_key: + logger.warning("MiniMax API key not configured") + return JSONResponse( + status_code=400, + content={ + "error": "MiniMax API 未配置", + "detail": "请在 .env 文件中配置 MINIMAX_API_KEY。访问 https://platform.minimaxi.com/ 获取 API 密钥。" + } + ) + + # 初始化服务 + image_service = ImageGenerationService( + api_key=minimax_api_key, + group_id=getattr(config, 'minimax_group_id', None) + ) + user_config = UserConfig(str(config.data_dir)) + + try: + logger.info( + f"Generating character image: " + f"color={color}, personality={personality}, " + f"appearance={appearance}, role={role}" + ) + + # 生成图像 + result = await image_service.generate_image( + color=color, + personality=personality, + appearance=appearance, + role=role, + aspect_ratio="1:1", + n=1 + ) + + # 下载图片到本地 + generated_images_dir = Path("generated_images") + generated_images_dir.mkdir(exist_ok=True) + + # 生成文件名:character_颜色_性格_时间戳.jpeg + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + filename = f"character_{color}_{personality}_{timestamp}.jpeg" + local_path = generated_images_dir / filename + + logger.info(f"Downloading image to: {local_path}") + + # 下载图片 + async with httpx.AsyncClient(timeout=60.0) as client: + response = await client.get(result['url']) + if response.status_code == 200: + with open(local_path, 'wb') as f: + f.write(response.content) + logger.info(f"Image saved to: {local_path}") + else: + logger.error(f"Failed to download image: HTTP {response.status_code}") + # 如果下载失败,仍然使用远程 URL + local_path = None + + # 保存到用户配置 + preferences = { + "color": color, + "personality": personality, + "appearance": appearance, + "role": role + } + + # 使用本地路径(如果下载成功) + image_url = str(local_path) if local_path else result['url'] + + user_config.save_character_image( + image_url=image_url, + prompt=result['prompt'], + revised_prompt=result.get('metadata', {}).get('revised_prompt'), + preferences=preferences + ) + + logger.info(f"Character image generated and saved: {image_url}") + + # 返回 HTTP URL(使用动态 base_url) + base_url = get_base_url(request) + if local_path: + http_url = f"{base_url}/generated_images/{local_path.name}" + else: + http_url = image_url + + return { + "success": True, + "image_url": http_url, + "prompt": result['prompt'], + "preferences": preferences, + "task_id": result.get('task_id') + } + + finally: + await image_service.close() + + except ImageGenerationError as e: + logger.error(f"Image generation error: {e.message}") + + # 提供更友好的错误信息 + error_detail = e.message + if "invalid api key" in e.message.lower(): + error_detail = "API 密钥无效,请检查 MINIMAX_API_KEY 配置是否正确" + elif "quota" in e.message.lower() or "配额" in e.message: + error_detail = "API 配额不足,请充值或等待配额恢复" + elif "timeout" in e.message.lower() or "超时" in e.message: + error_detail = "请求超时,请检查网络连接后重试" + + return JSONResponse( + status_code=500, + content={ + "error": "图像生成失败", + "detail": error_detail + } + ) + + except Exception as e: + logger.error(f"Failed to generate character: {e}", exc_info=True) + return JSONResponse( + status_code=500, + content={ + "error": "生成角色形象失败", + "detail": str(e) + } + ) + + +@app.get("/api/character/history") +async def get_character_history(request: Request): + """Get list of all generated character images. + + Returns: + JSON with list of historical character images + """ + try: + from pathlib import Path + import os + + base_url = get_base_url(request) + generated_images_dir = Path("generated_images") + + if not generated_images_dir.exists(): + return {"images": []} + + # 获取所有图片文件 + image_files = [] + for file in generated_images_dir.glob("character_*.jpeg"): + # 解析文件名:character_颜色_性格_时间戳.jpeg + parts = file.stem.split("_") + if len(parts) >= 4: + color = parts[1] + personality = parts[2] + timestamp = "_".join(parts[3:]) + + # 获取文件信息 + stat = file.stat() + + image_files.append({ + "filename": file.name, + "url": f"{base_url}/generated_images/{file.name}", + "color": color, + "personality": personality, + "timestamp": timestamp, + "created_at": stat.st_ctime, + "size": stat.st_size + }) + + # 按创建时间倒序排列(最新的在前) + image_files.sort(key=lambda x: x["created_at"], reverse=True) + + logger.info(f"Found {len(image_files)} historical character images") + + return {"images": image_files} + + except Exception as e: + logger.error(f"Error getting character history: {e}", exc_info=True) + raise HTTPException(status_code=500, detail=str(e)) + + +@app.post("/api/character/select") +async def select_character( + request: Request, + filename: str = Form(...) +): + """Select a historical character image as current. + + Args: + filename: Filename of the character image to select + + Returns: + JSON with success status and image URL + """ + try: + from app.user_config import UserConfig + from pathlib import Path + + config = get_config() + user_config = UserConfig(str(config.data_dir)) + + # 验证文件存在 + image_path = Path("generated_images") / filename + if not image_path.exists(): + raise HTTPException(status_code=404, detail="图片文件不存在") + + # 解析文件名获取偏好设置 + parts = filename.replace(".jpeg", "").split("_") + if len(parts) >= 4: + color = parts[1] + personality = parts[2] + + preferences = { + "color": color, + "personality": personality, + "appearance": "未知", + "role": "未知" + } + else: + preferences = {} + + # 更新用户配置 + image_url = str(image_path) + user_config.save_character_image( + image_url=image_url, + prompt=f"历史形象: {filename}", + preferences=preferences + ) + + logger.info(f"Selected historical character: {filename}") + + # 返回 HTTP URL(使用动态 base_url) + base_url = get_base_url(request) + http_url = f"{base_url}/generated_images/{filename}" + + return { + "success": True, + "image_url": http_url, + "filename": filename, + "preferences": preferences + } + + except HTTPException: + raise + except Exception as e: + logger.error(f"Error selecting character: {e}", exc_info=True) + raise HTTPException(status_code=500, detail=str(e)) + + +@app.post("/api/character/preferences") +async def update_character_preferences( + color: Optional[str] = Form(None), + personality: Optional[str] = Form(None), + appearance: Optional[str] = Form(None), + role: Optional[str] = Form(None) +): + """Update character preferences without generating new image. + + Args: + color: Color preference (optional) + personality: Personality trait (optional) + appearance: Appearance feature (optional) + role: Character role (optional) + + Returns: + JSON with updated preferences + """ + try: + from app.user_config import UserConfig + + config = get_config() + user_config = UserConfig(str(config.data_dir)) + + # 更新偏好设置 + user_config.update_character_preferences( + color=color, + personality=personality, + appearance=appearance, + role=role + ) + + # 返回更新后的配置 + updated_config = user_config.load_config() + + return { + "success": True, + "preferences": updated_config['character']['preferences'] + } + + except Exception as e: + logger.error(f"Failed to update preferences: {e}") + return JSONResponse( + status_code=500, + content={"error": str(e)} + ) + + +if __name__ == "__main__": + import uvicorn + + # Load config for server settings + try: + config = init_config() + setup_logging(log_level=config.log_level, log_file=config.log_file) + + # Run server + uvicorn.run( + "app.main:app", + host=config.host, + port=config.port, + reload=False, + log_level=config.log_level.lower() + ) + except Exception as e: + print(f"Failed to start application: {e}") + exit(1) diff --git a/app/models.py b/app/models.py new file mode 100644 index 0000000000000000000000000000000000000000..bc372b6e0230b86ae21812455335d63092ef665c --- /dev/null +++ b/app/models.py @@ -0,0 +1,118 @@ +"""Data models for Voice Text Processor. + +This module defines all Pydantic data models used throughout the application +for data validation, serialization, and API request/response handling. + +Requirements: 4.1, 4.2, 4.3, 5.1, 5.2, 5.3, 6.1, 6.2, 6.3, 6.4 +""" + +from typing import Optional, List, Literal +from pydantic import BaseModel, Field + + +class MoodData(BaseModel): + """Mood data structure. + + Represents the emotional state extracted from user input. + + Attributes: + type: The type/name of the emotion (e.g., "开心", "焦虑") + intensity: Emotion intensity on a scale of 1-10 + keywords: List of keywords associated with the emotion + + Requirements: 4.1, 4.2, 4.3 + """ + type: Optional[str] = None + intensity: Optional[int] = Field(None, ge=1, le=10) + keywords: List[str] = Field(default_factory=list) + + +class InspirationData(BaseModel): + """Inspiration data structure. + + Represents an idea or inspiration extracted from user input. + + Attributes: + core_idea: The core idea/concept (max 20 characters) + tags: List of tags for categorization (max 5 tags) + category: Category of the inspiration + + Requirements: 5.1, 5.2, 5.3 + """ + core_idea: str = Field(..., max_length=20) + tags: List[str] = Field(default_factory=list, max_length=5) + category: Literal["工作", "生活", "学习", "创意"] + + +class TodoData(BaseModel): + """Todo item data structure. + + Represents a task/todo item extracted from user input. + + Attributes: + task: Description of the task + time: Time information (preserved as original expression) + location: Location information + status: Task status (defaults to "pending") + + Requirements: 6.1, 6.2, 6.3, 6.4 + """ + task: str + time: Optional[str] = None + location: Optional[str] = None + status: str = "pending" + + +class ParsedData(BaseModel): + """Parsed data structure. + + Contains all structured data extracted from semantic parsing. + + Attributes: + mood: Extracted mood data (optional) + inspirations: List of extracted inspirations + todos: List of extracted todo items + """ + mood: Optional[MoodData] = None + inspirations: List[InspirationData] = Field(default_factory=list) + todos: List[TodoData] = Field(default_factory=list) + + +class RecordData(BaseModel): + """Complete record data structure. + + Represents a complete user input record with all metadata and parsed data. + + Attributes: + record_id: Unique identifier for the record + timestamp: ISO 8601 timestamp of when the record was created + input_type: Type of input (audio or text) + original_text: The original or transcribed text + parsed_data: Structured data extracted from the text + """ + record_id: str + timestamp: str + input_type: Literal["audio", "text"] + original_text: str + parsed_data: ParsedData + + +class ProcessResponse(BaseModel): + """API response model for /api/process endpoint. + + Represents the response returned to clients after processing input. + + Attributes: + record_id: Unique identifier for the processed record + timestamp: ISO 8601 timestamp of when processing completed + mood: Extracted mood data (optional) + inspirations: List of extracted inspirations + todos: List of extracted todo items + error: Error message if processing failed (optional) + """ + record_id: str + timestamp: str + mood: Optional[MoodData] = None + inspirations: List[InspirationData] = Field(default_factory=list) + todos: List[TodoData] = Field(default_factory=list) + error: Optional[str] = None diff --git a/app/semantic_parser.py b/app/semantic_parser.py new file mode 100644 index 0000000000000000000000000000000000000000..1b31ecf315f2e0c96bc33ae45122e64434448d9c --- /dev/null +++ b/app/semantic_parser.py @@ -0,0 +1,326 @@ +"""Semantic Parser service for Voice Text Processor. + +This module implements the SemanticParserService class for parsing text +into structured data (mood, inspirations, todos) using the GLM-4-Flash API. + +Requirements: 3.1, 3.2, 3.3, 3.4, 3.5, 9.2, 9.5 +""" + +import logging +import json +from typing import Optional +import httpx + +from app.models import ParsedData, MoodData, InspirationData, TodoData + + +logger = logging.getLogger(__name__) + + +class SemanticParserError(Exception): + """Exception raised when semantic parsing operations fail. + + This exception is raised when the GLM-4-Flash API call fails, + such as due to network issues, API errors, or invalid responses. + + Requirements: 3.5 + """ + + def __init__(self, message: str = "语义解析服务不可用"): + """Initialize SemanticParserError. + + Args: + message: Error message describing the failure + """ + super().__init__(message) + self.message = message + + +class SemanticParserService: + """Service for parsing text into structured data using GLM-4-Flash API. + + This service handles semantic parsing by calling the GLM-4-Flash API + to extract mood, inspirations, and todos from text. It manages API + authentication, request formatting, response parsing, and error handling. + + Attributes: + api_key: Zhipu AI API key for authentication + client: Async HTTP client for making API requests + api_url: GLM-4-Flash API endpoint URL + model: Model identifier + system_prompt: System prompt for data conversion + + Requirements: 3.1, 3.2, 3.3, 3.4, 3.5, 9.2, 9.5 + """ + + def __init__(self, api_key: str): + """Initialize the semantic parser service. + + Args: + api_key: Zhipu AI API key for authentication + + Requirements: 3.1, 3.2 + """ + self.api_key = api_key + self.client = httpx.AsyncClient(timeout=30.0) + self.api_url = "https://open.bigmodel.cn/api/paas/v4/chat/completions" + self.model = "glm-4-flash" + + # System prompt as specified in requirements + self.system_prompt = ( + "你是一个专业的文本语义分析助手。请将用户输入的文本解析为结构化的 JSON 数据。\n\n" + "你需要提取以下三个维度的信息:\n\n" + "1. **情绪 (mood)**:\n" + " - type: 情绪类型(如:喜悦、焦虑、平静、忧虑、兴奋、悲伤等中文词汇)\n" + " - intensity: 情绪强度(1-10的整数,10表示最强烈)\n" + " - keywords: 情绪关键词列表(3-5个中文词)\n\n" + "2. **灵感 (inspirations)**:数组,每个元素包含:\n" + " - core_idea: 核心观点或想法(20字以内的中文)\n" + " - tags: 相关标签列表(3-5个中文词)\n" + " - category: 所属分类(必须是:工作、生活、学习、创意 之一)\n\n" + "3. **待办 (todos)**:数组,每个元素包含:\n" + " - task: 任务描述(中文)\n" + " - time: 时间信息(如:明天、下周、周五等,如果没有则为null)\n" + " - location: 地点信息(如果没有则为null)\n" + " - status: 状态(默认为\"pending\")\n\n" + "**重要规则**:\n" + "- 如果文本中没有某个维度的信息,mood 返回 null,inspirations 和 todos 返回空数组 []\n" + "- 必须返回有效的 JSON 格式,不要添加任何其他说明文字\n" + "- 所有字段名使用英文,内容使用中文\n" + "- 直接返回 JSON,不要用 markdown 代码块包裹\n\n" + "返回格式示例:\n" + "{\n" + " \"mood\": {\"type\": \"焦虑\", \"intensity\": 7, \"keywords\": [\"压力\", \"疲惫\", \"放松\"]},\n" + " \"inspirations\": [{\"core_idea\": \"晚霞可以缓解压力\", \"tags\": [\"自然\", \"治愈\"], \"category\": \"生活\"}],\n" + " \"todos\": [{\"task\": \"整理文档\", \"time\": \"明天\", \"location\": null, \"status\": \"pending\"}]\n" + "}" + ) + + async def close(self): + """Close the HTTP client. + + This should be called when the service is no longer needed + to properly clean up resources. + """ + await self.client.aclose() + + async def parse(self, text: str) -> ParsedData: + """Parse text into structured data using GLM-4-Flash API. + + This method sends the text to the GLM-4-Flash API with the configured + system prompt and returns structured data containing mood, inspirations, + and todos. It handles API errors, missing dimensions, and logs all errors + with timestamps and stack traces. + + Args: + text: Text content to parse + + Returns: + ParsedData object containing mood (optional), inspirations (list), + and todos (list). Missing dimensions return null or empty arrays. + + Raises: + SemanticParserError: If API call fails or returns invalid response + + Requirements: 3.1, 3.2, 3.3, 3.4, 3.5, 9.2, 9.5 + """ + try: + # Prepare request headers + headers = { + "Authorization": f"Bearer {self.api_key}", + "Content-Type": "application/json" + } + + # Prepare request payload + payload = { + "model": self.model, + "messages": [ + { + "role": "system", + "content": self.system_prompt + }, + { + "role": "user", + "content": text + } + ], + "temperature": 0.7, + "top_p": 0.9 + } + + logger.info(f"Calling GLM-4-Flash API for semantic parsing. Text length: {len(text)}") + + # Make API request + response = await self.client.post( + self.api_url, + headers=headers, + json=payload + ) + + # Check response status + if response.status_code != 200: + error_msg = f"GLM-4-Flash API returned status {response.status_code}" + try: + error_detail = response.json() + error_msg += f": {error_detail}" + except Exception: + error_msg += f": {response.text}" + + logger.error( + f"Semantic parsing API call failed: {error_msg}", + exc_info=True, + extra={"timestamp": logger.makeRecord( + logger.name, logging.ERROR, "", 0, error_msg, (), None + ).created} + ) + raise SemanticParserError(f"语义解析服务不可用: {error_msg}") + + # Parse response + try: + result = response.json() + except Exception as e: + error_msg = f"Failed to parse GLM-4-Flash API response: {str(e)}" + logger.error( + error_msg, + exc_info=True, + extra={"timestamp": logger.makeRecord( + logger.name, logging.ERROR, "", 0, error_msg, (), None + ).created} + ) + raise SemanticParserError(f"语义解析服务不可用: 响应格式无效") + + # Extract content from response + try: + content = result["choices"][0]["message"]["content"] + except (KeyError, IndexError) as e: + error_msg = f"Invalid API response structure: {str(e)}" + logger.error( + error_msg, + exc_info=True, + extra={"timestamp": logger.makeRecord( + logger.name, logging.ERROR, "", 0, error_msg, (), None + ).created} + ) + raise SemanticParserError(f"语义解析服务不可用: 响应结构无效") + + # Parse JSON from content + try: + # Try to extract JSON from markdown code blocks if present + if "```json" in content: + json_start = content.find("```json") + 7 + json_end = content.find("```", json_start) + content = content[json_start:json_end].strip() + elif "```" in content: + json_start = content.find("```") + 3 + json_end = content.find("```", json_start) + content = content[json_start:json_end].strip() + + parsed_json = json.loads(content) + except json.JSONDecodeError as e: + error_msg = f"Failed to parse JSON from API response: {str(e)}" + logger.error( + error_msg, + exc_info=True, + extra={"timestamp": logger.makeRecord( + logger.name, logging.ERROR, "", 0, error_msg, (), None + ).created} + ) + raise SemanticParserError(f"语义解析服务不可用: JSON 解析失败") + + # Extract and validate mood data + mood = None + if "mood" in parsed_json and parsed_json["mood"]: + try: + mood_data = parsed_json["mood"] + if isinstance(mood_data, dict): + mood = MoodData( + type=mood_data.get("type"), + intensity=mood_data.get("intensity"), + keywords=mood_data.get("keywords", []) + ) + except Exception as e: + logger.warning(f"Failed to parse mood data: {str(e)}") + mood = None + + # Extract and validate inspirations + inspirations = [] + if "inspirations" in parsed_json and parsed_json["inspirations"]: + for insp_data in parsed_json["inspirations"]: + try: + if isinstance(insp_data, dict): + inspiration = InspirationData( + core_idea=insp_data.get("core_idea", ""), + tags=insp_data.get("tags", []), + category=insp_data.get("category", "生活") + ) + inspirations.append(inspiration) + except Exception as e: + logger.warning(f"Failed to parse inspiration data: {str(e)}") + continue + + # Extract and validate todos + todos = [] + if "todos" in parsed_json and parsed_json["todos"]: + for todo_data in parsed_json["todos"]: + try: + if isinstance(todo_data, dict): + todo = TodoData( + task=todo_data.get("task", ""), + time=todo_data.get("time"), + location=todo_data.get("location"), + status=todo_data.get("status", "pending") + ) + todos.append(todo) + except Exception as e: + logger.warning(f"Failed to parse todo data: {str(e)}") + continue + + logger.info( + f"Semantic parsing successful. " + f"Mood: {'present' if mood else 'none'}, " + f"Inspirations: {len(inspirations)}, " + f"Todos: {len(todos)}" + ) + + return ParsedData( + mood=mood, + inspirations=inspirations, + todos=todos + ) + + except SemanticParserError: + # Re-raise SemanticParserError as-is + raise + + except httpx.TimeoutException as e: + error_msg = f"GLM-4-Flash API request timeout: {str(e)}" + logger.error( + error_msg, + exc_info=True, + extra={"timestamp": logger.makeRecord( + logger.name, logging.ERROR, "", 0, error_msg, (), None + ).created} + ) + raise SemanticParserError("语义解析服务不可用: 请求超时") + + except httpx.RequestError as e: + error_msg = f"GLM-4-Flash API request failed: {str(e)}" + logger.error( + error_msg, + exc_info=True, + extra={"timestamp": logger.makeRecord( + logger.name, logging.ERROR, "", 0, error_msg, (), None + ).created} + ) + raise SemanticParserError(f"语义解析服务不可用: 网络错误") + + except Exception as e: + error_msg = f"Unexpected error in semantic parser service: {str(e)}" + logger.error( + error_msg, + exc_info=True, + extra={"timestamp": logger.makeRecord( + logger.name, logging.ERROR, "", 0, error_msg, (), None + ).created} + ) + raise SemanticParserError(f"语义解析服务不可用: {str(e)}") diff --git a/app/storage.py b/app/storage.py new file mode 100644 index 0000000000000000000000000000000000000000..49637ed25d2d398301f7eb68d3c1b89ef5fbb683 --- /dev/null +++ b/app/storage.py @@ -0,0 +1,508 @@ +"""Storage service for JSON file persistence. + +This module implements the StorageService class for managing JSON file storage +of records, moods, inspirations, and todos. + +Requirements: 7.1, 7.2, 7.3, 7.4, 7.5, 7.6, 7.7 +""" + +import json +import uuid +from pathlib import Path +from typing import List, Optional +from datetime import datetime + +from app.models import RecordData, MoodData, InspirationData, TodoData + + +class StorageError(Exception): + """Exception raised when storage operations fail. + + This exception is raised when file operations (read/write) fail, + such as due to permission issues, disk space, or I/O errors. + + Requirements: 7.6 + """ + pass + + +class StorageService: + """Service for managing JSON file storage. + + This service handles persistence of records, moods, inspirations, and todos + to separate JSON files. It ensures file initialization, generates unique IDs, + and handles errors appropriately. + + Attributes: + data_dir: Directory path for storing JSON files + records_file: Path to records.json + moods_file: Path to moods.json + inspirations_file: Path to inspirations.json + todos_file: Path to todos.json + + Requirements: 7.1, 7.2, 7.3, 7.4, 7.5, 7.6, 7.7 + """ + + def __init__(self, data_dir: str): + """Initialize the storage service. + + Args: + data_dir: Directory path for storing JSON files + """ + self.data_dir = Path(data_dir) + self.records_file = self.data_dir / "records.json" + self.moods_file = self.data_dir / "moods.json" + self.inspirations_file = self.data_dir / "inspirations.json" + self.todos_file = self.data_dir / "todos.json" + + # Ensure data directory exists + self.data_dir.mkdir(parents=True, exist_ok=True) + + def _ensure_file_exists(self, file_path: Path) -> None: + """Ensure a JSON file exists and is initialized with default data. + + If the file doesn't exist, creates it with sample Chinese data. + + Args: + file_path: Path to the JSON file + + Raises: + StorageError: If file creation fails + + Requirements: 7.5 + """ + if not file_path.exists(): + try: + # 根据文件类型提供不同的默认数据 + default_data = [] + + if file_path.name == 'records.json': + default_data = self._get_default_records() + elif file_path.name == 'moods.json': + default_data = self._get_default_moods() + elif file_path.name == 'inspirations.json': + default_data = self._get_default_inspirations() + elif file_path.name == 'todos.json': + default_data = self._get_default_todos() + elif file_path.name == 'user_config.json': + default_data = self._get_default_user_config() + + with open(file_path, 'w', encoding='utf-8') as f: + json.dump(default_data, f, ensure_ascii=False, indent=2) + except Exception as e: + raise StorageError( + f"Failed to initialize file {file_path}: {str(e)}" + ) + + def _get_default_records(self) -> list: + """获取默认的记录数据""" + from datetime import datetime, timedelta + now = datetime.now() + + return [ + { + "record_id": "welcome-1", + "timestamp": (now - timedelta(hours=2)).isoformat() + "Z", + "input_type": "text", + "original_text": "今天天气真好,阳光洒在窗台上,心情也跟着明朗起来。决定下午去公园散散步,感受一下大自然的美好。", + "parsed_data": { + "mood": { + "type": "喜悦", + "intensity": 8, + "keywords": ["阳光", "明朗", "美好"] + }, + "inspirations": [ + { + "core_idea": "享受自然的美好时光", + "tags": ["自然", "散步", "放松"], + "category": "生活" + } + ], + "todos": [ + { + "task": "去公园散步", + "time": "下午", + "location": "公园", + "status": "pending" + } + ] + } + }, + { + "record_id": "welcome-2", + "timestamp": (now - timedelta(hours=5)).isoformat() + "Z", + "input_type": "text", + "original_text": "刚看完一本很棒的书,书中的一句话让我印象深刻:'生活不是等待暴风雨过去,而是学会在雨中跳舞。'这句话给了我很多启发。", + "parsed_data": { + "mood": { + "type": "平静", + "intensity": 7, + "keywords": ["启发", "思考", "感悟"] + }, + "inspirations": [ + { + "core_idea": "学会在困难中保持积极", + "tags": ["人生哲理", "积极心态", "成长"], + "category": "学习" + } + ], + "todos": [] + } + }, + { + "record_id": "welcome-3", + "timestamp": (now - timedelta(days=1, hours=3)).isoformat() + "Z", + "input_type": "text", + "original_text": "和好朋友聊了很久,她分享了最近的生活和工作。虽然大家都很忙,但能抽时间见面真的很珍贵。友谊需要用心维护。", + "parsed_data": { + "mood": { + "type": "温暖", + "intensity": 9, + "keywords": ["友谊", "珍贵", "陪伴"] + }, + "inspirations": [ + { + "core_idea": "珍惜身边的朋友", + "tags": ["友情", "陪伴", "珍惜"], + "category": "生活" + } + ], + "todos": [ + { + "task": "定期和朋友联系", + "time": None, + "location": None, + "status": "pending" + } + ] + } + }, + { + "record_id": "welcome-4", + "timestamp": (now - timedelta(days=2)).isoformat() + "Z", + "input_type": "text", + "original_text": "今天完成了一个困扰我很久的项目,虽然过程很辛苦,但看到成果的那一刻,所有的付出都值得了。成就感满满!", + "parsed_data": { + "mood": { + "type": "兴奋", + "intensity": 10, + "keywords": ["成就感", "完成", "满足"] + }, + "inspirations": [], + "todos": [] + } + }, + { + "record_id": "welcome-5", + "timestamp": (now - timedelta(days=3)).isoformat() + "Z", + "input_type": "text", + "original_text": "最近工作压力有点大,总是担心做不好。但转念一想,每个人都会遇到困难,重要的是保持积极的心态,一步一步来。", + "parsed_data": { + "mood": { + "type": "焦虑", + "intensity": 6, + "keywords": ["压力", "担心", "积极"] + }, + "inspirations": [ + { + "core_idea": "保持积极心态面对压力", + "tags": ["心态", "压力管理", "成长"], + "category": "工作" + } + ], + "todos": [] + } + } + ] + + def _get_default_moods(self) -> list: + """获取默认的心情数据""" + from datetime import datetime, timedelta + now = datetime.now() + + return [ + { + "record_id": "welcome-1", + "timestamp": (now - timedelta(hours=2)).isoformat() + "Z", + "type": "喜悦", + "intensity": 8, + "keywords": ["阳光", "明朗", "美好"] + }, + { + "record_id": "welcome-2", + "timestamp": (now - timedelta(hours=5)).isoformat() + "Z", + "type": "平静", + "intensity": 7, + "keywords": ["启发", "思考", "感悟"] + }, + { + "record_id": "welcome-3", + "timestamp": (now - timedelta(days=1, hours=3)).isoformat() + "Z", + "type": "温暖", + "intensity": 9, + "keywords": ["友谊", "珍贵", "陪伴"] + }, + { + "record_id": "welcome-4", + "timestamp": (now - timedelta(days=2)).isoformat() + "Z", + "type": "兴奋", + "intensity": 10, + "keywords": ["成就感", "完成", "满足"] + }, + { + "record_id": "welcome-5", + "timestamp": (now - timedelta(days=3)).isoformat() + "Z", + "type": "焦虑", + "intensity": 6, + "keywords": ["压力", "担心", "积极"] + } + ] + + def _get_default_inspirations(self) -> list: + """获取默认的灵感数据""" + from datetime import datetime, timedelta + now = datetime.now() + + return [ + { + "record_id": "welcome-1", + "timestamp": (now - timedelta(hours=2)).isoformat() + "Z", + "core_idea": "享受自然的美好时光", + "tags": ["自然", "散步", "放松"], + "category": "生活" + }, + { + "record_id": "welcome-2", + "timestamp": (now - timedelta(hours=5)).isoformat() + "Z", + "core_idea": "学会在困难中保持积极", + "tags": ["人生哲理", "积极心态", "成长"], + "category": "学习" + }, + { + "record_id": "welcome-3", + "timestamp": (now - timedelta(days=1, hours=3)).isoformat() + "Z", + "core_idea": "珍惜身边的朋友", + "tags": ["友情", "陪伴", "珍惜"], + "category": "生活" + }, + { + "record_id": "welcome-5", + "timestamp": (now - timedelta(days=3)).isoformat() + "Z", + "core_idea": "保持积极心态面对压力", + "tags": ["心态", "压力管理", "成长"], + "category": "工作" + } + ] + + def _get_default_todos(self) -> list: + """获取默认的待办数据""" + from datetime import datetime, timedelta + now = datetime.now() + + return [ + { + "record_id": "welcome-1", + "timestamp": (now - timedelta(hours=2)).isoformat() + "Z", + "task": "去公园散步", + "time": "下午", + "location": "公园", + "status": "pending" + }, + { + "record_id": "welcome-3", + "timestamp": (now - timedelta(days=1, hours=3)).isoformat() + "Z", + "task": "定期和朋友联系", + "time": None, + "location": None, + "status": "pending" + } + ] + + def _get_default_user_config(self) -> dict: + """获取默认的用户配置""" + return { + "character": { + "image_url": "", # 空字符串,前端会显示占位符 + "prompt": "默认形象:薰衣草紫色温柔猫咪", + "preferences": { + "color": "薰衣草紫", + "personality": "温柔", + "appearance": "无配饰", + "role": "陪伴式朋友" + } + } + } + + def _read_json_file(self, file_path: Path) -> List: + """Read and parse a JSON file. + + Args: + file_path: Path to the JSON file + + Returns: + List of records from the JSON file + + Raises: + StorageError: If file reading or parsing fails + """ + self._ensure_file_exists(file_path) + try: + with open(file_path, 'r', encoding='utf-8') as f: + return json.load(f) + except Exception as e: + raise StorageError( + f"Failed to read file {file_path}: {str(e)}" + ) + + def _write_json_file(self, file_path: Path, data: List) -> None: + """Write data to a JSON file. + + Args: + file_path: Path to the JSON file + data: List of records to write + + Raises: + StorageError: If file writing fails + + Requirements: 7.6 + """ + try: + with open(file_path, 'w', encoding='utf-8') as f: + json.dump(data, f, ensure_ascii=False, indent=2) + except Exception as e: + raise StorageError( + f"Failed to write file {file_path}: {str(e)}" + ) + + def save_record(self, record: RecordData) -> str: + """Save a complete record to records.json. + + Generates a unique UUID for the record if not already set, + and appends the record to the records.json file. + + Args: + record: RecordData object to save + + Returns: + The unique record_id (UUID string) + + Raises: + StorageError: If file writing fails + + Requirements: 7.1, 7.7 + """ + # Generate unique UUID if not set + if not record.record_id: + record.record_id = str(uuid.uuid4()) + + # Read existing records + records = self._read_json_file(self.records_file) + + # Append new record + records.append(record.model_dump()) + + # Write back to file + self._write_json_file(self.records_file, records) + + return record.record_id + + def append_mood(self, mood: MoodData, record_id: str, timestamp: str) -> None: + """Append mood data to moods.json. + + Args: + mood: MoodData object to append + record_id: Associated record ID + timestamp: ISO 8601 timestamp + + Raises: + StorageError: If file writing fails + + Requirements: 7.2 + """ + # Read existing moods + moods = self._read_json_file(self.moods_file) + + # Create mood entry with metadata + mood_entry = { + "record_id": record_id, + "timestamp": timestamp, + **mood.model_dump() + } + + # Append new mood + moods.append(mood_entry) + + # Write back to file + self._write_json_file(self.moods_file, moods) + + def append_inspirations( + self, + inspirations: List[InspirationData], + record_id: str, + timestamp: str + ) -> None: + """Append inspiration data to inspirations.json. + + Args: + inspirations: List of InspirationData objects to append + record_id: Associated record ID + timestamp: ISO 8601 timestamp + + Raises: + StorageError: If file writing fails + + Requirements: 7.3 + """ + if not inspirations: + return + + # Read existing inspirations + all_inspirations = self._read_json_file(self.inspirations_file) + + # Create inspiration entries with metadata + for inspiration in inspirations: + inspiration_entry = { + "record_id": record_id, + "timestamp": timestamp, + **inspiration.model_dump() + } + all_inspirations.append(inspiration_entry) + + # Write back to file + self._write_json_file(self.inspirations_file, all_inspirations) + + def append_todos( + self, + todos: List[TodoData], + record_id: str, + timestamp: str + ) -> None: + """Append todo data to todos.json. + + Args: + todos: List of TodoData objects to append + record_id: Associated record ID + timestamp: ISO 8601 timestamp + + Raises: + StorageError: If file writing fails + + Requirements: 7.4 + """ + if not todos: + return + + # Read existing todos + all_todos = self._read_json_file(self.todos_file) + + # Create todo entries with metadata + for todo in todos: + todo_entry = { + "record_id": record_id, + "timestamp": timestamp, + **todo.model_dump() + } + all_todos.append(todo_entry) + + # Write back to file + self._write_json_file(self.todos_file, all_todos) diff --git a/app/user_config.py b/app/user_config.py new file mode 100644 index 0000000000000000000000000000000000000000..560817dca693d1b42934d5d64e25491b4da3fcf9 --- /dev/null +++ b/app/user_config.py @@ -0,0 +1,211 @@ +"""User configuration management for Voice Text Processor. + +This module handles user-specific configurations, including +the generated cat character image settings. + +Requirements: PRD - AI形象生成模块 +""" + +import json +import os +from typing import Optional, Dict, List +from datetime import datetime +import logging + +logger = logging.getLogger(__name__) + + +class UserConfig: + """User configuration manager. + + This class manages user-specific settings, particularly + the generated cat character image configuration. + + Attributes: + config_dir: Directory for storing user configurations + config_file: Path to the user config JSON file + """ + + def __init__(self, config_dir: str = "data"): + """Initialize user configuration manager. + + Args: + config_dir: Directory for storing configurations + """ + self.config_dir = config_dir + self.config_file = os.path.join(config_dir, "user_config.json") + + # 确保目录存在 + os.makedirs(config_dir, exist_ok=True) + + # 初始化配置文件 + if not os.path.exists(self.config_file): + self._init_config_file() + + def _init_config_file(self): + """Initialize the configuration file with default values.""" + default_config = { + "user_id": "default_user", + "created_at": datetime.utcnow().isoformat() + "Z", + "character": { + "image_url": "", # 空字符串,前端会显示占位符 + "prompt": "默认治愈系小猫形象", + "revised_prompt": "一只薰衣草紫色的温柔猫咪,治愈系风格,温暖的陪伴者", + "preferences": { + "color": "薰衣草紫", + "personality": "温柔", + "appearance": "无配饰", + "role": "陪伴式朋友" + }, + "generated_at": datetime.utcnow().isoformat() + "Z", + "generation_count": 0 + }, + "settings": { + "theme": "light", + "language": "zh-CN" + } + } + + with open(self.config_file, 'w', encoding='utf-8') as f: + json.dump(default_config, f, ensure_ascii=False, indent=2) + + logger.info(f"Initialized user config file: {self.config_file}") + + def load_config(self) -> Dict: + """Load user configuration from file. + + Returns: + Dictionary containing user configuration + """ + try: + with open(self.config_file, 'r', encoding='utf-8') as f: + config = json.load(f) + return config + except Exception as e: + logger.error(f"Failed to load user config: {str(e)}") + # 返回默认配置 + self._init_config_file() + return self.load_config() + + def save_config(self, config: Dict): + """Save user configuration to file. + + Args: + config: Configuration dictionary to save + """ + try: + with open(self.config_file, 'w', encoding='utf-8') as f: + json.dump(config, f, ensure_ascii=False, indent=2) + logger.info("User config saved successfully") + except Exception as e: + logger.error(f"Failed to save user config: {str(e)}") + raise + + def get_character_config(self) -> Dict: + """Get character configuration. + + Returns: + Dictionary containing character settings + """ + config = self.load_config() + return config.get("character", {}) + + def save_character_image( + self, + image_url: str, + prompt: str, + revised_prompt: Optional[str] = None, + preferences: Optional[Dict] = None + ): + """Save generated character image configuration. + + Args: + image_url: URL of the generated image + prompt: Prompt used for generation + revised_prompt: AI-revised prompt (optional) + preferences: User preferences used (optional) + """ + config = self.load_config() + + # 更新角色配置 + config["character"]["image_url"] = image_url + config["character"]["prompt"] = prompt + config["character"]["revised_prompt"] = revised_prompt or prompt + config["character"]["generated_at"] = datetime.utcnow().isoformat() + "Z" + config["character"]["generation_count"] += 1 + + if preferences: + config["character"]["preferences"] = preferences + + self.save_config(config) + logger.info(f"Character image saved: {image_url[:50]}...") + + def get_character_image_url(self) -> Optional[str]: + """Get the current character image URL. + + Returns: + Image URL or None if not set + """ + character = self.get_character_config() + return character.get("image_url") + + def get_character_preferences(self) -> Dict: + """Get character generation preferences. + + Returns: + Dictionary containing color, personality, appearance, role + """ + character = self.get_character_config() + return character.get("preferences", { + "color": "温暖粉", + "personality": "温柔", + "appearance": "无配饰", + "role": "陪伴式朋友" + }) + + def update_character_preferences( + self, + color: Optional[str] = None, + personality: Optional[str] = None, + appearance: Optional[str] = None, + role: Optional[str] = None + ): + """Update character generation preferences. + + Args: + color: Color preference (optional) + personality: Personality trait (optional) + appearance: Appearance feature (optional) + role: Character role (optional) + """ + config = self.load_config() + preferences = config["character"]["preferences"] + + if color: + preferences["color"] = color + if personality: + preferences["personality"] = personality + if appearance: + preferences["appearance"] = appearance + if role: + preferences["role"] = role + + self.save_config(config) + logger.info("Character preferences updated") + + def get_generation_count(self) -> int: + """Get the number of times character has been generated. + + Returns: + Generation count + """ + character = self.get_character_config() + return character.get("generation_count", 0) + + def has_character_image(self) -> bool: + """Check if user has a character image set. + + Returns: + True if character image exists, False otherwise + """ + return self.get_character_image_url() is not None diff --git a/data/.gitkeep b/data/.gitkeep new file mode 100644 index 0000000000000000000000000000000000000000..2272f38100cecf4b8dca91c78ec784a247c1300f --- /dev/null +++ b/data/.gitkeep @@ -0,0 +1 @@ +# This file ensures the data directory is tracked by git diff --git a/deployment/DEPLOYMENT.md b/deployment/DEPLOYMENT.md new file mode 100644 index 0000000000000000000000000000000000000000..abf6b9ad4dbef715f75ee1d40598ebffdeac638d --- /dev/null +++ b/deployment/DEPLOYMENT.md @@ -0,0 +1,133 @@ +# 部署指南 + +## 部署到 Hugging Face Spaces + +### 前置准备 + +1. **构建前端** + ```bash + cd frontend + npm install + npm run build + ``` + +2. **验证构建产物** + - 确保 `frontend/dist/` 目录存在 + - 包含 `index.html` 和 `assets/` 文件夹 + +### 自动部署(推荐) + +**Windows:** +```bash +build_and_deploy.bat +``` + +**Linux/Mac:** +```bash +chmod +x build_and_deploy.sh +./build_and_deploy.sh +``` + +### 手动部署 + +1. **构建前端** + ```bash + cd frontend + npm run build + cd .. + ``` + +2. **提交更改** + ```bash + git add . + git commit -m "Deploy: Update frontend build" + ``` + +3. **推送到 Hugging Face** + ```bash + git push hf main + ``` + +### 配置 Hugging Face Secrets + +在 Space 的 Settings → Repository secrets 中添加: + +**必需:** +- `ZHIPU_API_KEY` - 智谱 AI API 密钥 + - 获取:https://open.bigmodel.cn/ + +**可选:** +- `MINIMAX_API_KEY` - MiniMax API 密钥 +- `MINIMAX_GROUP_ID` - MiniMax Group ID + - 获取:https://platform.minimaxi.com/ + +### 访问应用 + +部署成功后,访问: +- **前端应用**: `https://your-space.hf.space/app` +- **Gradio 界面**: `https://your-space.hf.space/gradio` +- **API 文档**: `https://your-space.hf.space/docs` + +### 文件结构 + +``` +. +├── app.py # Hugging Face 入口文件 +├── app/ # FastAPI 后端 +│ ├── main.py # 主应用 +│ └── ... +├── frontend/ +│ ├── dist/ # 构建产物(需要提交) +│ │ ├── index.html +│ │ └── assets/ +│ └── ... +├── requirements_hf.txt # Python 依赖 +└── README_HF.md # Hugging Face 说明 +``` + +### 故障排查 + +**问题:前端 404** +- 检查 `frontend/dist/` 是否存在 +- 确认已运行 `npm run build` +- 查看 Space 日志确认文件已上传 + +**问题:API 调用失败** +- 检查 Secrets 是否正确配置 +- 查看 Space 日志中的错误信息 +- 确认 API 密钥有效 + +**问题:静态资源加载失败** +- 检查 `frontend/dist/assets/` 是否存在 +- 确认 CSS 和 JS 文件已生成 +- 查看浏览器控制台的网络请求 + +### 本地测试 + +在部署前本地测试: + +```bash +# 构建前端 +cd frontend && npm run build && cd .. + +# 运行应用 +python app.py +``` + +访问 `http://localhost:7860/app` 测试前端应用。 + +### 更新部署 + +每次修改前端代码后: + +1. 重新构建:`cd frontend && npm run build && cd ..` +2. 提交更改:`git add . && git commit -m "Update"` +3. 推送:`git push hf main` + +### 注意事项 + +- ✅ `frontend/dist/` 必须提交到 Git(不要在 .gitignore 中忽略) +- ✅ 每次修改前端代码都需要重新构建 +- ✅ Hugging Face Spaces 会自动重启应用 +- ⚠️ 首次部署可能需要 5-10 分钟 +- ⚠️ 免费 Space 可能会在不活跃时休眠 diff --git a/deployment/DEPLOY_CHECKLIST.md b/deployment/DEPLOY_CHECKLIST.md new file mode 100644 index 0000000000000000000000000000000000000000..5109fd2ec5e66f68e2b74224876f6aab2df58614 --- /dev/null +++ b/deployment/DEPLOY_CHECKLIST.md @@ -0,0 +1,137 @@ +# Hugging Face Spaces 部署检查清单 + +## 📋 部署前检查 + +### 1. 依赖版本确认 +- [ ] `requirements_hf.txt` 中 `huggingface-hub==0.23.5` +- [ ] `requirements_hf.txt` 中 `gradio==4.44.0` +- [ ] `README_HF.md` frontmatter 中 `sdk_version: "4.44.0"` + +### 2. 文件结构确认 +- [ ] `app.py` 存在且正确 +- [ ] `frontend/dist/` 已构建(运行 `cd frontend && npm run build`) +- [ ] `data/` 目录存在 +- [ ] `generated_images/` 目录存在 + +### 3. 环境变量配置 +在 Space Settings → Repository secrets 中配置: +- [ ] `ZHIPU_API_KEY` - 必需 +- [ ] `MINIMAX_API_KEY` - 可选 +- [ ] `MINIMAX_GROUP_ID` - 可选 + +## 🚀 部署步骤 + +### 方法 1: 使用 deploy_to_hf.sh (推荐) + +```bash +# 1. 确保脚本可执行 +chmod +x deploy_to_hf.sh + +# 2. 运行部署脚本 +./deploy_to_hf.sh +``` + +### 方法 2: 手动部署 + +```bash +# 1. 构建前端 +cd frontend +npm install +npm run build +cd .. + +# 2. 提交到 Git +git add . +git commit -m "Deploy to Hugging Face Spaces" + +# 3. 推送到 Hugging Face +git push hf main +``` + +## 🐛 常见问题 + +### ImportError: cannot import name 'HfFolder' + +**原因:** `gradio` 和 `huggingface_hub` 版本不兼容 + +**解决方法:** +1. 确认 `requirements_hf.txt` 版本正确 +2. 在 Space Settings 中点击 "Factory reboot" +3. 查看 Container logs 确认安装的版本 + +### 前端 404 错误 + +**原因:** 前端未构建或未正确挂载 + +**解决方法:** +1. 本地运行 `cd frontend && npm run build` +2. 确认 `frontend/dist/` 目录存在且有内容 +3. 提交并推送 `frontend/dist/` 到仓库 + +### API 调用失败 + +**原因:** 环境变量未配置 + +**解决方法:** +1. 在 Space Settings → Repository secrets 添加 `ZHIPU_API_KEY` +2. 重启 Space +3. 查看 Logs 确认 API 密钥已加载 + +## 📊 部署后验证 + +### 1. 健康检查 +访问 `https://your-space.hf.space/health` 应返回: +```json +{ + "status": "healthy", + "timestamp": "..." +} +``` + +### 2. API 文档 +访问 `https://your-space.hf.space/docs` 查看 API 文档 + +### 3. 前端访问 +访问 `https://your-space.hf.space/` 应显示应用界面 + +### 4. 功能测试 +- [ ] 首页输入框可以输入文字 +- [ ] 点击麦克风可以录音(需要浏览器权限) +- [ ] 点击 AI 形象显示对话框 +- [ ] 底部导航可以切换页面 + +## 🔄 更新部署 + +### 代码更新 +```bash +git add . +git commit -m "Update: description" +git push hf main +``` + +### 强制重建 +如果遇到缓存问题: +1. 进入 Space Settings +2. 点击 "Factory reboot" +3. 等待重新构建完成 + +## 📝 版本兼容性 + +### 已测试的稳定组合 + +| gradio | huggingface-hub | Python | 状态 | +|--------|----------------|--------|------| +| 4.44.0 | 0.23.5 | 3.11 | ✅ 推荐 | +| 4.36.1 | 0.23.0 | 3.11 | ✅ 可用 | +| 5.x | latest | 3.11 | ❌ 不兼容 | + +### 不兼容的组合 + +- `gradio==4.x` + `huggingface-hub>=0.24.0` → HfFolder 错误 +- `gradio==5.x` + `huggingface-hub<0.24.0` → 版本冲突 + +## 🔗 相关资源 + +- [Hugging Face Spaces 文档](https://huggingface.co/docs/hub/spaces) +- [Gradio 文档](https://www.gradio.app/docs) +- [项目 README](./README.md) diff --git a/deployment/Dockerfile b/deployment/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..a6c74154b4712507c904f564e8a64096c770d135 --- /dev/null +++ b/deployment/Dockerfile @@ -0,0 +1,31 @@ +FROM python:3.11-slim + +WORKDIR /app + +# 安装系统依赖 +RUN apt-get update && apt-get install -y \ + build-essential \ + && rm -rf /var/lib/apt/lists/* + +# 复制依赖文件 +COPY requirements.txt . + +# 安装 Python 依赖 +RUN pip install --no-cache-dir -r requirements.txt + +# 复制应用代码 +COPY app/ ./app/ +COPY data/ ./data/ +COPY frontend/dist/ ./frontend/dist/ + +# 复制启动脚本 +COPY start.py . + +# 创建必要的目录 +RUN mkdir -p generated_images logs + +# 暴露端口 +EXPOSE 7860 + +# 启动应用 +CMD ["python", "start.py"] diff --git a/deployment/README_HF.md b/deployment/README_HF.md new file mode 100644 index 0000000000000000000000000000000000000000..c4b32f74240524f0cf171d1f937ac2f811c8ed4e --- /dev/null +++ b/deployment/README_HF.md @@ -0,0 +1,99 @@ +--- +title: Nora - 治愈系记录助手 +emoji: 🌟 +colorFrom: purple +colorTo: pink +sdk: docker +pinned: false +license: mit +--- + +# 🌟 治愈系记录助手 - SoulMate AI Companion + +一个温暖、治愈的 AI 陪伴应用,帮助你记录心情、捕捉灵感、管理待办。 + +## ✨ 核心特性 + +- 🎤 **语音/文字快速记录** - 自动分类保存 +- 🤖 **AI 语义解析** - 智能提取情绪、灵感和待办 +- 💬 **AI 对话陪伴(RAG)** - 基于历史记录的个性化对话 +- 🖼️ **AI 形象定制** - 生成专属治愈系角色(720 种组合) +- 🫧 **物理引擎心情池** - 基于 Matter.js 的动态气泡可视化 + +## 🚀 快速开始 + +### 在线使用 + +直接访问本 Space 即可使用完整功能! + +### 配置 API 密钥 + +在 Space 的 **Settings → Repository secrets** 中配置: + +**必需:** +- `ZHIPU_API_KEY` - 智谱 AI API 密钥 + - 获取地址:https://open.bigmodel.cn/ + - 用途:语音识别、语义解析、AI 对话 + +**可选:** +- `MINIMAX_API_KEY` - MiniMax API 密钥 +- `MINIMAX_GROUP_ID` - MiniMax Group ID + - 获取地址:https://platform.minimaxi.com/ + - 用途:AI 形象生成 + +## 📖 使用说明 + +1. **首页快速记录** + - 点击麦克风录音或在输入框输入文字 + - AI 自动分析并分类保存 + +2. **查看分类数据** + - 点击顶部心情、灵感、待办图标 + - 查看不同类型的记录 + +3. **与 AI 对话** + - 点击 AI 形象显示问候对话框 + - 点击对话框中的聊天图标进入完整对话 + - AI 基于你的历史记录提供个性化回复 + +4. **定制 AI 形象** + - 点击右下角 ✨ 按钮 + - 选择颜色、性格、外观、角色 + - 生成专属形象(需要 MiniMax API) + +5. **心情气泡池** + - 点击顶部心情图标 + - 左右滑动查看不同日期的心情卡片 + - 点击卡片展开查看当天的气泡池 + - 可以拖拽气泡,感受物理引擎效果 + +## 📊 API 端点 + +- `POST /api/process` - 处理文本/语音输入 +- `POST /api/chat` - 与 AI 对话(RAG) +- `GET /api/records` - 获取所有记录 +- `GET /api/moods` - 获取情绪数据 +- `GET /api/inspirations` - 获取灵感 +- `GET /api/todos` - 获取待办事项 +- `POST /api/character/generate` - 生成角色形象 +- `GET /health` - 健康检查 +- `GET /docs` - API 文档 + +## 🔗 相关链接 + +- [GitHub 仓库](https://github.com/kernel-14/Nora) +- [详细文档](https://github.com/kernel-14/Nora/blob/main/README.md) +- [智谱 AI](https://open.bigmodel.cn/) +- [MiniMax](https://platform.minimaxi.com/) + +## 📝 技术栈 + +- **后端**: FastAPI + Python 3.11 +- **前端**: React + TypeScript + Vite +- **物理引擎**: Matter.js +- **AI 服务**: 智谱 AI (GLM-4) + MiniMax +- **部署**: Hugging Face Spaces (Docker) + +## 📄 License + +MIT License diff --git a/deployment/README_MODELSCOPE.md b/deployment/README_MODELSCOPE.md new file mode 100644 index 0000000000000000000000000000000000000000..4622b622566c7b95956ac346faf7c7311873302f --- /dev/null +++ b/deployment/README_MODELSCOPE.md @@ -0,0 +1,126 @@ +# 🌟 治愈系记录助手 - SoulMate AI Companion + +一个温暖、治愈的 AI 陪伴应用,帮助你记录心情、捕捉灵感、管理待办。 + +## ✨ 核心特性 + +- 🎤 **语音/文字快速记录** - 自动分类保存 +- 🤖 **AI 语义解析** - 智能提取情绪、灵感和待办 +- 💬 **AI 对话陪伴(RAG)** - 基于历史记录的个性化对话 +- 🖼️ **AI 形象定制** - 生成专属治愈系角色(720 种组合) +- 🫧 **物理引擎心情池** - 基于 Matter.js 的动态气泡可视化 + +## 🚀 快速开始 + +### 在线使用 + +直接访问本应用即可使用完整功能! + +### 配置 API 密钥 + +在 ModelScope 的环境变量中配置: + +**必需:** +- `ZHIPU_API_KEY` - 智谱 AI API 密钥 + - 获取地址:https://open.bigmodel.cn/ + - 用途:语音识别、语义解析、AI 对话 + +**可选:** +- `MINIMAX_API_KEY` - MiniMax API 密钥 +- `MINIMAX_GROUP_ID` - MiniMax Group ID + - 获取地址:https://platform.minimaxi.com/ + - 用途:AI 形象生成 + +## 📖 使用说明 + +1. **首页快速记录** + - 点击麦克风录音或在输入框输入文字 + - AI 自动分析并分类保存 + +2. **查看分类数据** + - 点击顶部心情、灵感、待办图标 + - 查看不同类型的记录 + +3. **与 AI 对话** + - 点击 AI 形象显示问候对话框 + - 点击对话框中的聊天图标进入完整对话 + - AI 基于你的历史记录提供个性化回复 + +4. **定制 AI 形象** + - 点击右下角 ✨ 按钮 + - 选择颜色、性格、外观、角色 + - 生成专属形象(需要 MiniMax API) + +5. **心情气泡池** + - 点击顶部心情图标 + - 左右滑动查看不同日期的心情卡片 + - 点击卡片展开查看当天的气泡池 + - 可以拖拽气泡,感受物理引擎效果 + +## 📊 API 端点 + +- `POST /api/process` - 处理文本/语音输入 +- `POST /api/chat` - 与 AI 对话(RAG) +- `GET /api/records` - 获取所有记录 +- `GET /api/moods` - 获取情绪数据 +- `GET /api/inspirations` - 获取灵感 +- `GET /api/todos` - 获取待办事项 +- `POST /api/character/generate` - 生成角色形象 +- `GET /health` - 健康检查 +- `GET /docs` - API 文档 + +## 🔗 相关链接 + +- [GitHub 仓库](https://github.com/kernel-14/Nora) +- [详细文档](https://github.com/kernel-14/Nora/blob/main/README.md) +- [智谱 AI](https://open.bigmodel.cn/) +- [MiniMax](https://platform.minimaxi.com/) + +## 📝 技术栈 + +- **后端**: FastAPI + Python 3.11 +- **前端**: React + TypeScript + Vite +- **物理引擎**: Matter.js +- **AI 服务**: 智谱 AI (GLM-4) + MiniMax +- **部署**: ModelScope (Gradio) + +## 📄 License + +MIT License + +--- + +## 🚀 部署到 ModelScope + +### 方法一:通过 Git 导入 + +1. 在 ModelScope 创建新的应用空间 +2. 选择 "从 Git 导入" +3. 输入仓库地址:`https://github.com/kernel-14/Nora.git` +4. 选择 Gradio SDK +5. 配置环境变量(见上方配置说明) +6. 点击创建 + +### 方法二:手动上传 + +1. 克隆本仓库到本地 +2. 在 ModelScope 创建新的应用空间 +3. 上传所有文件 +4. 确保 `configuration.json` 和 `app_modelscope.py` 在根目录 +5. 配置环境变量 +6. 启动应用 + +### 文件说明 + +- `app_modelscope.py` - ModelScope 入口文件 +- `configuration.json` - ModelScope 配置文件 +- `requirements_modelscope.txt` - Python 依赖(使用兼容的 Gradio 版本) +- `app/` - FastAPI 后端代码 +- `frontend/dist/` - 前端构建产物 +- `data/` - 数据存储目录 + +### 注意事项 + +- 确保 `frontend/dist/` 目录已包含构建好的前端文件 +- 环境变量必须正确配置才能使用 AI 功能 +- ModelScope 使用 Gradio 4.44.1 版本以避免依赖冲突 diff --git a/deployment/app_modelscope.py b/deployment/app_modelscope.py new file mode 100644 index 0000000000000000000000000000000000000000..6f89cc73bc031bf110c330c3fde975c04487e3dc --- /dev/null +++ b/deployment/app_modelscope.py @@ -0,0 +1,187 @@ +""" +ModelScope 部署入口文件 +使用 Gradio 包装 FastAPI 应用 +""" + +import os +import sys +from pathlib import Path +import gradio as gr + +# 添加项目根目录到 Python 路径 +sys.path.insert(0, str(Path(__file__).parent)) + +# 设置环境变量 +os.environ.setdefault("DATA_DIR", "data") +os.environ.setdefault("LOG_LEVEL", "INFO") + +# 确保数据目录存在 +data_dir = Path("data") +data_dir.mkdir(exist_ok=True) + +generated_images_dir = Path("generated_images") +generated_images_dir.mkdir(exist_ok=True) + +# 导入 FastAPI 应用 +from app.main import app as fastapi_app +from fastapi.staticfiles import StaticFiles +from fastapi.responses import FileResponse + +# 挂载前端静态文件 +frontend_dist = Path(__file__).parent / "frontend" / "dist" +if frontend_dist.exists(): + # 挂载静态资源(CSS, JS) + assets_dir = frontend_dist / "assets" + if assets_dir.exists(): + fastapi_app.mount("/assets", StaticFiles(directory=str(assets_dir)), name="assets") + print(f"✅ 前端资源文件已挂载: {assets_dir}") + + print(f"✅ 前端应用已挂载: {frontend_dist}") +else: + print(f"⚠️ 前端构建目录不存在: {frontend_dist}") + +# 重写根路由以服务前端 +@fastapi_app.get("/", include_in_schema=False) +async def serve_root(): + """服务前端应用首页""" + if frontend_dist.exists(): + index_file = frontend_dist / "index.html" + if index_file.exists(): + return FileResponse(index_file) + return { + "service": "SoulMate AI Companion", + "status": "running", + "version": "1.0.0", + "message": "Welcome! Visit /docs for API documentation." + } + +# 添加 catch-all 路由用于 SPA +@fastapi_app.get("/{full_path:path}", include_in_schema=False) +async def serve_spa(full_path: str): + """服务前端应用(SPA 路由支持)""" + # 如果是 API 路径,跳过 + if full_path.startswith("api/") or full_path == "docs" or full_path == "openapi.json" or full_path == "health": + from fastapi import HTTPException + raise HTTPException(status_code=404, detail="Not found") + + # 返回前端 index.html + if frontend_dist.exists(): + index_file = frontend_dist / "index.html" + if index_file.exists(): + return FileResponse(index_file) + + return {"error": "Frontend not found"} + +# 创建 Gradio 界面(用于 ModelScope 的展示) +with gr.Blocks( + title="治愈系记录助手 - SoulMate AI Companion", + theme=gr.themes.Soft( + primary_hue="purple", + secondary_hue="pink", + ), +) as demo: + + gr.Markdown(""" + # 🌟 治愈系记录助手 - SoulMate AI Companion + + 一个温暖、治愈的 AI 陪伴应用,帮助你记录心情、捕捉灵感、管理待办。 + + ### ✨ 核心特性 + - 🎤 **语音/文字快速记录** - 自动分类保存 + - 🤖 **AI 语义解析** - 智能提取情绪、灵感和待办 + - 💬 **AI 对话陪伴(RAG)** - 基于历史记录的个性化对话 + - 🖼️ **AI 形象定制** - 生成专属治愈系角色(720 种组合) + - 🫧 **物理引擎心情池** - 基于 Matter.js 的动态气泡可视化 + + --- + + ### 🚀 开始使用 + + **🎯 前端应用地址:** 点击上方的 "App" 标签页访问完整应用 + + **📚 API 文档:** [FastAPI Swagger Docs →](/docs) + + --- + + ### 📖 使用说明 + + 1. **首页快速记录** + - 点击麦克风录音或在输入框输入文字 + - AI 自动分析并分类保存 + + 2. **查看分类数据** + - 点击顶部心情、灵感、待办图标 + - 查看不同类型的记录 + + 3. **与 AI 对话** + - 点击 AI 形象显示问候对话框 + - 点击对话框中的聊天图标进入完整对话 + - AI 基于你的历史记录提供个性化回复 + + 4. **定制 AI 形象** + - 点击右下角 ✨ 按钮 + - 选择颜色、性格、外观、角色 + - 生成专属形象(需要 MiniMax API) + + 5. **心情气泡池** + - 点击顶部心情图标 + - 左右滑动查看不同日期的心情卡片 + - 点击卡片展开查看当天的气泡池 + - 可以拖拽气泡,感受物理引擎效果 + + --- + + ### ⚙️ 配置说明 + + 需要在 ModelScope 的环境变量中配置: + + **必需:** + - `ZHIPU_API_KEY` - 智谱 AI API 密钥 + - 获取地址:https://open.bigmodel.cn/ + - 用途:语音识别、语义解析、AI 对话 + + **可选:** + - `MINIMAX_API_KEY` - MiniMax API 密钥 + - `MINIMAX_GROUP_ID` - MiniMax Group ID + - 获取地址:https://platform.minimaxi.com/ + - 用途:AI 形象生成 + + --- + + ### 🔗 相关链接 + - [GitHub 仓库](https://github.com/kernel-14/Nora) + - [详细文档](https://github.com/kernel-14/Nora/blob/main/README.md) + - [智谱 AI](https://open.bigmodel.cn/) + - [MiniMax](https://platform.minimaxi.com/) + + --- + + ### 📊 API 端点 + + - `POST /api/process` - 处理文本/语音输入 + - `POST /api/chat` - 与 AI 对话(RAG) + - `GET /api/records` - 获取所有记录 + - `GET /api/moods` - 获取情绪数据 + - `GET /api/inspirations` - 获取灵感 + - `GET /api/todos` - 获取待办事项 + - `POST /api/character/generate` - 生成角色形象 + - `GET /health` - 健康检查 + - `GET /docs` - API 文档 + """) + +# 挂载 FastAPI 到 Gradio +app = gr.mount_gradio_app(fastapi_app, demo, path="/gradio") + +# 如果直接运行此文件 +if __name__ == "__main__": + import uvicorn + print("=" * 50) + print("🌟 治愈系记录助手 - SoulMate AI Companion") + print("=" * 50) + print(f"📍 前端应用: http://0.0.0.0:7860/") + print(f"📚 Gradio 界面: http://0.0.0.0:7860/gradio") + print(f"📖 API 文档: http://0.0.0.0:7860/docs") + print(f"🔍 健康检查: http://0.0.0.0:7860/health") + print("=" * 50) + + uvicorn.run(app, host="0.0.0.0", port=7860) diff --git a/deployment/configuration.json b/deployment/configuration.json new file mode 100644 index 0000000000000000000000000000000000000000..5b5c1319418e6fec0afb3e699e6527ff2de9d597 --- /dev/null +++ b/deployment/configuration.json @@ -0,0 +1,5 @@ +{ + "framework": "Gradio", + "task": "chat", + "allow_remote_code": true +} diff --git a/deployment/deploy_to_hf.bat b/deployment/deploy_to_hf.bat new file mode 100644 index 0000000000000000000000000000000000000000..305d99bfd80eb1fad31356e0f687910b20bee455 --- /dev/null +++ b/deployment/deploy_to_hf.bat @@ -0,0 +1,109 @@ +@echo off +chcp 65001 >nul +echo 🚀 开始部署到 Hugging Face Spaces... +echo. + +REM 检查是否已登录 +huggingface-cli whoami >nul 2>&1 +if errorlevel 1 ( + echo ❌ 请先登录 Hugging Face CLI + echo 运行: huggingface-cli login + pause + exit /b 1 +) + +REM 获取用户名 +for /f "tokens=2" %%i in ('huggingface-cli whoami ^| findstr "username:"') do set USERNAME=%%i +echo ✅ 已登录为: %USERNAME% +echo. + +REM 询问 Space 名称 +set /p SPACE_NAME="请输入 Space 名称 (默认: soulmate-ai-companion): " +if "%SPACE_NAME%"=="" set SPACE_NAME=soulmate-ai-companion + +echo. +echo 📦 准备文件... + +REM 构建前端 +echo 🔨 构建前端... +cd frontend +call npm install +call npm run build +cd .. + +if not exist "frontend\dist" ( + echo ❌ 前端构建失败 + pause + exit /b 1 +) + +echo ✅ 前端构建完成 +echo. + +REM 创建临时目录 +set TEMP_DIR=temp_hf_deploy +if exist %TEMP_DIR% rmdir /s /q %TEMP_DIR% +mkdir %TEMP_DIR% + +REM 复制文件 +echo 📋 复制文件... +copy app.py %TEMP_DIR%\ +copy requirements_hf.txt %TEMP_DIR%\requirements.txt +copy README_HF.md %TEMP_DIR%\README.md +copy .gitattributes %TEMP_DIR%\ +xcopy /E /I /Y app %TEMP_DIR%\app +xcopy /E /I /Y frontend\dist %TEMP_DIR%\frontend +mkdir %TEMP_DIR%\data +mkdir %TEMP_DIR%\generated_images + +REM 创建或克隆 Space +echo 🌐 准备 Space... +set SPACE_URL=https://huggingface.co/spaces/%USERNAME%/%SPACE_NAME% + +huggingface-cli repo info spaces/%USERNAME%/%SPACE_NAME% >nul 2>&1 +if errorlevel 1 ( + echo 🆕 创建新 Space... + huggingface-cli repo create %SPACE_NAME% --type space --space_sdk gradio +) else ( + echo ✅ Space 已存在 +) + +cd %TEMP_DIR% +git clone %SPACE_URL% . + +REM 复制文件到仓库 +echo 📤 准备上传... +copy ..\app.py . +copy ..\requirements_hf.txt requirements.txt +copy ..\README_HF.md README.md +copy ..\.gitattributes . +xcopy /E /I /Y ..\app app +xcopy /E /I /Y ..\frontend\dist frontend +if not exist data mkdir data +if not exist generated_images mkdir generated_images + +REM 提交并推送 +echo 🚀 上传到 Hugging Face... +git add . +git commit -m "Deploy to Hugging Face Spaces" +git push + +cd .. +rmdir /s /q %TEMP_DIR% + +echo. +echo ✅ 部署完成! +echo. +echo 📍 Space URL: %SPACE_URL% +echo. +echo ⚙️ 下一步: +echo 1. 访问 %SPACE_URL% +echo 2. 点击 Settings → Repository secrets +echo 3. 添加环境变量: +echo - ZHIPU_API_KEY (必需) +echo - MINIMAX_API_KEY (可选) +echo - MINIMAX_GROUP_ID (可选) +echo. +echo 🎉 完成后即可使用! +echo. +pause diff --git a/deployment/deploy_to_hf.sh b/deployment/deploy_to_hf.sh new file mode 100644 index 0000000000000000000000000000000000000000..b048fa2a848aa0cfb1edec295d7380cb6b045dd6 --- /dev/null +++ b/deployment/deploy_to_hf.sh @@ -0,0 +1,101 @@ +#!/bin/bash + +# Hugging Face Spaces 快速部署脚本 + +echo "🚀 开始部署到 Hugging Face Spaces..." + +# 检查是否已登录 +if ! huggingface-cli whoami &> /dev/null; then + echo "❌ 请先登录 Hugging Face CLI" + echo "运行: huggingface-cli login" + exit 1 +fi + +# 获取用户名 +USERNAME=$(huggingface-cli whoami | grep "username:" | awk '{print $2}') +echo "✅ 已登录为: $USERNAME" + +# 询问 Space 名称 +read -p "请输入 Space 名称 (默认: soulmate-ai-companion): " SPACE_NAME +SPACE_NAME=${SPACE_NAME:-soulmate-ai-companion} + +echo "📦 准备文件..." + +# 构建前端 +echo "🔨 构建前端..." +cd frontend +npm install +npm run build +cd .. + +if [ ! -d "frontend/dist" ]; then + echo "❌ 前端构建失败" + exit 1 +fi + +echo "✅ 前端构建完成" + +# 创建临时目录 +TEMP_DIR="temp_hf_deploy" +rm -rf $TEMP_DIR +mkdir -p $TEMP_DIR + +# 复制文件 +echo "📋 复制文件..." +cp app.py $TEMP_DIR/ +cp requirements_hf.txt $TEMP_DIR/requirements.txt +cp README_HF.md $TEMP_DIR/README.md +cp .gitattributes $TEMP_DIR/ +cp -r app $TEMP_DIR/ +cp -r frontend/dist $TEMP_DIR/frontend/ +mkdir -p $TEMP_DIR/data +mkdir -p $TEMP_DIR/generated_images + +# 创建或克隆 Space +echo "🌐 准备 Space..." +SPACE_URL="https://huggingface.co/spaces/$USERNAME/$SPACE_NAME" + +if huggingface-cli repo info "spaces/$USERNAME/$SPACE_NAME" &> /dev/null; then + echo "✅ Space 已存在,克隆中..." + cd $TEMP_DIR + git clone $SPACE_URL . +else + echo "🆕 创建新 Space..." + huggingface-cli repo create $SPACE_NAME --type space --space_sdk gradio + cd $TEMP_DIR + git clone $SPACE_URL . +fi + +# 复制文件到仓库 +echo "📤 准备上传..." +cp ../app.py . +cp ../requirements_hf.txt ./requirements.txt +cp ../README_HF.md ./README.md +cp ../.gitattributes . +cp -r ../app . +cp -r ../frontend/dist ./frontend/ +mkdir -p data generated_images + +# 提交并推送 +echo "🚀 上传到 Hugging Face..." +git add . +git commit -m "Deploy to Hugging Face Spaces" +git push + +cd .. +rm -rf $TEMP_DIR + +echo "" +echo "✅ 部署完成!" +echo "" +echo "📍 Space URL: $SPACE_URL" +echo "" +echo "⚙️ 下一步:" +echo "1. 访问 $SPACE_URL" +echo "2. 点击 Settings → Repository secrets" +echo "3. 添加环境变量:" +echo " - ZHIPU_API_KEY (必需)" +echo " - MINIMAX_API_KEY (可选)" +echo " - MINIMAX_GROUP_ID (可选)" +echo "" +echo "🎉 完成后即可使用!" diff --git a/deployment/ms_deploy.json b/deployment/ms_deploy.json new file mode 100644 index 0000000000000000000000000000000000000000..fdac0a52256b462798ad4e7a9d1498a54de93008 --- /dev/null +++ b/deployment/ms_deploy.json @@ -0,0 +1,29 @@ +{ + "$schema": "https://modelscope.cn/api/v1/studios/deploy_schema.json", + "sdk_type": "gradio", + "sdk_version": "4.44.1", + "resource_configuration": "platform/2v-cpu-16g-mem", + "base_image": "ubuntu22.04-py311-torch2.3.1-modelscope1.31.0", + "environment_variables": [ + { + "name": "ZHIPU_API_KEY", + "value": "" + }, + { + "name": "MINIMAX_API_KEY", + "value": "" + }, + { + "name": "MINIMAX_GROUP_ID", + "value": "" + }, + { + "name": "DATA_DIR", + "value": "data" + }, + { + "name": "LOG_LEVEL", + "value": "INFO" + } + ] +} diff --git a/deployment/requirements_hf.txt b/deployment/requirements_hf.txt new file mode 100644 index 0000000000000000000000000000000000000000..15e4ff19c96da9954afefd89325bcd0aacdbd068 --- /dev/null +++ b/deployment/requirements_hf.txt @@ -0,0 +1,17 @@ +# Hugging Face Spaces Requirements +# Using latest stable versions + +# Core Gradio - use latest version which is compatible with new huggingface-hub +gradio==5.9.1 + +# Core dependencies (compatible with Python 3.11+) +fastapi==0.115.0 +uvicorn[standard]==0.32.0 +pydantic==2.10.0 +pydantic-settings==2.6.0 +httpx==0.27.0 +python-multipart==0.0.12 +python-dotenv==1.0.1 + +# Additional dependencies +aiofiles==24.1.0 diff --git a/deployment/requirements_modelscope.txt b/deployment/requirements_modelscope.txt new file mode 100644 index 0000000000000000000000000000000000000000..6abd4359fd9ed65f41f34ac3343618139170ca28 --- /dev/null +++ b/deployment/requirements_modelscope.txt @@ -0,0 +1,17 @@ +# ModelScope 部署依赖 +# 使用兼容的 Gradio 版本 + +# Gradio - 使用稳定版本 +gradio==4.44.1 + +# Core dependencies (compatible with Python 3.11+) +fastapi==0.115.0 +uvicorn[standard]==0.32.0 +pydantic==2.10.0 +pydantic-settings==2.6.0 +httpx==0.27.0 +python-multipart==0.0.12 +python-dotenv==1.0.1 + +# Additional dependencies +aiofiles==24.1.0 diff --git "a/docs/API_\351\205\215\347\275\256\350\257\264\346\230\216.md" "b/docs/API_\351\205\215\347\275\256\350\257\264\346\230\216.md" new file mode 100644 index 0000000000000000000000000000000000000000..cdd457e3204e2e58db4d9d24147e2e379a71cd33 --- /dev/null +++ "b/docs/API_\351\205\215\347\275\256\350\257\264\346\230\216.md" @@ -0,0 +1,113 @@ +# API 配置说明 + +## 自动检测 API 地址 + +前端应用会自动检测运行环境并配置正确的 API 地址。 + +### 支持的环境 + +#### 1. 生产环境(自动检测) + +**Hugging Face Spaces:** +- 域名包含:`hf.space`, `huggingface.co`, `gradio.live` +- API 地址:使用相同的协议和域名 +- 示例:`https://huggingface.co/spaces/kernel14/Nora` + - 前端:`https://huggingface.co/spaces/kernel14/Nora` + - API:`https://huggingface.co/spaces/kernel14/Nora/api/...` + +**ModelScope:** +- 域名包含:`modelscope.cn` +- API 地址:使用相同的协议和域名 +- 示例:`https://modelscope.cn/studios/xxx/yyy` + - 前端:`https://modelscope.cn/studios/xxx/yyy` + - API:`https://modelscope.cn/studios/xxx/yyy/api/...` + +#### 2. 局域网访问 + +**通过 IP 地址访问:** +- 前端:`http://192.168.1.100:5173` +- API:`http://192.168.1.100:8000` + +**通过主机名访问:** +- 前端:`http://mycomputer.local:5173` +- API:`http://mycomputer.local:8000` + +#### 3. 本地开发 + +**默认配置:** +- 前端:`http://localhost:5173` +- API:`http://localhost:8000` + +### 环境变量配置(可选) + +如果需要手动指定 API 地址,可以在前端项目中创建 `.env.local` 文件: + +```env +VITE_API_URL=https://your-custom-api-url.com +``` + +### 检测逻辑 + +```typescript +const getApiBaseUrl = () => { + // 1. 优先使用环境变量 + if (import.meta.env.VITE_API_URL) { + return import.meta.env.VITE_API_URL; + } + + // 2. 检测生产环境(Hugging Face, ModelScope) + if (hostname.includes('hf.space') || + hostname.includes('huggingface.co') || + hostname.includes('modelscope.cn')) { + return `${protocol}//${hostname}`; + } + + // 3. 检测局域网访问 + if (hostname !== 'localhost' && hostname !== '127.0.0.1') { + return `${protocol}//${hostname}:8000`; + } + + // 4. 默认本地开发 + return 'http://localhost:8000'; +}; +``` + +### 调试 + +打开浏览器控制台,查看 API 地址: + +``` +🔗 API Base URL: https://huggingface.co/spaces/kernel14/Nora +``` + +### 常见问题 + +**Q: 为什么其他设备无法访问?** + +A: 确保: +1. 后端服务器绑定到 `0.0.0.0` 而不是 `127.0.0.1` +2. 防火墙允许端口 8000 +3. 使用正确的 IP 地址访问 + +**Q: Hugging Face 上 API 调用失败?** + +A: 检查: +1. 浏览器控制台的 API 地址是否正确 +2. 是否配置了必需的环境变量(`ZHIPU_API_KEY`) +3. 查看 Space 的日志是否有错误 + +**Q: 如何测试 API 连接?** + +A: 访问以下地址: +- 健康检查:`/health` +- API 文档:`/docs` +- 测试页面:`/test_api.html` + +### 部署检查清单 + +- [ ] 前端已重新构建(`npm run build`) +- [ ] `frontend/dist/` 已提交到 Git +- [ ] 环境变量已配置(Hugging Face Secrets / ModelScope 环境变量) +- [ ] Space 已重启 +- [ ] 浏览器控制台显示正确的 API 地址 +- [ ] 测试 API 调用是否成功 diff --git a/docs/FEATURE_SUMMARY.md b/docs/FEATURE_SUMMARY.md new file mode 100644 index 0000000000000000000000000000000000000000..8926b0fb7f01573888cfe799346179cc33ca7b94 --- /dev/null +++ b/docs/FEATURE_SUMMARY.md @@ -0,0 +1,368 @@ +# Home Interaction Feature - Implementation Summary + +## Overview + +This document summarizes the implementation of the home page interaction feature for the SoulMate AI Companion application. The feature includes two complementary functionalities: + +1. **Quick Recording** - Fast capture of thoughts, inspirations, and todos +2. **AI Chat (RAG-Enhanced)** - Intelligent conversation with context awareness + +## Key Features + +### 1. Home Page Quick Recording + +**Purpose:** Enable users to quickly record their thoughts through voice or text input. + +**Workflow:** +``` +User Input (Voice/Text) + ↓ +Call /api/process + ↓ +AI Semantic Analysis + ↓ +Save to records.json + ↓ +Auto-split to: + - moods.json (emotions) + - inspirations.json (ideas) + - todos.json (tasks) +``` + +**Characteristics:** +- ✅ One-time processing +- ✅ Automatic categorization +- ✅ Structured data output +- ✅ No conversation context needed + +### 2. AI Chat with RAG Enhancement + +**Purpose:** Provide intelligent, warm companionship through context-aware conversations. + +**Workflow:** +``` +User Message + ↓ +Call /api/chat + ↓ +Load Recent Records (last 10) + ↓ +Build RAG Context + ↓ +AI Generates Personalized Response + ↓ +Return to User +``` + +**Characteristics:** +- ✅ Each message calls API +- ✅ Uses RAG (Retrieval-Augmented Generation) +- ✅ Context from records.json +- ✅ Personalized, warm responses +- ✅ Conversation not saved + +## Technical Implementation + +### Backend Changes + +#### File: `app/main.py` + +**Updated `/api/chat` endpoint with RAG:** + +```python +@app.post("/api/chat") +async def chat_with_ai(text: str = Form(...)): + # Load user's records as RAG knowledge base + records = storage_service._read_json_file(storage_service.records_file) + recent_records = records[-10:] # Last 10 records + + # Build context from records + context_parts = [] + for record in recent_records: + context_entry = f"[{timestamp}] User said: {original_text}" + if mood: + context_entry += f"\nMood: {mood['type']}" + if inspirations: + context_entry += f"\nInspirations: {ideas}" + if todos: + context_entry += f"\nTodos: {tasks}" + context_parts.append(context_entry) + + # Build system prompt with context + system_prompt = f"""You are a warm, empathetic AI companion. + You can reference the user's history to provide more caring responses: + + {context_text} + + Please respond with warmth and understanding based on this background.""" + + # Call AI API with context + response = await client.post( + "https://open.bigmodel.cn/api/paas/v4/chat/completions", + json={ + "model": "glm-4-flash", + "messages": [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": text} + ] + } + ) +``` + +### Frontend Changes + +#### New Component: `frontend/components/HomeInput.tsx` + +**Features:** +- Large circular microphone button with gradient +- Text input field +- Real-time processing status +- Success/error animations +- Auto-refresh data on completion + +**Key Functions:** + +```typescript +// Voice recording +const startRecording = async () => { + const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); + const mediaRecorder = new MediaRecorder(stream); + // Recording logic... +}; + +// Process audio +const processAudio = async (audioBlob: Blob) => { + const file = new File([audioBlob], 'recording.webm'); + await apiService.processInput(file); + setShowSuccess(true); + onRecordComplete(); +}; + +// Process text +const processText = async () => { + await apiService.processInput(undefined, textInput); + setTextInput(''); + setShowSuccess(true); + onRecordComplete(); +}; +``` + +#### Updated: `frontend/App.tsx` + +Integrated HomeInput component into the home page: + +```typescript +
+ + + {/* Home Input Component */} +
+ +
+
+``` + +## Feature Comparison + +| Feature | Quick Recording | AI Chat | +|---------|----------------|---------| +| **Purpose** | Record thoughts | Intelligent companionship | +| **API Endpoint** | `/api/process` | `/api/chat` | +| **Call Frequency** | One-time | Per message | +| **Knowledge Base** | Not used | Uses RAG | +| **Output** | Structured data | Natural language | +| **Storage** | Auto-save to files | Not saved | +| **Context** | No context needed | Based on history | + +## Files Modified/Created + +### New Files + +1. **frontend/components/HomeInput.tsx** - Home input component +2. **test_home_input.py** - Feature test script +3. **首页交互功能说明.md** - Detailed documentation (Chinese) +4. **新功能实现总结.md** - Implementation summary (Chinese) +5. **快速开始-新功能.md** - Quick start guide (Chinese) +6. **功能架构图.md** - Architecture diagrams (Chinese) +7. **FEATURE_SUMMARY.md** - This file + +### Modified Files + +1. **app/main.py** - Updated `/api/chat` with RAG +2. **frontend/App.tsx** - Integrated HomeInput component +3. **README.md** - Updated documentation + +## Usage Examples + +### Example 1: Quick Recording + +``` +User Input: +"Today I'm feeling great. Had a new idea for an app. Need to buy books tomorrow." + +System Processing: +✓ Call /api/process +✓ Semantic analysis +✓ Save to records.json +✓ Split to: + - moods.json: feeling great + - inspirations.json: new app idea + - todos.json: buy books tomorrow +✓ Show "Record Successful" +``` + +### Example 2: AI Chat with RAG + +``` +User: "What have I been doing lately?" + +AI (based on history): +"From your records, you've been working on a project. Although work +has been tiring, you felt accomplished after completing it. You also +plan to wake up early tomorrow for a run. Great plans!" + +User: "How's my mood been?" + +AI: +"Your mood has had ups and downs. You felt tired during work, but +happy after completing tasks. Overall, you're a positive person who +finds joy in achievements even when tired. Keep it up!" +``` + +## Testing + +### Run Test Script + +```bash +# Ensure backend is running +python -m uvicorn app.main:app --reload + +# Run tests in another terminal +python test_home_input.py +``` + +### Test Coverage + +1. ✅ Home text input recording +2. ✅ AI chat without history +3. ✅ AI chat with RAG enhancement +4. ✅ Retrieve records + +## Performance Considerations + +### Frontend Optimizations + +- Debounce input handling +- Optimistic updates +- Component lazy loading +- Result caching + +### Backend Optimizations + +- Async processing (async/await) +- Connection pool reuse +- Limit history records (10 items) +- Response compression + +### RAG Optimizations + +- Load only recent records +- Streamline context information +- Cache common queries +- Vector database (future enhancement) + +## Security & Privacy + +### API Key Protection + +- Stored in `.env` file +- Not committed to version control +- Auto-filtered in logs + +### Input Validation + +- Frontend basic format validation +- Backend Pydantic model validation +- File size and format restrictions + +### Data Privacy + +- Local storage only +- No external data sharing +- Consider encryption for sensitive data + +## Future Enhancements + +### Short-term + +- [ ] Multi-turn conversation history +- [ ] Voice synthesis (AI voice response) +- [ ] Emotion analysis visualization +- [ ] Smart recommendations + +### Long-term + +- [ ] Vector database for better RAG +- [ ] Semantic similarity search +- [ ] Knowledge graph +- [ ] Multi-modal support (images, video) +- [ ] User profiling +- [ ] Personalization engine + +## Deployment + +### Frontend + +No additional configuration needed. HomeInput component is integrated into App.tsx. + +### Backend + +No additional configuration needed. RAG functionality is integrated into existing `/api/chat` endpoint. + +### Requirements + +- Python 3.8+ +- Node.js 16+ +- Zhipu AI API Key (required) + +## Troubleshooting + +### Issue: Voice recording not working + +**Solution:** +- Check browser support (Chrome/Edge recommended) +- Allow microphone permissions +- Use HTTPS or localhost + +### Issue: Records not saving + +**Solution:** +- Check if backend is running: `curl http://localhost:8000/health` +- Check browser console for errors +- Check backend logs: `tail -f logs/app.log` + +### Issue: AI chat not using history + +**Solution:** +- Ensure records exist in `data/records.json` +- Ask more specific questions like "What did I do yesterday?" +- Check backend logs for "AI chat successful with RAG context" + +## Conclusion + +This implementation successfully adds two complementary features: + +1. **Quick Recording** - Simple, direct, efficient thought capture +2. **AI Chat** - Intelligent, warm, personalized companionship + +Through RAG technology, the AI chat can provide context-aware responses based on user history, creating a truly "understanding" companion experience. + +The features work together to provide a complete recording and companionship experience: +- Quick recording for capturing thoughts +- AI chat for intelligent companionship + +--- + +**Implementation Complete!** 🎉 + +For questions or further optimization needs, please refer to the detailed documentation or contact the development team. diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 0000000000000000000000000000000000000000..9fe97dbc17e55d3c8afe39cead46cd9c20789a94 --- /dev/null +++ b/docs/README.md @@ -0,0 +1,103 @@ +# 文档目录 + +本目录包含项目的详细技术文档。 + +## 📚 文档列表 + +### 核心文档 + +- **[功能架构图.md](功能架构图.md)** - 系统架构、数据流向、组件关系图 +- **[FEATURE_SUMMARY.md](FEATURE_SUMMARY.md)** - 功能实现总结(英文) + +### 故障排查 + +- **[后端启动问题排查.md](后端启动问题排查.md)** - 后端启动常见问题和解决方案 +- **[语音录制问题排查.md](语音录制问题排查.md)** - 语音录制功能的使用和故障排查 + +## 🔗 相关文档 + +### 根目录文档 + +- **[README.md](../README.md)** - 项目主文档 +- **[PRD.md](../PRD.md)** - 产品需求文档 + +### 测试文件 + +- **[test_home_input.py](../test_home_input.py)** - 首页输入功能测试 +- **[test_audio_recording.html](../test_audio_recording.html)** - 音频录制测试页面 +- **[诊断环境.py](../诊断环境.py)** - 环境诊断脚本 + +## 📖 快速导航 + +### 我想... + +- **启动应用** → 查看 [README.md](../README.md) 的"快速开始"部分 +- **解决启动问题** → 查看 [后端启动问题排查.md](后端启动问题排查.md) +- **了解语音录制** → 查看 [语音录制问题排查.md](语音录制问题排查.md) +- **了解系统架构** → 查看 [功能架构图.md](功能架构图.md) +- **查看功能实现** → 查看 [FEATURE_SUMMARY.md](FEATURE_SUMMARY.md) + +## 🛠️ 工具和脚本 + +### 诊断工具 + +```bash +# 环境诊断 +python 诊断环境.py + +# 功能测试 +python test_home_input.py +``` + +### 启动脚本 + +```bash +# Windows CMD +启动后端.bat + +# PowerShell +.\启动后端.ps1 +``` + +### 测试页面 + +- 打开 `test_audio_recording.html` 测试音频录制功能 + +## 📝 文档维护 + +### 文档结构 + +``` +项目根目录/ +├── README.md # 主文档 +├── PRD.md # 产品需求文档 +├── docs/ # 详细文档目录 +│ ├── README.md # 本文件 +│ ├── 功能架构图.md # 架构文档 +│ ├── 后端启动问题排查.md # 启动问题 +│ ├── 语音录制问题排查.md # 录音问题 +│ └── FEATURE_SUMMARY.md # 功能总结 +├── test_home_input.py # 测试脚本 +├── test_audio_recording.html # 测试页面 +└── 诊断环境.py # 诊断脚本 +``` + +### 更新文档 + +如需更新文档,请: +1. 修改对应的 Markdown 文件 +2. 确保链接正确 +3. 更新本 README 的文档列表 + +## 🤝 贡献 + +欢迎改进文档!如果你发现: +- 文档有错误 +- 说明不清楚 +- 缺少重要信息 + +请提交 Issue 或 Pull Request。 + +--- + +**最后更新:** 2024-01-17 diff --git a/docs/ROADMAP.md b/docs/ROADMAP.md new file mode 100644 index 0000000000000000000000000000000000000000..2dfaa7a9726a6166f66b36cdee995a52dbf4fd89 --- /dev/null +++ b/docs/ROADMAP.md @@ -0,0 +1,422 @@ +# 🗺️ 未来迭代计划 - Roadmap + +## 📋 版本规划 + +### 当前版本:v1.0.0 ✅ +**发布日期**:2026-01-18 + +**核心功能**: +- ✅ 语音/文本快速记录 +- ✅ AI 语义解析(心情、灵感、待办) +- ✅ AI 对话陪伴(RAG) +- ✅ AI 形象定制(720 种组合) +- ✅ 物理引擎心情气泡池 +- ✅ 多平台部署(Hugging Face, ModelScope, 本地) + +--- + +## 🚀 v1.1.0 - 数据增强与可视化 +**预计发布**:2026-02 + +### 核心目标 +增强数据分析和可视化能力,让用户更好地了解自己的情绪变化和成长轨迹。 + +### 新功能 + +#### 1. 情绪趋势分析 📊 +- [ ] **情绪时间线** + - 按日/周/月查看情绪变化曲线 + - 识别情绪周期和模式 + - 情绪高峰和低谷标注 + +- [ ] **情绪统计报告** + - 情绪类型分布饼图 + - 情绪强度热力图 + - 每周/每月情绪总结 + +- [ ] **情绪触发因素分析** + - 关键词云图 + - 高频触发场景识别 + - 情绪关联事件分析 + +#### 2. 灵感知识图谱 🕸️ +- [ ] **灵感关联网络** + - 基于标签的灵感连接 + - 可视化灵感演化路径 + - 发现灵感之间的隐藏联系 + +- [ ] **灵感分类优化** + - 自动分类(工作/生活/学习/创意) + - 自定义标签系统 + - 灵感收藏夹 + +- [ ] **灵感搜索增强** + - 全文搜索 + - 标签筛选 + - 时间范围筛选 + +#### 3. 待办智能管理 ✅ +- [ ] **待办优先级** + - AI 自动评估紧急程度 + - 重要性标记 + - 智能排序 + +- [ ] **待办提醒** + - 时间提醒 + - 地点提醒(基于位置) + - 智能推荐最佳执行时间 + +- [ ] **待办统计** + - 完成率统计 + - 拖延分析 + - 效率趋势图 + +### 技术改进 +- [ ] 数据导出功能(JSON/CSV) +- [ ] 数据备份与恢复 +- [ ] 性能优化(大数据量处理) + +--- + +## 🎨 v1.2.0 - 社交与分享 +**预计发布**:2026-03 + +### 核心目标 +构建温暖的社区氛围,让用户可以安全地分享和交流。 + +### 新功能 + +#### 1. 匿名社区 🌐 +- [ ] **心情广场** + - 匿名分享心情 + - 点赞和评论 + - 情绪共鸣标记 + +- [ ] **灵感市集** + - 分享创意灵感 + - 灵感收藏 + - 灵感协作 + +- [ ] **治愈树洞** + - 完全匿名倾诉 + - AI 温暖回复 + - 用户互助支持 + +#### 2. 好友系统 👥 +- [ ] **添加好友** + - 邀请码机制 + - 好友申请 + - 好友列表 + +- [ ] **私密分享** + - 向好友分享特定记录 + - 好友可见的心情动态 + - 互相鼓励和支持 + +- [ ] **小组功能** + - 创建兴趣小组 + - 小组话题讨论 + - 小组活动 + +#### 3. 成就系统 🏆 +- [ ] **记录成就** + - 连续记录天数 + - 记录总数里程碑 + - 特殊成就徽章 + +- [ ] **成长勋章** + - 情绪管理大师 + - 灵感收集家 + - 行动派达人 + +- [ ] **每日打卡** + - 打卡日历 + - 打卡奖励 + - 打卡提醒 + +### 技术改进 +- [ ] 用户认证系统 +- [ ] 数据隐私保护 +- [ ] 内容审核机制 + +--- + +## 🧠 v1.3.0 - AI 能力升级 +**预计发布**:2026-04 + +### 核心目标 +提升 AI 的智能化水平,提供更个性化、更深入的陪伴体验。 + +### 新功能 + +#### 1. 智能对话增强 💬 +- [ ] **多轮对话记忆** + - 记住对话上下文 + - 长期记忆用户偏好 + - 个性化对话风格 + +- [ ] **情感识别** + - 识别用户情绪状态 + - 根据情绪调整回复风格 + - 主动关怀和安慰 + +- [ ] **主动对话** + - AI 主动发起问候 + - 定期情绪检查 + - 特殊日期提醒 + +#### 2. 个性化推荐 🎯 +- [ ] **内容推荐** + - 推荐相关灵感 + - 推荐治愈内容 + - 推荐行动建议 + +- [ ] **习惯分析** + - 识别用户习惯模式 + - 提供改善建议 + - 个性化目标设定 + +- [ ] **智能提醒** + - 基于历史数据的智能提醒 + - 最佳记录时间推荐 + - 情绪调节建议 + +#### 3. AI 形象进化 🎭 +- [ ] **动态表情** + - 根据对话内容变化表情 + - 情绪同步动画 + - 互动动作 + +- [ ] **语音对话** + - AI 语音回复 + - 语音情感表达 + - 多种声音选择 + +- [ ] **3D 形象** + - 3D 角色模型 + - 更丰富的动画 + - 场景互动 + +### 技术改进 +- [ ] 升级到更强大的 AI 模型 +- [ ] 本地 AI 模型支持(隐私保护) +- [ ] 多模态输入(图片、视频) + +--- + +## 📱 v1.4.0 - 移动端与硬件 +**预计发布**:2026-05 + +### 核心目标 +扩展到移动端和智能硬件,提供无处不在的陪伴体验。 + +### 新功能 + +#### 1. 移动端应用 📱 +- [ ] **原生 App** + - iOS 应用 + - Android 应用 + - 离线功能 + +- [ ] **移动端优化** + - 触摸手势优化 + - 移动端专属 UI + - 省电模式 + +- [ ] **快捷记录** + - 桌面小组件 + - 快捷指令 + - 语音唤醒 + +#### 2. 智能硬件集成 ⌚ +- [ ] **可穿戴设备** + - 智能手表集成 + - 心率监测 + - 情绪预警 + +- [ ] **智能音箱** + - 语音交互 + - 定时播报 + - 环境音乐 + +- [ ] **IoT 设备** + - 智能灯光(情绪灯) + - 智能香薰 + - 环境传感器 + +#### 3. 跨平台同步 ☁️ +- [ ] **云端同步** + - 实时数据同步 + - 多设备无缝切换 + - 冲突解决 + +- [ ] **离线模式** + - 离线记录 + - 离线 AI 对话 + - 自动同步 + +### 技术改进 +- [ ] React Native / Flutter 移动端开发 +- [ ] 蓝牙/WiFi 硬件通信 +- [ ] 云端数据库 + +--- + +## 🌟 v2.0.0 - 生态系统 +**预计发布**:2026-Q3 + +### 核心目标 +构建完整的心理健康生态系统,提供全方位的支持。 + +### 新功能 + +#### 1. 专业服务对接 🏥 +- [ ] **心理咨询师入驻** + - 在线预约 + - 视频咨询 + - 专业评估 + +- [ ] **心理测评** + - 标准化量表 + - AI 辅助评估 + - 报告生成 + +- [ ] **危机干预** + - 自动识别危机信号 + - 紧急联系人通知 + - 专业资源推荐 + +#### 2. 内容生态 📚 +- [ ] **治愈内容库** + - 冥想音频 + - 正念练习 + - 心理学文章 + +- [ ] **课程体系** + - 情绪管理课程 + - 压力应对课程 + - 自我成长课程 + +- [ ] **创作者平台** + - 内容创作工具 + - 创作者激励 + - 内容分发 + +#### 3. 企业版 🏢 +- [ ] **团队版功能** + - 团队情绪监测 + - 团队氛围分析 + - 管理者仪表盘 + +- [ ] **企业服务** + - 员工关怀计划 + - 心理健康培训 + - 数据报告 + +### 技术改进 +- [ ] 微服务架构 +- [ ] 大数据分析平台 +- [ ] AI 模型训练平台 + +--- + +## 🔮 未来展望 + +### 长期愿景 +打造一个温暖、智能、专业的心理健康陪伴平台,让每个人都能: +- 🌈 更好地理解和管理自己的情绪 +- 💡 记录和实现自己的灵感与目标 +- 🤝 在安全的环境中获得支持和陪伴 +- 🌱 持续成长,成为更好的自己 + +### 技术方向 +- **AI 技术**:更智能的情感理解和对话能力 +- **隐私保护**:端到端加密、本地 AI 模型 +- **多模态**:支持图片、视频、音频等多种输入 +- **个性化**:深度学习用户偏好,提供定制化体验 +- **开放生态**:API 开放、插件系统、第三方集成 + +### 研究方向 +- 情绪识别算法优化 +- 个性化推荐系统 +- 心理健康预警模型 +- 人机交互体验研究 + +--- + +## 📊 迭代原则 + +### 1. 用户优先 +- 所有功能基于用户真实需求 +- 持续收集用户反馈 +- 快速迭代优化 + +### 2. 隐私安全 +- 数据加密存储 +- 用户数据自主权 +- 透明的隐私政策 + +### 3. 温暖治愈 +- 保持温暖的设计风格 +- 避免过度商业化 +- 关注用户心理健康 + +### 4. 技术创新 +- 采用前沿 AI 技术 +- 优化用户体验 +- 保持技术领先 + +### 5. 可持续发展 +- 合理的商业模式 +- 社会责任 +- 长期价值创造 + +--- + +## 🤝 参与贡献 + +我们欢迎社区贡献!你可以通过以下方式参与: + +### 功能建议 +- 在 GitHub Issues 提交功能建议 +- 参与功能讨论和投票 +- 分享你的使用体验 + +### 代码贡献 +- Fork 项目并提交 PR +- 修复 Bug +- 优化性能 +- 添加新功能 + +### 内容贡献 +- 分享治愈内容 +- 编写使用教程 +- 翻译文档 + +### 测试反馈 +- 参与 Beta 测试 +- 报告 Bug +- 提供改进建议 + +--- + +## 📞 联系我们 + +- **GitHub**:https://github.com/kernel-14/Nora +- **Issues**:https://github.com/kernel-14/Nora/issues +- **Discussions**:https://github.com/kernel-14/Nora/discussions + +--- + +## 📝 更新日志 + +### v1.0.0 (2026-01-18) +- ✅ 初始版本发布 +- ✅ 核心功能实现 +- ✅ 多平台部署支持 + +--- + +**注意**:本路线图会根据实际情况和用户反馈进行调整。具体功能和发布时间可能会有变化。 + +**最后更新**:2026-01-18 diff --git "a/docs/\345\212\237\350\203\275\346\236\266\346\236\204\345\233\276.md" "b/docs/\345\212\237\350\203\275\346\236\266\346\236\204\345\233\276.md" new file mode 100644 index 0000000000000000000000000000000000000000..b24c1bd34072347cb2cadc40bc45b3bd43e9fd4c --- /dev/null +++ "b/docs/\345\212\237\350\203\275\346\236\266\346\236\204\345\233\276.md" @@ -0,0 +1,375 @@ +# 首页交互功能架构图 + +## 整体架构 + +``` +┌─────────────────────────────────────────────────────────────┐ +│ 用户界面 │ +│ │ +│ ┌──────────────┐ ┌──────────────┐ │ +│ │ 首页记录 │ │ AI 对话 │ │ +│ │ │ │ │ │ +│ │ 🎤 语音输入 │ │ 💬 聊天界面 │ │ +│ │ ⌨️ 文字输入 │ │ 📝 消息输入 │ │ +│ └──────────────┘ └──────────────┘ │ +│ │ │ │ +└─────────┼──────────────────────────────┼─────────────────────┘ + │ │ + ▼ ▼ +┌─────────────────────┐ ┌─────────────────────┐ +│ /api/process │ │ /api/chat │ +│ │ │ │ +│ 1. 接收输入 │ │ 1. 接收消息 │ +│ 2. ASR 转文字 │ │ 2. 加载历史记录 │ +│ 3. 语义分析 │ │ 3. 构建 RAG 上下文 │ +│ 4. 保存记录 │ │ 4. 调用 AI API │ +│ 5. 拆分分类 │ │ 5. 返回回复 │ +└─────────────────────┘ └─────────────────────┘ + │ │ + ▼ │ +┌─────────────────────┐ │ +│ 数据存储 │◄─────────────────┘ +│ │ (读取历史) +│ 📄 records.json │ +│ 😊 moods.json │ +│ 💡 inspirations.json│ +│ ✅ todos.json │ +└─────────────────────┘ +``` + +## 首页记录流程 + +``` +用户输入 + │ + ├─ 语音 ──► MediaRecorder ──► Blob ──► File + │ │ + └─ 文字 ─────────────────────────────────┤ + │ + ▼ + FormData (audio/text) + │ + ▼ + POST /api/process + │ + ┌───────────────────────┴───────────────────────┐ + │ │ + ▼ ▼ + audio != null? text != null? + │ │ + ▼ │ + ASR Service (智谱 AI) │ + │ │ + └───────────────────┬───────────────────────────┘ + │ + ▼ + original_text + │ + ▼ + Semantic Parser (GLM-4-Flash) + │ + ┌───────────────────┼───────────────────┐ + │ │ │ + ▼ ▼ ▼ + mood inspirations todos + │ │ │ + └───────────────────┴───────────────────┘ + │ + ▼ + Storage Service + │ + ┌───────────────────┼───────────────────┐ + │ │ │ + ▼ ▼ ▼ + moods.json inspirations.json todos.json + │ │ │ + └───────────────────┴───────────────────┘ + │ + ▼ + records.json (完整记录) + │ + ▼ + 返回 ProcessResponse + │ + ▼ + 前端显示"记录成功" +``` + +## AI 对话流程(RAG 增强) + +``` +用户消息 + │ + ▼ +POST /api/chat + │ + ├─ text: "我最近在做什么?" + │ + ▼ +Storage Service + │ + ├─ 读取 records.json + │ + ▼ +recent_records = records[-10:] (最近 10 条) + │ + ▼ +构建上下文 + │ + ├─ for each record: + │ ├─ 提取 original_text + │ ├─ 提取 mood (type, intensity) + │ ├─ 提取 inspirations (core_idea) + │ └─ 提取 todos (task) + │ + ▼ +context_text = """ +[2024-01-17T10:00:00Z] 用户说: 今天工作很累 +情绪: 疲惫 (强度: 7) +待办: 明天早起跑步 + +[2024-01-17T14:00:00Z] 用户说: 完成了项目很开心 +情绪: 开心 (强度: 8) +灵感: 项目完成的成就感 +... +""" + │ + ▼ +system_prompt = f""" +你是一个温柔、善解人意的AI陪伴助手。 +你可以参考用户的历史记录来提供更贴心的回复: + +{context_text} + +请基于这些背景信息,用温暖、理解的语气回复用户。 +""" + │ + ▼ +GLM-4-Flash API + │ + ├─ model: "glm-4-flash" + ├─ messages: [ + │ {role: "system", content: system_prompt}, + │ {role: "user", content: "我最近在做什么?"} + │ ] + ├─ temperature: 0.8 + └─ top_p: 0.9 + │ + ▼ +AI 生成回复 + │ + ├─ "从你的记录来看,你最近在忙一个项目, + │ 虽然工作很累,但完成后很有成就感呢! + │ 你还计划明天早起去跑步,保持健康的习惯真棒!" + │ + ▼ +返回 {response: "..."} + │ + ▼ +前端显示 AI 回复 +``` + +## 数据流向 + +``` +┌─────────────┐ +│ 用户输入 │ +└──────┬──────┘ + │ + ▼ +┌─────────────┐ +│ HomeInput │ (前端组件) +│ Component │ +└──────┬──────┘ + │ + ▼ +┌─────────────┐ +│ API Service │ (前端服务层) +└──────┬──────┘ + │ + ▼ +┌─────────────┐ +│ FastAPI │ (后端 API) +│ /api/process│ +└──────┬──────┘ + │ + ├─► ASR Service ──► 智谱 AI (语音转文字) + │ + ├─► Semantic Parser ──► GLM-4-Flash (语义分析) + │ + └─► Storage Service ──► JSON 文件 (数据存储) + │ + ├─► records.json + ├─► moods.json + ├─► inspirations.json + └─► todos.json +``` + +## RAG 知识库结构 + +``` +records.json (知识库) +│ +├─ Record 1 +│ ├─ record_id: "uuid-1" +│ ├─ timestamp: "2024-01-17T10:00:00Z" +│ ├─ original_text: "今天工作很累" +│ └─ parsed_data: +│ ├─ mood: {type: "疲惫", intensity: 7} +│ ├─ inspirations: [] +│ └─ todos: [] +│ +├─ Record 2 +│ ├─ record_id: "uuid-2" +│ ├─ timestamp: "2024-01-17T14:00:00Z" +│ ├─ original_text: "完成了项目很开心" +│ └─ parsed_data: +│ ├─ mood: {type: "开心", intensity: 8} +│ ├─ inspirations: [{core_idea: "项目完成"}] +│ └─ todos: [] +│ +└─ Record 3 + ├─ record_id: "uuid-3" + ├─ timestamp: "2024-01-17T18:00:00Z" + ├─ original_text: "明天要早起跑步" + └─ parsed_data: + ├─ mood: null + ├─ inspirations: [] + └─ todos: [{task: "早起跑步", time: "明天"}] + + ↓ (RAG 提取) + +AI 对话上下文: +""" +[2024-01-17T10:00:00Z] 用户说: 今天工作很累 +情绪: 疲惫 (强度: 7) + +[2024-01-17T14:00:00Z] 用户说: 完成了项目很开心 +情绪: 开心 (强度: 8) +灵感: 项目完成 + +[2024-01-17T18:00:00Z] 用户说: 明天要早起跑步 +待办: 早起跑步 +""" +``` + +## 组件关系图 + +``` +App.tsx + │ + ├─► HomeInput.tsx (首页输入) + │ │ + │ ├─► VoiceRecording (语音录制) + │ │ └─► MediaRecorder API + │ │ + │ ├─► TextInput (文字输入) + │ │ └─► Input Element + │ │ + │ └─► apiService.processInput() + │ └─► POST /api/process + │ + ├─► MoodView.tsx (心情页面) + │ └─► ChatDialog.tsx + │ └─► apiService.chatWithAI() + │ └─► POST /api/chat (RAG) + │ + ├─► InspirationView.tsx (灵感页面) + │ └─► ChatDialog.tsx + │ └─► apiService.chatWithAI() + │ └─► POST /api/chat (RAG) + │ + └─► TodoView.tsx (待办页面) + └─► ChatDialog.tsx + └─► apiService.chatWithAI() + └─► POST /api/chat (RAG) +``` + +## API 端点对比 + +``` +┌─────────────────────────────────────────────────────────────┐ +│ /api/process │ +├─────────────────────────────────────────────────────────────┤ +│ 输入: audio (File) 或 text (string) │ +│ 处理: │ +│ 1. ASR 转文字 (如果是音频) │ +│ 2. 语义分析 (GLM-4-Flash) │ +│ 3. 保存到 records.json │ +│ 4. 拆分到 moods/inspirations/todos.json │ +│ 输出: ProcessResponse { │ +│ record_id, timestamp, mood, inspirations, todos │ +│ } │ +└─────────────────────────────────────────────────────────────┘ + +┌─────────────────────────────────────────────────────────────┐ +│ /api/chat │ +├─────────────────────────────────────────────────────────────┤ +│ 输入: text (string) │ +│ 处理: │ +│ 1. 加载 records.json (最近 10 条) │ +│ 2. 提取情绪、灵感、待办信息 │ +│ 3. 构建 RAG 上下文 │ +│ 4. 调用 GLM-4-Flash API │ +│ 5. 生成个性化回复 │ +│ 输出: { │ +│ response: "AI 的回复内容" │ +│ } │ +└─────────────────────────────────────────────────────────────┘ +``` + +## 技术栈 + +``` +前端 +├─ React 19 +├─ TypeScript +├─ Vite +├─ Tailwind CSS +└─ Lucide Icons + +后端 +├─ FastAPI +├─ Pydantic +├─ Uvicorn +├─ httpx (异步 HTTP) +└─ Python 3.8+ + +AI 服务 +├─ 智谱 AI (ASR) +├─ GLM-4-Flash (语义分析) +└─ GLM-4-Flash (对话生成) + +数据存储 +└─ JSON 文件 + ├─ records.json + ├─ moods.json + ├─ inspirations.json + └─ todos.json +``` + +## 性能优化点 + +``` +前端优化 +├─ 防抖处理 (输入延迟) +├─ 乐观更新 (立即反馈) +├─ 组件懒加载 +└─ 缓存机制 + +后端优化 +├─ 异步处理 (async/await) +├─ 连接池复用 +├─ 限制历史记录数量 (10 条) +└─ 响应压缩 + +RAG 优化 +├─ 只加载最近记录 +├─ 精简上下文信息 +├─ 缓存常见问题 +└─ 向量数据库 (未来) +``` + +--- + +这个架构图展示了整个系统的工作流程和数据流向,帮助理解两种功能的区别和联系。 diff --git "a/docs/\345\220\216\347\253\257\345\220\257\345\212\250\351\227\256\351\242\230\346\216\222\346\237\245.md" "b/docs/\345\220\216\347\253\257\345\220\257\345\212\250\351\227\256\351\242\230\346\216\222\346\237\245.md" new file mode 100644 index 0000000000000000000000000000000000000000..692fe61c6f9cdfc05ddc00e0b055b9dfd276c56d --- /dev/null +++ "b/docs/\345\220\216\347\253\257\345\220\257\345\212\250\351\227\256\351\242\230\346\216\222\346\237\245.md" @@ -0,0 +1,368 @@ +# 后端启动问题排查指南 + +## 问题:ModuleNotFoundError: No module named 'app' + +### 错误信息 +``` +ModuleNotFoundError: No module named 'app' +``` + +### 原因分析 + +这个错误通常由以下原因引起: + +1. **在错误的目录运行命令** + - 必须在项目根目录运行 + - 不能在 `app/` 目录内运行 + +2. **Python 路径问题** + - Python 找不到 `app` 模块 + - PYTHONPATH 未正确设置 + +3. **虚拟环境问题** + - 未激活正确的虚拟环境 + - 依赖未安装 + +## 解决方案 + +### 方案 1:使用启动脚本(推荐) + +**Windows CMD:** +```bash +启动后端.bat +``` + +**PowerShell:** +```bash +.\启动后端.ps1 +``` + +这些脚本会: +- ✅ 检查当前目录是否正确 +- ✅ 自动激活虚拟环境 +- ✅ 使用正确的命令启动 + +### 方案 2:手动启动 + +#### 步骤 1:确认在项目根目录 + +```bash +# 检查当前目录 +pwd + +# 应该看到这些文件/目录 +ls +# app/ +# frontend/ +# data/ +# requirements.txt +# README.md +``` + +#### 步骤 2:激活虚拟环境(如果有) + +**Windows:** +```bash +# CMD +venv\Scripts\activate.bat + +# PowerShell +venv\Scripts\Activate.ps1 +``` + +**Linux/Mac:** +```bash +source venv/bin/activate +``` + +#### 步骤 3:启动服务器 + +**使用 python -m(推荐):** +```bash +python -m uvicorn app.main:app --host 0.0.0.0 --port 8000 --reload +``` + +**或者直接使用 uvicorn:** +```bash +uvicorn app.main:app --host 0.0.0.0 --port 8000 --reload +``` + +### 方案 3:不使用 reload 模式 + +如果 `--reload` 参数导致问题,可以不使用: + +```bash +python -m uvicorn app.main:app --host 0.0.0.0 --port 8000 +``` + +**注意:** 不使用 reload 模式时,修改代码后需要手动重启服务器。 + +### 方案 4:设置 PYTHONPATH + +如果上述方法都不行,手动设置 PYTHONPATH: + +**Windows CMD:** +```bash +set PYTHONPATH=%CD% +python -m uvicorn app.main:app --host 0.0.0.0 --port 8000 +``` + +**PowerShell:** +```powershell +$env:PYTHONPATH = $PWD +python -m uvicorn app.main:app --host 0.0.0.0 --port 8000 +``` + +**Linux/Mac:** +```bash +export PYTHONPATH=$PWD +python -m uvicorn app.main:app --host 0.0.0.0 --port 8000 +``` + +## 验证步骤 + +### 1. 检查目录结构 + +```bash +# 应该看到这个结构 +项目根目录/ +├── app/ +│ ├── __init__.py +│ ├── main.py +│ ├── config.py +│ └── ... +├── frontend/ +├── data/ +└── requirements.txt +``` + +### 2. 检查 Python 环境 + +```bash +# 检查 Python 版本 +python --version +# 应该是 Python 3.8+ + +# 检查 uvicorn 是否安装 +python -c "import uvicorn; print(uvicorn.__version__)" +# 应该显示版本号 + +# 检查 FastAPI 是否安装 +python -c "import fastapi; print(fastapi.__version__)" +# 应该显示版本号 +``` + +### 3. 测试导入 + +```bash +# 测试能否导入 app 模块 +python -c "import app.main; print('OK')" +# 应该显示 OK +``` + +如果这一步失败,说明模块路径有问题。 + +### 4. 检查依赖 + +```bash +# 安装/更新依赖 +pip install -r requirements.txt + +# 或者单独安装 +pip install fastapi uvicorn python-multipart httpx pydantic +``` + +## 常见错误和解决方法 + +### 错误 1:在 app/ 目录内运行 + +**错误操作:** +```bash +cd app +python -m uvicorn main:app --reload +``` + +**正确操作:** +```bash +# 回到项目根目录 +cd .. +python -m uvicorn app.main:app --reload +``` + +### 错误 2:虚拟环境未激活 + +**症状:** +- 提示找不到 uvicorn +- 提示找不到 fastapi + +**解决:** +```bash +# 激活虚拟环境 +venv\Scripts\activate.bat # Windows CMD +venv\Scripts\Activate.ps1 # PowerShell +source venv/bin/activate # Linux/Mac + +# 然后重新启动 +python -m uvicorn app.main:app --reload +``` + +### 错误 3:端口被占用 + +**错误信息:** +``` +OSError: [Errno 48] Address already in use +``` + +**解决方法:** + +**方法 1:使用其他端口** +```bash +python -m uvicorn app.main:app --port 8001 +``` + +**方法 2:关闭占用端口的进程** + +Windows: +```bash +# 查找占用 8000 端口的进程 +netstat -ano | findstr :8000 + +# 关闭进程(替换 PID) +taskkill /PID /F +``` + +Linux/Mac: +```bash +# 查找并关闭 +lsof -ti:8000 | xargs kill -9 +``` + +### 错误 4:权限问题 + +**错误信息:** +``` +PermissionError: [Errno 13] Permission denied +``` + +**解决方法:** + +1. 以管理员身份运行 +2. 检查文件权限 +3. 使用其他端口(> 1024) + +## 推荐的启动方式 + +### 开发环境 + +**方式 1:使用启动脚本** +```bash +# Windows +启动后端.bat + +# PowerShell +.\启动后端.ps1 +``` + +**方式 2:手动启动(带 reload)** +```bash +python -m uvicorn app.main:app --host 0.0.0.0 --port 8000 --reload +``` + +### 生产环境 + +**使用 gunicorn(Linux):** +```bash +gunicorn app.main:app -w 4 -k uvicorn.workers.UvicornWorker --bind 0.0.0.0:8000 +``` + +**使用 uvicorn(Windows):** +```bash +python -m uvicorn app.main:app --host 0.0.0.0 --port 8000 --workers 4 +``` + +## 调试技巧 + +### 1. 查看详细日志 + +```bash +python -m uvicorn app.main:app --log-level debug +``` + +### 2. 检查配置 + +```bash +# 查看环境变量 +python -c "from app.config import get_config; print(get_config())" +``` + +### 3. 测试 API + +```bash +# 启动后测试 +curl http://localhost:8000/health + +# 或在浏览器访问 +http://localhost:8000/docs +``` + +## 完整的启动检查清单 + +- [ ] 在项目根目录(不是 app/ 目录) +- [ ] 虚拟环境已激活(如果使用) +- [ ] 依赖已安装(pip install -r requirements.txt) +- [ ] .env 文件已配置 +- [ ] 端口 8000 未被占用 +- [ ] Python 版本 >= 3.8 +- [ ] 可以导入 app 模块(python -c "import app.main") + +## 快速诊断命令 + +运行这个命令进行快速诊断: + +```bash +python -c " +import sys +import os +print('Python 版本:', sys.version) +print('当前目录:', os.getcwd()) +print('app 目录存在:', os.path.exists('app')) +print('main.py 存在:', os.path.exists('app/main.py')) +try: + import uvicorn + print('uvicorn 已安装:', uvicorn.__version__) +except: + print('uvicorn 未安装') +try: + import fastapi + print('fastapi 已安装:', fastapi.__version__) +except: + print('fastapi 未安装') +try: + import app.main + print('app.main 可导入: OK') +except Exception as e: + print('app.main 导入失败:', e) +" +``` + +## 总结 + +最常见的问题是**在错误的目录运行命令**。 + +**解决方法:** +1. 确保在项目根目录 +2. 使用提供的启动脚本 +3. 使用 `python -m uvicorn` 而不是直接 `uvicorn` + +如果问题仍然存在,请: +1. 运行快速诊断命令 +2. 检查完整的启动检查清单 +3. 查看详细的错误日志 + +--- + +**需要帮助?** 请提供: +- 完整的错误信息 +- 当前目录(pwd) +- Python 版本(python --version) +- 快速诊断命令的输出 diff --git "a/docs/\345\261\200\345\237\237\347\275\221\350\256\277\351\227\256\344\277\256\345\244\215\345\256\214\346\210\220.md" "b/docs/\345\261\200\345\237\237\347\275\221\350\256\277\351\227\256\344\277\256\345\244\215\345\256\214\346\210\220.md" new file mode 100644 index 0000000000000000000000000000000000000000..9ff956cf4842f724a4587605274b2389d1ae056f --- /dev/null +++ "b/docs/\345\261\200\345\237\237\347\275\221\350\256\277\351\227\256\344\277\256\345\244\215\345\256\214\346\210\220.md" @@ -0,0 +1,187 @@ +# ✅ 局域网访问问题已修复 + +## 🎯 问题描述 +从其他设备访问 `http://172.18.16.245:8000/` 时显示 "Load failed" + +## 🔧 已完成的修复 + +### 1. 移除硬编码的 API 地址 +**问题**:`frontend/.env.local` 中设置了 `VITE_API_URL=http://localhost:8000`,导致前端构建时将这个地址写死在代码中。其他设备访问时会尝试连接到 `localhost:8000`(它们自己的设备),而不是你的服务器。 + +**修复**: +- ✅ 注释掉了 `frontend/.env.local` 中的 `VITE_API_URL` +- ✅ 前端现在会自动检测 API 地址: + - 本地访问 → `http://localhost:8000` + - 局域网访问 → `http://172.18.16.245:8000` + - 生产环境 → 自动使用当前域名 + +### 2. 重新构建前端 +**操作**: +```bash +cd frontend +npm run build +``` + +**结果**: +- ✅ 新的构建文件已生成在 `frontend/dist/` +- ✅ 包含了自动 API 地址检测逻辑 + +### 3. 创建诊断工具 +**新增文件**: +- ✅ `frontend/dist/test-connection.html` - 网络连接诊断页面 +- ✅ `scripts/test_lan_access.bat` - 快速测试脚本 +- ✅ `docs/局域网访问快速修复.md` - 详细修复指南 +- ✅ `docs/局域网访问问题排查.md` - 完整排查步骤 + +## 🚀 立即测试 + +### 步骤 1:启动后端 +在主机上运行: +```bash +python scripts/start_local.py +``` + +### 步骤 2:运行诊断 +在主机上运行: +```bash +scripts\test_lan_access.bat +``` + +这会: +- ✅ 检查后端服务是否运行 +- ✅ 显示你的 IP 地址 +- ✅ 测试 API 端点 +- ✅ 检查防火墙状态 + +### 步骤 3:在其他设备上测试 + +#### 方法 1:访问诊断页面(推荐) +在其他设备的浏览器中打开: +``` +http://172.18.16.245:8000/test-connection.html +``` + +点击 "🚀 开始测试" 按钮,查看所有 API 是否可以访问。 + +#### 方法 2:直接访问主应用 +在其他设备的浏览器中打开: +``` +http://172.18.16.245:8000/ +``` + +应该可以正常加载并显示数据。 + +## 🔥 如果仍然失败:检查防火墙 + +### 最常见的原因:Windows 防火墙阻止端口 8000 + +#### 快速测试(临时关闭防火墙) +以管理员身份运行 PowerShell: +```powershell +Set-NetFirewallProfile -Profile Domain,Public,Private -Enabled False +``` + +然后在其他设备上重新访问。如果可以访问了,说明是防火墙问题。 + +#### 添加防火墙规则(推荐) +以管理员身份运行 PowerShell: +```powershell +New-NetFirewallRule -DisplayName "Python FastAPI 8000" -Direction Inbound -LocalPort 8000 -Protocol TCP -Action Allow +``` + +#### 重新启用防火墙 +```powershell +Set-NetFirewallProfile -Profile Domain,Public,Private -Enabled True +``` + +## 📱 移动设备访问 + +如果从手机访问: +1. ✅ 确保手机连接的是**同一个 WiFi 网络**(不是移动数据) +2. ✅ 在手机浏览器中输入:`http://172.18.16.245:8000/` +3. ✅ 如果无法访问,先访问诊断页面:`http://172.18.16.245:8000/test-connection.html` + +## 🐛 使用浏览器开发者工具调试 + +如果仍然有问题,请: +1. 在其他设备的浏览器中按 **F12** 打开开发者工具 +2. 切换到 **Console** 标签 +3. 刷新页面 +4. 查看错误信息并告诉我 + +**常见错误**: +- `Failed to fetch` → 网络连接问题(检查防火墙) +- `net::ERR_CONNECTION_REFUSED` → 端口未开放(检查后端是否运行) +- `net::ERR_CONNECTION_TIMED_OUT` → 连接超时(检查网络连接) + +## ✅ 成功标志 + +当以下测试都通过时,说明配置正确: + +1. ✅ 诊断页面所有测试都显示绿色 ✅ +2. ✅ 主应用可以正常加载 +3. ✅ 可以看到 AI 角色形象 +4. ✅ 可以进行语音输入和文本输入 +5. ✅ 可以查看心情、灵感、待办数据 + +## 📚 相关文档 + +- [局域网访问快速修复](docs/局域网访问快速修复.md) - 详细的修复步骤 +- [局域网访问问题排查](docs/局域网访问问题排查.md) - 完整的排查指南 +- [局域网访问指南](docs/局域网访问指南.md) - 配置说明 + +## 🆘 仍然无法解决? + +请提供以下信息: + +1. **诊断页面的测试结果**(截图) +2. **浏览器控制台的错误信息**(F12 → Console 标签,截图或文字) +3. **主机上的测试结果**: + ```bash + curl http://localhost:8000/health + ``` +4. **其他设备上的测试**: + - 能否 ping 通主机:`ping 172.18.16.245` + - 访问健康检查:`http://172.18.16.245:8000/health` + +--- + +## 📝 技术说明 + +### 为什么会出现这个问题? + +1. **Vite 的环境变量机制**: + - Vite 在构建时会将 `import.meta.env.VITE_*` 变量替换为实际值 + - 如果设置了 `VITE_API_URL=http://localhost:8000`,构建后的代码会包含这个硬编码的地址 + - 其他设备访问时,会尝试连接到 `localhost:8000`(它们自己的设备) + +2. **解决方案**: + - 不设置 `VITE_API_URL`,让前端在运行时动态检测 + - 使用 `window.location.hostname` 获取当前访问的主机名 + - 根据主机名自动构建正确的 API 地址 + +### API 地址检测逻辑 + +```typescript +function getApiBaseUrl() { + const currentHost = window.location.hostname; + const currentProtocol = window.location.protocol; + + // 生产环境(Hugging Face, ModelScope) + if (currentHost.includes('hf.space') || + currentHost.includes('huggingface.co') || + currentHost.includes('modelscope.cn')) { + return `${currentProtocol}//${currentHost}`; + } + + // 局域网访问(如 192.168.x.x, 172.x.x.x) + if (currentHost !== 'localhost' && currentHost !== '127.0.0.1') { + return `${currentProtocol}//${currentHost}:8000`; + } + + // 本地开发 + return 'http://localhost:8000'; +} +``` + +这样,无论从哪个地址访问,都能自动使用正确的 API 地址。 diff --git "a/docs/\345\261\200\345\237\237\347\275\221\350\256\277\351\227\256\345\277\253\351\200\237\344\277\256\345\244\215.md" "b/docs/\345\261\200\345\237\237\347\275\221\350\256\277\351\227\256\345\277\253\351\200\237\344\277\256\345\244\215.md" new file mode 100644 index 0000000000000000000000000000000000000000..12bfce709cc97c114f40e90fcc51633c859689ce --- /dev/null +++ "b/docs/\345\261\200\345\237\237\347\275\221\350\256\277\351\227\256\345\277\253\351\200\237\344\277\256\345\244\215.md" @@ -0,0 +1,157 @@ +# 局域网访问快速修复指南 + +## 问题:其他设备访问显示 "Load failed" + +### ✅ 已完成的修复 + +1. **移除了硬编码的 API 地址** + - 修改了 `frontend/.env.local`,移除了 `VITE_API_URL=http://localhost:8000` + - 重新构建了前端,现在会自动检测 API 地址 + +2. **前端已重新构建** + - 运行了 `npm run build` + - 新的构建文件已生成在 `frontend/dist/` + +3. **创建了诊断工具** + - 访问 `http://172.18.16.245:8000/test-connection.html` 可以测试连接 + +## 🚀 立即测试 + +### 步骤 1:启动后端服务 + +在主机上运行: +```bash +python scripts/start_local.py +``` + +确认看到: +``` +============================================================ +🌟 治愈系记录助手 - SoulMate AI Companion +============================================================ +📍 本地访问: http://localhost:8000/ +📍 局域网访问: http://172.18.16.245:8000/ +============================================================ +``` + +### 步骤 2:在其他设备上测试 + +#### 测试 1:访问诊断页面 +在其他设备的浏览器中打开: +``` +http://172.18.16.245:8000/test-connection.html +``` + +点击 "🚀 开始测试" 按钮,查看所有 API 是否可以访问。 + +#### 测试 2:访问主应用 +在其他设备的浏览器中打开: +``` +http://172.18.16.245:8000/ +``` + +应该可以正常加载并显示数据。 + +## 🔧 如果仍然失败 + +### 方案 1:检查防火墙(最常见原因) + +#### Windows 防火墙快速测试 +1. 临时关闭防火墙测试(以管理员身份运行 PowerShell): +```powershell +Set-NetFirewallProfile -Profile Domain,Public,Private -Enabled False +``` + +2. 在其他设备上重新访问 `http://172.18.16.245:8000/` + +3. 如果可以访问了,说明是防火墙问题,需要添加规则: +```powershell +# 允许 Python 通过防火墙 +New-NetFirewallRule -DisplayName "Python FastAPI 8000" -Direction Inbound -LocalPort 8000 -Protocol TCP -Action Allow +``` + +4. 重新启用防火墙: +```powershell +Set-NetFirewallProfile -Profile Domain,Public,Private -Enabled True +``` + +### 方案 2:检查网络连接 + +在其他设备上测试能否 ping 通主机: +```bash +ping 172.18.16.245 +``` + +如果 ping 不通: +- 确认两台设备在同一 WiFi 网络 +- 检查路由器是否启用了 AP 隔离(需要在路由器设置中关闭) +- 确认主机 IP 地址是否正确(可能已变化) + +### 方案 3:检查 IP 地址 + +主机 IP 可能已经变化,重新获取: +```bash +# Windows +ipconfig + +# 查找 "IPv4 地址",例如:192.168.1.100 +``` + +使用新的 IP 地址访问。 + +### 方案 4:使用浏览器开发者工具 + +在其他设备的浏览器中: +1. 按 F12 打开开发者工具 +2. 切换到 "Console" 标签 +3. 刷新页面 +4. 查看具体的错误信息 + +**常见错误及解决方案**: + +| 错误信息 | 原因 | 解决方案 | +|---------|------|---------| +| `Failed to fetch` | 网络连接失败 | 检查防火墙和网络连接 | +| `net::ERR_CONNECTION_REFUSED` | 端口未开放 | 检查后端是否运行,防火墙是否允许 | +| `net::ERR_CONNECTION_TIMED_OUT` | 连接超时 | 检查网络连接,可能是路由器 AP 隔离 | +| `CORS error` | CORS 配置问题 | 已配置,不应该出现此错误 | + +## 📱 移动设备特别说明 + +如果从手机访问: +1. 确保手机连接的是同一个 WiFi 网络(不是移动数据) +2. 某些公共 WiFi 可能禁止设备间通信 +3. 可以尝试使用手机的浏览器访问诊断页面 + +## ✅ 成功标志 + +当以下测试都通过时,说明配置正确: + +1. ✅ 诊断页面所有测试都显示绿色 ✅ +2. ✅ 主应用可以正常加载 +3. ✅ 可以看到 AI 角色形象 +4. ✅ 可以进行语音输入和文本输入 +5. ✅ 可以查看心情、灵感、待办数据 + +## 🆘 仍然无法解决? + +请提供以下信息: + +1. **诊断页面的测试结果**(截图) +2. **浏览器控制台的错误信息**(截图或文字) +3. **主机上运行的结果**: + ```bash + curl http://localhost:8000/health + ``` +4. **其他设备上的测试结果**: + - 能否 ping 通主机 + - 访问 `http://172.18.16.245:8000/health` 的结果 +5. **防火墙状态**: + ```powershell + netsh advfirewall show allprofiles state + ``` + +## 📚 相关文档 + +- [局域网访问问题排查](./局域网访问问题排查.md) - 详细的排查步骤 +- [局域网访问指南](./局域网访问指南.md) - 完整的配置指南 diff --git "a/docs/\345\261\200\345\237\237\347\275\221\350\256\277\351\227\256\346\214\207\345\215\227.md" "b/docs/\345\261\200\345\237\237\347\275\221\350\256\277\351\227\256\346\214\207\345\215\227.md" new file mode 100644 index 0000000000000000000000000000000000000000..2efc7b81f7143eaff19aeea55c952c7976a3429d --- /dev/null +++ "b/docs/\345\261\200\345\237\237\347\275\221\350\256\277\351\227\256\346\214\207\345\215\227.md" @@ -0,0 +1,269 @@ +# 局域网访问指南 + +## 🌐 让其他设备访问你的应用 + +### 快速开始 + +#### 1. 启动本地服务器 + +**Windows:** +```bash +start_local.bat +``` + +**Linux/Mac:** +```bash +python start_local.py +``` + +服务器会显示: +``` +============================================================ +🌟 治愈系记录助手 - SoulMate AI Companion +============================================================ +📍 本地访问: http://localhost:8000/ +📍 局域网访问: http://172.18.16.245:8000/ +📚 API 文档: http://localhost:8000/docs +🔍 健康检查: http://localhost:8000/health +============================================================ +💡 提示: 其他设备可以通过 http://172.18.16.245:8000/ 访问 +============================================================ +``` + +#### 2. 其他设备访问 + +在同一局域网内的其他设备(手机、平板、其他电脑)上: + +1. 打开浏览器 +2. 访问:`http://172.18.16.245:8000/`(使用你的实际 IP 地址) +3. 开始使用! + +### 前置要求 + +#### 1. 构建前端 + +首次使用前,需要构建前端: + +```bash +cd frontend +npm install +npm run build +cd .. +``` + +#### 2. 配置防火墙 + +**Windows 防火墙:** + +1. 打开 Windows Defender 防火墙 +2. 点击"高级设置" +3. 选择"入站规则" → "新建规则" +4. 选择"端口" → 下一步 +5. 选择"TCP",特定本地端口:`8000` +6. 允许连接 +7. 应用到所有配置文件 +8. 命名规则:`SoulMate AI - Port 8000` + +**或使用命令行(管理员权限):** +```powershell +netsh advfirewall firewall add rule name="SoulMate AI - Port 8000" dir=in action=allow protocol=TCP localport=8000 +``` + +**Linux (ufw):** +```bash +sudo ufw allow 8000/tcp +``` + +**Mac:** +系统偏好设置 → 安全性与隐私 → 防火墙 → 防火墙选项 → 添加应用 + +#### 3. 确保在同一网络 + +- 所有设备连接到同一个 WiFi 或局域网 +- 检查路由器是否启用了 AP 隔离(如果启用需要关闭) + +### 故障排查 + +#### 问题 1: 其他设备无法访问 + +**检查清单:** + +1. ✅ 服务器是否正在运行? + ```bash + # 应该看到服务器日志 + INFO: Uvicorn running on http://0.0.0.0:8000 + ``` + +2. ✅ 防火墙是否允许端口 8000? + ```bash + # Windows: 测试端口 + Test-NetConnection -ComputerName 172.18.16.245 -Port 8000 + ``` + +3. ✅ 设备是否在同一网络? + ```bash + # 从其他设备 ping 服务器 + ping 172.18.16.245 + ``` + +4. ✅ IP 地址是否正确? + ```bash + # Windows: 查看 IP + ipconfig + + # Linux/Mac: 查看 IP + ifconfig + ``` + +#### 问题 2: API 调用失败 + +**检查浏览器控制台:** + +1. 打开开发者工具(F12) +2. 查看 Console 标签 +3. 应该看到:`🔗 API Base URL: http://172.18.16.245:8000` +4. 如果不正确,清除浏览器缓存并刷新 + +**测试 API 连接:** + +访问:`http://172.18.16.245:8000/health` + +应该返回: +```json +{ + "status": "healthy", + "data_dir": "data", + "max_audio_size": 10485760 +} +``` + +#### 问题 3: 没有显示默认形象 + +**检查:** + +1. ✅ 默认形象文件是否存在? + ```bash + # 应该存在这个文件 + generated_images/default_character.jpeg + ``` + +2. ✅ 用户配置是否正确? + ```bash + # 查看配置文件 + cat data/user_config.json + ``` + +3. ✅ 图片 URL 是否正确? + - 访问:`http://172.18.16.245:8000/api/user/config` + - 检查 `character.image_url` 字段 + +4. ✅ 图片是否可访问? + - 访问:`http://172.18.16.245:8000/generated_images/default_character.jpeg` + - 应该能看到图片 + +### 性能优化 + +#### 1. 使用有线连接 + +- 服务器电脑使用网线连接路由器 +- 减少 WiFi 干扰和延迟 + +#### 2. 关闭不必要的应用 + +- 释放 CPU 和内存资源 +- 提高响应速度 + +#### 3. 使用现代浏览器 + +- Chrome 90+ +- Firefox 88+ +- Safari 14+ +- Edge 90+ + +### 安全建议 + +⚠️ **注意:** 局域网访问仅适用于受信任的网络环境 + +1. **不要在公共 WiFi 上使用** +2. **定期更新 API 密钥** +3. **不要暴露到公网** +4. **使用强密码保护路由器** + +### 高级配置 + +#### 自定义端口 + +编辑 `start_local.py`,修改端口号: + +```python +uvicorn.run( + app, + host="0.0.0.0", + port=8888, # 改为你想要的端口 + log_level="info" +) +``` + +同时需要修改防火墙规则允许新端口。 + +#### 使用环境变量 + +创建 `.env.local` 文件: + +```env +HOST=0.0.0.0 +PORT=8000 +``` + +### 常见使用场景 + +#### 场景 1: 手机访问电脑上的应用 + +1. 电脑运行 `start_local.bat` +2. 手机连接同一 WiFi +3. 手机浏览器访问 `http://172.18.16.245:8000/` +4. 可以语音输入、查看心情、与 AI 对话 + +#### 场景 2: 平板作为展示屏 + +1. 电脑运行服务器 +2. 平板访问应用 +3. 全屏显示心情气泡池 +4. 作为情绪可视化展示 + +#### 场景 3: 多人协作 + +1. 一台电脑运行服务器 +2. 团队成员通过局域网访问 +3. 共享灵感和待办事项 +4. 实时同步数据 + +### 技术细节 + +#### API 地址自动检测 + +前端会自动检测访问地址并配置 API: + +```typescript +// 访问: http://172.18.16.245:5173/ +// API: http://172.18.16.245:8000/ + +// 访问: http://localhost:5173/ +// API: http://localhost:8000/ +``` + +#### CORS 配置 + +后端已配置允许所有来源(开发环境): + +```python +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) +``` + +生产环境应该限制具体的域名。 diff --git "a/docs/\345\261\200\345\237\237\347\275\221\350\256\277\351\227\256\351\227\256\351\242\230\346\216\222\346\237\245.md" "b/docs/\345\261\200\345\237\237\347\275\221\350\256\277\351\227\256\351\227\256\351\242\230\346\216\222\346\237\245.md" new file mode 100644 index 0000000000000000000000000000000000000000..c2ff5e8def4de55cb46fb9251eacbacc5e8ca543 --- /dev/null +++ "b/docs/\345\261\200\345\237\237\347\275\221\350\256\277\351\227\256\351\227\256\351\242\230\346\216\222\346\237\245.md" @@ -0,0 +1,195 @@ +# 局域网访问问题排查指南 + +## 问题描述 +从其他设备访问 `http://172.18.16.245:8000/` 时显示 "Load failed" + +## 排查步骤 + +### 1. 确认后端服务正在运行 +在主机上运行: +```bash +python scripts/start_local.py +``` + +确认看到以下输出: +``` +============================================================ +🌟 治愈系记录助手 - SoulMate AI Companion +============================================================ +📍 本地访问: http://localhost:8000/ +📍 局域网访问: http://172.18.16.245:8000/ +📚 API 文档: http://localhost:8000/docs +🔍 健康检查: http://localhost:8000/health +============================================================ +``` + +### 2. 测试后端 API 是否可访问 + +#### 在主机上测试(应该成功): +```bash +curl http://localhost:8000/health +``` + +#### 在其他设备上测试(关键): +打开浏览器访问: +``` +http://172.18.16.245:8000/health +``` + +**预期结果**:应该看到 JSON 响应 +```json +{ + "status": "healthy", + "data_dir": "data", + "max_audio_size": 10485760 +} +``` + +**如果失败**:说明网络连接有问题,继续下一步 + +### 3. 检查防火墙设置 + +#### Windows 防火墙 +1. 打开 "Windows Defender 防火墙" +2. 点击 "允许应用通过防火墙" +3. 确保 Python 已被允许(专用网络和公用网络都勾选) + +#### 或者临时关闭防火墙测试: +```powershell +# 以管理员身份运行 PowerShell +Set-NetFirewallProfile -Profile Domain,Public,Private -Enabled False +``` + +测试完成后记得重新开启: +```powershell +Set-NetFirewallProfile -Profile Domain,Public,Private -Enabled True +``` + +### 4. 检查网络连接 + +#### 在其他设备上 ping 主机: +```bash +ping 172.18.16.245 +``` + +**预期结果**:应该能 ping 通 + +**如果失败**: +- 确认两台设备在同一局域网 +- 检查主机的 IP 地址是否正确(可能已变化) + +#### 获取当前 IP 地址: +```bash +# Windows +ipconfig + +# 查找 "IPv4 地址" 或 "IPv4 Address" +``` + +### 5. 检查端口是否被占用 + +```bash +# Windows +netstat -ano | findstr :8000 +``` + +如果端口被占用,更换端口或关闭占用端口的程序 + +### 6. 浏览器控制台检查 + +在其他设备的浏览器中: +1. 按 F12 打开开发者工具 +2. 切换到 "Console" 标签 +3. 刷新页面 +4. 查看错误信息 + +**常见错误**: +- `Failed to fetch`: 网络连接问题 +- `CORS error`: CORS 配置问题(但我们已经配置了) +- `404 Not Found`: API 路径错误 +- `Timeout`: 请求超时 + +### 7. 测试 API 端点 + +在其他设备的浏览器中依次访问: + +1. **健康检查**:`http://172.18.16.245:8000/health` +2. **API 状态**:`http://172.18.16.245:8000/api/status` +3. **前端页面**:`http://172.18.16.245:8000/` + +记录每个请求的结果 + +### 8. 检查 CORS 配置 + +后端已配置允许所有来源: +```python +allow_origins=["*"] +``` + +但如果仍有问题,可以尝试更明确的配置: +```python +allow_origins=[ + "http://localhost:5173", + "http://localhost:3000", + "http://172.18.16.245:5173", + "http://172.18.16.245:8000", + "*" +] +``` + +## 快速诊断命令 + +在主机上运行以下命令,将结果发给我: + +```bash +# 1. 检查服务是否运行 +curl http://localhost:8000/health + +# 2. 检查端口监听 +netstat -ano | findstr :8000 + +# 3. 检查 IP 地址 +ipconfig | findstr IPv4 + +# 4. 检查防火墙状态 +netsh advfirewall show allprofiles state +``` + +## 解决方案 + +### 方案 1:临时关闭防火墙测试 +如果关闭防火墙后可以访问,说明是防火墙问题,需要添加防火墙规则 + +### 方案 2:添加防火墙规则 +```powershell +# 以管理员身份运行 +New-NetFirewallRule -DisplayName "Python FastAPI" -Direction Inbound -Program "C:\Path\To\Python\python.exe" -Action Allow +``` + +### 方案 3:使用不同的端口 +如果 8000 端口有问题,可以尝试其他端口(如 8080, 5000) + +修改 `scripts/start_local.py` 中的端口号 + +### 方案 4:检查路由器设置 +某些路由器可能阻止设备间通信(AP 隔离),需要在路由器设置中关闭 + +## 成功标志 + +当以下所有测试都通过时,说明配置正确: + +1. ✅ 主机可以访问 `http://localhost:8000/health` +2. ✅ 其他设备可以 ping 通 `172.18.16.245` +3. ✅ 其他设备可以访问 `http://172.18.16.245:8000/health` +4. ✅ 其他设备可以访问 `http://172.18.16.245:8000/` +5. ✅ 前端可以正常加载并显示数据 + +## 需要提供的信息 + +如果问题仍未解决,请提供: + +1. 浏览器控制台的完整错误信息(截图或文字) +2. 主机上运行 `curl http://localhost:8000/health` 的结果 +3. 其他设备上访问 `http://172.18.16.245:8000/health` 的结果 +4. 防火墙状态 +5. 两台设备是否在同一局域网 diff --git "a/docs/\345\277\203\346\203\205\346\260\224\346\263\241\346\261\240\345\212\237\350\203\275\350\257\264\346\230\216.md" "b/docs/\345\277\203\346\203\205\346\260\224\346\263\241\346\261\240\345\212\237\350\203\275\350\257\264\346\230\216.md" new file mode 100644 index 0000000000000000000000000000000000000000..337db9d834165195112241e50ffe3c0d10d93175 --- /dev/null +++ "b/docs/\345\277\203\346\203\205\346\260\224\346\263\241\346\261\240\345\212\237\350\203\275\350\257\264\346\230\216.md" @@ -0,0 +1,204 @@ +# 心情气泡池功能说明 + +## 概述 + +心情气泡池是一个基于 **Matter.js 物理引擎**的动态可视化界面,将用户的心情记录转化为可交互的彩色气泡,在屏幕内自由漂浮并发生真实的物理碰撞。 + +## 核心特性 + +### 1. 物理引擎驱动 +- **引擎**: Matter.js +- **重力**: 极轻微的向下重力 (0.05) +- **碰撞**: 真实的物理反弹效果 +- **弹性系数**: 0.6(柔和的水滴质感) +- **空气阻力**: 0.02(自然的减速效果) +- **布朗运动**: 随机微小扰动,模拟自然漂浮 + +### 2. 数据驱动 + +#### 数据来源 +- 从 `moods.json` 读取心情记录 +- 只显示**最近 7 天**的心情数据 +- 实时从后端 API 获取: `/api/moods` + +#### 数据映射 + +| 数据字段 | 视觉映射 | 说明 | +|---------|---------|------| +| `type` | 气泡颜色 + 中心文字 | 心情类型(喜悦、焦虑、平静等) | +| `intensity` | 气泡大小 | 1-10 的强度值,映射到 25-60px 半径 | +| `keywords` | 详情弹窗 | 点击气泡显示关键词标签 | +| `timestamp` | 详情弹窗 | 显示记录时间 | + +### 3. 颜色映射表 + +| 心情类型 | 填充色 | 边框色 | 光晕色 | 说明 | +|---------|--------|--------|--------|------| +| 喜悦 | 暖橙色 (#FED7AA) | #FB923C | rgba(251,146,60,0.4) | 温暖明亮 | +| 开心 | 粉红色 (#FECACA) | #FB7185 | rgba(251,113,133,0.4) | 活泼可爱 | +| 兴奋 | 亮黄色 (#FEF08A) | #FACC15 | rgba(250,204,21,0.4) | 充满活力 | +| 平静 | 天蓝色 (#BFDBFE) | #60A5FA | rgba(96,165,250,0.4) | 宁静舒适 | +| 放松 | 青绿色 (#D9F99D) | #84CC16 | rgba(132,204,22,0.4) | 自然清新 | +| 焦虑 | 淡紫色 (#DDD6FE) | #A78BFA | rgba(167,139,250,0.4) | 柔和梦幻 | +| 悲伤 | 灰蓝色 (#CBD5E1) | #64748B | rgba(100,116,139,0.4) | 低调沉静 | +| 疲惫 | 浅紫色 (#E0E7FF) | #818CF8 | rgba(129,140,248,0.4) | 柔和朦胧 | + +### 4. 视觉效果 + +#### 毛玻璃质感 +- **半透明渐变**: 使用 CSS 渐变色 +- **光晕效果**: 径向渐变,透明度 0.3 +- **高光**: 左上角白色高光点,模拟光泽 + +#### 动画效果 +- **漂浮**: 气泡在容器内自由移动 +- **碰撞**: 气泡相互碰撞时产生真实反弹 +- **拖拽**: 鼠标可以拖动气泡 +- **布朗运动**: 每 100ms 施加随机微小力 + +### 5. 交互功能 + +#### 点击气泡 +- 显示详情弹窗 +- 包含信息: + - 心情类型(大标题) + - 情绪强度(进度条) + - 关键词标签 + - 记录时间 + +#### 拖拽气泡 +- 鼠标按住气泡可拖动 +- 释放后气泡继续物理运动 +- 拖动时其他气泡会被推开 + +#### 边界限制 +- 气泡被限制在容器内 +- 碰到边界会反弹 +- 不会跑出屏幕 + +## 技术实现 + +### 组件结构 + +``` +MoodView.tsx (主容器) + └── PhysicsMoodBubble.tsx (物理引擎组件) + ├── Matter.js Engine (物理引擎) + ├── Matter.js Render (渲染器) + ├── Matter.js Bodies (气泡实体) + └── Matter.js MouseConstraint (鼠标交互) +``` + +### 关键代码 + +#### 创建气泡 +```typescript +const radius = 25 + (mood.intensity / 10) * 35; // 大小映射 +const body = Matter.Bodies.circle(x, y, radius, { + restitution: 0.6, // 弹性 + friction: 0.01, // 摩擦 + frictionAir: 0.02, // 空气阻力 + density: 0.001 // 密度 +}); +``` + +#### 自定义渲染 +```typescript +Matter.Events.on(render, 'afterRender', () => { + // 绘制光晕 + // 绘制高光 + // 绘制文字 +}); +``` + +#### 布朗运动 +```typescript +setInterval(() => { + bodies.forEach(({ body }) => { + Matter.Body.applyForce(body, body.position, { + x: (Math.random() - 0.5) * 0.0001, + y: (Math.random() - 0.5) * 0.0001 + }); + }); +}, 100); +``` + +## 性能优化 + +### 数据过滤 +- 只显示最近 7 天的心情 +- 避免气泡过多导致性能问题 +- 建议最多显示 20-30 个气泡 + +### 渲染优化 +- 使用 Canvas 渲染,性能优于 DOM +- 物理引擎在独立线程运行 +- 自定义渲染只在必要时执行 + +### 内存管理 +- 组件卸载时清理物理引擎 +- 清除所有事件监听器 +- 释放 Canvas 资源 + +## 测试 + +### 测试页面 +打开 `frontend/test-physics-mood.html` 可以独立测试物理引擎效果。 + +### 测试要点 +1. ✅ 气泡是否正常显示 +2. ✅ 气泡是否会碰撞反弹 +3. ✅ 是否可以拖动气泡 +4. ✅ 点击是否显示详情 +5. ✅ 气泡是否被限制在容器内 +6. ✅ 颜色映射是否正确 +7. ✅ 大小是否与强度对应 + +## 未来优化方向 + +### 功能增强 +- [ ] 添加心情筛选(按类型、时间) +- [ ] 支持气泡合并(相同心情) +- [ ] 添加心情趋势分析 +- [ ] 支持导出心情数据 + +### 视觉优化 +- [ ] 更多心情类型和颜色 +- [ ] 气泡动画效果(呼吸、闪烁) +- [ ] 背景粒子效果 +- [ ] 主题切换(日间/夜间) + +### 交互优化 +- [ ] 双击气泡编辑 +- [ ] 长按删除气泡 +- [ ] 手势缩放容器 +- [ ] 气泡分组显示 + +## 常见问题 + +### Q: 气泡重叠怎么办? +A: 初始位置采用圆形分布,避免重叠。如果仍有重叠,物理引擎会自动推开。 + +### Q: 气泡跑出屏幕? +A: 已添加不可见边界墙,气泡会被限制在容器内。 + +### Q: 动画卡顿? +A: 检查气泡数量,建议不超过 30 个。可以调整时间范围过滤。 + +### Q: 颜色不对? +A: 检查 `getMoodColor` 函数的颜色映射表,确保心情类型匹配。 + +## 相关文件 + +- `frontend/components/MoodView.tsx` - 主视图组件 +- `frontend/components/PhysicsMoodBubble.tsx` - 物理引擎组件 +- `frontend/test-physics-mood.html` - 独立测试页面 +- `data/moods.json` - 心情数据文件 +- `app/main.py` - 后端 API (`/api/moods`) + +## 依赖 + +- `matter-js`: ^0.20.0 - 物理引擎 +- `@types/matter-js`: ^0.20.2 - TypeScript 类型定义 +- `react`: ^19.2.3 +- `lucide-react`: ^0.562.0 - 图标库 diff --git "a/docs/\345\277\203\346\203\205\346\260\224\346\263\241\346\261\240\345\277\253\351\200\237\345\274\200\345\247\213.md" "b/docs/\345\277\203\346\203\205\346\260\224\346\263\241\346\261\240\345\277\253\351\200\237\345\274\200\345\247\213.md" new file mode 100644 index 0000000000000000000000000000000000000000..317843c454200bd23892f6aefb4c6ef670d0a928 --- /dev/null +++ "b/docs/\345\277\203\346\203\205\346\260\224\346\263\241\346\261\240\345\277\253\351\200\237\345\274\200\345\247\213.md" @@ -0,0 +1,245 @@ +# 心情气泡池 - 快速开始 + +## 🚀 立即体验 + +### 方法 1: 独立测试页面(推荐) + +最快的方式是直接打开测试页面: + +```bash +# 在浏览器中打开 +frontend/test-physics-mood.html +``` + +这个页面包含: +- ✅ 完整的物理引擎演示 +- ✅ 8 个模拟心情气泡 +- ✅ 所有交互功能 +- ✅ 无需启动后端 + +### 方法 2: 完整应用 + +1. **启动后端** +```bash +# 使用启动脚本 +启动后端.bat +# 或 +启动后端.ps1 +``` + +2. **启动前端** +```bash +# 使用启动脚本 +启动前端.bat +# 或 +启动前端.ps1 +``` + +3. **访问应用** +- 打开浏览器访问: http://localhost:5173 +- 点击顶部的"心情"按钮 +- 查看动态气泡池 + +## 🎮 交互指南 + +### 基本操作 + +| 操作 | 效果 | +|------|------| +| 👆 **点击气泡** | 查看心情详情(类型、强度、关键词、时间) | +| 🖱️ **拖动气泡** | 移动气泡,其他气泡会被推开 | +| 👀 **观察碰撞** | 气泡相互碰撞时会真实反弹 | +| 🌊 **自然漂浮** | 气泡会自动进行微小的随机运动 | + +### 高级技巧 + +1. **快速拖动**: 用力拖动气泡后释放,气泡会高速飞出并与其他气泡碰撞 +2. **聚集效果**: 将多个气泡拖到一起,观察它们如何相互推开 +3. **边界反弹**: 将气泡拖到边缘,观察反弹效果 + +## 📊 添加心情数据 + +### 方法 1: 通过首页输入 + +1. 回到首页 +2. 点击麦克风或输入文字 +3. 说出或写下包含心情的内容,例如: + - "今天感觉很焦虑,工作压力好大" + - "心情很平静,看着窗外的云发呆" + - "超级开心!项目终于完成了" + +4. 系统会自动分析并保存心情 +5. 再次打开心情页面,新气泡会出现 + +### 方法 2: 直接编辑数据文件 + +编辑 `data/moods.json`: + +```json +[ + { + "record_id": "unique-id-1", + "timestamp": "2026-01-17T12:00:00Z", + "type": "喜悦", + "intensity": 9, + "keywords": ["开心", "满足", "成就感"] + }, + { + "record_id": "unique-id-2", + "timestamp": "2026-01-17T14:30:00Z", + "type": "焦虑", + "intensity": 7, + "keywords": ["压力", "担心", "紧张"] + } +] +``` + +**字段说明**: +- `record_id`: 唯一标识符(任意字符串) +- `timestamp`: ISO 8601 格式的时间戳 +- `type`: 心情类型(见下方支持列表) +- `intensity`: 强度值 1-10 +- `keywords`: 关键词数组 + +## 🎨 支持的心情类型 + +| 类型 | 颜色 | 适用场景 | +|------|------|---------| +| 喜悦 | 🟠 暖橙色 | 成就感、满足、愉悦 | +| 开心 | 🩷 粉红色 | 快乐、轻松、愉快 | +| 兴奋 | 🟡 亮黄色 | 激动、期待、充满活力 | +| 平静 | 🔵 天蓝色 | 宁静、安宁、放松 | +| 放松 | 🟢 青绿色 | 舒适、自在、惬意 | +| 焦虑 | 🟣 淡紫色 | 担心、压力、紧张 | +| 悲伤 | ⚪ 灰蓝色 | 难过、失落、低落 | +| 疲惫 | 🟣 浅紫色 | 累、困、疲倦 | + +## 🔧 自定义配置 + +### 调整物理参数 + +编辑 `frontend/components/PhysicsMoodBubble.tsx`: + +```typescript +// 调整重力(越大气泡下沉越快) +gravity: { x: 0, y: 0.05, scale: 0.001 } + +// 调整弹性(0-1,越大越弹) +restitution: 0.6 + +// 调整摩擦力(越大越快停止) +friction: 0.01 + +// 调整空气阻力(越大减速越快) +frictionAir: 0.02 +``` + +### 调整气泡大小 + +```typescript +// 当前公式:25-60px +const radius = 25 + (mood.intensity / 10) * 35; + +// 更大的气泡:40-80px +const radius = 40 + (mood.intensity / 10) * 40; + +// 更小的气泡:15-40px +const radius = 15 + (mood.intensity / 10) * 25; +``` + +### 添加新的心情类型 + +在 `getMoodColor` 函数中添加: + +```typescript +'新心情': { + fill: '#颜色代码', + stroke: '#边框颜色', + glow: 'rgba(R, G, B, 0.4)' +} +``` + +### 调整显示时间范围 + +编辑 `frontend/components/MoodView.tsx`: + +```typescript +// 当前:最近 7 天 +const sevenDaysAgo = Date.now() - 7 * 24 * 60 * 60 * 1000; + +// 改为最近 30 天 +const thirtyDaysAgo = Date.now() - 30 * 24 * 60 * 60 * 1000; + +// 改为最近 24 小时 +const oneDayAgo = Date.now() - 24 * 60 * 60 * 1000; +``` + +## 🐛 故障排查 + +### 问题:气泡不显示 + +**可能原因**: +1. 没有心情数据 +2. 数据时间超过 7 天 +3. 后端未启动 + +**解决方案**: +```bash +# 1. 检查数据文件 +cat data/moods.json + +# 2. 检查后端状态 +curl http://localhost:8000/health + +# 3. 查看浏览器控制台错误 +# 按 F12 打开开发者工具 +``` + +### 问题:气泡重叠严重 + +**解决方案**: +- 减少气泡数量(调整时间范围) +- 增大容器尺寸 +- 调整初始位置分布算法 + +### 问题:动画卡顿 + +**解决方案**: +1. 减少气泡数量(< 30 个) +2. 降低渲染质量 +3. 关闭布朗运动效果 + +### 问题:气泡飞出屏幕 + +**检查**: +- 边界墙是否正确创建 +- 容器尺寸是否正确计算 +- 物理引擎是否正常运行 + +## 📚 更多资源 + +- [完整功能说明](./心情气泡池功能说明.md) +- [Matter.js 官方文档](https://brm.io/matter-js/) +- [项目 README](../README.md) + +## 💡 使用建议 + +### 最佳实践 + +1. **定期记录**: 每天记录 2-3 次心情,保持数据新鲜 +2. **详细描述**: 添加关键词,方便回顾 +3. **观察趋势**: 定期查看气泡池,了解情绪变化 +4. **互动体验**: 多拖动气泡,感受物理引擎的乐趣 + +### 创意玩法 + +1. **心情日记**: 每天截图保存气泡池状态 +2. **情绪分析**: 观察哪种颜色的气泡最多 +3. **压力释放**: 拖动气泡碰撞,释放压力 +4. **冥想辅助**: 观察气泡漂浮,放松心情 + +## 🎉 开始使用 + +现在就打开 `frontend/test-physics-mood.html` 或启动完整应用,体验动态心情气泡池吧! + +有任何问题或建议,欢迎反馈。 diff --git "a/docs/\346\234\252\346\235\245\350\277\255\344\273\243\350\256\241\345\210\222.txt" "b/docs/\346\234\252\346\235\245\350\277\255\344\273\243\350\256\241\345\210\222.txt" new file mode 100644 index 0000000000000000000000000000000000000000..fff25cd090c408decbe770a968a3eaa31c8e6b57 --- /dev/null +++ "b/docs/\346\234\252\346\235\245\350\277\255\344\273\243\350\256\241\345\210\222.txt" @@ -0,0 +1,77 @@ +未来迭代计划 +================================================================================ + +v1.1.0 - 数据分析(2月) +------------------------ +- 情绪趋势图表(日/周/月) +- 情绪统计报告 +- 灵感关联网络 +- 灵感分类和搜索 +- 待办优先级和提醒 +- 数据导出功能 + + +v1.2.0 - 社交功能(3月) +------------------------ +- 匿名心情广场 +- 灵感分享市集 +- 好友系统 +- 成就徽章 +- 每日打卡 + + +v1.3.0 - AI升级(4月) +------------------------ +- 多轮对话记忆 +- 情感识别 +- AI主动关怀 +- 个性化推荐 +- AI动态表情 +- 语音对话 + + +v1.4.0 - 移动端(5月) +------------------------ +- iOS/Android App +- 智能手表集成 +- 智能音箱支持 +- 云端同步 +- 离线模式 + + +v2.0.0 - 生态系统(Q3) +------------------------ +- 心理咨询师入驻 +- 心理测评 +- 治愈内容库 +- 课程体系 +- 企业版 + + +长期方向 +------------------------ +- 更智能的AI对话 +- 端到端加密 +- 多模态输入(图片/视频) +- 开放API和插件系统 +- 情绪预警模型 + + +迭代原则 +------------------------ +1. 用户需求优先 +2. 隐私安全第一 +3. 保持温暖治愈的风格 +4. 技术持续创新 +5. 可持续发展 + + +参与方式 +------------------------ +- GitHub提交功能建议 +- 提交代码PR +- 参与Beta测试 +- 分享使用体验 + + +最后更新:2026-01-18 diff --git "a/docs/\350\257\255\351\237\263\345\275\225\345\210\266\351\227\256\351\242\230\346\216\222\346\237\245.md" "b/docs/\350\257\255\351\237\263\345\275\225\345\210\266\351\227\256\351\242\230\346\216\222\346\237\245.md" new file mode 100644 index 0000000000000000000000000000000000000000..cab200963a51aab79cb80b361df3be684854e5b4 --- /dev/null +++ "b/docs/\350\257\255\351\237\263\345\275\225\345\210\266\351\227\256\351\242\230\346\216\222\346\237\245.md" @@ -0,0 +1,300 @@ +# 语音录制问题排查指南 + +## 概述 + +本文档整合了语音录制功能的所有相关信息,包括音频格式、使用说明和故障排查。 + +## 音频格式支持 + +### 问题背景 + +浏览器的 MediaRecorder API 默认录制的音频格式是 **webm**,但后端 ASR 服务支持的格式是: +- ✅ mp3 +- ✅ wav +- ✅ m4a +- ✅ webm(已添加支持) + +### 解决方案 + +系统实现了自动格式转换: + +**转换流程:** +``` +webm 录音 → Web Audio API 解码 → AudioBuffer → 手动编码 WAV → 上传 +``` + +**转换时间:** 约 1 秒(用户几乎无感知) + +**浏览器支持:** +- ✅ Chrome/Edge - webm → wav 自动转换 +- ✅ Firefox - webm → wav 自动转换 +- ✅ Safari - mp4/aac 直接支持 +- ✅ 移动端浏览器 - 全部支持 + +## 使用指南 + +### 在应用中使用 + +1. 打开应用首页:http://localhost:5173 +2. 点击大圆形麦克风按钮 +3. 允许浏览器访问麦克风(首次使用) +4. 开始说话 +5. 再次点击麦克风停止录音 +6. 等待处理(1-3 秒) +7. 看到"记录成功!"提示 + +### 测试录音功能 + +打开测试页面:`test_audio_recording.html` + +**功能:** +- 查看浏览器支持的音频格式 +- 录制音频并预览 +- 测试格式转换 +- 上传到后端测试 + +## 常见问题 + +### Q1: 为什么需要麦克风权限? + +**A:** 浏览器安全策略要求,必须用户明确授权才能访问麦克风。 + +**解决方法:** +1. 点击浏览器地址栏的麦克风图标 +2. 选择"允许" +3. 刷新页面重试 + +### Q2: 为什么录音后有延迟? + +**A:** 这是正常的,因为需要: +1. 格式转换(webm → wav):约 0.5-1 秒 +2. 上传到后端:约 0.5-1 秒 +3. ASR 识别:约 1-2 秒 +4. 语义分析:约 1-2 秒 + +**总计:** 约 3-6 秒 + +### Q3: 录音音质不好怎么办? + +**可能原因:** +- 麦克风质量差 +- 环境噪音大 +- 距离麦克风太远 + +**解决方法:** +1. 使用外接麦克风 +2. 在安静环境录音 +3. 靠近麦克风说话 +4. 说话清晰、语速适中 + +### Q4: 录音失败怎么办? + +**错误信息:** "无法访问麦克风" + +**解决方法:** +1. 检查麦克风是否连接 +2. 检查浏览器权限设置 +3. 检查系统麦克风权限 +4. 尝试其他浏览器 + +**错误信息:** "处理失败" + +**解决方法:** +1. 检查网络连接 +2. 检查后端是否运行 +3. 查看浏览器控制台错误 +4. 查看后端日志 + +### Q5: 可以录多长时间? + +**A:** 目前没有硬性限制,但建议: +- 单次录音 < 60 秒 +- 说话清晰、简洁 +- 避免长时间停顿 + +**原因:** +- 文件越大,上传越慢 +- 转换时间越长 +- ASR 识别可能不准确 + +## 使用技巧 + +### 1. 录音前准备 + +- ✅ 确保环境安静 +- ✅ 检查麦克风工作正常 +- ✅ 想好要说的内容 +- ✅ 准备好开始录音 + +### 2. 录音中 + +- ✅ 说话清晰、语速适中 +- ✅ 避免长时间停顿 +- ✅ 一次说完一个完整想法 +- ✅ 注意录音指示灯(脉冲动画) + +### 3. 录音后 + +- ✅ 等待处理完成 +- ✅ 查看"记录成功"提示 +- ✅ 检查记录是否正确 +- ✅ 如有错误,可以重新录制 + +### 4. 最佳实践 + +**好的录音示例:** +``` +"今天天气很好,心情不错。 +想到一个新点子:做一个治愈系应用。 +明天要记得买书。" +``` + +**不好的录音示例:** +``` +"嗯... 今天... 呃... 那个... +(长时间停顿) +我想... 嗯... 做个... 什么来着..." +``` + +## 技术细节 + +### 录音参数 + +```typescript +const options = { + mimeType: 'audio/webm;codecs=opus', + audioBitsPerSecond: 128000 // 128 kbps +}; +``` + +### WAV 格式参数 + +``` +- 格式:PCM +- 采样率:原始采样率(通常 48000 Hz) +- 位深度:16-bit +- 声道:单声道或立体声(取决于录音) +``` + +### 文件大小估算 + +``` +录音时长 × 采样率 × 位深度 × 声道数 / 8 + +示例(10 秒单声道录音): +10 × 48000 × 16 × 1 / 8 = 960,000 bytes ≈ 938 KB +``` + +## 浏览器兼容性 + +### Chrome/Edge ✅ (推荐) + +- 录音格式:webm +- 自动转换:是 +- 音质:优秀 +- 性能:优秀 + +### Firefox ✅ + +- 录音格式:webm +- 自动转换:是 +- 音质:优秀 +- 性能:良好 + +### Safari ✅ + +- 录音格式:mp4/aac +- 自动转换:否(直接支持) +- 音质:优秀 +- 性能:优秀 + +### 移动端浏览器 ✅ + +- iOS Safari:支持 +- Android Chrome:支持 +- 微信浏览器:支持(需要 HTTPS) + +## 性能优化 + +### 转换性能 + +- **转换时间:** 通常 < 1 秒(取决于录音长度) +- **内存占用:** 临时增加(录音长度 × 2) +- **CPU 使用:** 中等(Web Audio API 解码 + WAV 编码) + +### 优化建议 + +1. **限制录音时长** + ```typescript + // 最多录制 60 秒 + setTimeout(() => { + if (mediaRecorder.state === 'recording') { + mediaRecorder.stop(); + } + }, 60000); + ``` + +2. **显示转换进度** + ```typescript + setProcessing(true); + setStatus('正在转换音频格式...'); + const wavBlob = await convertWebmToWav(audioBlob); + setStatus('正在上传...'); + ``` + +3. **错误处理** + ```typescript + try { + const wavBlob = await convertWebmToWav(audioBlob); + // 上传 wav + } catch (conversionError) { + console.error('Conversion failed:', conversionError); + // 降级:尝试直接上传 webm + const file = new File([audioBlob], 'recording.mp3', { type: 'audio/mpeg' }); + await apiService.processInput(file); + } + ``` + +## 故障排查工具 + +### 1. 浏览器控制台 + +按 F12 打开开发者工具,查看: +- Console:错误信息 +- Network:网络请求 +- Application:权限设置 + +### 2. 测试页面 + +打开 `test_audio_recording.html`: +- 查看浏览器支持的格式 +- 测试录音和转换 +- 查看详细错误信息 + +### 3. 后端日志 + +```bash +tail -f logs/app.log +``` + +查看: +- ASR 调用日志 +- 错误堆栈 +- 处理时间 + +## 总结 + +语音录制功能已经完全支持现代浏览器: + +✅ **自动格式转换** - webm → wav +✅ **跨浏览器支持** - Chrome/Firefox/Safari +✅ **友好的用户体验** - 清晰的状态提示 +✅ **完善的错误处理** - 降级方案 +✅ **详细的文档** - 使用说明和故障排查 + +现在你可以在任何浏览器中流畅使用语音录制功能了! + +--- + +**更新时间:** 2024-01-17 +**状态:** ✅ 已完成 diff --git a/frontend/.gitignore b/frontend/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..b5592da50fb4617134691ca4d1f3806dd5389e05 --- /dev/null +++ b/frontend/.gitignore @@ -0,0 +1,24 @@ +# Logs +logs +*.log +npm-debug.log* +yarn-debug.log* +yarn-error.log* +pnpm-debug.log* +lerna-debug.log* + +node_modules +# dist - 注释掉,部署需要 dist 文件夹 +dist-ssr +*.local + +# Editor directories and files +.vscode/* +!.vscode/extensions.json +.idea +.DS_Store +*.suo +*.ntvs* +*.njsproj +*.sln +*.sw? diff --git a/frontend/App.tsx b/frontend/App.tsx new file mode 100644 index 0000000000000000000000000000000000000000..b61164a5542737034646d05c43faba6aa7106056 --- /dev/null +++ b/frontend/App.tsx @@ -0,0 +1,892 @@ +import React, { useState, useEffect } from 'react'; +import { AIEntity } from './components/AIEntity'; +import { TopActions } from './components/TopActions'; +import { BottomNav } from './components/BottomNav'; +import { CustomizationButton } from './components/CustomizationButton'; +import { CharacterCustomizationDialog, CharacterPreferences } from './components/CharacterCustomizationDialog'; +import { ChatDialog } from './components/ChatDialog'; +import { RecordView } from './components/RecordView'; +import { CommunityView } from './components/CommunityView'; +import { MineView } from './components/MineView'; +import { MoodView } from './components/MoodView'; +import { InspirationView } from './components/InspirationView'; +import { TodoView } from './components/TodoView'; +import { HomeInput } from './components/HomeInput'; +import { + Tab, + MoodAction, + RecordItem, + RecordSource, + CommunityPost, + Profile, + DeviceStatus, + MoodItem, + MoodType, + InspirationItem, + TodoItem +} from './types'; +import { apiService } from './services/api'; +import { + transformRecord, + transformMood, + transformInspiration, + transformTodo +} from './utils/dataTransform'; + +// Mock Data: Records - 丰富多样的记录数据 +const generateMockRecords = (): RecordItem[] => { + const now = Date.now(); + const hour = 60 * 60 * 1000; + const day = 24 * hour; + + return [ + // 今天 - 早晨 + { + id: '1', + content: "早上6点醒来,窗外的鸟鸣声特别清脆。新的一天,充满期待。", + createdAt: now - hour * 2, + sourceType: RecordSource.MOOD + }, + { + id: '2', + content: "晨跑时看到日出,天空从深蓝渐变到橙粉色,美得让人想哭。", + createdAt: now - hour * 1.5, + sourceType: RecordSource.VOICE + }, + // 今天 - 上午 + { + id: '3', + content: "咖啡店里的音乐很好听,是一首法语歌。要记得回去搜一下。", + createdAt: now - hour, + sourceType: RecordSource.INSPIRATION + }, + { + id: '4', + content: "工作会议上提出的方案被采纳了,同事们的认可让我很开心。", + createdAt: now - hour * 0.5, + sourceType: RecordSource.MOOD + }, + // 昨天 - 下午 + { + id: '5', + content: "午后的阳光透过百叶窗,在桌上投下一道道光影。时间仿佛静止了。", + createdAt: now - day - hour * 6, + sourceType: RecordSource.MANUAL + }, + { + id: '6', + content: "突然想到:如果每个人的记忆都能变成一本书,那会是什么样子?", + createdAt: now - day - hour * 5, + sourceType: RecordSource.INSPIRATION + }, + // 昨天 - 傍晚 + { + id: '7', + content: "下班路上遇到一只流浪猫,蹲下来和它对视了很久。它的眼睛很清澈。", + createdAt: now - day - hour * 2, + sourceType: RecordSource.VOICE + }, + { + id: '8', + content: "晚餐做了番茄炒蛋,虽然简单,但吃得很满足。生活的幸福就是这样。", + createdAt: now - day - hour, + sourceType: RecordSource.MOOD + }, + // 前天 - 全天 + { + id: '9', + content: "今天有点焦虑,deadline临近,但还有很多工作没完成。深呼吸,一步一步来。", + createdAt: now - day * 2 - hour * 8, + sourceType: RecordSource.MOOD + }, + { + id: '10', + content: "中午和老朋友视频通话,虽然各自忙碌,但友谊依然温暖。", + createdAt: now - day * 2 - hour * 6, + sourceType: RecordSource.VOICE + }, + { + id: '11', + content: "读到一句话:'不要因为走得太远,而忘记为什么出发。' 很有共鸣。", + createdAt: now - day * 2 - hour * 3, + sourceType: RecordSource.MANUAL + }, + { + id: '12', + content: "晚上加班到很晚,但看到项目进展顺利,疲惫中带着一丝成就感。", + createdAt: now - day * 2 - hour, + sourceType: RecordSource.MOOD + }, + // 三天前 + { + id: '13', + content: "周末去了郊外,看到一大片向日葵田。金黄色的花海,治愈了整个人。", + createdAt: now - day * 3 - hour * 10, + sourceType: RecordSource.VOICE + }, + { + id: '14', + content: "在书店翻到一本旧书,扉页上有人写的字:'愿你永远保持好奇心'。", + createdAt: now - day * 3 - hour * 7, + sourceType: RecordSource.INSPIRATION + }, + { + id: '15', + content: "下午茶时间,点了一块抹茶蛋糕。甜食真的能让心情变好。", + createdAt: now - day * 3 - hour * 4, + sourceType: RecordSource.MOOD + }, + // 四天前 + { + id: '16', + content: "今天状态不太好,做什么都提不起劲。也许只是需要好好休息一下。", + createdAt: now - day * 4 - hour * 9, + sourceType: RecordSource.MOOD + }, + { + id: '17', + content: "妈妈打电话来,聊了很久。她说最近在学广场舞,听起来很开心。", + createdAt: now - day * 4 - hour * 5, + sourceType: RecordSource.VOICE + }, + { + id: '18', + content: "晚上看了一部老电影,《天使爱美丽》。生活需要一些小确幸和浪漫。", + createdAt: now - day * 4 - hour * 2, + sourceType: RecordSource.MANUAL + }, + // 五天前 + { + id: '19', + content: "早起去菜市场,看到各种新鲜的蔬菜水果,感受到生活的烟火气。", + createdAt: now - day * 5 - hour * 11, + sourceType: RecordSource.MOOD + }, + { + id: '20', + content: "灵感:设计一个'慢生活'主题的摄影集,记录日常中被忽略的美好瞬间。", + createdAt: now - day * 5 - hour * 8, + sourceType: RecordSource.INSPIRATION + }, + { + id: '21', + content: "下午在公园散步,看到一对老夫妻手牵手。希望自己老了也能这样。", + createdAt: now - day * 5 - hour * 4, + sourceType: RecordSource.VOICE + }, + // 六天前 + { + id: '22', + content: "工作上遇到了一些挫折,有点沮丧。但转念一想,这也是成长的机会。", + createdAt: now - day * 6 - hour * 10, + sourceType: RecordSource.MOOD + }, + { + id: '23', + content: "中午吃饭时,餐厅放的背景音乐是《月亮代表我的心》,突然很想家。", + createdAt: now - day * 6 - hour * 6, + sourceType: RecordSource.MANUAL + }, + { + id: '24', + content: "晚上和室友一起做饭,虽然厨艺不精,但笑声不断。这就是生活的乐趣。", + createdAt: now - day * 6 - hour * 2, + sourceType: RecordSource.VOICE + }, + // 一周前 + { + id: '25', + content: "今天是周一,新的一周开始。给自己定个小目标:每天进步一点点。", + createdAt: now - day * 7 - hour * 9, + sourceType: RecordSource.MOOD + }, + { + id: '26', + content: "路过花店,买了一束雏菊。白色的花瓣,简单却很美。", + createdAt: now - day * 7 - hour * 5, + sourceType: RecordSource.MANUAL + }, + { + id: '27', + content: "晚上写日记时想到:记录生活不是为了回忆,而是为了更好地活在当下。", + createdAt: now - day * 7 - hour * 1, + sourceType: RecordSource.INSPIRATION + }, + // 更早之前 + { + id: '28', + content: "雨后的街道,空气中弥漫着泥土的清香。这是大自然的馈赠。", + createdAt: now - day * 10 - hour * 7, + sourceType: RecordSource.VOICE + }, + { + id: '29', + content: "完成了一个困扰很久的难题,那种豁然开朗的感觉太棒了!", + createdAt: now - day * 12 - hour * 4, + sourceType: RecordSource.MOOD + }, + { + id: '30', + content: "深夜听歌,突然被一句歌词击中:'我们都在时光里跌跌撞撞地成长'。", + createdAt: now - day * 14 - hour * 2, + sourceType: RecordSource.MANUAL + } + ]; +}; + +// Mock Data: Community Posts +const generateMockPosts = (): CommunityPost[] => { + const now = Date.now(); + return [ + { + id: '101', + user: { name: '安静的观察者', avatarColor: 'bg-indigo-200' }, + content: "有人也觉得自己在等待一些还没发生的事情吗?这是一种对未来的奇怪怀念。", + createdAt: now - 1000 * 60 * 10, // 10 mins ago + likeCount: 12, + isLiked: false, + commentCount: 3 + }, + { + id: '102', + user: { name: '温柔的灵魂', avatarColor: 'bg-pink-200' }, + content: "泡了一杯茶,看着蒸汽升起看了5分钟。这是我今天最美好的时刻。", + createdAt: now - 1000 * 60 * 45, + likeCount: 28, + isLiked: true, + commentCount: 5 + }, + { + id: '103', + user: { name: '匿名', avatarColor: 'bg-teal-200' }, + content: "今天我试着对自己更温柔一些。这比对别人温柔更难。", + createdAt: now - 1000 * 60 * 60 * 2, + likeCount: 45, + isLiked: false, + commentCount: 8 + }, + { + id: '104', + user: { name: '云中漫步者', avatarColor: 'bg-blue-200' }, + content: "今天的日落特别粉。", + createdAt: now - 1000 * 60 * 60 * 5, + likeCount: 8, + isLiked: false, + commentCount: 0 + } + ]; +}; + +// Mock Data: Profile & Device +const mockProfile: Profile = { + name: "小雅", + birthday: "3月12日", + moodStatus: "感觉平静而专注", +}; + +const mockDeviceStatus: DeviceStatus = { + isConnected: true, + batteryLevel: 82, + deviceName: "心灵伴侣吊坠" +}; + +// Mock Data: Moods - 包含所有情绪类型 +const generateMockMoods = (): MoodItem[] => { + const now = Date.now(); + const hour = 60 * 60 * 1000; + return [ + // HAPPY - 开心 + { + id: 'm1', + type: MoodType.HAPPY, + date: now - hour * 2, + intensity: 0.9, + x: 25, + y: 30 + }, + { + id: 'm2', + type: MoodType.HAPPY, + date: now - hour * 5, + intensity: 0.7, + x: 70, + y: 25 + }, + // CALM - 平静 + { + id: 'm3', + type: MoodType.CALM, + date: now - hour * 1, + intensity: 0.8, + x: 35, + y: 45 + }, + { + id: 'm4', + type: MoodType.CALM, + date: now - hour * 8, + intensity: 0.6, + x: 55, + y: 60 + }, + { + id: 'm5', + type: MoodType.CALM, + date: now - hour * 12, + intensity: 0.75, + x: 20, + y: 70 + }, + // TIRED - 疲惫 + { + id: 'm6', + type: MoodType.TIRED, + date: now - hour * 3, + intensity: 0.5, + x: 65, + y: 50 + }, + { + id: 'm7', + type: MoodType.TIRED, + date: now - hour * 10, + intensity: 0.4, + x: 45, + y: 75 + }, + // ANXIOUS - 焦虑 + { + id: 'm8', + type: MoodType.ANXIOUS, + date: now - hour * 4, + intensity: 0.6, + x: 80, + y: 40 + }, + { + id: 'm9', + type: MoodType.ANXIOUS, + date: now - hour * 6, + intensity: 0.55, + x: 30, + y: 55 + }, + // HOPEFUL - 充满希望 + { + id: 'm10', + type: MoodType.HOPEFUL, + date: now, + intensity: 0.85, + x: 50, + y: 35 + }, + { + id: 'm11', + type: MoodType.HOPEFUL, + date: now - hour * 7, + intensity: 0.7, + x: 75, + y: 65 + }, + { + id: 'm12', + type: MoodType.HOPEFUL, + date: now - hour * 11, + intensity: 0.65, + x: 40, + y: 20 + } + ]; +}; + +// Mock Data: Inspirations - 丰富的灵感数据 +const generateMockInspirations = (): InspirationItem[] => { + const now = Date.now(); + const hour = 60 * 60 * 1000; + const day = 24 * hour; + return [ + { + id: 'i1', + content: "如果云朵只是地球在做梦呢?", + createdAt: now - hour * 2, + tags: ['随想', '自然'] + }, + { + id: 'i2', + content: "设计概念:一个不显示数字的时钟,只用颜色代表一天的能量。", + createdAt: now - hour * 5, + tags: ['设计', '创意'] + }, + { + id: 'i3', + content: "旧书和咖啡的香气,是时光最温柔的记忆。", + createdAt: now - hour * 8, + tags: ['生活', '随想'] + }, + { + id: 'i4', + content: "记得在接电话前深呼吸,给自己三秒钟的准备时间。", + createdAt: now - day, + tags: ['提醒', '生活'] + }, + { + id: 'i5', + content: "也许每个人都是一座岛屿,而友谊是连接彼此的桥梁。", + createdAt: now - day - hour * 3, + tags: ['随想', '友情'] + }, + { + id: 'i6', + content: "学习新技能时,不要害怕犯错,错误是成长的阶梯。", + createdAt: now - day * 2, + tags: ['学习', '成长'] + }, + { + id: 'i7', + content: "工作灵感:用番茄工作法,25分钟专注,5分钟放松。", + createdAt: now - day * 2 - hour * 4, + tags: ['工作', '提醒'] + }, + { + id: 'i8', + content: "雨后的空气里藏着大地的秘密。", + createdAt: now - day * 3, + tags: ['自然', '随想'] + }, + { + id: 'i9', + content: "创意想法:设计一个记录每天小确幸的应用。", + createdAt: now - day * 3 - hour * 6, + tags: ['创意', '设计'] + }, + { + id: 'i10', + content: "真正的朋友,是那个在你沉默时也能理解你的人。", + createdAt: now - day * 4, + tags: ['友情', '生活'] + }, + { + id: 'i11', + content: "每天写三件感恩的事,心态会慢慢变得更积极。", + createdAt: now - day * 5, + tags: ['成长', '提醒'] + }, + { + id: 'i12', + content: "工作中遇到困难时,试着换个角度思考问题。", + createdAt: now - day * 5 - hour * 2, + tags: ['工作', '成长'] + }, + { + id: 'i13', + content: "学习笔记:费曼学习法 - 用简单的语言解释复杂的概念。", + createdAt: now - day * 6, + tags: ['学习', '工作'] + }, + { + id: 'i14', + content: "窗外的鸟鸣声,是大自然送给城市的礼物。", + createdAt: now - day * 7, + tags: ['自然', '生活'] + }, + { + id: 'i15', + content: "设计理念:少即是多,简洁才是最高级的美。", + createdAt: now - day * 8, + tags: ['设计', '随想'] + } + ]; +}; + +// Mock Data: Todos - 丰富的待办数据 +const generateMockTodos = (): TodoItem[] => { + const now = Date.now(); + const hour = 60 * 60 * 1000; + const day = 24 * hour; + return [ + { + id: 't1', + title: "慢慢喝一杯水", + createdAt: now, + scheduledAt: now + hour / 2, + isDone: false, + category: 'health' + }, + { + id: 't2', + title: "读《小王子》20页", + createdAt: now - hour, + scheduledAt: now + hour * 2, + isDone: false, + category: 'life' + }, + { + id: 't3', + title: "回复小雪关于项目的邮件", + createdAt: now - hour * 4, + scheduledAt: now + day, + isDone: false, + category: 'work' + }, + { + id: 't4', + title: "整理学习笔记", + createdAt: now - hour * 6, + scheduledAt: now + day, + isDone: false, + category: 'study' + }, + { + id: 't5', + title: "晚上8点运动30分钟", + createdAt: now - day, + scheduledAt: now + hour * 8, + isDone: false, + category: 'health' + }, + { + id: 't6', + title: "准备明天的会议材料", + createdAt: now - day, + scheduledAt: now + day, + isDone: false, + category: 'work' + }, + { + id: 't7', + title: "买鲜花", + createdAt: now - day, + isDone: true, + category: 'life' + }, + { + id: 't8', + title: "完成英语作业", + createdAt: now - day * 2, + isDone: true, + category: 'study' + }, + { + id: 't9', + title: "给植物浇水", + createdAt: now - day * 2, + isDone: true, + category: 'life' + } + ]; +}; + +export default function App() { + const [currentTab, setCurrentTab] = useState(Tab.HOME); + const [records, setRecords] = useState(generateMockRecords()); + const [posts, setPosts] = useState(generateMockPosts()); + const [moods, setMoods] = useState(generateMockMoods()); + const [inspirations, setInspirations] = useState(generateMockInspirations()); + const [todos, setTodos] = useState(generateMockTodos()); + const [loading, setLoading] = useState(true); + const [error, setError] = useState(null); + const [characterImageUrl, setCharacterImageUrl] = useState(); + const [characterPreferences, setCharacterPreferences] = useState(); + const [showCustomizationDialog, setShowCustomizationDialog] = useState(false); + const [showChatDialog, setShowChatDialog] = useState(false); + + // State to manage full-screen action views (like Mood Page) + const [activeActionView, setActiveActionView] = useState(null); + + // Load data from backend on mount + useEffect(() => { + loadAllData(); + }, []); + + const loadAllData = async () => { + try { + setLoading(true); + setError(null); + + // Load all data in parallel + const [recordsRes, moodsRes, inspirationsRes, todosRes, userConfigRes] = await Promise.all([ + apiService.getRecords().catch(() => ({ records: [] })), + apiService.getMoods().catch(() => ({ moods: [] })), + apiService.getInspirations().catch(() => ({ inspirations: [] })), + apiService.getTodos().catch(() => ({ todos: [] })), + apiService.getUserConfig().catch(() => null) + ]); + + // Transform and set data + if (recordsRes.records.length > 0) { + setRecords(recordsRes.records.map(transformRecord)); + } + + if (moodsRes.moods.length > 0) { + setMoods(moodsRes.moods.map((m, i) => transformMood(m, i))); + } + + if (inspirationsRes.inspirations.length > 0) { + setInspirations(inspirationsRes.inspirations.map(transformInspiration)); + } + + if (todosRes.todos.length > 0) { + setTodos(todosRes.todos.map(transformTodo)); + } + + // Set character image + if (userConfigRes?.character?.image_url) { + setCharacterImageUrl(userConfigRes.character.image_url); + } + + // Set character preferences + if (userConfigRes?.character?.preferences) { + setCharacterPreferences(userConfigRes.character.preferences); + } + + } catch (err) { + console.error('Failed to load data:', err); + setError('Failed to load data from server. Using mock data.'); + } finally { + setLoading(false); + } + }; + + // 获取最近的录音记录 + const latestVoiceRecord = records + .filter(r => r.sourceType === RecordSource.VOICE) + .sort((a, b) => b.createdAt - a.createdAt)[0]?.content; + + // Chat with AI + const handleSendMessage = async (message: string): Promise => { + try { + const response = await apiService.chatWithAI(message); + return response; + } catch (error) { + console.error('Failed to chat:', error); + return '抱歉,我现在有点累了,稍后再聊好吗?'; + } + }; + + // Background gradient configuration + const bgGradient = "bg-[radial-gradient(ellipse_at_top_right,_var(--tw-gradient-stops))] from-purple-100 via-pink-50 to-blue-50"; + + const handleActionClick = (action: MoodAction) => { + console.log(`Action clicked: ${action}`); + if (action === MoodAction.MOOD) { + setActiveActionView(MoodAction.MOOD); + } else if (action === MoodAction.INSPIRATION) { + setActiveActionView(MoodAction.INSPIRATION); + } else if (action === MoodAction.TODO) { + setActiveActionView(MoodAction.TODO); + } + }; + + const closeActionView = () => { + setActiveActionView(null); + }; + + const handleAddPost = (content: string) => { + const newPost: CommunityPost = { + id: Date.now().toString(), + user: { name: 'Me', avatarColor: 'bg-purple-300' }, + content: content, + createdAt: Date.now(), + likeCount: 0, + isLiked: false, + commentCount: 0 + }; + setPosts([newPost, ...posts]); + }; + + const handleAddInspiration = async (content: string, isVoice: boolean) => { + try { + // 如果是语音,先转换为文字 + if (isVoice) { + // 这里应该调用语音转文字API + console.log('Voice input:', content); + } + + // 调用后端API处理灵感 + const response = await apiService.processInput(undefined, content); + + // 刷新数据 + await loadAllData(); + + console.log('Inspiration added:', response); + } catch (error) { + console.error('Failed to add inspiration:', error); + alert('添加灵感失败,请重试'); + } + }; + + const handleAddTodo = () => { + console.log("Add todo clicked"); + // Placeholder + }; + + const handleToggleTodo = async (id: string) => { + // Optimistic update + const todo = todos.find(t => t.id === id); + if (!todo) return; + + const newStatus = todo.isDone ? 'pending' : 'completed'; + + setTodos(todos.map(t => + t.id === id ? { ...t, isDone: !t.isDone } : t + )); + + // Update backend + try { + await apiService.updateTodoStatus(id, newStatus); + } catch (err) { + console.error('Failed to update todo:', err); + // Revert on error + setTodos(todos.map(t => + t.id === id ? { ...t, isDone: todo.isDone } : t + )); + } + }; + + const handleGenerateCharacter = async (preferences: CharacterPreferences) => { + try { + console.log('Generating character with preferences:', preferences); + + const result = await apiService.generateCharacter(preferences); + + console.log('Character generated:', result); + + // 更新角色形象 + setCharacterImageUrl(result.image_url); + setCharacterPreferences(result.preferences); + + // 显示成功提示 + alert('AI 形象生成成功!'); + + } catch (error) { + console.error('Failed to generate character:', error); + throw error; + } + }; + + const handleTabChange = (tab: Tab) => { + // 如果有打开的全屏页面,先关闭它 + if (activeActionView) { + setActiveActionView(null); + } + // 然后切换标签 + setCurrentTab(tab); + }; + + const handleOpenChat = () => { + setShowChatDialog(true); + }; + + const isHome = currentTab === Tab.HOME; + + return ( +
+ {/* Ambient noise texture overlay */} +
+ + {/* Main Content Area */} +
+ + {/* HOME VIEW */} + {isHome && !activeActionView && ( + <> + + +
+
+ I'm here with you +
+ + console.log('AI greeting:', greeting)} + onOpenChat={handleOpenChat} + /> + + {/* Home Input Component */} +
+ +
+
+ + setShowCustomizationDialog(true)} /> + + )} + + {/* RECORD VIEW */} + {currentTab === Tab.RECORD && !activeActionView && ( + + )} + + {/* COMMUNITY VIEW */} + {currentTab === Tab.COMMUNITY && !activeActionView && ( + + )} + + {/* MINE VIEW */} + {currentTab === Tab.MINE && !activeActionView && ( + + )} + + {/* --- FULL SCREEN OVERLAYS --- */} + + {/* MOOD PAGE OVERLAY */} + {activeActionView === MoodAction.MOOD && ( + + )} + + {/* INSPIRATION PAGE OVERLAY */} + {activeActionView === MoodAction.INSPIRATION && ( + + )} + + {/* TODO PAGE OVERLAY */} + {activeActionView === MoodAction.TODO && ( + + )} + + {/* Bottom Navigation - 始终显示,z-index 最高 */} + +
+ + {/* Character Customization Dialog */} + setShowCustomizationDialog(false)} + onGenerate={handleGenerateCharacter} + onSelectHistory={(imageUrl) => { + setCharacterImageUrl(imageUrl); + }} + currentPreferences={characterPreferences} + currentImageUrl={characterImageUrl} + /> + + {/* Chat Dialog */} + setShowChatDialog(false)} + characterImageUrl={characterImageUrl} + onSendMessage={handleSendMessage} + /> +
+ ); +} \ No newline at end of file diff --git a/frontend/README.md b/frontend/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b1b072da6c8947dd0d197592bbca055034eb477c --- /dev/null +++ b/frontend/README.md @@ -0,0 +1,20 @@ +
+GHBanner +
+ +# Run and deploy your AI Studio app + +This contains everything you need to run your app locally. + +View your app in AI Studio: https://ai.studio/apps/drive/1anSXAr28s4vnp1PCKwBCwpZfDBjeqopo + +## Run Locally + +**Prerequisites:** Node.js + + +1. Install dependencies: + `npm install` +2. Set the `GEMINI_API_KEY` in [.env.local](.env.local) to your Gemini API key +3. Run the app: + `npm run dev` diff --git a/frontend/components/AIEntity.tsx b/frontend/components/AIEntity.tsx new file mode 100644 index 0000000000000000000000000000000000000000..9f586872141321f735ad17ab1e2e09c8f5040b8f --- /dev/null +++ b/frontend/components/AIEntity.tsx @@ -0,0 +1,179 @@ +import React, { useState } from 'react'; +import { MessageCircle, X } from 'lucide-react'; + +interface AIEntityProps { + imageUrl?: string; + onGreeting?: (greeting: string) => void; + onOpenChat?: () => void; + latestRecord?: string; +} + +export const AIEntity: React.FC = ({ imageUrl, onGreeting, onOpenChat, latestRecord }) => { + const [showGreeting, setShowGreeting] = useState(false); + const [greeting, setGreeting] = useState(''); + const [isHovered, setIsHovered] = useState(false); + + // 生成个性化问候语 + const generateGreeting = () => { + if (latestRecord) { + // 基于最近的录音生成问候 + const greetings = [ + `刚才你说"${latestRecord.substring(0, 20)}...",想聊聊吗?`, + `关于"${latestRecord.substring(0, 20)}...",我有些想法想和你分享~`, + `听到你说"${latestRecord.substring(0, 20)}...",我很想了解更多呢`, + `"${latestRecord.substring(0, 20)}..."这让我想起了一些事情,要聊聊吗?`, + ]; + return greetings[Math.floor(Math.random() * greetings.length)]; + } else { + // 默认问候语 + const defaultGreetings = [ + '嗨!今天过得怎么样呀?', + '你好呀!有什么想和我分享的吗?', + 'Hi~ 我一直在这里陪着你哦', + '嘿!要不要聊聊天?', + '你来啦!今天感觉如何?', + ]; + return defaultGreetings[Math.floor(Math.random() * defaultGreetings.length)]; + } + }; + + const handleClick = () => { + const newGreeting = generateGreeting(); + setGreeting(newGreeting); + setShowGreeting(true); + if (onGreeting) { + onGreeting(newGreeting); + } + }; + + const handleCloseGreeting = (e: React.MouseEvent) => { + e.stopPropagation(); + setShowGreeting(false); + }; + + const handleOpenChat = (e: React.MouseEvent) => { + e.stopPropagation(); + if (onOpenChat) { + onOpenChat(); + } + }; + + return ( +
+ {/* Outer Glow / Aura - 增强动效 */} +
+ + {imageUrl ? ( + // 显示角色图片 + <> + {/* 背景光晕 - 增强动效 */} +
+
+
+ + {/* 角色图片 - 添加悬停和点击效果 */} +
setIsHovered(true)} + onMouseLeave={() => setIsHovered(false)} + > + AI Character + + {/* 悬停时的光效 */} + {isHovered && ( +
+ )} +
+ + {/* 前景光效 - 增强 */} +
+ + {/* 悬停提示 */} + {isHovered && !showGreeting && ( +
+
+ + 点击和我聊天 +
+
+ )} + + ) : ( + // 默认的抽象形态 - 增强动效 + <> + {/* Main Morphing Blob 1 */} +
+
+
+ + {/* Secondary Morphing Blob 2 */} +
+
+
+ + {/* Core "Soul" Light */} +
+ + {/* 可点击区域 */} +
setIsHovered(true)} + onMouseLeave={() => setIsHovered(false)} + >
+ + )} + + {/* 可爱的对话框 */} + {showGreeting && ( +
+
+ {/* 关闭按钮 */} + + + {/* 对话内容 */} +
+ +
+

+ {greeting} +

+
+
+ + {/* 小尾巴 */} +
+
+
+ )} +
+ ); +}; \ No newline at end of file diff --git a/frontend/components/AddInspirationDialog.tsx b/frontend/components/AddInspirationDialog.tsx new file mode 100644 index 0000000000000000000000000000000000000000..afefa630d120318ef89083ba1574cda8e1c09a93 --- /dev/null +++ b/frontend/components/AddInspirationDialog.tsx @@ -0,0 +1,238 @@ +import React, { useState, useRef } from 'react'; +import { X, Send, Mic, Square, Loader2 } from 'lucide-react'; + +interface AddInspirationDialogProps { + isOpen: boolean; + onClose: () => void; + onSubmit: (content: string, isVoice: boolean) => Promise; +} + +export const AddInspirationDialog: React.FC = ({ + isOpen, + onClose, + onSubmit +}) => { + const [content, setContent] = useState(''); + const [isRecording, setIsRecording] = useState(false); + const [isProcessing, setIsProcessing] = useState(false); + const mediaRecorderRef = useRef(null); + const audioChunksRef = useRef([]); + + if (!isOpen) return null; + + const handleTextSubmit = async () => { + if (!content.trim() || isProcessing) return; + + setIsProcessing(true); + try { + await onSubmit(content, false); + setContent(''); + onClose(); + } catch (error) { + console.error('Failed to submit inspiration:', error); + } finally { + setIsProcessing(false); + } + }; + + const startRecording = async () => { + try { + const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); + const mediaRecorder = new MediaRecorder(stream); + + mediaRecorderRef.current = mediaRecorder; + audioChunksRef.current = []; + + mediaRecorder.ondataavailable = (event) => { + if (event.data.size > 0) { + audioChunksRef.current.push(event.data); + } + }; + + mediaRecorder.onstop = async () => { + const audioBlob = new Blob(audioChunksRef.current, { type: 'audio/webm' }); + await processAudio(audioBlob); + + // Stop all tracks + stream.getTracks().forEach(track => track.stop()); + }; + + mediaRecorder.start(); + setIsRecording(true); + } catch (err) { + console.error('Failed to start recording:', err); + alert('无法访问麦克风,请检查权限设置'); + } + }; + + const stopRecording = () => { + if (mediaRecorderRef.current && isRecording) { + mediaRecorderRef.current.stop(); + setIsRecording(false); + } + }; + + const processAudio = async (audioBlob: Blob) => { + setIsProcessing(true); + + try { + const file = new File([audioBlob], 'recording.webm', { type: 'audio/webm' }); + + // 这里调用 API 处理音频 + // 暂时使用文本提交的方式 + await onSubmit('语音录制的灵感', true); + onClose(); + } catch (error) { + console.error('Failed to process audio:', error); + alert('语音处理失败,请重试'); + } finally { + setIsProcessing(false); + } + }; + + const handleKeyPress = (e: React.KeyboardEvent) => { + if (e.key === 'Enter' && !e.shiftKey) { + e.preventDefault(); + handleTextSubmit(); + } + }; + + return ( +
+
+ {/* 头部 */} +
+

+ ✨ 记录灵感 +

+ +
+ + {/* 输入区域 */} +
+ {/* 文本输入 */} + + +
+
+ + +
+

📚 获取记录

+ + + + +
+
+ + +
+

⚙️ 用户配置

+ +
+
+
+ + + + diff --git a/frontend/test-inspiration-graph.html b/frontend/test-inspiration-graph.html new file mode 100644 index 0000000000000000000000000000000000000000..b98d892e0741c21391816a62b27805ec218aa62d --- /dev/null +++ b/frontend/test-inspiration-graph.html @@ -0,0 +1,575 @@ + + + + + + 灵感知识图谱测试 + + + +

🌟 灵感知识图谱

+

Inspiration Knowledge Graph

+ +
+ + +
+

图例

+
+
+ 灵感节点 +
+
+
+ 标签节点 +
+
+
+ 关联关系 +
+
+ +
+ 💡 悬停查看内容 · 点击查看详情 · 拖拽移动视图 +
+ +
+
+
+
+
+
+ + + + diff --git a/frontend/test-physics-mood.html b/frontend/test-physics-mood.html new file mode 100644 index 0000000000000000000000000000000000000000..d76fbd7db7bec6ca017b3eb6fee4eee2de4d480e --- /dev/null +++ b/frontend/test-physics-mood.html @@ -0,0 +1,318 @@ + + + + + + 物理引擎心情气泡测试 + + + +
+

🫧 物理引擎心情气泡池

+

基于 Matter.js 的动态气泡交互演示

+ +
+ +
+ +
+

✨ 功能特性

+
    +
  • 🎨 颜色映射:不同心情类型对应不同颜色(喜悦-橙色、焦虑-紫色、平静-蓝色等)
  • +
  • 📏 大小映射:气泡大小由情绪强度(1-10)决定
  • +
  • 💫 物理碰撞:气泡之间真实的物理反弹效果
  • +
  • 🖱️ 拖拽交互:可以拖动气泡,感受物理引擎
  • +
  • 👆 点击查看:点击气泡查看详细信息
  • +
  • 🌊 布朗运动:轻微的随机扰动,模拟自然漂浮
  • +
  • 光晕效果:半透明渐变,毛玻璃质感
  • +
+ +

📊 当前心情数据

+
+
+
+ + + + + diff --git a/frontend/tsconfig.json b/frontend/tsconfig.json new file mode 100644 index 0000000000000000000000000000000000000000..2c6eed55868c7545e8f265f260277fb0605b2dbc --- /dev/null +++ b/frontend/tsconfig.json @@ -0,0 +1,29 @@ +{ + "compilerOptions": { + "target": "ES2022", + "experimentalDecorators": true, + "useDefineForClassFields": false, + "module": "ESNext", + "lib": [ + "ES2022", + "DOM", + "DOM.Iterable" + ], + "skipLibCheck": true, + "types": [ + "node" + ], + "moduleResolution": "bundler", + "isolatedModules": true, + "moduleDetection": "force", + "allowJs": true, + "jsx": "react-jsx", + "paths": { + "@/*": [ + "./*" + ] + }, + "allowImportingTsExtensions": true, + "noEmit": true + } +} \ No newline at end of file diff --git a/frontend/types.ts b/frontend/types.ts new file mode 100644 index 0000000000000000000000000000000000000000..70f6e9d5b106e1ef1b0977c6efe65c646837dfa2 --- /dev/null +++ b/frontend/types.ts @@ -0,0 +1,101 @@ +export enum Tab { + HOME = 'HOME', + RECORD = 'RECORD', + COMMUNITY = 'COMMUNITY', + MINE = 'MINE' +} + +export enum MoodAction { + MOOD = 'MOOD', + INSPIRATION = 'INSPIRATION', + TODO = 'TODO' +} + +export enum RecordSource { + MOOD = 'MOOD', + INSPIRATION = 'INSPIRATION', + VOICE = 'VOICE', + MANUAL = 'MANUAL' +} + +export enum MoodType { + HAPPY = 'HAPPY', // Warm Pink/Coral + CALM = 'CALM', // Lavender + TIRED = 'TIRED', // Foggy Blue + ANXIOUS = 'ANXIOUS', // Warm Beige/Grey + HOPEFUL = 'HOPEFUL' // Creamy Yellow +} + +export interface NavItem { + id: Tab; + label: string; + iconName: string; +} + +export interface ActionItem { + id: MoodAction; + label: string; + iconName: string; +} + +export interface RecordItem { + id: string; + content: string; + createdAt: number; // Unix timestamp + sourceType: RecordSource; +} + +export interface CommunityPost { + id: string; + user: { + name: string; + avatarColor: string; // Tailwind bg class + }; + content: string; + createdAt: number; + likeCount: number; + isLiked: boolean; + commentCount: number; +} + +export interface Profile { + name: string; + birthday: string; // e.g., "Mar 12" + moodStatus: string; + avatarUrl?: string; // Optional image URL +} + +export interface DeviceStatus { + isConnected: boolean; + batteryLevel: number; // 0-100 + deviceName: string; +} + +export interface MoodItem { + id: string; + type: MoodType; + date: number; + intensity: number; // 0-1 (affects size) + x?: number; // relative position percentage (optional for random placement) + y?: number; // relative position percentage +} + +export interface InspirationItem { + id: string; + content: string; + createdAt: number; + tags?: string[]; +} + +export type TodoCategory = 'study' | 'work' | 'life' | 'health'; + +export interface TodoItem { + id: string; + title: string; + createdAt: number; + scheduledAt?: number; + isDone: boolean; + category?: TodoCategory; + location?: string; + time?: string; +} \ No newline at end of file diff --git a/frontend/utils/dataTransform.ts b/frontend/utils/dataTransform.ts new file mode 100644 index 0000000000000000000000000000000000000000..9673accb64e56df6979d4d1deeb97b522f320955 --- /dev/null +++ b/frontend/utils/dataTransform.ts @@ -0,0 +1,122 @@ +/** + * Data transformation utilities + * Convert backend API responses to frontend types + */ + +import { + RecordItem, + RecordSource, + MoodItem, + MoodType, + InspirationItem, + TodoItem, + TodoCategory +} from '../types'; + +/** + * Convert backend record to frontend RecordItem + */ +export function transformRecord(record: any): RecordItem { + return { + id: record.record_id, + content: record.original_text, + createdAt: new Date(record.timestamp).getTime(), + sourceType: record.input_type === 'audio' ? RecordSource.VOICE : RecordSource.MANUAL + }; +} + +/** + * Convert backend mood type to frontend MoodType + */ +export function transformMoodType(type?: string): MoodType { + if (!type) return MoodType.CALM; + + const typeMap: Record = { + '开心': MoodType.HAPPY, + '快乐': MoodType.HAPPY, + '平静': MoodType.CALM, + '冷静': MoodType.CALM, + '疲惫': MoodType.TIRED, + '累': MoodType.TIRED, + '焦虑': MoodType.ANXIOUS, + '紧张': MoodType.ANXIOUS, + '希望': MoodType.HOPEFUL, + '期待': MoodType.HOPEFUL, + }; + + return typeMap[type] || MoodType.CALM; +} + +/** + * Convert backend mood to frontend MoodItem + */ +export function transformMood(mood: any, index: number): MoodItem { + // Generate pseudo-random position based on timestamp + const timestamp = new Date(mood.timestamp).getTime(); + const x = 20 + ((timestamp + index * 13) % 60); + const y = 20 + ((timestamp + index * 17) % 60); + + return { + id: mood.record_id, + type: transformMoodType(mood.type), + date: new Date(mood.timestamp).getTime(), + intensity: mood.intensity ? mood.intensity / 10 : 0.5, + x, + y + }; +} + +/** + * Convert backend inspiration to frontend InspirationItem + */ +export function transformInspiration(inspiration: any): InspirationItem { + return { + id: inspiration.record_id, + content: inspiration.core_idea, + createdAt: new Date(inspiration.timestamp).getTime(), + tags: inspiration.tags || [] + }; +} + +/** + * Convert backend category to frontend TodoCategory + */ +export function transformTodoCategory(category?: string): TodoCategory { + const categoryMap: Record = { + '工作': 'work', + '生活': 'life', + '学习': 'study', + '健康': 'health' + }; + + return (categoryMap[category || ''] || 'life') as TodoCategory; +} + +/** + * Convert backend todo to frontend TodoItem + */ +export function transformTodo(todo: any): TodoItem { + const createdAt = new Date(todo.timestamp).getTime(); + + // Try to parse time if available + let scheduledAt: number | undefined; + if (todo.time) { + // Simple heuristic: if time contains "明天", add 1 day + if (todo.time.includes('明天')) { + scheduledAt = createdAt + 24 * 60 * 60 * 1000; + } else if (todo.time.includes('今天')) { + scheduledAt = createdAt; + } + } + + return { + id: todo.record_id, + title: todo.task, + createdAt, + scheduledAt, + isDone: todo.status === 'completed' || todo.status === 'done', + category: transformTodoCategory(todo.category), + location: todo.location || undefined, + time: todo.time || undefined + }; +} diff --git a/frontend/vite-env.d.ts b/frontend/vite-env.d.ts new file mode 100644 index 0000000000000000000000000000000000000000..b54b4c9828041fb637462a87f5b67d20fa035c2a --- /dev/null +++ b/frontend/vite-env.d.ts @@ -0,0 +1,9 @@ +/// + +interface ImportMetaEnv { + readonly VITE_API_URL: string +} + +interface ImportMeta { + readonly env: ImportMetaEnv +} diff --git a/frontend/vite.config.ts b/frontend/vite.config.ts new file mode 100644 index 0000000000000000000000000000000000000000..b7ea53b759f88827fedf88e1ddf70a6521d6b6e4 --- /dev/null +++ b/frontend/vite.config.ts @@ -0,0 +1,19 @@ +import path from 'path'; +import { defineConfig, loadEnv } from 'vite'; +import react from '@vitejs/plugin-react'; + +export default defineConfig(({ mode }) => { + const env = loadEnv(mode, '.', ''); + return { + server: { + port: 5173, + host: '0.0.0.0', + }, + plugins: [react()], + resolve: { + alias: { + '@': path.resolve(__dirname, '.'), + } + } + }; +}); diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 0000000000000000000000000000000000000000..8e33431b47991f578f735073bd867268abdb50b0 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,15 @@ +[pytest] +testpaths = tests +python_files = test_*.py +python_classes = Test* +python_functions = test_* +addopts = + -v + --strict-markers + --tb=short + --disable-warnings +markers = + unit: Unit tests + integration: Integration tests + property: Property-based tests +asyncio_mode = auto diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..2576a7294004e6e0b96bcdc6965d43b8350bf409 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,20 @@ +# Core dependencies (compatible with Python 3.11+) +fastapi==0.115.0 +uvicorn[standard]==0.32.0 +pydantic==2.10.0 +pydantic-settings==2.6.0 +httpx==0.27.0 +python-multipart==0.0.12 +python-dotenv==1.0.1 + +# Testing dependencies +pytest==8.3.0 +pytest-asyncio==0.24.0 +pytest-cov==6.0.0 +pytest-mock==3.14.0 +hypothesis==6.122.0 + +# Development dependencies +black==24.10.0 +flake8==7.1.0 +mypy==1.13.0 diff --git a/scripts/build_and_deploy.bat b/scripts/build_and_deploy.bat new file mode 100644 index 0000000000000000000000000000000000000000..dd99c38f165072db4d982c6ce2fff5a2d5492bfc --- /dev/null +++ b/scripts/build_and_deploy.bat @@ -0,0 +1,57 @@ +@echo off +echo ======================================== +echo 构建并部署到 Hugging Face Spaces +echo ======================================== +echo. + +echo [1/4] 构建前端... +cd frontend +call npm install +if errorlevel 1 ( + echo 错误: npm install 失败 + pause + exit /b 1 +) + +call npm run build +if errorlevel 1 ( + echo 错误: npm run build 失败 + pause + exit /b 1 +) +cd .. + +echo. +echo [2/4] 检查构建产物... +if not exist "frontend\dist\index.html" ( + echo 错误: 构建产物不存在 + pause + exit /b 1 +) +echo ✓ 构建产物检查通过 + +echo. +echo [3/4] 提交到 Git... +git add . +git commit -m "Build: Update frontend dist for deployment" +if errorlevel 1 ( + echo 提示: 没有新的更改需要提交 +) + +echo. +echo [4/4] 推送到 Hugging Face... +git push hf main +if errorlevel 1 ( + echo 错误: 推送失败 + echo 请检查 Hugging Face 远程仓库配置 + pause + exit /b 1 +) + +echo. +echo ======================================== +echo ✓ 部署完成! +echo ======================================== +echo. +echo 访问你的 Hugging Face Space 查看应用 +pause diff --git a/scripts/build_and_deploy.sh b/scripts/build_and_deploy.sh new file mode 100644 index 0000000000000000000000000000000000000000..85b15bbec1b38e9c2a46b333d18921ced45f5614 --- /dev/null +++ b/scripts/build_and_deploy.sh @@ -0,0 +1,53 @@ +#!/bin/bash + +echo "========================================" +echo "构建并部署到 Hugging Face Spaces" +echo "========================================" +echo "" + +echo "[1/4] 构建前端..." +cd frontend +npm install +if [ $? -ne 0 ]; then + echo "错误: npm install 失败" + exit 1 +fi + +npm run build +if [ $? -ne 0 ]; then + echo "错误: npm run build 失败" + exit 1 +fi +cd .. + +echo "" +echo "[2/4] 检查构建产物..." +if [ ! -f "frontend/dist/index.html" ]; then + echo "错误: 构建产物不存在" + exit 1 +fi +echo "✓ 构建产物检查通过" + +echo "" +echo "[3/4] 提交到 Git..." +git add . +git commit -m "Build: Update frontend dist for deployment" +if [ $? -ne 0 ]; then + echo "提示: 没有新的更改需要提交" +fi + +echo "" +echo "[4/4] 推送到 Hugging Face..." +git push hf main +if [ $? -ne 0 ]; then + echo "错误: 推送失败" + echo "请检查 Hugging Face 远程仓库配置" + exit 1 +fi + +echo "" +echo "========================================" +echo "✓ 部署完成!" +echo "========================================" +echo "" +echo "访问你的 Hugging Face Space 查看应用" diff --git a/scripts/prepare_hf_deploy.bat b/scripts/prepare_hf_deploy.bat new file mode 100644 index 0000000000000000000000000000000000000000..1e10aed5d457c1716bfb0f5383058abda85b63b7 --- /dev/null +++ b/scripts/prepare_hf_deploy.bat @@ -0,0 +1,125 @@ +@echo off +chcp 65001 >nul +echo ============================================================ +echo 🚀 准备 Hugging Face Spaces 部署 +echo ============================================================ +echo. + +echo [1/5] 检查根目录必需文件... +if exist "Dockerfile" ( + echo ✅ Dockerfile 存在 +) else ( + echo ❌ Dockerfile 不存在 + echo 正在从 deployment 目录复制... + copy deployment\Dockerfile . >nul + echo ✅ 已复制 Dockerfile +) + +if exist "start.py" ( + echo ✅ start.py 存在 +) else ( + echo ❌ start.py 不存在 + echo 正在从 scripts 目录复制... + copy scripts\start.py . >nul + echo ✅ 已复制 start.py +) + +if exist "requirements.txt" ( + echo ✅ requirements.txt 存在 +) else ( + echo ❌ requirements.txt 不存在! + pause + exit /b 1 +) + +if exist "README_HF.md" ( + echo ✅ README_HF.md 存在 +) else ( + echo ❌ README_HF.md 不存在! + pause + exit /b 1 +) +echo. + +echo [2/5] 检查前端构建... +if exist "frontend\dist\index.html" ( + echo ✅ 前端已构建 +) else ( + echo ❌ 前端未构建 + echo 正在构建前端... + cd frontend + call npm run build + cd .. + if exist "frontend\dist\index.html" ( + echo ✅ 前端构建完成 + ) else ( + echo ❌ 前端构建失败! + pause + exit /b 1 + ) +) +echo. + +echo [3/5] 检查应用代码... +if exist "app\main.py" ( + echo ✅ app/ 目录存在 +) else ( + echo ❌ app/ 目录不存在! + pause + exit /b 1 +) +echo. + +echo [4/5] 检查数据目录... +if not exist "data" mkdir data +if not exist "generated_images" mkdir generated_images +echo ✅ 数据目录已准备 +echo. + +echo [5/5] 生成部署清单... +echo 📋 部署文件清单: > deploy_checklist.txt +echo. >> deploy_checklist.txt +echo 根目录文件: >> deploy_checklist.txt +echo ✅ Dockerfile >> deploy_checklist.txt +echo ✅ start.py >> deploy_checklist.txt +echo ✅ requirements.txt >> deploy_checklist.txt +echo ✅ README_HF.md >> deploy_checklist.txt +echo. >> deploy_checklist.txt +echo 应用代码: >> deploy_checklist.txt +echo ✅ app/ >> deploy_checklist.txt +echo ✅ data/ >> deploy_checklist.txt +echo ✅ frontend/dist/ >> deploy_checklist.txt +echo ✅ generated_images/ >> deploy_checklist.txt +echo. >> deploy_checklist.txt +echo 环境变量(需要在 HF Space Settings 中配置): >> deploy_checklist.txt +echo - ZHIPU_API_KEY (必需) >> deploy_checklist.txt +echo - MINIMAX_API_KEY (可选) >> deploy_checklist.txt +echo - MINIMAX_GROUP_ID (可选) >> deploy_checklist.txt +echo. >> deploy_checklist.txt +echo ✅ 清单已生成: deploy_checklist.txt +echo. + +echo ============================================================ +echo ✅ 部署准备完成! +echo ============================================================ +echo. +echo 📋 下一步操作: +echo. +echo 1. 提交所有更改到 Git: +echo git add . +echo git commit -m "Fix: Add required files for HF deployment" +echo git push origin main +echo. +echo 2. 在 Hugging Face Space 中同步: +echo https://huggingface.co/spaces/kernel14/Nora +echo Settings → Sync from GitHub → Sync now +echo. +echo 3. 配置环境变量: +echo Settings → Variables and secrets +echo 添加 ZHIPU_API_KEY +echo. +echo 4. 等待构建完成(查看 Logs 标签页) +echo. +echo 📚 详细说明请查看: HUGGINGFACE_DEPLOY.md +echo ============================================================ +pause diff --git a/scripts/start.py b/scripts/start.py new file mode 100644 index 0000000000000000000000000000000000000000..890a000344e0161cfe39731bbff30de48c1f46ba --- /dev/null +++ b/scripts/start.py @@ -0,0 +1,93 @@ +""" +启动脚本 - 不使用 Gradio,直接运行 FastAPI +""" + +import os +import sys +from pathlib import Path + +# 添加项目根目录到 Python 路径 +sys.path.insert(0, str(Path(__file__).parent)) + +# 设置环境变量 +os.environ.setdefault("DATA_DIR", "data") +os.environ.setdefault("LOG_LEVEL", "INFO") + +# 确保数据目录存在 +data_dir = Path("data") +data_dir.mkdir(exist_ok=True) + +generated_images_dir = Path("generated_images") +generated_images_dir.mkdir(exist_ok=True) + +# 导入 FastAPI 应用 +from app.main import app +from fastapi.staticfiles import StaticFiles +from fastapi.responses import FileResponse +from fastapi import Request + +# 检查前端构建目录 +frontend_dist = Path(__file__).parent / "frontend" / "dist" +frontend_exists = frontend_dist.exists() + +if frontend_exists: + # 挂载静态资源(CSS, JS) + assets_dir = frontend_dist / "assets" + if assets_dir.exists(): + app.mount("/assets", StaticFiles(directory=str(assets_dir)), name="assets") + print(f"✅ 前端资源文件已挂载: {assets_dir}") + + print(f"✅ 前端应用已挂载: {frontend_dist}") +else: + print(f"⚠️ 前端构建目录不存在: {frontend_dist}") + +# 重写根路径路由以服务前端 +@app.get("/", include_in_schema=False) +async def serve_root(): + """服务前端应用首页""" + if frontend_exists: + index_file = frontend_dist / "index.html" + if index_file.exists(): + return FileResponse(index_file) + return { + "service": "SoulMate AI Companion", + "status": "running", + "version": "1.0.0", + "message": "Frontend not available. Please visit /docs for API documentation." + } + +# 添加 catch-all 路由用于 SPA(必须放在最后) +@app.get("/{full_path:path}", include_in_schema=False) +async def serve_spa(full_path: str, request: Request): + """服务前端应用(SPA 路由支持)""" + # 如果是 API 路径,跳过(让 FastAPI 处理 404) + if full_path.startswith("api/") or full_path == "docs" or full_path == "openapi.json" or full_path == "health": + from fastapi import HTTPException + raise HTTPException(status_code=404, detail="Not found") + + # 返回前端 index.html + if frontend_exists: + index_file = frontend_dist / "index.html" + if index_file.exists(): + return FileResponse(index_file) + + return {"error": "Frontend not found"} + +if __name__ == "__main__": + import uvicorn + + print("=" * 50) + print("🌟 治愈系记录助手 - SoulMate AI Companion") + print("=" * 50) + print(f"📍 前端应用: http://0.0.0.0:7860/") + print(f"📚 API 文档: http://0.0.0.0:7860/docs") + print(f"🔍 健康检查: http://0.0.0.0:7860/health") + print("=" * 50) + + uvicorn.run( + app, + host="0.0.0.0", + port=7860, + log_level="info" + ) + diff --git a/scripts/start_local.bat b/scripts/start_local.bat new file mode 100644 index 0000000000000000000000000000000000000000..78f76e5b26d64fd489274abcb5dd21a7fe4f7619 --- /dev/null +++ b/scripts/start_local.bat @@ -0,0 +1,25 @@ +@echo off +echo ======================================== +echo 启动本地开发服务器 +echo ======================================== +echo. + +echo [1/2] 检查前端构建... +if not exist "frontend\dist\index.html" ( + echo 警告: 前端未构建,正在构建... + cd frontend + call npm run build + if errorlevel 1 ( + echo 错误: 前端构建失败 + pause + exit /b 1 + ) + cd .. +) +echo ✓ 前端构建检查通过 + +echo. +echo [2/2] 启动后端服务器... +python start_local.py + +pause diff --git a/scripts/start_local.py b/scripts/start_local.py new file mode 100644 index 0000000000000000000000000000000000000000..c8b7ade40abbef81c11d17d4b41c0e9195085803 --- /dev/null +++ b/scripts/start_local.py @@ -0,0 +1,101 @@ +""" +本地开发启动脚本 - 使用 8000 端口 +""" + +import os +import sys +from pathlib import Path + +# 添加项目根目录到 Python 路径 +sys.path.insert(0, str(Path(__file__).parent)) + +# 设置环境变量 +os.environ.setdefault("DATA_DIR", "data") +os.environ.setdefault("LOG_LEVEL", "INFO") + +# 确保数据目录存在 +data_dir = Path("data") +data_dir.mkdir(exist_ok=True) + +generated_images_dir = Path("generated_images") +generated_images_dir.mkdir(exist_ok=True) + +# 导入 FastAPI 应用 +from app.main import app +from fastapi.staticfiles import StaticFiles +from fastapi.responses import FileResponse +from fastapi import Request + +# 检查前端构建目录 +frontend_dist = Path(__file__).parent / "frontend" / "dist" +frontend_exists = frontend_dist.exists() + +if frontend_exists: + # 挂载静态资源(CSS, JS) + assets_dir = frontend_dist / "assets" + if assets_dir.exists(): + app.mount("/assets", StaticFiles(directory=str(assets_dir)), name="assets") + print(f"✅ 前端资源文件已挂载: {assets_dir}") + + print(f"✅ 前端应用已挂载: {frontend_dist}") +else: + print(f"⚠️ 前端构建目录不存在: {frontend_dist}") + print(f" 请先构建前端: cd frontend && npm run build") + +# 重写根路径路由以服务前端 +@app.get("/", include_in_schema=False) +async def serve_root(): + """服务前端应用首页""" + if frontend_exists: + index_file = frontend_dist / "index.html" + if index_file.exists(): + return FileResponse(index_file) + return { + "service": "SoulMate AI Companion", + "status": "running", + "version": "1.0.0", + "message": "Frontend not available. Please visit /docs for API documentation." + } + +# 添加 catch-all 路由用于 SPA(必须放在最后) +@app.get("/{full_path:path}", include_in_schema=False) +async def serve_spa(full_path: str, request: Request): + """服务前端应用(SPA 路由支持)""" + # 如果是 API 路径,跳过(让 FastAPI 处理 404) + if full_path.startswith("api/") or full_path == "docs" or full_path == "openapi.json" or full_path == "health": + from fastapi import HTTPException + raise HTTPException(status_code=404, detail="Not found") + + # 返回前端 index.html + if frontend_exists: + index_file = frontend_dist / "index.html" + if index_file.exists(): + return FileResponse(index_file) + + return {"error": "Frontend not found"} + +if __name__ == "__main__": + import uvicorn + import socket + + # 获取本机 IP 地址 + hostname = socket.gethostname() + local_ip = socket.gethostbyname(hostname) + + print("=" * 60) + print("🌟 治愈系记录助手 - SoulMate AI Companion") + print("=" * 60) + print(f"📍 本地访问: http://localhost:8000/") + print(f"📍 局域网访问: http://{local_ip}:8000/") + print(f"📚 API 文档: http://localhost:8000/docs") + print(f"🔍 健康检查: http://localhost:8000/health") + print("=" * 60) + print(f"💡 提示: 其他设备可以通过 http://{local_ip}:8000/ 访问") + print("=" * 60) + + uvicorn.run( + app, + host="0.0.0.0", # 监听所有网络接口 + port=8000, # 使用 8000 端口 + log_level="info" + ) diff --git a/scripts/test_lan_access.bat b/scripts/test_lan_access.bat new file mode 100644 index 0000000000000000000000000000000000000000..18ab04640d5bbb144c5e8f3aaccdde86792b5a02 --- /dev/null +++ b/scripts/test_lan_access.bat @@ -0,0 +1,59 @@ +@echo off +chcp 65001 >nul +echo ============================================================ +echo 🔍 局域网访问测试工具 +echo ============================================================ +echo. + +echo [1/4] 检查后端服务... +curl -s http://localhost:8000/health >nul 2>&1 +if %errorlevel% equ 0 ( + echo ✅ 后端服务正常运行 +) else ( + echo ❌ 后端服务未运行或无法访问 + echo 请先运行: python scripts/start_local.py + pause + exit /b 1 +) +echo. + +echo [2/4] 获取本机 IP 地址... +for /f "tokens=2 delims=:" %%a in ('ipconfig ^| findstr /c:"IPv4"') do ( + set IP=%%a + set IP=!IP: =! + echo 📍 本机 IP: !IP! + goto :found_ip +) +:found_ip +echo. + +echo [3/4] 测试 API 端点... +echo. +echo 测试 /health: +curl -s http://localhost:8000/health +echo. +echo. +echo 测试 /api/status: +curl -s http://localhost:8000/api/status +echo. +echo. + +echo [4/4] 检查防火墙状态... +netsh advfirewall show allprofiles state | findstr "状态\|State" +echo. + +echo ============================================================ +echo 📋 测试总结 +echo ============================================================ +echo. +echo ✅ 如果上面的测试都成功,请在其他设备上访问: +echo http://!IP!:8000/ +echo. +echo 🔍 如果其他设备无法访问,请访问诊断页面: +echo http://!IP!:8000/test-connection.html +echo. +echo 📚 详细排查步骤请参考: +echo docs/局域网访问快速修复.md +echo. +echo ============================================================ +pause diff --git a/start.py b/start.py new file mode 100644 index 0000000000000000000000000000000000000000..890a000344e0161cfe39731bbff30de48c1f46ba --- /dev/null +++ b/start.py @@ -0,0 +1,93 @@ +""" +启动脚本 - 不使用 Gradio,直接运行 FastAPI +""" + +import os +import sys +from pathlib import Path + +# 添加项目根目录到 Python 路径 +sys.path.insert(0, str(Path(__file__).parent)) + +# 设置环境变量 +os.environ.setdefault("DATA_DIR", "data") +os.environ.setdefault("LOG_LEVEL", "INFO") + +# 确保数据目录存在 +data_dir = Path("data") +data_dir.mkdir(exist_ok=True) + +generated_images_dir = Path("generated_images") +generated_images_dir.mkdir(exist_ok=True) + +# 导入 FastAPI 应用 +from app.main import app +from fastapi.staticfiles import StaticFiles +from fastapi.responses import FileResponse +from fastapi import Request + +# 检查前端构建目录 +frontend_dist = Path(__file__).parent / "frontend" / "dist" +frontend_exists = frontend_dist.exists() + +if frontend_exists: + # 挂载静态资源(CSS, JS) + assets_dir = frontend_dist / "assets" + if assets_dir.exists(): + app.mount("/assets", StaticFiles(directory=str(assets_dir)), name="assets") + print(f"✅ 前端资源文件已挂载: {assets_dir}") + + print(f"✅ 前端应用已挂载: {frontend_dist}") +else: + print(f"⚠️ 前端构建目录不存在: {frontend_dist}") + +# 重写根路径路由以服务前端 +@app.get("/", include_in_schema=False) +async def serve_root(): + """服务前端应用首页""" + if frontend_exists: + index_file = frontend_dist / "index.html" + if index_file.exists(): + return FileResponse(index_file) + return { + "service": "SoulMate AI Companion", + "status": "running", + "version": "1.0.0", + "message": "Frontend not available. Please visit /docs for API documentation." + } + +# 添加 catch-all 路由用于 SPA(必须放在最后) +@app.get("/{full_path:path}", include_in_schema=False) +async def serve_spa(full_path: str, request: Request): + """服务前端应用(SPA 路由支持)""" + # 如果是 API 路径,跳过(让 FastAPI 处理 404) + if full_path.startswith("api/") or full_path == "docs" or full_path == "openapi.json" or full_path == "health": + from fastapi import HTTPException + raise HTTPException(status_code=404, detail="Not found") + + # 返回前端 index.html + if frontend_exists: + index_file = frontend_dist / "index.html" + if index_file.exists(): + return FileResponse(index_file) + + return {"error": "Frontend not found"} + +if __name__ == "__main__": + import uvicorn + + print("=" * 50) + print("🌟 治愈系记录助手 - SoulMate AI Companion") + print("=" * 50) + print(f"📍 前端应用: http://0.0.0.0:7860/") + print(f"📚 API 文档: http://0.0.0.0:7860/docs") + print(f"🔍 健康检查: http://0.0.0.0:7860/health") + print("=" * 50) + + uvicorn.run( + app, + host="0.0.0.0", + port=7860, + log_level="info" + ) + diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..44553dbba14ac82ff98637793fd8ce7617d8e4ec --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1 @@ +"""Test suite for Voice Text Processor""" diff --git a/tests/test_api.html b/tests/test_api.html new file mode 100644 index 0000000000000000000000000000000000000000..d04552b381c8149c549898ab24e9f20d3e3401d1 --- /dev/null +++ b/tests/test_api.html @@ -0,0 +1,109 @@ + + + + API 测试 + + + +

🧪 API 测试工具

+ +
+

1. 健康检查

+ +
+
+ +
+

2. API 状态

+ +
+
+ +
+

3. 获取心情数据

+ +
+
+ +
+

4. 获取灵感数据

+ +
+
+ +
+

5. 获取待办数据

+ +
+
+ + + + diff --git a/tests/test_api_properties.py b/tests/test_api_properties.py new file mode 100644 index 0000000000000000000000000000000000000000..9b50135c2e75a8c3ab01a5d2fd9202e2904b0e3d --- /dev/null +++ b/tests/test_api_properties.py @@ -0,0 +1,530 @@ +"""Property-based tests for API endpoints. + +This module contains property-based tests for the /api/process endpoint, +validating universal properties that should hold across all inputs. + +Requirements: 1.1, 1.2, 1.3, 8.4, 8.5, 8.6, 9.1, 9.3 +""" + +import os +import pytest +import tempfile +import shutil +from unittest.mock import patch, AsyncMock, MagicMock +from io import BytesIO +from hypothesis import given, strategies as st, settings +from fastapi.testclient import TestClient + + +# Note: We don't use pytest fixtures with hypothesis tests because +# fixtures are not reset between examples. Instead, we create temp +# directories directly in the test methods. + + +# Custom strategies for generating test data +@st.composite +def audio_filename_strategy(draw): + """Generate audio filenames with various extensions.""" + base_name = draw(st.text(min_size=1, max_size=20, alphabet=st.characters( + whitelist_categories=('Lu', 'Ll', 'Nd'), + min_codepoint=ord('a'), + max_codepoint=ord('z') + ))) + extension = draw(st.sampled_from([ + '.mp3', '.wav', '.m4a', # Supported formats + '.ogg', '.flac', '.aac', '.wma', '.txt', '.pdf' # Unsupported formats + ])) + return base_name + extension + + +@st.composite +def utf8_text_strategy(draw): + """Generate UTF-8 text including Chinese, emoji, and special characters.""" + return draw(st.text( + min_size=1, + max_size=200, + alphabet=st.characters( + blacklist_categories=('Cs',), # Exclude surrogates + blacklist_characters='\x00' # Exclude null character + ) + )) + + +class TestProperty1AudioFormatValidation: + """Property 1: 音频格式验证 + + For any submitted file, if the file extension is mp3, wav, or m4a, + the system should accept the file; if it's another format, + the system should reject it and return an error. + + **Validates: Requirements 1.1** + """ + + @patch.dict(os.environ, {"ZHIPU_API_KEY": "test_key_1234567890"}, clear=True) + @patch("app.main.ASRService") + @patch("app.main.SemanticParserService") + @given(filename=audio_filename_strategy()) + @settings(max_examples=25) + def test_property_1_audio_format_validation( + self, + mock_parser_class, + mock_asr_class, + filename + ): + """Test that audio format validation works correctly for all file types. + + Feature: voice-text-processor, Property 1: 音频格式验证 + """ + # Create temporary directory for this test + temp_dir = tempfile.mkdtemp() + try: + # Reset config + import app.config + app.config._config = None + + # Mock services + from app.models import ParsedData + mock_asr = MagicMock() + mock_asr.transcribe = AsyncMock(return_value="转写后的文本") + mock_asr.close = AsyncMock() + mock_asr_class.return_value = mock_asr + + mock_parser = MagicMock() + mock_parser.parse = AsyncMock(return_value=ParsedData( + mood=None, + inspirations=[], + todos=[] + )) + mock_parser.close = AsyncMock() + mock_parser_class.return_value = mock_parser + + with patch.dict(os.environ, { + "DATA_DIR": os.path.join(temp_dir, "data"), + "LOG_FILE": os.path.join(temp_dir, "logs", "app.log") + }, clear=False): + from app.main import app + + with TestClient(app) as client: + # Create fake audio file + audio_data = b"fake audio content" + files = {"audio": (filename, BytesIO(audio_data), "audio/mpeg")} + + response = client.post("/api/process", files=files) + + # Extract file extension + file_ext = "." + filename.split(".")[-1].lower() if "." in filename else "" + supported_formats = {".mp3", ".wav", ".m4a"} + + if file_ext in supported_formats: + # Should accept the file (200 or 500 if processing fails) + assert response.status_code in [200, 500], \ + f"Supported format {file_ext} should be accepted" + + # If 200, should have record_id + if response.status_code == 200: + data = response.json() + assert "record_id" in data + else: + # Should reject the file with 400 + assert response.status_code == 400, \ + f"Unsupported format {file_ext} should be rejected" + data = response.json() + assert "error" in data + assert "不支持的音频格式" in data["error"] + finally: + # Clean up temporary directory + shutil.rmtree(temp_dir, ignore_errors=True) + + +class TestProperty2UTF8TextAcceptance: + """Property 2: UTF-8 文本接受 + + For any UTF-8 encoded text string (including Chinese, emoji, special characters), + the system should correctly accept and process it. + + **Validates: Requirements 1.2** + """ + + @patch.dict(os.environ, {"ZHIPU_API_KEY": "test_key_1234567890"}, clear=True) + @patch("app.main.SemanticParserService") + @given(text=utf8_text_strategy()) + @settings(max_examples=30) + def test_property_2_utf8_text_acceptance( + self, + mock_parser_class, + text + ): + """Test that UTF-8 text is accepted regardless of content. + + Feature: voice-text-processor, Property 2: UTF-8 文本接受 + """ + # Create temporary directory for this test + temp_dir = tempfile.mkdtemp() + try: + # Reset config + import app.config + app.config._config = None + + # Mock semantic parser + from app.models import ParsedData + mock_parser = MagicMock() + mock_parser.parse = AsyncMock(return_value=ParsedData( + mood=None, + inspirations=[], + todos=[] + )) + mock_parser.close = AsyncMock() + mock_parser_class.return_value = mock_parser + + with patch.dict(os.environ, { + "DATA_DIR": os.path.join(temp_dir, "data"), + "LOG_FILE": os.path.join(temp_dir, "logs", "app.log") + }, clear=False): + from app.main import app + + with TestClient(app) as client: + # Submit text input + response = client.post( + "/api/process", + data={"text": text} + ) + + # Should accept the input (not reject with 400 for encoding issues) + # May return 200 (success) or 500 (processing error), but not 400 + assert response.status_code in [200, 500], \ + f"UTF-8 text should be accepted, got {response.status_code}" + + # If successful, should have required fields + if response.status_code == 200: + data = response.json() + assert "record_id" in data + assert "timestamp" in data + finally: + # Clean up temporary directory + shutil.rmtree(temp_dir, ignore_errors=True) + + +class TestProperty3InvalidInputErrorHandling: + """Property 3: 无效输入错误处理 + + For any empty input or invalid format input, the system should return + a JSON response containing an error field, rather than crashing or + returning a success status. + + **Validates: Requirements 1.3, 9.1** + """ + + @patch.dict(os.environ, {"ZHIPU_API_KEY": "test_key_1234567890"}, clear=True) + @settings(max_examples=20) + @given( + has_audio=st.booleans(), + has_text=st.booleans(), + text_is_empty=st.booleans() + ) + def test_property_3_invalid_input_error_handling( + self, + has_audio, + has_text, + text_is_empty + ): + """Test that invalid inputs return proper error responses. + + Feature: voice-text-processor, Property 3: 无效输入错误处理 + """ + # Skip valid input combinations + if (has_audio and not has_text) or (has_text and not has_audio and not text_is_empty): + return + + # Create temporary directory for this test + temp_dir = tempfile.mkdtemp() + try: + # Reset config + import app.config + app.config._config = None + + with patch.dict(os.environ, { + "DATA_DIR": os.path.join(temp_dir, "data"), + "LOG_FILE": os.path.join(temp_dir, "logs", "app.log") + }, clear=False): + from app.main import app + + with TestClient(app) as client: + # Prepare request based on parameters + if not has_audio and not has_text: + # No input at all + response = client.post("/api/process") + elif has_audio and has_text: + # Both inputs (invalid) + audio_data = b"fake audio" + files = {"audio": ("test.mp3", BytesIO(audio_data), "audio/mpeg")} + response = client.post( + "/api/process", + files=files, + data={"text": "some text"} + ) + elif has_text and text_is_empty: + # Empty text + response = client.post( + "/api/process", + data={"text": ""} + ) + else: + # Should not reach here + return + + # Should return error response (400), not crash (500) or succeed (200) + assert response.status_code == 400, \ + "Invalid input should return 400 error" + + # Response should be valid JSON with error field + data = response.json() + assert "error" in data, "Error response must contain 'error' field" + assert isinstance(data["error"], str), "Error field must be a string" + assert len(data["error"]) > 0, "Error message must not be empty" + + # Should also have timestamp + assert "timestamp" in data + finally: + # Clean up temporary directory + shutil.rmtree(temp_dir, ignore_errors=True) + + +class TestProperty12SuccessResponseFormat: + """Property 12: 成功响应格式 + + For any successfully processed request, the HTTP response should return + 200 status code, and the response JSON should contain record_id, timestamp, + mood, inspirations, and todos fields. + + **Validates: Requirements 8.4, 8.6** + """ + + @patch.dict(os.environ, {"ZHIPU_API_KEY": "test_key_1234567890"}, clear=True) + @patch("app.main.SemanticParserService") + @given(text=st.text(min_size=1, max_size=100)) + @settings(max_examples=25) + def test_property_12_success_response_format( + self, + mock_parser_class, + text + ): + """Test that successful responses have the correct format. + + Feature: voice-text-processor, Property 12: 成功响应格式 + """ + # Create temporary directory for this test + temp_dir = tempfile.mkdtemp() + try: + # Reset config + import app.config + app.config._config = None + + # Mock semantic parser to always succeed + from app.models import ParsedData, MoodData, InspirationData, TodoData + + # Generate varied parsed data + mock_parser = MagicMock() + mock_parser.parse = AsyncMock(return_value=ParsedData( + mood=MoodData(type="测试情绪", intensity=5, keywords=["测试"]), + inspirations=[InspirationData(core_idea="测试想法", tags=["测试"], category="工作")], + todos=[TodoData(task="测试任务", time="今天", location="测试地点")] + )) + mock_parser.close = AsyncMock() + mock_parser_class.return_value = mock_parser + + with patch.dict(os.environ, { + "DATA_DIR": os.path.join(temp_dir, "data"), + "LOG_FILE": os.path.join(temp_dir, "logs", "app.log") + }, clear=False): + from app.main import app + + with TestClient(app) as client: + response = client.post( + "/api/process", + data={"text": text} + ) + + # Should return 200 status code + assert response.status_code == 200, \ + f"Success response should return 200, got {response.status_code}" + + # Response should be valid JSON + data = response.json() + + # Must contain all required fields + assert "record_id" in data, "Response must contain 'record_id'" + assert "timestamp" in data, "Response must contain 'timestamp'" + assert "mood" in data, "Response must contain 'mood'" + assert "inspirations" in data, "Response must contain 'inspirations'" + assert "todos" in data, "Response must contain 'todos'" + + # Validate field types + assert isinstance(data["record_id"], str), "record_id must be string" + assert len(data["record_id"]) > 0, "record_id must not be empty" + + assert isinstance(data["timestamp"], str), "timestamp must be string" + assert len(data["timestamp"]) > 0, "timestamp must not be empty" + + # mood can be None or dict + assert data["mood"] is None or isinstance(data["mood"], dict), \ + "mood must be None or dict" + + # inspirations must be list + assert isinstance(data["inspirations"], list), \ + "inspirations must be list" + + # todos must be list + assert isinstance(data["todos"], list), \ + "todos must be list" + finally: + # Clean up temporary directory + shutil.rmtree(temp_dir, ignore_errors=True) + + +class TestProperty13ErrorResponseFormat: + """Property 13: 错误响应格式 + + For any failed request, the HTTP response should return appropriate error + status code (400 or 500), and the response JSON should contain an error + field describing the specific error. + + **Validates: Requirements 8.5, 9.1, 9.3** + """ + + @patch.dict(os.environ, {"ZHIPU_API_KEY": "test_key_1234567890"}, clear=True) + @settings(max_examples=20) + @given( + error_type=st.sampled_from([ + "validation_empty", + "validation_both", + "validation_format", + "asr_error", + "parser_error", + "storage_error" + ]) + ) + def test_property_13_error_response_format( + self, + error_type + ): + """Test that error responses have the correct format. + + Feature: voice-text-processor, Property 13: 错误响应格式 + """ + # Create temporary directory for this test + temp_dir = tempfile.mkdtemp() + try: + # Reset config + import app.config + app.config._config = None + + with patch.dict(os.environ, { + "DATA_DIR": os.path.join(temp_dir, "data"), + "LOG_FILE": os.path.join(temp_dir, "logs", "app.log") + }, clear=False): + from app.main import app + + with TestClient(app) as client: + # Trigger different types of errors + if error_type == "validation_empty": + # Empty input + response = client.post("/api/process") + expected_status = 400 + + elif error_type == "validation_both": + # Both audio and text + audio_data = b"fake audio" + files = {"audio": ("test.mp3", BytesIO(audio_data), "audio/mpeg")} + response = client.post( + "/api/process", + files=files, + data={"text": "some text"} + ) + expected_status = 400 + + elif error_type == "validation_format": + # Unsupported audio format + audio_data = b"fake audio" + files = {"audio": ("test.ogg", BytesIO(audio_data), "audio/ogg")} + response = client.post("/api/process", files=files) + expected_status = 400 + + elif error_type == "asr_error": + # ASR service error + with patch("app.main.ASRService") as mock_asr_class: + from app.asr_service import ASRServiceError + mock_asr = MagicMock() + mock_asr.transcribe = AsyncMock( + side_effect=ASRServiceError("API调用失败") + ) + mock_asr.close = AsyncMock() + mock_asr_class.return_value = mock_asr + + audio_data = b"fake audio" + files = {"audio": ("test.mp3", BytesIO(audio_data), "audio/mpeg")} + response = client.post("/api/process", files=files) + expected_status = 500 + + elif error_type == "parser_error": + # Semantic parser error + with patch("app.main.SemanticParserService") as mock_parser_class: + from app.semantic_parser import SemanticParserError + mock_parser = MagicMock() + mock_parser.parse = AsyncMock( + side_effect=SemanticParserError("API调用失败") + ) + mock_parser.close = AsyncMock() + mock_parser_class.return_value = mock_parser + + response = client.post( + "/api/process", + data={"text": "test text"} + ) + expected_status = 500 + + elif error_type == "storage_error": + # Storage error + with patch("app.main.SemanticParserService") as mock_parser_class, \ + patch("app.main.StorageService") as mock_storage_class: + from app.models import ParsedData + from app.storage import StorageError + + mock_parser = MagicMock() + mock_parser.parse = AsyncMock(return_value=ParsedData( + mood=None, + inspirations=[], + todos=[] + )) + mock_parser.close = AsyncMock() + mock_parser_class.return_value = mock_parser + + mock_storage = MagicMock() + mock_storage.save_record = MagicMock( + side_effect=StorageError("磁盘空间不足") + ) + mock_storage_class.return_value = mock_storage + + response = client.post( + "/api/process", + data={"text": "test text"} + ) + expected_status = 500 + + # Verify status code + assert response.status_code == expected_status, \ + f"Error type {error_type} should return {expected_status}" + + # Response should be valid JSON + data = response.json() + + # Must contain error field + assert "error" in data, "Error response must contain 'error' field" + assert isinstance(data["error"], str), "Error field must be a string" + assert len(data["error"]) > 0, "Error message must not be empty" + + # Should also have timestamp + assert "timestamp" in data, "Error response must contain 'timestamp'" + assert isinstance(data["timestamp"], str), "timestamp must be string" + finally: + # Clean up temporary directory + shutil.rmtree(temp_dir, ignore_errors=True) diff --git a/tests/test_asr_service.py b/tests/test_asr_service.py new file mode 100644 index 0000000000000000000000000000000000000000..efe0290114b0798e933d18df2e401325fdc979f2 --- /dev/null +++ b/tests/test_asr_service.py @@ -0,0 +1,341 @@ +"""Unit tests for ASR service. + +This module contains unit tests for the ASRService class, testing +API call success scenarios, failure scenarios, and edge cases. + +Requirements: 2.1, 2.2, 2.3, 2.4 +""" + +import pytest +from unittest.mock import AsyncMock, MagicMock +import httpx + +from app.asr_service import ASRService, ASRServiceError + + +@pytest.fixture +def asr_service(): + """Create an ASRService instance for testing.""" + return ASRService(api_key="test_api_key_12345") + + +@pytest.fixture +def mock_audio_file(): + """Create mock audio file bytes.""" + return b"fake_audio_data_for_testing" + + +@pytest.mark.asyncio +async def test_asr_service_initialization(asr_service): + """Test ASR service initialization. + + Requirements: 2.1 + """ + assert asr_service.api_key == "test_api_key_12345" + assert asr_service.model == "glm-asr-2512" + assert asr_service.api_url == "https://api.z.ai/api/paas/v4/audio/transcriptions" + assert isinstance(asr_service.client, httpx.AsyncClient) + + # Clean up + await asr_service.close() + + +@pytest.mark.asyncio +async def test_transcribe_success(asr_service, mock_audio_file, mocker): + """Test successful transcription. + + Requirements: 2.1, 2.2 + """ + # Mock successful API response + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "id": "test_id", + "created": 1234567890, + "request_id": "test_request_id", + "model": "glm-asr-2512", + "text": "这是一段测试语音转写的文本内容" + } + + # Mock the HTTP client post method + mock_post = mocker.patch.object( + asr_service.client, + 'post', + return_value=mock_response + ) + + # Call transcribe + result = await asr_service.transcribe(mock_audio_file, "test.mp3") + + # Verify result + assert result == "这是一段测试语音转写的文本内容" + + # Verify API was called correctly + mock_post.assert_called_once() + call_args = mock_post.call_args + assert call_args.kwargs['headers']['Authorization'] == "Bearer test_api_key_12345" + assert call_args.kwargs['data']['model'] == "glm-asr-2512" + assert call_args.kwargs['data']['stream'] == "false" + + # Clean up + await asr_service.close() + + +@pytest.mark.asyncio +async def test_transcribe_empty_result(asr_service, mock_audio_file, mocker): + """Test transcription with empty recognition result. + + This tests the edge case where the audio cannot be recognized + and the API returns an empty text field. + + Requirements: 2.4 + """ + # Mock API response with empty text + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "id": "test_id", + "created": 1234567890, + "request_id": "test_request_id", + "model": "glm-asr-2512", + "text": "" + } + + # Mock the HTTP client post method + mocker.patch.object( + asr_service.client, + 'post', + return_value=mock_response + ) + + # Call transcribe + result = await asr_service.transcribe(mock_audio_file, "empty.mp3") + + # Verify result is empty string + assert result == "" + + # Clean up + await asr_service.close() + + +@pytest.mark.asyncio +async def test_transcribe_whitespace_only_result(asr_service, mock_audio_file, mocker): + """Test transcription with whitespace-only result. + + Requirements: 2.4 + """ + # Mock API response with whitespace-only text + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "id": "test_id", + "created": 1234567890, + "request_id": "test_request_id", + "model": "glm-asr-2512", + "text": " \n\t " + } + + # Mock the HTTP client post method + mocker.patch.object( + asr_service.client, + 'post', + return_value=mock_response + ) + + # Call transcribe + result = await asr_service.transcribe(mock_audio_file, "whitespace.mp3") + + # Verify result is empty string + assert result == "" + + # Clean up + await asr_service.close() + + +@pytest.mark.asyncio +async def test_transcribe_api_error_status(asr_service, mock_audio_file, mocker): + """Test transcription when API returns error status code. + + Requirements: 2.3 + """ + # Mock API error response + mock_response = MagicMock() + mock_response.status_code = 500 + mock_response.json.return_value = { + "error": { + "message": "Internal server error", + "code": "internal_error" + } + } + mock_response.text = "Internal server error" + + # Mock the HTTP client post method + mocker.patch.object( + asr_service.client, + 'post', + return_value=mock_response + ) + + # Call transcribe and expect exception + with pytest.raises(ASRServiceError) as exc_info: + await asr_service.transcribe(mock_audio_file, "error.mp3") + + # Verify error message + assert "语音识别服务不可用" in str(exc_info.value) + + # Clean up + await asr_service.close() + + +@pytest.mark.asyncio +async def test_transcribe_api_timeout(asr_service, mock_audio_file, mocker): + """Test transcription when API request times out. + + Requirements: 2.3 + """ + # Mock timeout exception + mocker.patch.object( + asr_service.client, + 'post', + side_effect=httpx.TimeoutException("Request timeout") + ) + + # Call transcribe and expect exception + with pytest.raises(ASRServiceError) as exc_info: + await asr_service.transcribe(mock_audio_file, "timeout.mp3") + + # Verify error message + assert "语音识别服务不可用" in str(exc_info.value) + assert "请求超时" in str(exc_info.value) + + # Clean up + await asr_service.close() + + +@pytest.mark.asyncio +async def test_transcribe_network_error(asr_service, mock_audio_file, mocker): + """Test transcription when network error occurs. + + Requirements: 2.3 + """ + # Mock network error + mocker.patch.object( + asr_service.client, + 'post', + side_effect=httpx.RequestError("Network error") + ) + + # Call transcribe and expect exception + with pytest.raises(ASRServiceError) as exc_info: + await asr_service.transcribe(mock_audio_file, "network_error.mp3") + + # Verify error message + assert "语音识别服务不可用" in str(exc_info.value) + assert "网络错误" in str(exc_info.value) + + # Clean up + await asr_service.close() + + +@pytest.mark.asyncio +async def test_transcribe_invalid_json_response(asr_service, mock_audio_file, mocker): + """Test transcription when API returns invalid JSON. + + Requirements: 2.3 + """ + # Mock response with invalid JSON + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.json.side_effect = ValueError("Invalid JSON") + + # Mock the HTTP client post method + mocker.patch.object( + asr_service.client, + 'post', + return_value=mock_response + ) + + # Call transcribe and expect exception + with pytest.raises(ASRServiceError) as exc_info: + await asr_service.transcribe(mock_audio_file, "invalid_json.mp3") + + # Verify error message + assert "语音识别服务不可用" in str(exc_info.value) + assert "响应格式无效" in str(exc_info.value) + + # Clean up + await asr_service.close() + + +@pytest.mark.asyncio +async def test_transcribe_missing_text_field(asr_service, mock_audio_file, mocker): + """Test transcription when API response is missing text field. + + Requirements: 2.3 + """ + # Mock response without text field + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "id": "test_id", + "created": 1234567890, + "request_id": "test_request_id", + "model": "glm-asr-2512" + # Missing "text" field + } + + # Mock the HTTP client post method + mocker.patch.object( + asr_service.client, + 'post', + return_value=mock_response + ) + + # Call transcribe - should return empty string when text field is missing + result = await asr_service.transcribe(mock_audio_file, "missing_text.mp3") + + # Verify result is empty string + assert result == "" + + # Clean up + await asr_service.close() + + +@pytest.mark.asyncio +async def test_transcribe_unexpected_exception(asr_service, mock_audio_file, mocker): + """Test transcription when unexpected exception occurs. + + Requirements: 2.3 + """ + # Mock unexpected exception + mocker.patch.object( + asr_service.client, + 'post', + side_effect=Exception("Unexpected error") + ) + + # Call transcribe and expect exception + with pytest.raises(ASRServiceError) as exc_info: + await asr_service.transcribe(mock_audio_file, "unexpected.mp3") + + # Verify error message + assert "语音识别服务不可用" in str(exc_info.value) + + # Clean up + await asr_service.close() + + +@pytest.mark.asyncio +async def test_close_client(asr_service): + """Test closing the HTTP client. + + Requirements: 2.1 + """ + # Verify client is open + assert not asr_service.client.is_closed + + # Close the client + await asr_service.close() + + # Verify client is closed + assert asr_service.client.is_closed diff --git a/tests/test_chat_api.py b/tests/test_chat_api.py new file mode 100644 index 0000000000000000000000000000000000000000..2094ae9dcdee8f49dedc33d83f64162f79bdf01f --- /dev/null +++ b/tests/test_chat_api.py @@ -0,0 +1,45 @@ +"""测试 Chat API""" +import requests + +def test_chat_api(): + """测试聊天 API""" + url = "http://172.18.16.245:8000/api/chat" + + print("=" * 50) + print("测试 Chat API") + print("=" * 50) + print(f"URL: {url}") + + # 测试数据 + data = { + 'text': '你好,今天天气怎么样?' + } + + print(f"\n发送消息: {data['text']}") + print("等待响应...") + + try: + response = requests.post(url, data=data, timeout=60) + + print(f"\n状态码: {response.status_code}") + print(f"响应头: {dict(response.headers)}") + + if response.status_code == 200: + result = response.json() + print(f"\n✅ 成功!") + print(f"AI 回复: {result.get('response', 'No response')}") + else: + print(f"\n❌ 失败!") + print(f"错误: {response.text}") + + except requests.exceptions.Timeout: + print("\n❌ 请求超时!") + except requests.exceptions.ConnectionError: + print("\n❌ 连接失败! 请确保服务器正在运行") + except Exception as e: + print(f"\n❌ 错误: {e}") + + print("\n" + "=" * 50) + +if __name__ == "__main__": + test_chat_api() diff --git a/tests/test_config.py b/tests/test_config.py new file mode 100644 index 0000000000000000000000000000000000000000..1906a74688a054c7467518d2a5cd7ea5e4dfe863 --- /dev/null +++ b/tests/test_config.py @@ -0,0 +1,230 @@ +"""Tests for configuration management module. + +Requirements: 10.1, 10.2, 10.3, 10.4, 10.5 +""" + +import os +import pytest +from pathlib import Path +from unittest.mock import patch + +from app.config import Config, load_config, validate_config, init_config + + +class TestConfig: + """Test configuration loading and validation.""" + + def test_config_with_all_fields(self): + """Test creating config with all fields specified.""" + config = Config( + zhipu_api_key="test_api_key_1234567890", + data_dir=Path("test_data"), + max_audio_size=5 * 1024 * 1024, + log_level="DEBUG", + log_file=Path("test_logs/test.log"), + host="127.0.0.1", + port=9000 + ) + + assert config.zhipu_api_key == "test_api_key_1234567890" + assert config.data_dir == Path("test_data") + assert config.max_audio_size == 5 * 1024 * 1024 + assert config.log_level == "DEBUG" + assert config.log_file == Path("test_logs/test.log") + assert config.host == "127.0.0.1" + assert config.port == 9000 + + def test_config_with_defaults(self): + """Test creating config with default values.""" + config = Config(zhipu_api_key="test_api_key_1234567890") + + assert config.zhipu_api_key == "test_api_key_1234567890" + assert config.data_dir == Path("data") + assert config.max_audio_size == 10 * 1024 * 1024 + assert config.log_level == "INFO" + assert config.log_file == Path("logs/app.log") + assert config.host == "0.0.0.0" + assert config.port == 8000 + + def test_config_missing_api_key(self): + """Test that missing API key raises validation error.""" + with pytest.raises(Exception): # Pydantic will raise validation error + Config() + + def test_config_invalid_log_level(self): + """Test that invalid log level raises validation error.""" + with pytest.raises(ValueError, match="log_level must be one of"): + Config( + zhipu_api_key="test_api_key_1234567890", + log_level="INVALID" + ) + + def test_config_invalid_max_audio_size(self): + """Test that invalid max audio size raises validation error.""" + with pytest.raises(ValueError, match="max_audio_size must be positive"): + Config( + zhipu_api_key="test_api_key_1234567890", + max_audio_size=-1 + ) + + def test_config_log_level_case_insensitive(self): + """Test that log level is case insensitive.""" + config = Config( + zhipu_api_key="test_api_key_1234567890", + log_level="debug" + ) + assert config.log_level == "DEBUG" + + def test_config_immutable(self): + """Test that config is immutable (frozen).""" + config = Config(zhipu_api_key="test_api_key_1234567890") + + with pytest.raises(Exception): # Pydantic frozen model raises error + config.zhipu_api_key = "new_key" + + +class TestLoadConfig: + """Test loading configuration from environment variables.""" + + @patch.dict(os.environ, { + "ZHIPU_API_KEY": "test_key_1234567890", + "DATA_DIR": "custom_data", + "MAX_AUDIO_SIZE": "5242880", + "LOG_LEVEL": "DEBUG", + "LOG_FILE": "custom_logs/app.log", + "HOST": "127.0.0.1", + "PORT": "9000" + }) + def test_load_config_from_env(self, tmp_path): + """Test loading config from environment variables.""" + # Create temporary directories + data_dir = tmp_path / "custom_data" + data_dir.mkdir() + log_dir = tmp_path / "custom_logs" + log_dir.mkdir() + + with patch.dict(os.environ, { + "DATA_DIR": str(data_dir), + "LOG_FILE": str(log_dir / "app.log") + }, clear=False): + config = load_config() + + assert config.zhipu_api_key == "test_key_1234567890" + assert config.data_dir == data_dir + assert config.max_audio_size == 5242880 + assert config.log_level == "DEBUG" + assert config.log_file == log_dir / "app.log" + assert config.host == "127.0.0.1" + assert config.port == 9000 + + @patch.dict(os.environ, {"ZHIPU_API_KEY": "test_key_1234567890"}, clear=True) + def test_load_config_with_defaults(self, tmp_path): + """Test loading config with default values.""" + # Use tmp_path for data directory + with patch.dict(os.environ, {"DATA_DIR": str(tmp_path / "data")}, clear=False): + config = load_config() + + assert config.zhipu_api_key == "test_key_1234567890" + assert config.log_level == "INFO" + assert config.host == "0.0.0.0" + assert config.port == 8000 + + @patch.dict(os.environ, {}, clear=True) + def test_load_config_missing_api_key(self): + """Test that missing API key raises ValueError. + + Requirement 10.4: Missing required config should cause startup failure. + """ + with pytest.raises(ValueError, match="ZHIPU_API_KEY environment variable is required"): + load_config() + + @patch.dict(os.environ, { + "ZHIPU_API_KEY": "test_key_1234567890", + "MAX_AUDIO_SIZE": "invalid" + }, clear=True) + def test_load_config_invalid_integer(self): + """Test that invalid integer value raises ValueError.""" + with pytest.raises(ValueError): + load_config() + + +class TestValidateConfig: + """Test configuration validation at startup.""" + + def test_validate_config_success(self, tmp_path): + """Test successful config validation.""" + data_dir = tmp_path / "data" + data_dir.mkdir() + log_dir = tmp_path / "logs" + log_dir.mkdir() + + config = Config( + zhipu_api_key="test_key_1234567890", + data_dir=data_dir, + log_file=log_dir / "app.log" + ) + + # Should not raise any exception + validate_config(config) + + def test_validate_config_data_dir_not_writable(self, tmp_path): + """Test validation fails if data directory is not writable. + + Note: This test is skipped on Windows as chmod doesn't work the same way. + """ + import platform + if platform.system() == "Windows": + pytest.skip("chmod doesn't work reliably on Windows") + + data_dir = tmp_path / "data" + data_dir.mkdir() + + # Make directory read-only + data_dir.chmod(0o444) + + config = Config( + zhipu_api_key="test_key_1234567890", + data_dir=data_dir + ) + + try: + with pytest.raises(ValueError, match="not writable"): + validate_config(config) + finally: + # Restore permissions for cleanup + data_dir.chmod(0o755) + + def test_validate_config_short_api_key(self, tmp_path): + """Test validation fails if API key is too short.""" + data_dir = tmp_path / "data" + data_dir.mkdir() + + config = Config( + zhipu_api_key="short", + data_dir=data_dir + ) + + with pytest.raises(ValueError, match="ZHIPU_API_KEY appears to be invalid"): + validate_config(config) + + +class TestInitConfig: + """Test global config initialization.""" + + @patch.dict(os.environ, {"ZHIPU_API_KEY": "test_key_1234567890"}, clear=True) + def test_init_config(self, tmp_path): + """Test initializing global config.""" + from app.config import _config, get_config + + # Reset global config + import app.config + app.config._config = None + + with patch.dict(os.environ, {"DATA_DIR": str(tmp_path / "data")}, clear=False): + config = init_config() + + assert config is not None + assert config.zhipu_api_key == "test_key_1234567890" + + # Should be able to get config + assert get_config() == config diff --git a/tests/test_default_character.py b/tests/test_default_character.py new file mode 100644 index 0000000000000000000000000000000000000000..8383cd523a10d99ee97066eb0273e6508241f7ab --- /dev/null +++ b/tests/test_default_character.py @@ -0,0 +1,77 @@ +"""测试默认形象功能""" +import os +import sys +from pathlib import Path + +# 添加项目根目录到路径 +sys.path.insert(0, str(Path(__file__).parent)) + +# 设置环境变量 +os.environ.setdefault("DATA_DIR", "data") + +from app.user_config import UserConfig + +def test_default_character(): + """测试默认形象加载""" + print("=" * 50) + print("测试默认形象功能") + print("=" * 50) + + # 1. 检查默认形象文件是否存在 + default_image = Path("generated_images/default_character.jpeg") + print(f"\n1. 检查默认形象文件: {default_image}") + if default_image.exists(): + print(f" ✅ 默认形象存在,大小: {default_image.stat().st_size} bytes") + else: + print(f" ❌ 默认形象不存在") + return + + # 2. 清空用户配置(模拟新用户) + print(f"\n2. 清空用户配置(模拟新用户)") + user_config = UserConfig("data") + config_file = Path("data/user_config.json") + if config_file.exists(): + config_file.unlink() + print(f" ✅ 已删除旧配置") + + # 3. 加载配置(应该创建新配置) + print(f"\n3. 加载用户配置") + config = user_config.load_config() + print(f" 配置内容: {config}") + + # 4. 检查是否有形象 + if config.get('character', {}).get('image_url'): + print(f" ✅ 已有形象: {config['character']['image_url']}") + else: + print(f" ℹ️ 暂无形象(需要通过 API 端点加载)") + + # 5. 模拟 API 调用加载默认形象 + print(f"\n4. 模拟加载默认形象") + if not config.get('character', {}).get('image_url'): + user_config.save_character_image( + image_url=str(default_image), + prompt="默认治愈系小猫形象", + preferences={ + "color": "薰衣草紫", + "personality": "温柔", + "appearance": "无配饰", + "role": "陪伴式朋友" + } + ) + print(f" ✅ 默认形象已保存") + + # 6. 重新加载配置验证 + print(f"\n5. 验证配置") + config = user_config.load_config() + if config.get('character', {}).get('image_url'): + print(f" ✅ 形象 URL: {config['character']['image_url']}") + print(f" ✅ 偏好设置: {config['character'].get('preferences', {})}") + else: + print(f" ❌ 形象加载失败") + + print("\n" + "=" * 50) + print("测试完成!") + print("=" * 50) + +if __name__ == "__main__": + test_default_character() diff --git a/tests/test_e2e_integration.py b/tests/test_e2e_integration.py new file mode 100644 index 0000000000000000000000000000000000000000..66b6621ab19055ca68babbcfda30e60c2dc2b841 --- /dev/null +++ b/tests/test_e2e_integration.py @@ -0,0 +1,844 @@ +"""End-to-end integration tests for Voice Text Processor. + +This module tests the complete workflow from input to storage, +including audio processing, text processing, and error scenarios. + +Requirements: All requirements (end-to-end validation) +""" + +import os +import json +import pytest +import tempfile +import shutil +from pathlib import Path +from unittest.mock import patch, AsyncMock, MagicMock +from io import BytesIO +from fastapi.testclient import TestClient + + +@pytest.fixture +def temp_data_dir(): + """Create a temporary directory for test data.""" + temp_dir = tempfile.mkdtemp() + yield temp_dir + # Close all logging handlers before cleanup to release file handles + import logging + for handler in logging.root.handlers[:]: + handler.close() + logging.root.removeHandler(handler) + # Give Windows time to release file handles + import time + time.sleep(0.1) + try: + shutil.rmtree(temp_dir) + except PermissionError: + # On Windows, sometimes files are still locked - ignore cleanup errors + pass + + +@pytest.fixture +def test_client(temp_data_dir): + """Create a test client with temporary data directory.""" + # Reset config + import app.config + app.config._config = None + + with patch.dict(os.environ, { + "ZHIPU_API_KEY": "test_key_1234567890", + "DATA_DIR": temp_data_dir, + "LOG_FILE": str(Path(temp_data_dir) / "test.log") + }, clear=True): + from app.main import app + with TestClient(app) as client: + yield client + + +class TestAudioToStorageE2E: + """End-to-end tests for audio processing workflow. + + Tests: 音频上传 → ASR → 语义解析 → 存储 → 响应 + """ + + @patch("app.main.ASRService") + @patch("app.main.SemanticParserService") + def test_complete_audio_workflow_with_all_data( + self, + mock_parser_class, + mock_asr_class, + test_client, + temp_data_dir + ): + """Test complete audio workflow: upload → ASR → parsing → storage → response. + + This test validates the entire pipeline with all data types present. + """ + # Mock ASR service + mock_asr = MagicMock() + mock_asr.transcribe = AsyncMock( + return_value="今天心情很好,想到一个新项目想法,明天要完成报告" + ) + mock_asr.close = AsyncMock() + mock_asr_class.return_value = mock_asr + + # Mock semantic parser with complete data + from app.models import MoodData, InspirationData, TodoData, ParsedData + mock_parser = MagicMock() + mock_parser.parse = AsyncMock(return_value=ParsedData( + mood=MoodData(type="开心", intensity=8, keywords=["愉快", "放松"]), + inspirations=[ + InspirationData(core_idea="新项目想法", tags=["创新", "技术"], category="工作") + ], + todos=[ + TodoData(task="完成报告", time="明天", location="办公室") + ] + )) + mock_parser.close = AsyncMock() + mock_parser_class.return_value = mock_parser + + # Create fake audio file + audio_data = b"fake audio content for testing" + files = {"audio": ("test.mp3", BytesIO(audio_data), "audio/mpeg")} + + # Make request + response = test_client.post("/api/process", files=files) + + # Verify response + assert response.status_code == 200 + data = response.json() + + # Check response structure + assert "record_id" in data + assert "timestamp" in data + assert data["mood"]["type"] == "开心" + assert data["mood"]["intensity"] == 8 + assert len(data["inspirations"]) == 1 + assert data["inspirations"][0]["core_idea"] == "新项目想法" + assert len(data["todos"]) == 1 + assert data["todos"][0]["task"] == "完成报告" + + # Verify ASR was called + mock_asr.transcribe.assert_called_once() + + # Verify semantic parser was called with transcribed text + mock_parser.parse.assert_called_once_with( + "今天心情很好,想到一个新项目想法,明天要完成报告" + ) + + # Verify storage - check all JSON files + records_file = Path(temp_data_dir) / "records.json" + moods_file = Path(temp_data_dir) / "moods.json" + inspirations_file = Path(temp_data_dir) / "inspirations.json" + todos_file = Path(temp_data_dir) / "todos.json" + + # Check records.json + assert records_file.exists() + with open(records_file, 'r', encoding='utf-8') as f: + records = json.load(f) + assert len(records) == 1 + assert records[0]["record_id"] == data["record_id"] + assert records[0]["input_type"] == "audio" + assert records[0]["original_text"] == "今天心情很好,想到一个新项目想法,明天要完成报告" + + # Check moods.json + assert moods_file.exists() + with open(moods_file, 'r', encoding='utf-8') as f: + moods = json.load(f) + assert len(moods) == 1 + assert moods[0]["record_id"] == data["record_id"] + assert moods[0]["type"] == "开心" + + # Check inspirations.json + assert inspirations_file.exists() + with open(inspirations_file, 'r', encoding='utf-8') as f: + inspirations = json.load(f) + assert len(inspirations) == 1 + assert inspirations[0]["record_id"] == data["record_id"] + assert inspirations[0]["core_idea"] == "新项目想法" + + # Check todos.json + assert todos_file.exists() + with open(todos_file, 'r', encoding='utf-8') as f: + todos = json.load(f) + assert len(todos) == 1 + assert todos[0]["record_id"] == data["record_id"] + assert todos[0]["task"] == "完成报告" + + @patch("app.main.ASRService") + @patch("app.main.SemanticParserService") + def test_audio_workflow_with_partial_data( + self, + mock_parser_class, + mock_asr_class, + test_client, + temp_data_dir + ): + """Test audio workflow with only some data types present.""" + # Mock ASR service + mock_asr = MagicMock() + mock_asr.transcribe = AsyncMock(return_value="今天感觉很平静") + mock_asr.close = AsyncMock() + mock_asr_class.return_value = mock_asr + + # Mock semantic parser with only mood (no inspirations or todos) + from app.models import MoodData, ParsedData + mock_parser = MagicMock() + mock_parser.parse = AsyncMock(return_value=ParsedData( + mood=MoodData(type="平静", intensity=5, keywords=["安静"]) + )) + mock_parser.close = AsyncMock() + mock_parser_class.return_value = mock_parser + + # Create fake audio file + audio_data = b"fake audio content" + files = {"audio": ("test.wav", BytesIO(audio_data), "audio/wav")} + + # Make request + response = test_client.post("/api/process", files=files) + + # Verify response + assert response.status_code == 200 + data = response.json() + assert data["mood"]["type"] == "平静" + assert len(data["inspirations"]) == 0 + assert len(data["todos"]) == 0 + + # Verify storage - only records.json and moods.json should exist + records_file = Path(temp_data_dir) / "records.json" + moods_file = Path(temp_data_dir) / "moods.json" + inspirations_file = Path(temp_data_dir) / "inspirations.json" + todos_file = Path(temp_data_dir) / "todos.json" + + assert records_file.exists() + assert moods_file.exists() + assert not inspirations_file.exists() + assert not todos_file.exists() + + @patch("app.main.ASRService") + @patch("app.main.SemanticParserService") + def test_audio_workflow_with_multiple_items( + self, + mock_parser_class, + mock_asr_class, + test_client, + temp_data_dir + ): + """Test audio workflow with multiple inspirations and todos.""" + # Mock ASR service + mock_asr = MagicMock() + mock_asr.transcribe = AsyncMock( + return_value="有三个想法和两个任务要做" + ) + mock_asr.close = AsyncMock() + mock_asr_class.return_value = mock_asr + + # Mock semantic parser with multiple items + from app.models import InspirationData, TodoData, ParsedData + mock_parser = MagicMock() + mock_parser.parse = AsyncMock(return_value=ParsedData( + inspirations=[ + InspirationData(core_idea="想法1", tags=["标签1"], category="工作"), + InspirationData(core_idea="想法2", tags=["标签2"], category="生活"), + InspirationData(core_idea="想法3", tags=["标签3"], category="学习") + ], + todos=[ + TodoData(task="任务1", time="今天", location="家里"), + TodoData(task="任务2", time="明天", location="公司") + ] + )) + mock_parser.close = AsyncMock() + mock_parser_class.return_value = mock_parser + + # Create fake audio file + audio_data = b"fake audio content" + files = {"audio": ("test.m4a", BytesIO(audio_data), "audio/m4a")} + + # Make request + response = test_client.post("/api/process", files=files) + + # Verify response + assert response.status_code == 200 + data = response.json() + assert len(data["inspirations"]) == 3 + assert len(data["todos"]) == 2 + + # Verify storage + inspirations_file = Path(temp_data_dir) / "inspirations.json" + todos_file = Path(temp_data_dir) / "todos.json" + + with open(inspirations_file, 'r', encoding='utf-8') as f: + inspirations = json.load(f) + assert len(inspirations) == 3 + + with open(todos_file, 'r', encoding='utf-8') as f: + todos = json.load(f) + assert len(todos) == 2 + + +class TestTextToStorageE2E: + """End-to-end tests for text processing workflow. + + Tests: 文本提交 → 语义解析 → 存储 → 响应 + """ + + @patch("app.main.SemanticParserService") + def test_complete_text_workflow_with_all_data( + self, + mock_parser_class, + test_client, + temp_data_dir + ): + """Test complete text workflow: submit → parsing → storage → response. + + This test validates the entire pipeline for text input with all data types. + """ + # Mock semantic parser with complete data + from app.models import MoodData, InspirationData, TodoData, ParsedData + mock_parser = MagicMock() + mock_parser.parse = AsyncMock(return_value=ParsedData( + mood=MoodData(type="焦虑", intensity=6, keywords=["紧张", "担心"]), + inspirations=[ + InspirationData(core_idea="解决方案", tags=["问题解决"], category="工作") + ], + todos=[ + TodoData(task="准备会议", time="下午3点", location="会议室"), + TodoData(task="发送邮件", time="今晚", location=None) + ] + )) + mock_parser.close = AsyncMock() + mock_parser_class.return_value = mock_parser + + # Make request with text + text_input = "有点焦虑,想到一个解决方案,下午要准备会议,今晚要发送邮件" + response = test_client.post( + "/api/process", + data={"text": text_input} + ) + + # Verify response + assert response.status_code == 200 + data = response.json() + + # Check response structure + assert "record_id" in data + assert "timestamp" in data + assert data["mood"]["type"] == "焦虑" + assert data["mood"]["intensity"] == 6 + assert len(data["inspirations"]) == 1 + assert len(data["todos"]) == 2 + + # Verify semantic parser was called with input text + mock_parser.parse.assert_called_once_with(text_input) + + # Verify storage - check all JSON files + records_file = Path(temp_data_dir) / "records.json" + moods_file = Path(temp_data_dir) / "moods.json" + inspirations_file = Path(temp_data_dir) / "inspirations.json" + todos_file = Path(temp_data_dir) / "todos.json" + + # Check records.json + assert records_file.exists() + with open(records_file, 'r', encoding='utf-8') as f: + records = json.load(f) + assert len(records) == 1 + assert records[0]["record_id"] == data["record_id"] + assert records[0]["input_type"] == "text" + assert records[0]["original_text"] == text_input + + # Check moods.json + assert moods_file.exists() + with open(moods_file, 'r', encoding='utf-8') as f: + moods = json.load(f) + assert len(moods) == 1 + assert moods[0]["type"] == "焦虑" + + # Check inspirations.json + assert inspirations_file.exists() + with open(inspirations_file, 'r', encoding='utf-8') as f: + inspirations = json.load(f) + assert len(inspirations) == 1 + + # Check todos.json + assert todos_file.exists() + with open(todos_file, 'r', encoding='utf-8') as f: + todos = json.load(f) + assert len(todos) == 2 + + @patch("app.main.SemanticParserService") + def test_text_workflow_with_no_data( + self, + mock_parser_class, + test_client, + temp_data_dir + ): + """Test text workflow when no structured data is extracted.""" + # Mock semantic parser with empty data + from app.models import ParsedData + mock_parser = MagicMock() + mock_parser.parse = AsyncMock(return_value=ParsedData()) + mock_parser.close = AsyncMock() + mock_parser_class.return_value = mock_parser + + # Make request with text + response = test_client.post( + "/api/process", + data={"text": "这是一段普通的文本"} + ) + + # Verify response + assert response.status_code == 200 + data = response.json() + assert data["mood"] is None + assert len(data["inspirations"]) == 0 + assert len(data["todos"]) == 0 + + # Verify storage - only records.json should exist + records_file = Path(temp_data_dir) / "records.json" + moods_file = Path(temp_data_dir) / "moods.json" + inspirations_file = Path(temp_data_dir) / "inspirations.json" + todos_file = Path(temp_data_dir) / "todos.json" + + assert records_file.exists() + assert not moods_file.exists() + assert not inspirations_file.exists() + assert not todos_file.exists() + + @patch("app.main.SemanticParserService") + def test_text_workflow_with_utf8_characters( + self, + mock_parser_class, + test_client, + temp_data_dir + ): + """Test text workflow with various UTF-8 characters (Chinese, emoji, etc.).""" + # Mock semantic parser + from app.models import MoodData, ParsedData + mock_parser = MagicMock() + mock_parser.parse = AsyncMock(return_value=ParsedData( + mood=MoodData(type="开心😊", intensity=9, keywords=["快乐", "幸福"]) + )) + mock_parser.close = AsyncMock() + mock_parser_class.return_value = mock_parser + + # Make request with UTF-8 text including emoji + text_input = "今天超级开心😊!感觉特别幸福💖" + response = test_client.post( + "/api/process", + data={"text": text_input} + ) + + # Verify response + assert response.status_code == 200 + data = response.json() + assert data["mood"]["type"] == "开心😊" + + # Verify storage preserves UTF-8 + records_file = Path(temp_data_dir) / "records.json" + with open(records_file, 'r', encoding='utf-8') as f: + records = json.load(f) + assert records[0]["original_text"] == text_input + + @patch("app.main.SemanticParserService") + def test_multiple_text_submissions( + self, + mock_parser_class, + test_client, + temp_data_dir + ): + """Test multiple text submissions accumulate in storage.""" + # Mock semantic parser + from app.models import MoodData, ParsedData + mock_parser = MagicMock() + mock_parser.close = AsyncMock() + mock_parser_class.return_value = mock_parser + + # First submission + mock_parser.parse = AsyncMock(return_value=ParsedData( + mood=MoodData(type="开心", intensity=8) + )) + response1 = test_client.post( + "/api/process", + data={"text": "今天很开心"} + ) + assert response1.status_code == 200 + + # Second submission + mock_parser.parse = AsyncMock(return_value=ParsedData( + mood=MoodData(type="平静", intensity=5) + )) + response2 = test_client.post( + "/api/process", + data={"text": "现在很平静"} + ) + assert response2.status_code == 200 + + # Verify both records are stored + records_file = Path(temp_data_dir) / "records.json" + with open(records_file, 'r', encoding='utf-8') as f: + records = json.load(f) + assert len(records) == 2 + + # Verify both moods are stored + moods_file = Path(temp_data_dir) / "moods.json" + with open(moods_file, 'r', encoding='utf-8') as f: + moods = json.load(f) + assert len(moods) == 2 + assert moods[0]["type"] == "开心" + assert moods[1]["type"] == "平静" + + +class TestErrorScenariosE2E: + """End-to-end tests for error scenarios. + + Tests: 错误场景的端到端处理 + """ + + def test_validation_error_no_input(self, test_client, temp_data_dir): + """Test validation error when no input is provided.""" + response = test_client.post("/api/process") + + assert response.status_code == 400 + data = response.json() + assert "error" in data + assert "timestamp" in data + assert "请提供音频文件或文本内容" in data["error"] + + # Verify no files are created + records_file = Path(temp_data_dir) / "records.json" + assert not records_file.exists() + + def test_validation_error_both_inputs(self, test_client, temp_data_dir): + """Test validation error when both audio and text are provided.""" + audio_data = b"fake audio" + files = {"audio": ("test.mp3", BytesIO(audio_data), "audio/mpeg")} + + response = test_client.post( + "/api/process", + files=files, + data={"text": "some text"} + ) + + assert response.status_code == 400 + data = response.json() + assert "error" in data + + # Verify no files are created + records_file = Path(temp_data_dir) / "records.json" + assert not records_file.exists() + + def test_validation_error_empty_text(self, test_client, temp_data_dir): + """Test validation error when text is empty.""" + response = test_client.post( + "/api/process", + data={"text": ""} + ) + + assert response.status_code == 400 + data = response.json() + assert "error" in data + # Empty string is treated as no input by FastAPI + assert "请提供音频文件或文本内容" in data["error"] + + def test_validation_error_unsupported_audio_format(self, test_client, temp_data_dir): + """Test validation error for unsupported audio format.""" + audio_data = b"fake audio" + files = {"audio": ("test.ogg", BytesIO(audio_data), "audio/ogg")} + + response = test_client.post("/api/process", files=files) + + assert response.status_code == 400 + data = response.json() + assert "error" in data + assert "不支持的音频格式" in data["error"] + + # Verify no files are created + records_file = Path(temp_data_dir) / "records.json" + assert not records_file.exists() + + @patch("app.main.ASRService") + def test_asr_error_end_to_end( + self, + mock_asr_class, + test_client, + temp_data_dir + ): + """Test end-to-end error handling when ASR service fails.""" + # Mock ASR service to raise error + from app.asr_service import ASRServiceError + mock_asr = MagicMock() + mock_asr.transcribe = AsyncMock( + side_effect=ASRServiceError("API连接超时") + ) + mock_asr.close = AsyncMock() + mock_asr_class.return_value = mock_asr + + # Create audio file + audio_data = b"fake audio content" + files = {"audio": ("test.mp3", BytesIO(audio_data), "audio/mpeg")} + + # Make request + response = test_client.post("/api/process", files=files) + + # Verify error response + assert response.status_code == 500 + data = response.json() + assert "error" in data + assert "语音识别服务不可用" in data["error"] + assert "timestamp" in data + + # Verify no files are created + records_file = Path(temp_data_dir) / "records.json" + assert not records_file.exists() + + @patch("app.main.SemanticParserService") + def test_semantic_parser_error_end_to_end( + self, + mock_parser_class, + test_client, + temp_data_dir + ): + """Test end-to-end error handling when semantic parser fails.""" + # Mock semantic parser to raise error + from app.semantic_parser import SemanticParserError + mock_parser = MagicMock() + mock_parser.parse = AsyncMock( + side_effect=SemanticParserError("API返回格式错误") + ) + mock_parser.close = AsyncMock() + mock_parser_class.return_value = mock_parser + + # Make request + response = test_client.post( + "/api/process", + data={"text": "测试文本"} + ) + + # Verify error response + assert response.status_code == 500 + data = response.json() + assert "error" in data + assert "语义解析服务不可用" in data["error"] + assert "timestamp" in data + + # Verify no files are created + records_file = Path(temp_data_dir) / "records.json" + assert not records_file.exists() + + @patch("app.main.SemanticParserService") + @patch("app.main.StorageService") + def test_storage_error_end_to_end( + self, + mock_storage_class, + mock_parser_class, + test_client, + temp_data_dir + ): + """Test end-to-end error handling when storage fails.""" + # Mock semantic parser + from app.models import ParsedData + mock_parser = MagicMock() + mock_parser.parse = AsyncMock(return_value=ParsedData()) + mock_parser.close = AsyncMock() + mock_parser_class.return_value = mock_parser + + # Mock storage service to raise error + from app.storage import StorageError + mock_storage = MagicMock() + mock_storage.save_record = MagicMock( + side_effect=StorageError("磁盘空间不足") + ) + mock_storage_class.return_value = mock_storage + + # Make request + response = test_client.post( + "/api/process", + data={"text": "测试文本"} + ) + + # Verify error response + assert response.status_code == 500 + data = response.json() + assert "error" in data + assert "数据存储失败" in data["error"] + assert "timestamp" in data + + @patch("app.main.ASRService") + @patch("app.main.SemanticParserService") + def test_asr_empty_result_end_to_end( + self, + mock_parser_class, + mock_asr_class, + test_client, + temp_data_dir + ): + """Test end-to-end handling when ASR returns empty text.""" + # Mock ASR service to return empty string + mock_asr = MagicMock() + mock_asr.transcribe = AsyncMock(return_value="") + mock_asr.close = AsyncMock() + mock_asr_class.return_value = mock_asr + + # Mock semantic parser + from app.models import ParsedData + mock_parser = MagicMock() + mock_parser.parse = AsyncMock(return_value=ParsedData()) + mock_parser.close = AsyncMock() + mock_parser_class.return_value = mock_parser + + # Create audio file + audio_data = b"silent audio" + files = {"audio": ("test.mp3", BytesIO(audio_data), "audio/mpeg")} + + # Make request + response = test_client.post("/api/process", files=files) + + # Should succeed but with empty text + assert response.status_code == 200 + data = response.json() + + # Verify record was saved with empty text + records_file = Path(temp_data_dir) / "records.json" + with open(records_file, 'r', encoding='utf-8') as f: + records = json.load(f) + assert len(records) == 1 + assert records[0]["original_text"] == "" + + +class TestConcurrentRequestsE2E: + """End-to-end tests for concurrent request handling.""" + + @patch("app.main.SemanticParserService") + def test_concurrent_text_submissions( + self, + mock_parser_class, + test_client, + temp_data_dir + ): + """Test that concurrent requests are handled correctly and stored separately.""" + # Mock semantic parser + from app.models import MoodData, ParsedData + mock_parser = MagicMock() + mock_parser.close = AsyncMock() + mock_parser_class.return_value = mock_parser + + # Simulate multiple concurrent requests + responses = [] + for i in range(5): + mock_parser.parse = AsyncMock(return_value=ParsedData( + mood=MoodData(type=f"情绪{i}", intensity=i+1) + )) + response = test_client.post( + "/api/process", + data={"text": f"测试文本{i}"} + ) + responses.append(response) + + # Verify all requests succeeded + for response in responses: + assert response.status_code == 200 + + # Verify all records are stored with unique IDs + records_file = Path(temp_data_dir) / "records.json" + with open(records_file, 'r', encoding='utf-8') as f: + records = json.load(f) + assert len(records) == 5 + + # Check all record IDs are unique + record_ids = [r["record_id"] for r in records] + assert len(record_ids) == len(set(record_ids)) + + # Verify all moods are stored + moods_file = Path(temp_data_dir) / "moods.json" + with open(moods_file, 'r', encoding='utf-8') as f: + moods = json.load(f) + assert len(moods) == 5 + + +class TestDataIntegrityE2E: + """End-to-end tests for data integrity across the pipeline.""" + + @patch("app.main.SemanticParserService") + def test_record_id_consistency_across_files( + self, + mock_parser_class, + test_client, + temp_data_dir + ): + """Test that record_id is consistent across all JSON files.""" + # Mock semantic parser with all data types + from app.models import MoodData, InspirationData, TodoData, ParsedData + mock_parser = MagicMock() + mock_parser.parse = AsyncMock(return_value=ParsedData( + mood=MoodData(type="开心", intensity=8), + inspirations=[InspirationData(core_idea="想法", tags=[], category="生活")], + todos=[TodoData(task="任务")] + )) + mock_parser.close = AsyncMock() + mock_parser_class.return_value = mock_parser + + # Make request + response = test_client.post( + "/api/process", + data={"text": "测试数据完整性"} + ) + + assert response.status_code == 200 + record_id = response.json()["record_id"] + + # Verify record_id is consistent across all files + records_file = Path(temp_data_dir) / "records.json" + moods_file = Path(temp_data_dir) / "moods.json" + inspirations_file = Path(temp_data_dir) / "inspirations.json" + todos_file = Path(temp_data_dir) / "todos.json" + + with open(records_file, 'r', encoding='utf-8') as f: + records = json.load(f) + assert records[0]["record_id"] == record_id + + with open(moods_file, 'r', encoding='utf-8') as f: + moods = json.load(f) + assert moods[0]["record_id"] == record_id + + with open(inspirations_file, 'r', encoding='utf-8') as f: + inspirations = json.load(f) + assert inspirations[0]["record_id"] == record_id + + with open(todos_file, 'r', encoding='utf-8') as f: + todos = json.load(f) + assert todos[0]["record_id"] == record_id + + @patch("app.main.SemanticParserService") + def test_timestamp_consistency( + self, + mock_parser_class, + test_client, + temp_data_dir + ): + """Test that timestamps are consistent and properly formatted.""" + # Mock semantic parser + from app.models import MoodData, ParsedData + mock_parser = MagicMock() + mock_parser.parse = AsyncMock(return_value=ParsedData( + mood=MoodData(type="开心", intensity=8) + )) + mock_parser.close = AsyncMock() + mock_parser_class.return_value = mock_parser + + # Make request + response = test_client.post( + "/api/process", + data={"text": "测试时间戳"} + ) + + assert response.status_code == 200 + timestamp = response.json()["timestamp"] + + # Verify timestamp format (ISO 8601 with Z suffix) + assert timestamp.endswith("Z") + assert "T" in timestamp + + # Verify timestamp is consistent in storage + moods_file = Path(temp_data_dir) / "moods.json" + with open(moods_file, 'r', encoding='utf-8') as f: + moods = json.load(f) + assert moods[0]["timestamp"] == timestamp diff --git a/tests/test_logging_config.py b/tests/test_logging_config.py new file mode 100644 index 0000000000000000000000000000000000000000..8612408a50b8df71ad91d129ee6628e1af49223c --- /dev/null +++ b/tests/test_logging_config.py @@ -0,0 +1,422 @@ +"""Tests for logging configuration module. + +Requirements: 10.5, 9.5 +""" + +import logging +import pytest +from pathlib import Path + +from app.logging_config import ( + SensitiveDataFilter, + RequestIdFilter, + setup_logging, + get_logger, + set_request_id, + clear_request_id +) + + +class TestRequestIdFilter: + """Test request_id filtering in logs. + + Requirement 9.5: Logs should include request_id for tracing. + """ + + def test_filter_adds_request_id(self): + """Test that request_id is added to log records.""" + filter_obj = RequestIdFilter() + + # Set request_id in context + set_request_id("test-request-123") + + # Create a log record + record = logging.LogRecord( + name="test", + level=logging.INFO, + pathname="", + lineno=0, + msg="Test message", + args=(), + exc_info=None + ) + + filter_obj.filter(record) + + assert hasattr(record, 'request_id') + assert record.request_id == "test-request-123" + + # Clean up + clear_request_id() + + def test_filter_uses_default_when_no_request_id(self): + """Test that filter uses '-' when no request_id is set.""" + filter_obj = RequestIdFilter() + + # Ensure no request_id is set + clear_request_id() + + # Create a log record + record = logging.LogRecord( + name="test", + level=logging.INFO, + pathname="", + lineno=0, + msg="Test message", + args=(), + exc_info=None + ) + + filter_obj.filter(record) + + assert hasattr(record, 'request_id') + assert record.request_id == "-" + + +class TestSensitiveDataFilter: + """Test sensitive data filtering in logs. + + Requirement 10.5: System should not output sensitive information in logs. + """ + + def test_filter_api_key(self): + """Test that API keys are masked in log messages.""" + filter_obj = SensitiveDataFilter() + + # Create a log record with API key + record = logging.LogRecord( + name="test", + level=logging.INFO, + pathname="", + lineno=0, + msg="Using api_key=sk_1234567890abcdef for request", + args=(), + exc_info=None + ) + + filter_obj.filter(record) + + assert "sk_1234567890abcdef" not in record.msg + assert "***REDACTED***" in record.msg + + def test_filter_zhipu_api_key(self): + """Test that Zhipu API keys are masked.""" + filter_obj = SensitiveDataFilter() + + record = logging.LogRecord( + name="test", + level=logging.INFO, + pathname="", + lineno=0, + msg="zhipu_api_key: abc123def456ghi789", + args=(), + exc_info=None + ) + + filter_obj.filter(record) + + assert "abc123def456ghi789" not in record.msg + assert "***REDACTED***" in record.msg + + def test_filter_bearer_token(self): + """Test that bearer tokens are masked.""" + filter_obj = SensitiveDataFilter() + + record = logging.LogRecord( + name="test", + level=logging.INFO, + pathname="", + lineno=0, + msg="Authorization: Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9", + args=(), + exc_info=None + ) + + filter_obj.filter(record) + + assert "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9" not in record.msg + assert "***REDACTED***" in record.msg + + def test_filter_password(self): + """Test that passwords are masked.""" + filter_obj = SensitiveDataFilter() + + record = logging.LogRecord( + name="test", + level=logging.INFO, + pathname="", + lineno=0, + msg='Login attempt with password="secret123"', + args=(), + exc_info=None + ) + + filter_obj.filter(record) + + assert "secret123" not in record.msg + assert "***REDACTED***" in record.msg + + def test_filter_authorization_header(self): + """Test that authorization headers are masked.""" + filter_obj = SensitiveDataFilter() + + record = logging.LogRecord( + name="test", + level=logging.INFO, + pathname="", + lineno=0, + msg="Headers: authorization: Basic_dXNlcjpwYXNz", + args=(), + exc_info=None + ) + + filter_obj.filter(record) + + assert "Basic_dXNlcjpwYXNz" not in record.msg + assert "***REDACTED***" in record.msg + + def test_filter_preserves_normal_text(self): + """Test that normal text is not affected by filtering.""" + filter_obj = SensitiveDataFilter() + + record = logging.LogRecord( + name="test", + level=logging.INFO, + pathname="", + lineno=0, + msg="Processing request for user authentication", + args=(), + exc_info=None + ) + + original_msg = record.msg + filter_obj.filter(record) + + assert record.msg == original_msg + + def test_filter_with_args_dict(self): + """Test filtering with dictionary arguments.""" + filter_obj = SensitiveDataFilter() + + # Create record with dict args + record = logging.LogRecord( + name="test", + level=logging.INFO, + pathname="", + lineno=0, + msg="Request data", + args=(), + exc_info=None + ) + # Manually set args after creation + record.args = {"data": "api_key=secret123456"} + + filter_obj.filter(record) + + assert "secret123456" not in record.args["data"] + assert "***REDACTED***" in record.args["data"] + + def test_filter_with_args_tuple(self): + """Test filtering with tuple arguments.""" + filter_obj = SensitiveDataFilter() + + record = logging.LogRecord( + name="test", + level=logging.INFO, + pathname="", + lineno=0, + msg="Config: %s", + args=("api_key=secret123456",), + exc_info=None + ) + + filter_obj.filter(record) + + assert "secret123456" not in record.args[0] + assert "***REDACTED***" in record.args[0] + + +class TestSetupLogging: + """Test logging setup and configuration.""" + + def test_setup_logging_console_only(self): + """Test setting up logging with console output only.""" + setup_logging(log_level="INFO", log_file=None) + + root_logger = logging.getLogger() + + # Should have at least one handler (console) + assert len(root_logger.handlers) >= 1 + + # Check log level + assert root_logger.level == logging.INFO + + def test_setup_logging_with_file(self, tmp_path): + """Test setting up logging with file output.""" + log_file = tmp_path / "test.log" + + setup_logging(log_level="DEBUG", log_file=log_file) + + root_logger = logging.getLogger() + + # Should have at least two handlers (console + file) + assert len(root_logger.handlers) >= 2 + + # Check log level + assert root_logger.level == logging.DEBUG + + # Test logging to file + test_logger = logging.getLogger("test") + test_logger.info("Test message") + + # File should exist and contain the message + assert log_file.exists() + content = log_file.read_text() + assert "Test message" in content + + def test_setup_logging_custom_format(self, tmp_path): + """Test setting up logging with custom format.""" + log_file = tmp_path / "test.log" + custom_format = "%(levelname)s - %(message)s" + + setup_logging( + log_level="INFO", + log_file=log_file, + log_format=custom_format + ) + + test_logger = logging.getLogger("test_custom") + test_logger.info("Custom format test") + + content = log_file.read_text() + assert "INFO - Custom format test" in content + + def test_setup_logging_applies_sensitive_filter(self, tmp_path): + """Test that sensitive data filter is applied to all handlers. + + Requirement 10.5: Sensitive information should be filtered from logs. + """ + log_file = tmp_path / "test.log" + + setup_logging(log_level="INFO", log_file=log_file) + + test_logger = logging.getLogger("test_sensitive") + test_logger.info("API request with api_key=secret123456789") + + # Read log file + content = log_file.read_text() + + # API key should be redacted + assert "secret123456789" not in content + assert "***REDACTED***" in content + + def test_get_logger(self): + """Test getting a logger instance.""" + logger = get_logger("test_module") + + assert logger is not None + assert logger.name == "test_module" + assert isinstance(logger, logging.Logger) + + +class TestLoggingIntegration: + """Integration tests for logging system. + + Requirement 9.5: System should log all errors with timestamp and stack trace. + """ + + def test_error_logging_with_traceback(self, tmp_path): + """Test that errors are logged with full traceback.""" + log_file = tmp_path / "error.log" + + setup_logging(log_level="ERROR", log_file=log_file) + + logger = get_logger("test_error") + + try: + # Cause an error + raise ValueError("Test error for logging") + except ValueError: + logger.error("An error occurred", exc_info=True) + + # Read log file + content = log_file.read_text() + + # Should contain error message and traceback + assert "An error occurred" in content + assert "ValueError: Test error for logging" in content + assert "Traceback" in content + + def test_logging_includes_timestamp(self, tmp_path): + """Test that log entries include timestamps.""" + log_file = tmp_path / "timestamp.log" + + setup_logging(log_level="INFO", log_file=log_file) + + logger = get_logger("test_timestamp") + logger.info("Timestamp test message") + + content = log_file.read_text() + + # Should contain timestamp in format [YYYY-MM-DD HH:MM:SS] + import re + timestamp_pattern = r'\[\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\]' + assert re.search(timestamp_pattern, content) + + def test_logging_includes_level_and_module(self, tmp_path): + """Test that log entries include level and module name.""" + log_file = tmp_path / "format.log" + + setup_logging(log_level="INFO", log_file=log_file) + + logger = get_logger("test_module") + logger.warning("Warning message") + + content = log_file.read_text() + + # Should contain level and module name + assert "[WARNING]" in content + assert "[test_module]" in content + assert "Warning message" in content + + def test_logging_includes_request_id(self, tmp_path): + """Test that log entries include request_id when set. + + Requirement 9.5: Logs should include request_id for request tracing. + """ + log_file = tmp_path / "request_id.log" + + setup_logging(log_level="INFO", log_file=log_file) + + # Set request_id + set_request_id("req-12345") + + logger = get_logger("test_request") + logger.info("Request message") + + content = log_file.read_text() + + # Should contain request_id + assert "[req-12345]" in content + assert "Request message" in content + + # Clean up + clear_request_id() + + def test_logging_without_request_id(self, tmp_path): + """Test that log entries use '-' when no request_id is set.""" + log_file = tmp_path / "no_request_id.log" + + setup_logging(log_level="INFO", log_file=log_file) + + # Ensure no request_id is set + clear_request_id() + + logger = get_logger("test_no_request") + logger.info("Message without request_id") + + content = log_file.read_text() + + # Should contain '-' for request_id + assert "[-]" in content + assert "Message without request_id" in content diff --git a/tests/test_logging_properties.py b/tests/test_logging_properties.py new file mode 100644 index 0000000000000000000000000000000000000000..92fc739d79a4a869a317fdeeaeb698833f91b994 --- /dev/null +++ b/tests/test_logging_properties.py @@ -0,0 +1,833 @@ +"""Property-based tests for logging functionality. + +This module uses hypothesis to verify that error logging properties hold across +many random error scenarios, ensuring all errors are logged with timestamps and +stack traces. + +Requirements: 9.5 +""" + +import logging +import pytest +import tempfile +import re +from pathlib import Path +from datetime import datetime + +from hypothesis import given, strategies as st +from hypothesis import settings + +from app.logging_config import ( + setup_logging, + get_logger, + set_request_id, + clear_request_id +) + + +# Custom strategies for generating error scenarios +@st.composite +def error_message_strategy(draw): + """Generate various error messages.""" + error_types = [ + "API call failed", + "Database connection error", + "File not found", + "Permission denied", + "Timeout occurred", + "Invalid input", + "Network error", + "Service unavailable" + ] + + error_type = draw(st.sampled_from(error_types)) + details = draw(st.text(min_size=0, max_size=100, alphabet=st.characters( + whitelist_categories=('Lu', 'Ll', 'Nd', 'P', 'Zs'), + blacklist_characters='\x00\n\r' + ))) + + if details: + return f"{error_type}: {details}" + return error_type + + +@st.composite +def exception_strategy(draw): + """Generate various exception types with messages.""" + exception_types = [ + ValueError, + RuntimeError, + TypeError, + KeyError, + IndexError, + AttributeError, + IOError, + ConnectionError + ] + + exc_type = draw(st.sampled_from(exception_types)) + message = draw(st.text(min_size=1, max_size=50, alphabet=st.characters( + whitelist_categories=('Lu', 'Ll', 'Nd', 'P', 'Zs'), + blacklist_characters='\x00\n\r' + ))) + + return exc_type(message) + + +class TestErrorLoggingProperties: + """Property-based tests for error logging. + + **Validates: Requirements 9.5** + """ + + @given( + error_msg=error_message_strategy(), + module_name=st.text(min_size=1, max_size=30, alphabet=st.characters( + whitelist_categories=('Lu', 'Ll', 'Nd'), + blacklist_characters='\x00\n\r' + )) + ) + @settings(max_examples=30) + def test_property_14_error_logging_with_timestamp(self, error_msg, module_name): + """ + Property 14: 错误日志记录 - Timestamp + + For any error that occurs in the system, the error should be logged + with a timestamp. + + **Validates: Requirements 9.5** + """ + # Create a fresh temporary log file for each example + with tempfile.NamedTemporaryFile(mode='w', suffix='.log', delete=False) as tmp_file: + log_file = Path(tmp_file.name) + + try: + # Setup logging with the temporary file + setup_logging(log_level="ERROR", log_file=log_file) + + # Get a logger for the module + logger = get_logger(module_name) + + # Log an error + logger.error(error_msg) + + # Read the log file + content = log_file.read_text(encoding='utf-8') + + # Property 1: Log entry should exist + assert content, "Log file should contain error entry" + + # Property 2: Log entry should contain timestamp in format [YYYY-MM-DD HH:MM:SS] + timestamp_pattern = r'\[\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\]' + assert re.search(timestamp_pattern, content), \ + f"Log entry should contain timestamp. Content: {content}" + + # Property 3: Log entry should contain the error message + # (escape special regex characters in error_msg) + escaped_msg = re.escape(error_msg[:50]) # Check first 50 chars to avoid issues + assert re.search(escaped_msg, content, re.IGNORECASE), \ + f"Log entry should contain error message. Expected: {error_msg[:50]}, Content: {content}" + + # Property 4: Log entry should contain ERROR level + assert "[ERROR]" in content, \ + f"Log entry should contain ERROR level. Content: {content}" + finally: + # Clean up - close all handlers first to release file locks + root_logger = logging.getLogger() + for handler in root_logger.handlers[:]: + handler.close() + root_logger.removeHandler(handler) + + if log_file.exists(): + try: + log_file.unlink() + except PermissionError: + pass # File still locked, skip cleanup + + @given( + exception=exception_strategy(), + module_name=st.text(min_size=1, max_size=30, alphabet=st.characters( + whitelist_categories=('Lu', 'Ll', 'Nd'), + blacklist_characters='\x00\n\r' + )) + ) + @settings(max_examples=30) + def test_property_14_error_logging_with_stack_trace(self, exception, module_name): + """ + Property 14: 错误日志记录 - Stack Trace + + For any error that occurs in the system, the error should be logged + with a complete stack trace when exc_info=True is used. + + **Validates: Requirements 9.5** + """ + # Create a fresh temporary log file for each example + with tempfile.NamedTemporaryFile(mode='w', suffix='.log', delete=False) as tmp_file: + log_file = Path(tmp_file.name) + + try: + # Setup logging with the temporary file + setup_logging(log_level="ERROR", log_file=log_file) + + # Get a logger for the module + logger = get_logger(module_name) + + # Raise and catch an exception, then log it with stack trace + try: + raise exception + except Exception as e: + logger.error(f"An error occurred: {str(e)}", exc_info=True) + + # Read the log file + content = log_file.read_text(encoding='utf-8') + + # Property 1: Log entry should exist + assert content, "Log file should contain error entry" + + # Property 2: Log entry should contain timestamp + timestamp_pattern = r'\[\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\]' + assert re.search(timestamp_pattern, content), \ + f"Log entry should contain timestamp. Content: {content}" + + # Property 3: Log entry should contain the exception type name + exception_type_name = type(exception).__name__ + assert exception_type_name in content, \ + f"Log entry should contain exception type '{exception_type_name}'. Content: {content}" + + # Property 4: Log entry should contain stack trace indicator + # Python stack traces contain "Traceback" or the file/line info + assert "Traceback" in content or "File" in content, \ + f"Log entry should contain stack trace. Content: {content}" + + # Property 5: Log entry should contain ERROR level + assert "[ERROR]" in content, \ + f"Log entry should contain ERROR level. Content: {content}" + finally: + # Clean up - close all handlers first to release file locks + root_logger = logging.getLogger() + for handler in root_logger.handlers[:]: + handler.close() + root_logger.removeHandler(handler) + + if log_file.exists(): + try: + log_file.unlink() + except PermissionError: + pass # File still locked, skip cleanup + + @given( + errors=st.lists( + error_message_strategy(), + min_size=1, + max_size=5 + ), + module_name=st.text(min_size=1, max_size=30, alphabet=st.characters( + whitelist_categories=('Lu', 'Ll', 'Nd'), + blacklist_characters='\x00\n\r' + )) + ) + @settings(max_examples=30) + def test_property_14_multiple_errors_logged(self, errors, module_name): + """ + Property 14: 错误日志记录 - Multiple Errors + + For any sequence of errors that occur in the system, all errors should + be logged with timestamps. + + **Validates: Requirements 9.5** + """ + # Create a fresh temporary log file for each example + with tempfile.NamedTemporaryFile(mode='w', suffix='.log', delete=False) as tmp_file: + log_file = Path(tmp_file.name) + + try: + # Setup logging with the temporary file + setup_logging(log_level="ERROR", log_file=log_file) + + # Get a logger for the module + logger = get_logger(module_name) + + # Log all errors + for error_msg in errors: + logger.error(error_msg) + + # Read the log file + content = log_file.read_text(encoding='utf-8') + + # Property 1: Log file should contain entries + assert content, "Log file should contain error entries" + + # Property 2: Count ERROR level entries + error_count = content.count("[ERROR]") + assert error_count >= len(errors), \ + f"Log should contain at least {len(errors)} ERROR entries, found {error_count}" + + # Property 3: All timestamps should be present + timestamp_pattern = r'\[\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\]' + timestamps = re.findall(timestamp_pattern, content) + assert len(timestamps) >= len(errors), \ + f"Log should contain at least {len(errors)} timestamps, found {len(timestamps)}" + + # Property 4: Each unique error message should appear in the log + # (check first 30 chars of each message to avoid special char issues) + for error_msg in errors: + # Take a safe substring and escape it + safe_msg = error_msg[:30] + if safe_msg: + escaped_msg = re.escape(safe_msg) + assert re.search(escaped_msg, content, re.IGNORECASE), \ + f"Log should contain error message: {safe_msg}" + finally: + # Clean up - close all handlers first to release file locks + root_logger = logging.getLogger() + for handler in root_logger.handlers[:]: + handler.close() + root_logger.removeHandler(handler) + + if log_file.exists(): + try: + log_file.unlink() + except PermissionError: + pass # File still locked, skip cleanup + + @given( + exception=exception_strategy(), + request_id=st.text(min_size=5, max_size=36, alphabet=st.characters( + whitelist_categories=('Lu', 'Ll', 'Nd'), + blacklist_characters='\x00\n\r' + )), + module_name=st.text(min_size=1, max_size=30, alphabet=st.characters( + whitelist_categories=('Lu', 'Ll', 'Nd'), + blacklist_characters='\x00\n\r' + )) + ) + @settings(max_examples=30) + def test_property_14_error_logging_with_request_context( + self, exception, request_id, module_name + ): + """ + Property 14: 错误日志记录 - Request Context + + For any error that occurs during request processing, the error should + be logged with timestamp, stack trace, and request_id for tracing. + + **Validates: Requirements 9.5** + """ + # Create a fresh temporary log file for each example + with tempfile.NamedTemporaryFile(mode='w', suffix='.log', delete=False) as tmp_file: + log_file = Path(tmp_file.name) + + try: + # Setup logging with the temporary file + setup_logging(log_level="ERROR", log_file=log_file) + + # Set request_id in context + set_request_id(request_id) + + # Get a logger for the module + logger = get_logger(module_name) + + # Raise and catch an exception, then log it + try: + raise exception + except Exception as e: + logger.error(f"Request processing error: {str(e)}", exc_info=True) + + # Read the log file + content = log_file.read_text(encoding='utf-8') + + # Property 1: Log entry should contain timestamp + timestamp_pattern = r'\[\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\]' + assert re.search(timestamp_pattern, content), \ + f"Log entry should contain timestamp. Content: {content}" + + # Property 2: Log entry should contain request_id + # Escape special regex characters in request_id + escaped_request_id = re.escape(request_id) + assert re.search(escaped_request_id, content), \ + f"Log entry should contain request_id '{request_id}'. Content: {content}" + + # Property 3: Log entry should contain stack trace + assert "Traceback" in content or "File" in content, \ + f"Log entry should contain stack trace. Content: {content}" + + # Property 4: Log entry should contain exception type + exception_type_name = type(exception).__name__ + assert exception_type_name in content, \ + f"Log entry should contain exception type '{exception_type_name}'. Content: {content}" + + # Property 5: Log entry should contain ERROR level + assert "[ERROR]" in content, \ + f"Log entry should contain ERROR level. Content: {content}" + finally: + # Clean up + clear_request_id() + + # Close all handlers first to release file locks + root_logger = logging.getLogger() + for handler in root_logger.handlers[:]: + handler.close() + root_logger.removeHandler(handler) + + if log_file.exists(): + try: + log_file.unlink() + except PermissionError: + pass # File still locked, skip cleanup + + @given( + error_msg=error_message_strategy(), + log_level=st.sampled_from(["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]) + ) + @settings(max_examples=30) + def test_property_14_error_logging_respects_level(self, error_msg, log_level): + """ + Property 14: 错误日志记录 - Log Level Filtering + + For any error logged at ERROR level, it should appear in the log file + when the log level is set to ERROR or lower (more permissive). + + **Validates: Requirements 9.5** + """ + # Create a fresh temporary log file for each example + with tempfile.NamedTemporaryFile(mode='w', suffix='.log', delete=False) as tmp_file: + log_file = Path(tmp_file.name) + + try: + # Setup logging with the specified level + setup_logging(log_level=log_level, log_file=log_file) + + # Get a logger + logger = get_logger("test_module") + + # Log an error + logger.error(error_msg) + + # Read the log file + content = log_file.read_text(encoding='utf-8') + + # Property: ERROR messages should always be logged regardless of level + # (ERROR is high priority, so it should appear even if level is CRITICAL) + # Actually, ERROR should appear for DEBUG, INFO, WARNING, ERROR levels + # but not necessarily for CRITICAL (which is higher than ERROR) + + level_hierarchy = { + "DEBUG": 10, + "INFO": 20, + "WARNING": 30, + "ERROR": 40, + "CRITICAL": 50 + } + + if level_hierarchy[log_level] <= level_hierarchy["ERROR"]: + # Error should be logged + assert content, "Log file should contain error entry" + assert "[ERROR]" in content, \ + f"Log should contain ERROR level when log_level={log_level}" + + # Should contain timestamp + timestamp_pattern = r'\[\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\]' + assert re.search(timestamp_pattern, content), \ + f"Log entry should contain timestamp. Content: {content}" + else: + # Error should NOT be logged (level is CRITICAL, which is higher than ERROR) + # Actually, this is wrong - ERROR should still be logged at CRITICAL level + # Let me correct this: ERROR level logs should appear at ERROR and CRITICAL levels + pass # ERROR should always appear + finally: + # Clean up - close all handlers first to release file locks + root_logger = logging.getLogger() + for handler in root_logger.handlers[:]: + handler.close() + root_logger.removeHandler(handler) + + if log_file.exists(): + try: + log_file.unlink() + except PermissionError: + pass # File still locked, skip cleanup + + + +class TestSensitiveDataProtectionProperties: + """Property-based tests for sensitive data protection in logs. + + **Validates: Requirements 10.5** + """ + + @given( + api_key=st.text(min_size=20, max_size=64, alphabet=st.characters( + whitelist_categories=('Lu', 'Ll', 'Nd'), + min_codepoint=ord('A'), max_codepoint=ord('z') + )), + message_prefix=st.text(min_size=5, max_size=50, alphabet=st.characters( + whitelist_categories=('Lu', 'Ll', 'Nd', 'Zs'), + blacklist_characters='\x00\n\r' + )) + ) + @settings(max_examples=30) + def test_property_15_api_key_masking(self, api_key, message_prefix): + """ + Property 15: 敏感信息保护 - API Key Masking + + For any log message containing an API key, the API key should be + masked and not appear in plain text in the log output. + + **Validates: Requirements 10.5** + """ + # Create a fresh temporary log file for each example + with tempfile.NamedTemporaryFile(mode='w', suffix='.log', delete=False) as tmp_file: + log_file = Path(tmp_file.name) + + try: + # Setup logging with the temporary file + setup_logging(log_level="INFO", log_file=log_file) + + # Get a logger + logger = get_logger("test_module") + + # Log a message containing an API key in various formats + formats = [ + f"{message_prefix} api_key={api_key}", + f"{message_prefix} api-key: {api_key}", + f"{message_prefix} API_KEY={api_key}", + f"{message_prefix} zhipu_api_key={api_key}", + ] + + for log_message in formats: + logger.info(log_message) + + # Read the log file + content = log_file.read_text(encoding='utf-8') + + # Property 1: Log file should contain entries + assert content, "Log file should contain log entries" + + # Property 2: API key should NOT appear in plain text + assert api_key not in content, \ + f"API key should be masked in logs. Found: {api_key} in content" + + # Property 3: Redaction marker should appear + assert "***REDACTED***" in content, \ + f"Log should contain redaction marker. Content: {content}" + + # Property 4: Message prefix should still be present (not masked) + if message_prefix.strip(): + # Check first few words of prefix + prefix_words = message_prefix.strip().split()[:2] + if prefix_words: + first_word = prefix_words[0] + assert first_word in content, \ + f"Non-sensitive message prefix should be preserved. Looking for: {first_word}" + finally: + # Clean up - close all handlers first to release file locks + root_logger = logging.getLogger() + for handler in root_logger.handlers[:]: + handler.close() + root_logger.removeHandler(handler) + + if log_file.exists(): + try: + log_file.unlink() + except PermissionError: + pass # File still locked, skip cleanup + + @given( + password=st.text(min_size=8, max_size=32, alphabet=st.characters( + whitelist_categories=('Lu', 'Ll', 'Nd', 'P'), + blacklist_characters='\x00\n\r\t ' + )), + username=st.text(min_size=3, max_size=20, alphabet=st.characters( + whitelist_categories=('Lu', 'Ll', 'Nd'), + blacklist_characters='\x00\n\r' + )) + ) + @settings(max_examples=30) + def test_property_15_password_masking(self, password, username): + """ + Property 15: 敏感信息保护 - Password Masking + + For any log message containing a password, the password should be + masked and not appear in plain text in the log output. + + **Validates: Requirements 10.5** + """ + # Create a fresh temporary log file for each example + with tempfile.NamedTemporaryFile(mode='w', suffix='.log', delete=False) as tmp_file: + log_file = Path(tmp_file.name) + + try: + # Setup logging with the temporary file + setup_logging(log_level="INFO", log_file=log_file) + + # Get a logger + logger = get_logger("test_module") + + # Log messages containing passwords in various formats + formats = [ + f"User {username} login with password={password}", + f"Authentication failed for password: {password}", + f"PASSWORD={password}", + ] + + for log_message in formats: + logger.info(log_message) + + # Read the log file + content = log_file.read_text(encoding='utf-8') + + # Property 1: Log file should contain entries + assert content, "Log file should contain log entries" + + # Property 2: Password should NOT appear in plain text + assert password not in content, \ + f"Password should be masked in logs. Found: {password} in content" + + # Property 3: Redaction marker should appear + assert "***REDACTED***" in content, \ + f"Log should contain redaction marker. Content: {content}" + + # Property 4: Username should still be present (not masked) + assert username in content, \ + f"Non-sensitive username should be preserved. Looking for: {username}" + finally: + # Clean up - close all handlers first to release file locks + root_logger = logging.getLogger() + for handler in root_logger.handlers[:]: + handler.close() + root_logger.removeHandler(handler) + + if log_file.exists(): + try: + log_file.unlink() + except PermissionError: + pass # File still locked, skip cleanup + + @given( + bearer_token=st.text(min_size=20, max_size=64, alphabet=st.characters( + whitelist_categories=('Lu', 'Ll', 'Nd'), + min_codepoint=ord('A'), max_codepoint=ord('z') + )), + endpoint=st.text(min_size=5, max_size=30, alphabet=st.characters( + whitelist_categories=('Lu', 'Ll', 'Nd'), + blacklist_characters='\x00\n\r' + )) + ) + @settings(max_examples=30) + def test_property_15_bearer_token_masking(self, bearer_token, endpoint): + """ + Property 15: 敏感信息保护 - Bearer Token Masking + + For any log message containing a bearer token, the token should be + masked and not appear in plain text in the log output. + + **Validates: Requirements 10.5** + """ + # Create a fresh temporary log file for each example + with tempfile.NamedTemporaryFile(mode='w', suffix='.log', delete=False) as tmp_file: + log_file = Path(tmp_file.name) + + try: + # Setup logging with the temporary file + setup_logging(log_level="INFO", log_file=log_file) + + # Get a logger + logger = get_logger("test_module") + + # Log messages containing bearer tokens + formats = [ + f"Calling {endpoint} with Bearer {bearer_token}", + f"Authorization: Bearer {bearer_token}", + f"BEARER {bearer_token}", + ] + + for log_message in formats: + logger.info(log_message) + + # Read the log file + content = log_file.read_text(encoding='utf-8') + + # Property 1: Log file should contain entries + assert content, "Log file should contain log entries" + + # Property 2: Bearer token should NOT appear in plain text + assert bearer_token not in content, \ + f"Bearer token should be masked in logs. Found: {bearer_token} in content" + + # Property 3: Redaction marker should appear + assert "***REDACTED***" in content, \ + f"Log should contain redaction marker. Content: {content}" + + # Property 4: Endpoint should still be present (not masked) + assert endpoint in content, \ + f"Non-sensitive endpoint should be preserved. Looking for: {endpoint}" + finally: + # Clean up - close all handlers first to release file locks + root_logger = logging.getLogger() + for handler in root_logger.handlers[:]: + handler.close() + root_logger.removeHandler(handler) + + if log_file.exists(): + try: + log_file.unlink() + except PermissionError: + pass # File still locked, skip cleanup + + @given( + auth_header=st.text(min_size=20, max_size=64, alphabet=st.characters( + whitelist_categories=('Lu', 'Ll', 'Nd'), + min_codepoint=ord('A'), max_codepoint=ord('z') + )) + ) + @settings(max_examples=30) + def test_property_15_authorization_header_masking(self, auth_header): + """ + Property 15: 敏感信息保护 - Authorization Header Masking + + For any log message containing an authorization header value, it should + be masked and not appear in plain text in the log output. + + **Validates: Requirements 10.5** + """ + # Create a fresh temporary log file for each example + with tempfile.NamedTemporaryFile(mode='w', suffix='.log', delete=False) as tmp_file: + log_file = Path(tmp_file.name) + + try: + # Setup logging with the temporary file + setup_logging(log_level="INFO", log_file=log_file) + + # Get a logger + logger = get_logger("test_module") + + # Log messages containing authorization headers + formats = [ + f"Request headers: authorization={auth_header}", + f"Authorization: {auth_header}", + f"AUTHORIZATION={auth_header}", + ] + + for log_message in formats: + logger.info(log_message) + + # Read the log file + content = log_file.read_text(encoding='utf-8') + + # Property 1: Log file should contain entries + assert content, "Log file should contain log entries" + + # Property 2: Authorization header should NOT appear in plain text + assert auth_header not in content, \ + f"Authorization header should be masked in logs. Found: {auth_header} in content" + + # Property 3: Redaction marker should appear + assert "***REDACTED***" in content, \ + f"Log should contain redaction marker. Content: {content}" + finally: + # Clean up - close all handlers first to release file locks + root_logger = logging.getLogger() + for handler in root_logger.handlers[:]: + handler.close() + root_logger.removeHandler(handler) + + if log_file.exists(): + try: + log_file.unlink() + except PermissionError: + pass # File still locked, skip cleanup + + @given( + sensitive_data=st.lists( + st.tuples( + st.sampled_from(["api_key", "password", "bearer_token", "authorization"]), + st.text(min_size=15, max_size=40, alphabet=st.characters( + whitelist_categories=('Lu', 'Ll', 'Nd'), + min_codepoint=ord('A'), max_codepoint=ord('z') + )) + ), + min_size=1, + max_size=3 + ), + normal_message=st.text(min_size=10, max_size=50, alphabet=st.characters( + whitelist_categories=('Lu', 'Ll', 'Nd', 'Zs'), + blacklist_characters='\x00\n\r' + )) + ) + @settings(max_examples=30) + def test_property_15_multiple_sensitive_data_masking(self, sensitive_data, normal_message): + """ + Property 15: 敏感信息保护 - Multiple Sensitive Data Masking + + For any log message containing multiple types of sensitive data, + all sensitive data should be masked while preserving non-sensitive content. + + **Validates: Requirements 10.5** + """ + # Create a fresh temporary log file for each example + with tempfile.NamedTemporaryFile(mode='w', suffix='.log', delete=False) as tmp_file: + log_file = Path(tmp_file.name) + + try: + # Setup logging with the temporary file + setup_logging(log_level="INFO", log_file=log_file) + + # Get a logger + logger = get_logger("test_module") + + # Build a log message with multiple sensitive data + log_message = normal_message + sensitive_values = [] + + for data_type, value in sensitive_data: + if data_type == "api_key": + log_message += f" api_key={value}" + elif data_type == "password": + log_message += f" password={value}" + elif data_type == "bearer_token": + log_message += f" Bearer {value}" + elif data_type == "authorization": + log_message += f" authorization={value}" + sensitive_values.append(value) + + # Log the message + logger.info(log_message) + + # Read the log file + content = log_file.read_text(encoding='utf-8') + + # Property 1: Log file should contain entries + assert content, "Log file should contain log entries" + + # Property 2: None of the sensitive values should appear in plain text + for value in sensitive_values: + assert value not in content, \ + f"Sensitive value should be masked in logs. Found: {value} in content" + + # Property 3: Redaction markers should appear (at least one per sensitive item) + redaction_count = content.count("***REDACTED***") + assert redaction_count >= len(sensitive_values), \ + f"Log should contain at least {len(sensitive_values)} redaction markers, found {redaction_count}" + + # Property 4: Normal message should still be present (at least partially) + if normal_message.strip(): + # Check first few words of normal message + words = normal_message.strip().split()[:2] + if words: + first_word = words[0] + # Only check if the word is not too short + if len(first_word) > 3: + assert first_word in content, \ + f"Non-sensitive message content should be preserved. Looking for: {first_word}" + finally: + # Clean up - close all handlers first to release file locks + root_logger = logging.getLogger() + for handler in root_logger.handlers[:]: + handler.close() + root_logger.removeHandler(handler) + + if log_file.exists(): + try: + log_file.unlink() + except PermissionError: + pass # File still locked, skip cleanup diff --git a/tests/test_main.py b/tests/test_main.py new file mode 100644 index 0000000000000000000000000000000000000000..bdc66cec22e717304e76e3aa4050276d6e8b41ff --- /dev/null +++ b/tests/test_main.py @@ -0,0 +1,505 @@ +"""Tests for main FastAPI application. + +Requirements: 10.4 - Startup configuration validation +Requirements: 8.1, 8.2, 8.3 - API endpoint implementation +""" + +import os +import pytest +from unittest.mock import patch, AsyncMock, MagicMock +from io import BytesIO + + +class TestApplicationStartup: + """Test application startup and configuration validation. + + Requirement 10.4: Application should refuse to start if required config is missing. + """ + + @patch.dict(os.environ, {"ZHIPU_API_KEY": "test_key_1234567890"}, clear=True) + def test_app_starts_with_valid_config(self, tmp_path): + """Test that application starts successfully with valid configuration.""" + # Reset config + import app.config + app.config._config = None + + with patch.dict(os.environ, { + "DATA_DIR": str(tmp_path / "data"), + "LOG_FILE": str(tmp_path / "logs" / "app.log") + }, clear=False): + # Import app after setting environment + from fastapi.testclient import TestClient + from app.main import app + + with TestClient(app) as client: + response = client.get("/") + + assert response.status_code == 200 + assert response.json()["status"] == "running" + + @patch.dict(os.environ, {}, clear=True) + def test_app_refuses_to_start_without_api_key(self): + """Test that application refuses to start without API key. + + Requirement 10.4: Missing required config should cause startup failure. + """ + # Reset the config module + import app.config + app.config._config = None + + # Import fresh app module + import importlib + import app.main + importlib.reload(app.main) + + from fastapi.testclient import TestClient + + with pytest.raises(RuntimeError, match="Configuration error"): + with TestClient(app.main.app) as client: + # Trigger lifespan startup + pass + + +class TestHealthEndpoint: + """Test health check endpoint.""" + + @patch.dict(os.environ, {"ZHIPU_API_KEY": "test_key_1234567890"}, clear=True) + def test_health_check_success(self, tmp_path): + """Test health check returns healthy status.""" + # Reset config + import app.config + app.config._config = None + + with patch.dict(os.environ, { + "DATA_DIR": str(tmp_path / "data"), + "LOG_FILE": str(tmp_path / "logs" / "app.log") + }, clear=False): + from fastapi.testclient import TestClient + from app.main import app + + with TestClient(app) as client: + response = client.get("/health") + + assert response.status_code == 200 + data = response.json() + assert data["status"] == "healthy" + assert "data_dir" in data + assert "max_audio_size" in data + + +class TestRootEndpoint: + """Test root endpoint.""" + + @patch.dict(os.environ, {"ZHIPU_API_KEY": "test_key_1234567890"}, clear=True) + def test_root_endpoint(self, tmp_path): + """Test root endpoint returns service information.""" + # Reset config + import app.config + app.config._config = None + + with patch.dict(os.environ, { + "DATA_DIR": str(tmp_path / "data"), + "LOG_FILE": str(tmp_path / "logs" / "app.log") + }, clear=False): + from fastapi.testclient import TestClient + from app.main import app + + with TestClient(app) as client: + response = client.get("/") + + assert response.status_code == 200 + data = response.json() + assert data["service"] == "Voice Text Processor" + assert data["status"] == "running" + assert "version" in data + + + +class TestProcessEndpoint: + """Test /api/process endpoint. + + Requirements: 8.1, 8.2, 8.3 - API endpoint, business logic, error handling + """ + + @patch.dict(os.environ, {"ZHIPU_API_KEY": "test_key_1234567890"}, clear=True) + def test_process_endpoint_exists(self, tmp_path): + """Test that POST /api/process endpoint exists. + + Requirement 8.1: System should provide POST /api/process interface. + """ + # Reset config + import app.config + app.config._config = None + + with patch.dict(os.environ, { + "DATA_DIR": str(tmp_path / "data"), + "LOG_FILE": str(tmp_path / "logs" / "app.log") + }, clear=False): + from fastapi.testclient import TestClient + from app.main import app + + with TestClient(app) as client: + # Test with empty request (should fail validation but endpoint exists) + response = client.post("/api/process") + + # Should return 400 (validation error), not 404 (not found) + assert response.status_code == 400 + assert "error" in response.json() + + @patch.dict(os.environ, {"ZHIPU_API_KEY": "test_key_1234567890"}, clear=True) + @patch("app.main.SemanticParserService") + def test_process_text_input(self, mock_parser_class, tmp_path): + """Test processing text input (application/json format). + + Requirement 8.3: System should accept application/json format. + """ + # Reset config + import app.config + app.config._config = None + + # Mock semantic parser + from app.models import ParsedData + mock_parser = MagicMock() + mock_parser.parse = AsyncMock(return_value=ParsedData( + mood=None, + inspirations=[], + todos=[] + )) + mock_parser.close = AsyncMock() + mock_parser_class.return_value = mock_parser + + with patch.dict(os.environ, { + "DATA_DIR": str(tmp_path / "data"), + "LOG_FILE": str(tmp_path / "logs" / "app.log") + }, clear=False): + from fastapi.testclient import TestClient + from app.main import app + + with TestClient(app) as client: + # Use data parameter for form data + response = client.post( + "/api/process", + data={"text": "今天心情很好"} + ) + + assert response.status_code == 200 + data = response.json() + assert "record_id" in data + assert "timestamp" in data + assert "mood" in data + assert "inspirations" in data + assert "todos" in data + + @patch.dict(os.environ, {"ZHIPU_API_KEY": "test_key_1234567890"}, clear=True) + @patch("app.main.ASRService") + @patch("app.main.SemanticParserService") + def test_process_audio_input(self, mock_parser_class, mock_asr_class, tmp_path): + """Test processing audio input (multipart/form-data format). + + Requirement 8.2: System should accept multipart/form-data format. + """ + # Reset config + import app.config + app.config._config = None + + # Mock ASR service + mock_asr = MagicMock() + mock_asr.transcribe = AsyncMock(return_value="转写后的文本") + mock_asr.close = AsyncMock() + mock_asr_class.return_value = mock_asr + + # Mock semantic parser + from app.models import ParsedData + mock_parser = MagicMock() + mock_parser.parse = AsyncMock(return_value=ParsedData( + mood=None, + inspirations=[], + todos=[] + )) + mock_parser.close = AsyncMock() + mock_parser_class.return_value = mock_parser + + with patch.dict(os.environ, { + "DATA_DIR": str(tmp_path / "data"), + "LOG_FILE": str(tmp_path / "logs" / "app.log") + }, clear=False): + from fastapi.testclient import TestClient + from app.main import app + + with TestClient(app) as client: + # Create fake audio file + audio_data = b"fake audio content" + files = {"audio": ("test.mp3", BytesIO(audio_data), "audio/mpeg")} + + response = client.post("/api/process", files=files) + + assert response.status_code == 200 + data = response.json() + assert "record_id" in data + assert "timestamp" in data + + @patch.dict(os.environ, {"ZHIPU_API_KEY": "test_key_1234567890"}, clear=True) + def test_validation_error_empty_input(self, tmp_path): + """Test validation error for empty input. + + Requirement 8.3: System should return HTTP 400 for validation errors. + """ + # Reset config + import app.config + app.config._config = None + + with patch.dict(os.environ, { + "DATA_DIR": str(tmp_path / "data"), + "LOG_FILE": str(tmp_path / "logs" / "app.log") + }, clear=False): + from fastapi.testclient import TestClient + from app.main import app + + with TestClient(app) as client: + response = client.post("/api/process") + + assert response.status_code == 400 + data = response.json() + assert "error" in data + assert "timestamp" in data + + @patch.dict(os.environ, {"ZHIPU_API_KEY": "test_key_1234567890"}, clear=True) + def test_validation_error_unsupported_audio_format(self, tmp_path): + """Test validation error for unsupported audio format. + + Requirement 1.1: System should reject unsupported audio formats. + """ + # Reset config + import app.config + app.config._config = None + + with patch.dict(os.environ, { + "DATA_DIR": str(tmp_path / "data"), + "LOG_FILE": str(tmp_path / "logs" / "app.log") + }, clear=False): + from fastapi.testclient import TestClient + from app.main import app + + with TestClient(app) as client: + # Create fake audio file with unsupported format + audio_data = b"fake audio content" + files = {"audio": ("test.ogg", BytesIO(audio_data), "audio/ogg")} + + response = client.post("/api/process", files=files) + + assert response.status_code == 400 + data = response.json() + assert "error" in data + assert "不支持的音频格式" in data["error"] + + @patch.dict(os.environ, {"ZHIPU_API_KEY": "test_key_1234567890"}, clear=True) + def test_validation_error_file_too_large(self, tmp_path): + """Test validation error for file size exceeding limit. + + Requirement 1.4: System should reject files larger than max size. + """ + # Reset config + import app.config + app.config._config = None + + with patch.dict(os.environ, { + "DATA_DIR": str(tmp_path / "data"), + "LOG_FILE": str(tmp_path / "logs" / "app.log"), + "MAX_AUDIO_SIZE": "100" # Set very small limit + }, clear=False): + from fastapi.testclient import TestClient + from app.main import app + + with TestClient(app) as client: + # Create audio file larger than limit + audio_data = b"x" * 200 # 200 bytes > 100 bytes limit + files = {"audio": ("test.mp3", BytesIO(audio_data), "audio/mpeg")} + + response = client.post("/api/process", files=files) + + assert response.status_code == 400 + data = response.json() + assert "error" in data + assert "音频文件过大" in data["error"] + + @patch.dict(os.environ, {"ZHIPU_API_KEY": "test_key_1234567890"}, clear=True) + @patch("app.main.ASRService") + def test_asr_service_error(self, mock_asr_class, tmp_path): + """Test ASR service error handling. + + Requirement 8.3: System should return HTTP 500 for ASR service errors. + """ + # Reset config + import app.config + app.config._config = None + + # Mock ASR service to raise error + from app.asr_service import ASRServiceError + mock_asr = MagicMock() + mock_asr.transcribe = AsyncMock(side_effect=ASRServiceError("API调用失败")) + mock_asr.close = AsyncMock() + mock_asr_class.return_value = mock_asr + + with patch.dict(os.environ, { + "DATA_DIR": str(tmp_path / "data"), + "LOG_FILE": str(tmp_path / "logs" / "app.log") + }, clear=False): + from fastapi.testclient import TestClient + from app.main import app + + with TestClient(app) as client: + audio_data = b"fake audio content" + files = {"audio": ("test.mp3", BytesIO(audio_data), "audio/mpeg")} + + response = client.post("/api/process", files=files) + + assert response.status_code == 500 + data = response.json() + assert "error" in data + assert "语音识别服务不可用" in data["error"] + assert "timestamp" in data + + @patch.dict(os.environ, {"ZHIPU_API_KEY": "test_key_1234567890"}, clear=True) + @patch("app.main.SemanticParserService") + def test_semantic_parser_error(self, mock_parser_class, tmp_path): + """Test semantic parser error handling. + + Requirement 8.3: System should return HTTP 500 for semantic parser errors. + """ + # Reset config + import app.config + app.config._config = None + + # Mock semantic parser to raise error + from app.semantic_parser import SemanticParserError + mock_parser = MagicMock() + mock_parser.parse = AsyncMock(side_effect=SemanticParserError("API调用失败")) + mock_parser.close = AsyncMock() + mock_parser_class.return_value = mock_parser + + with patch.dict(os.environ, { + "DATA_DIR": str(tmp_path / "data"), + "LOG_FILE": str(tmp_path / "logs" / "app.log") + }, clear=False): + from fastapi.testclient import TestClient + from app.main import app + + with TestClient(app) as client: + # Use data parameter for form data + response = client.post( + "/api/process", + data={"text": "今天心情很好"} + ) + + assert response.status_code == 500 + data = response.json() + assert "error" in data + assert "语义解析服务不可用" in data["error"] + assert "timestamp" in data + + @patch.dict(os.environ, {"ZHIPU_API_KEY": "test_key_1234567890"}, clear=True) + @patch("app.main.SemanticParserService") + @patch("app.main.StorageService") + def test_storage_error(self, mock_storage_class, mock_parser_class, tmp_path): + """Test storage error handling. + + Requirement 8.3: System should return HTTP 500 for storage errors. + """ + # Reset config + import app.config + app.config._config = None + + # Mock semantic parser + from app.models import ParsedData + mock_parser = MagicMock() + mock_parser.parse = AsyncMock(return_value=ParsedData( + mood=None, + inspirations=[], + todos=[] + )) + mock_parser.close = AsyncMock() + mock_parser_class.return_value = mock_parser + + # Mock storage service to raise error + from app.storage import StorageError + mock_storage = MagicMock() + mock_storage.save_record = MagicMock(side_effect=StorageError("磁盘空间不足")) + mock_storage_class.return_value = mock_storage + + with patch.dict(os.environ, { + "DATA_DIR": str(tmp_path / "data"), + "LOG_FILE": str(tmp_path / "logs" / "app.log") + }, clear=False): + from fastapi.testclient import TestClient + from app.main import app + + with TestClient(app) as client: + # Use data parameter for form data + response = client.post( + "/api/process", + data={"text": "今天心情很好"} + ) + + assert response.status_code == 500 + data = response.json() + assert "error" in data + assert "数据存储失败" in data["error"] + assert "timestamp" in data + + @patch.dict(os.environ, {"ZHIPU_API_KEY": "test_key_1234567890"}, clear=True) + @patch("app.main.SemanticParserService") + def test_success_response_format(self, mock_parser_class, tmp_path): + """Test success response format. + + Requirement 8.4, 8.6: Success response should include all required fields. + """ + # Reset config + import app.config + app.config._config = None + + # Mock semantic parser with full data + from app.models import MoodData, InspirationData, TodoData, ParsedData + mock_parser = MagicMock() + mock_parser.parse = AsyncMock(return_value=ParsedData( + mood=MoodData(type="开心", intensity=8, keywords=["愉快"]), + inspirations=[InspirationData(core_idea="新想法", tags=["创新"], category="工作")], + todos=[TodoData(task="完成报告", time="明天", location="办公室")] + )) + mock_parser.close = AsyncMock() + mock_parser_class.return_value = mock_parser + + with patch.dict(os.environ, { + "DATA_DIR": str(tmp_path / "data"), + "LOG_FILE": str(tmp_path / "logs" / "app.log") + }, clear=False): + from fastapi.testclient import TestClient + from app.main import app + + with TestClient(app) as client: + # Use data parameter for form data + response = client.post( + "/api/process", + data={"text": "今天心情很好,有个新想法,明天要完成报告"} + ) + + assert response.status_code == 200 + data = response.json() + + # Check all required fields + assert "record_id" in data + assert "timestamp" in data + assert "mood" in data + assert "inspirations" in data + assert "todos" in data + + # Check mood data + assert data["mood"]["type"] == "开心" + assert data["mood"]["intensity"] == 8 + + # Check inspirations + assert len(data["inspirations"]) == 1 + assert data["inspirations"][0]["core_idea"] == "新想法" + + # Check todos + assert len(data["todos"]) == 1 + assert data["todos"][0]["task"] == "完成报告" diff --git a/tests/test_models.py b/tests/test_models.py new file mode 100644 index 0000000000000000000000000000000000000000..7f04fc7af5457980cf0ba36517348f641223787a --- /dev/null +++ b/tests/test_models.py @@ -0,0 +1,903 @@ +"""Unit tests for data models. + +This module tests the Pydantic data models to ensure proper validation, +serialization, and constraint enforcement. + +Requirements: 4.1, 4.2, 4.3, 5.1, 5.2, 5.3, 6.1, 6.2, 6.3, 6.4 +""" + +import pytest +from pydantic import ValidationError + +from app.models import ( + MoodData, + InspirationData, + TodoData, + ParsedData, + RecordData, + ProcessResponse +) + + +class TestMoodData: + """Tests for MoodData model. + + Requirements: 4.1, 4.2, 4.3 + """ + + def test_mood_data_valid(self): + """Test creating valid MoodData.""" + mood = MoodData( + type="开心", + intensity=8, + keywords=["愉快", "放松"] + ) + assert mood.type == "开心" + assert mood.intensity == 8 + assert mood.keywords == ["愉快", "放松"] + + def test_mood_data_optional_fields(self): + """Test MoodData with optional fields as None.""" + mood = MoodData() + assert mood.type is None + assert mood.intensity is None + assert mood.keywords == [] + + def test_mood_data_intensity_min_boundary(self): + """Test MoodData intensity minimum boundary (1).""" + mood = MoodData(type="平静", intensity=1) + assert mood.intensity == 1 + + def test_mood_data_intensity_max_boundary(self): + """Test MoodData intensity maximum boundary (10).""" + mood = MoodData(type="兴奋", intensity=10) + assert mood.intensity == 10 + + def test_mood_data_intensity_below_min(self): + """Test MoodData rejects intensity below 1.""" + with pytest.raises(ValidationError) as exc_info: + MoodData(type="平静", intensity=0) + assert "greater than or equal to 1" in str(exc_info.value) + + def test_mood_data_intensity_above_max(self): + """Test MoodData rejects intensity above 10.""" + with pytest.raises(ValidationError) as exc_info: + MoodData(type="兴奋", intensity=11) + assert "less than or equal to 10" in str(exc_info.value) + + def test_mood_data_empty_keywords(self): + """Test MoodData with empty keywords list.""" + mood = MoodData(type="中性", intensity=5, keywords=[]) + assert mood.keywords == [] + + +class TestInspirationData: + """Tests for InspirationData model. + + Requirements: 5.1, 5.2, 5.3 + """ + + def test_inspiration_data_valid(self): + """Test creating valid InspirationData.""" + inspiration = InspirationData( + core_idea="新的项目想法", + tags=["创新", "技术"], + category="工作" + ) + assert inspiration.core_idea == "新的项目想法" + assert inspiration.tags == ["创新", "技术"] + assert inspiration.category == "工作" + + def test_inspiration_data_core_idea_max_length(self): + """Test InspirationData core_idea at max length (20 characters).""" + # Exactly 20 characters + core_idea = "12345678901234567890" + inspiration = InspirationData( + core_idea=core_idea, + tags=["测试"], + category="学习" + ) + assert len(inspiration.core_idea) == 20 + + def test_inspiration_data_core_idea_exceeds_max_length(self): + """Test InspirationData rejects core_idea exceeding 20 characters.""" + # 21 characters + core_idea = "123456789012345678901" + with pytest.raises(ValidationError) as exc_info: + InspirationData( + core_idea=core_idea, + tags=["测试"], + category="学习" + ) + assert "at most 20 characters" in str(exc_info.value) + + def test_inspiration_data_tags_max_count(self): + """Test InspirationData with maximum 5 tags.""" + inspiration = InspirationData( + core_idea="想法", + tags=["标签1", "标签2", "标签3", "标签4", "标签5"], + category="创意" + ) + assert len(inspiration.tags) == 5 + + def test_inspiration_data_tags_exceeds_max_count(self): + """Test InspirationData rejects more than 5 tags.""" + with pytest.raises(ValidationError) as exc_info: + InspirationData( + core_idea="想法", + tags=["标签1", "标签2", "标签3", "标签4", "标签5", "标签6"], + category="创意" + ) + assert "at most 5 items" in str(exc_info.value) + + def test_inspiration_data_empty_tags(self): + """Test InspirationData with empty tags list.""" + inspiration = InspirationData( + core_idea="简单想法", + tags=[], + category="生活" + ) + assert inspiration.tags == [] + + def test_inspiration_data_category_work(self): + """Test InspirationData with category '工作'.""" + inspiration = InspirationData( + core_idea="工作想法", + category="工作" + ) + assert inspiration.category == "工作" + + def test_inspiration_data_category_life(self): + """Test InspirationData with category '生活'.""" + inspiration = InspirationData( + core_idea="生活想法", + category="生活" + ) + assert inspiration.category == "生活" + + def test_inspiration_data_category_study(self): + """Test InspirationData with category '学习'.""" + inspiration = InspirationData( + core_idea="学习想法", + category="学习" + ) + assert inspiration.category == "学习" + + def test_inspiration_data_category_creative(self): + """Test InspirationData with category '创意'.""" + inspiration = InspirationData( + core_idea="创意想法", + category="创意" + ) + assert inspiration.category == "创意" + + def test_inspiration_data_invalid_category(self): + """Test InspirationData rejects invalid category.""" + with pytest.raises(ValidationError) as exc_info: + InspirationData( + core_idea="想法", + category="无效分类" + ) + assert "Input should be" in str(exc_info.value) + + +class TestTodoData: + """Tests for TodoData model. + + Requirements: 6.1, 6.2, 6.3, 6.4 + """ + + def test_todo_data_valid(self): + """Test creating valid TodoData.""" + todo = TodoData( + task="完成报告", + time="明天下午", + location="办公室", + status="pending" + ) + assert todo.task == "完成报告" + assert todo.time == "明天下午" + assert todo.location == "办公室" + assert todo.status == "pending" + + def test_todo_data_default_status(self): + """Test TodoData defaults status to 'pending'.""" + todo = TodoData(task="买菜") + assert todo.status == "pending" + + def test_todo_data_optional_time(self): + """Test TodoData with optional time as None.""" + todo = TodoData(task="整理房间", location="家里") + assert todo.time is None + assert todo.location == "家里" + + def test_todo_data_optional_location(self): + """Test TodoData with optional location as None.""" + todo = TodoData(task="打电话", time="今晚") + assert todo.location is None + assert todo.time == "今晚" + + def test_todo_data_minimal(self): + """Test TodoData with only required task field.""" + todo = TodoData(task="记得喝水") + assert todo.task == "记得喝水" + assert todo.time is None + assert todo.location is None + assert todo.status == "pending" + + def test_todo_data_missing_task(self): + """Test TodoData requires task field.""" + with pytest.raises(ValidationError) as exc_info: + TodoData() + assert "Field required" in str(exc_info.value) + + def test_todo_data_custom_status(self): + """Test TodoData with custom status.""" + todo = TodoData(task="已完成任务", status="completed") + assert todo.status == "completed" + + +class TestParsedData: + """Tests for ParsedData model.""" + + def test_parsed_data_complete(self): + """Test ParsedData with all fields populated.""" + parsed = ParsedData( + mood=MoodData(type="开心", intensity=8), + inspirations=[ + InspirationData(core_idea="想法1", category="工作") + ], + todos=[ + TodoData(task="任务1") + ] + ) + assert parsed.mood is not None + assert len(parsed.inspirations) == 1 + assert len(parsed.todos) == 1 + + def test_parsed_data_empty(self): + """Test ParsedData with all fields empty.""" + parsed = ParsedData() + assert parsed.mood is None + assert parsed.inspirations == [] + assert parsed.todos == [] + + def test_parsed_data_only_mood(self): + """Test ParsedData with only mood.""" + parsed = ParsedData( + mood=MoodData(type="平静", intensity=5) + ) + assert parsed.mood is not None + assert parsed.inspirations == [] + assert parsed.todos == [] + + def test_parsed_data_multiple_inspirations(self): + """Test ParsedData with multiple inspirations.""" + parsed = ParsedData( + inspirations=[ + InspirationData(core_idea="想法1", category="工作"), + InspirationData(core_idea="想法2", category="生活"), + InspirationData(core_idea="想法3", category="学习") + ] + ) + assert len(parsed.inspirations) == 3 + + def test_parsed_data_multiple_todos(self): + """Test ParsedData with multiple todos.""" + parsed = ParsedData( + todos=[ + TodoData(task="任务1"), + TodoData(task="任务2"), + TodoData(task="任务3") + ] + ) + assert len(parsed.todos) == 3 + + +class TestRecordData: + """Tests for RecordData model.""" + + def test_record_data_audio_input(self): + """Test RecordData with audio input type.""" + record = RecordData( + record_id="test-id-123", + timestamp="2024-01-01T12:00:00Z", + input_type="audio", + original_text="转写后的文本", + parsed_data=ParsedData() + ) + assert record.input_type == "audio" + assert record.original_text == "转写后的文本" + + def test_record_data_text_input(self): + """Test RecordData with text input type.""" + record = RecordData( + record_id="test-id-456", + timestamp="2024-01-01T12:00:00Z", + input_type="text", + original_text="用户输入的文本", + parsed_data=ParsedData() + ) + assert record.input_type == "text" + assert record.original_text == "用户输入的文本" + + def test_record_data_invalid_input_type(self): + """Test RecordData rejects invalid input type.""" + with pytest.raises(ValidationError) as exc_info: + RecordData( + record_id="test-id", + timestamp="2024-01-01T12:00:00Z", + input_type="invalid", + original_text="文本", + parsed_data=ParsedData() + ) + assert "Input should be" in str(exc_info.value) + + def test_record_data_with_parsed_data(self): + """Test RecordData with complete parsed data.""" + record = RecordData( + record_id="test-id-789", + timestamp="2024-01-01T12:00:00Z", + input_type="text", + original_text="今天很开心,想到一个新项目,明天要完成报告", + parsed_data=ParsedData( + mood=MoodData(type="开心", intensity=8), + inspirations=[InspirationData(core_idea="新项目", category="工作")], + todos=[TodoData(task="完成报告", time="明天")] + ) + ) + assert record.parsed_data.mood is not None + assert len(record.parsed_data.inspirations) == 1 + assert len(record.parsed_data.todos) == 1 + + +class TestProcessResponse: + """Tests for ProcessResponse model.""" + + def test_process_response_success(self): + """Test ProcessResponse for successful processing.""" + response = ProcessResponse( + record_id="test-id-123", + timestamp="2024-01-01T12:00:00Z", + mood=MoodData(type="开心", intensity=8), + inspirations=[InspirationData(core_idea="想法", category="工作")], + todos=[TodoData(task="任务")] + ) + assert response.error is None + assert response.mood is not None + assert len(response.inspirations) == 1 + assert len(response.todos) == 1 + + def test_process_response_error(self): + """Test ProcessResponse with error.""" + response = ProcessResponse( + record_id="test-id-456", + timestamp="2024-01-01T12:00:00Z", + error="语音识别服务不可用" + ) + assert response.error == "语音识别服务不可用" + assert response.mood is None + assert response.inspirations == [] + assert response.todos == [] + + def test_process_response_empty_results(self): + """Test ProcessResponse with empty results.""" + response = ProcessResponse( + record_id="test-id-789", + timestamp="2024-01-01T12:00:00Z" + ) + assert response.error is None + assert response.mood is None + assert response.inspirations == [] + assert response.todos == [] + + def test_process_response_serialization(self): + """Test ProcessResponse can be serialized to dict.""" + response = ProcessResponse( + record_id="test-id", + timestamp="2024-01-01T12:00:00Z", + mood=MoodData(type="开心", intensity=8, keywords=["愉快"]) + ) + data = response.model_dump() + assert data["record_id"] == "test-id" + assert data["mood"]["type"] == "开心" + assert data["mood"]["intensity"] == 8 + assert data["mood"]["keywords"] == ["愉快"] + + +# Property-Based Tests +# These tests use hypothesis to verify properties hold across many random inputs + +from hypothesis import given, strategies as st +from hypothesis import settings + + +class TestMoodDataProperties: + """Property-based tests for MoodData model. + + **Validates: Requirements 4.1, 4.2, 4.3** + """ + + @given( + mood_type=st.one_of(st.none(), st.text(min_size=1, max_size=50)), + intensity=st.one_of(st.none(), st.integers(min_value=1, max_value=10)), + keywords=st.lists(st.text(min_size=0, max_size=20), min_size=0, max_size=10) + ) + @settings(max_examples=100) + def test_property_6_mood_data_structure_validation(self, mood_type, intensity, keywords): + """ + Property 6: 情绪数据结构验证 + + For any parsed mood data, it should contain type (string), intensity (1-10 integer), + and keywords (string array) fields, with intensity within valid range. + + **Validates: Requirements 4.1, 4.2, 4.3** + """ + # Create MoodData with valid inputs + mood = MoodData( + type=mood_type, + intensity=intensity, + keywords=keywords + ) + + # Property 1: type field exists and is either None or string + assert hasattr(mood, 'type') + assert mood.type is None or isinstance(mood.type, str) + + # Property 2: intensity field exists and is either None or integer in range 1-10 + assert hasattr(mood, 'intensity') + if mood.intensity is not None: + assert isinstance(mood.intensity, int) + assert 1 <= mood.intensity <= 10 + + # Property 3: keywords field exists and is a list of strings + assert hasattr(mood, 'keywords') + assert isinstance(mood.keywords, list) + assert all(isinstance(kw, str) for kw in mood.keywords) + + # Property 4: All three fields should be present in the model + model_dict = mood.model_dump() + assert 'type' in model_dict + assert 'intensity' in model_dict + assert 'keywords' in model_dict + + @given( + intensity=st.integers().filter(lambda x: x < 1 or x > 10) + ) + @settings(max_examples=100) + def test_property_6_mood_intensity_range_validation(self, intensity): + """ + Property 6: 情绪数据结构验证 - Intensity Range + + For any intensity value outside the range [1, 10], MoodData should reject it + with a ValidationError. + + **Validates: Requirements 4.2** + """ + with pytest.raises(ValidationError) as exc_info: + MoodData(type="测试", intensity=intensity) + + # Verify the error message mentions the constraint + error_str = str(exc_info.value) + assert "greater than or equal to 1" in error_str or "less than or equal to 10" in error_str + + @given( + mood_type=st.one_of(st.none(), st.text(min_size=0, max_size=100)), + keywords=st.lists(st.text(min_size=0, max_size=50), min_size=0, max_size=20) + ) + @settings(max_examples=100) + def test_property_6_mood_serialization_deserialization(self, mood_type, keywords): + """ + Property 6: 情绪数据结构验证 - Serialization + + For any valid MoodData, it should be serializable to dict and deserializable + back to MoodData with the same values. + + **Validates: Requirements 4.1, 4.2, 4.3** + """ + # Create original mood with valid intensity + original_mood = MoodData( + type=mood_type, + intensity=5, # Use valid intensity + keywords=keywords + ) + + # Serialize to dict + mood_dict = original_mood.model_dump() + + # Deserialize back to MoodData + deserialized_mood = MoodData(**mood_dict) + + # Verify all fields match + assert deserialized_mood.type == original_mood.type + assert deserialized_mood.intensity == original_mood.intensity + assert deserialized_mood.keywords == original_mood.keywords + + +class TestInspirationDataProperties: + """Property-based tests for InspirationData model. + + **Validates: Requirements 5.1, 5.2, 5.3** + """ + + @given( + core_idea=st.text(min_size=1, max_size=20), + tags=st.lists(st.text(min_size=0, max_size=20), min_size=0, max_size=5), + category=st.sampled_from(["工作", "生活", "学习", "创意"]) + ) + @settings(max_examples=100) + def test_property_7_inspiration_data_structure_validation(self, core_idea, tags, category): + """ + Property 7: 灵感数据结构验证 + + For any parsed inspiration data, it should contain core_idea (length ≤ 20), + tags (array length ≤ 5), and category (enum: 工作/生活/学习/创意) fields, + with all constraints satisfied. + + **Validates: Requirements 5.1, 5.2, 5.3** + """ + # Create InspirationData with valid inputs + inspiration = InspirationData( + core_idea=core_idea, + tags=tags, + category=category + ) + + # Property 1: core_idea field exists and is a string with length ≤ 20 + assert hasattr(inspiration, 'core_idea') + assert isinstance(inspiration.core_idea, str) + assert len(inspiration.core_idea) <= 20 + + # Property 2: tags field exists and is a list with length ≤ 5 + assert hasattr(inspiration, 'tags') + assert isinstance(inspiration.tags, list) + assert len(inspiration.tags) <= 5 + assert all(isinstance(tag, str) for tag in inspiration.tags) + + # Property 3: category field exists and is one of the valid enum values + assert hasattr(inspiration, 'category') + assert isinstance(inspiration.category, str) + assert inspiration.category in ["工作", "生活", "学习", "创意"] + + # Property 4: All three fields should be present in the model + model_dict = inspiration.model_dump() + assert 'core_idea' in model_dict + assert 'tags' in model_dict + assert 'category' in model_dict + + @given( + core_idea=st.text(min_size=21, max_size=100) + ) + @settings(max_examples=100) + def test_property_7_core_idea_length_validation(self, core_idea): + """ + Property 7: 灵感数据结构验证 - Core Idea Length + + For any core_idea with length > 20, InspirationData should reject it + with a ValidationError. + + **Validates: Requirements 5.1** + """ + with pytest.raises(ValidationError) as exc_info: + InspirationData( + core_idea=core_idea, + category="工作" + ) + + # Verify the error message mentions the length constraint + error_str = str(exc_info.value) + assert "at most 20 characters" in error_str + + @given( + tags=st.lists(st.text(min_size=1, max_size=10), min_size=6, max_size=20) + ) + @settings(max_examples=100) + def test_property_7_tags_count_validation(self, tags): + """ + Property 7: 灵感数据结构验证 - Tags Count + + For any tags list with more than 5 items, InspirationData should reject it + with a ValidationError. + + **Validates: Requirements 5.2** + """ + with pytest.raises(ValidationError) as exc_info: + InspirationData( + core_idea="想法", + tags=tags, + category="工作" + ) + + # Verify the error message mentions the count constraint + error_str = str(exc_info.value) + assert "at most 5 items" in error_str + + @given( + category=st.text(min_size=1, max_size=20).filter( + lambda x: x not in ["工作", "生活", "学习", "创意"] + ) + ) + @settings(max_examples=100) + def test_property_7_category_enum_validation(self, category): + """ + Property 7: 灵感数据结构验证 - Category Enum + + For any category value not in the enum ["工作", "生活", "学习", "创意"], + InspirationData should reject it with a ValidationError. + + **Validates: Requirements 5.3** + """ + with pytest.raises(ValidationError) as exc_info: + InspirationData( + core_idea="想法", + category=category + ) + + # Verify the error message mentions the enum constraint + error_str = str(exc_info.value) + assert "Input should be" in error_str + + @given( + core_idea=st.text(min_size=1, max_size=20), + tags=st.lists(st.text(min_size=0, max_size=30), min_size=0, max_size=5), + category=st.sampled_from(["工作", "生活", "学习", "创意"]) + ) + @settings(max_examples=100) + def test_property_7_inspiration_serialization_deserialization(self, core_idea, tags, category): + """ + Property 7: 灵感数据结构验证 - Serialization + + For any valid InspirationData, it should be serializable to dict and deserializable + back to InspirationData with the same values. + + **Validates: Requirements 5.1, 5.2, 5.3** + """ + # Create original inspiration + original_inspiration = InspirationData( + core_idea=core_idea, + tags=tags, + category=category + ) + + # Serialize to dict + inspiration_dict = original_inspiration.model_dump() + + # Deserialize back to InspirationData + deserialized_inspiration = InspirationData(**inspiration_dict) + + # Verify all fields match + assert deserialized_inspiration.core_idea == original_inspiration.core_idea + assert deserialized_inspiration.tags == original_inspiration.tags + assert deserialized_inspiration.category == original_inspiration.category + + @given( + core_idea=st.text(min_size=1, max_size=20), + category=st.sampled_from(["工作", "生活", "学习", "创意"]) + ) + @settings(max_examples=100) + def test_property_7_inspiration_empty_tags_default(self, core_idea, category): + """ + Property 7: 灵感数据结构验证 - Empty Tags Default + + For any InspirationData created without tags, it should default to an empty list. + + **Validates: Requirements 5.2** + """ + # Create inspiration without tags + inspiration = InspirationData( + core_idea=core_idea, + category=category + ) + + # Verify tags defaults to empty list + assert inspiration.tags == [] + assert isinstance(inspiration.tags, list) + + +class TestTodoDataProperties: + """Property-based tests for TodoData model. + + **Validates: Requirements 6.1, 6.2, 6.3, 6.4** + """ + + @given( + task=st.text(min_size=1, max_size=200), + time=st.one_of(st.none(), st.text(min_size=0, max_size=50)), + location=st.one_of(st.none(), st.text(min_size=0, max_size=100)), + status=st.text(min_size=1, max_size=20) + ) + @settings(max_examples=100) + def test_property_8_todo_data_structure_validation(self, task, time, location, status): + """ + Property 8: 待办数据结构验证 + + For any parsed todo data, it should contain task (required), time (optional), + location (optional), and status (defaults to "pending") fields. + + **Validates: Requirements 6.1, 6.2, 6.3, 6.4** + """ + # Create TodoData with valid inputs + todo = TodoData( + task=task, + time=time, + location=location, + status=status + ) + + # Property 1: task field exists and is a required string + assert hasattr(todo, 'task') + assert isinstance(todo.task, str) + assert len(todo.task) > 0 # task is required, so it should not be empty + + # Property 2: time field exists and is either None or string + assert hasattr(todo, 'time') + assert todo.time is None or isinstance(todo.time, str) + + # Property 3: location field exists and is either None or string + assert hasattr(todo, 'location') + assert todo.location is None or isinstance(todo.location, str) + + # Property 4: status field exists and is a string + assert hasattr(todo, 'status') + assert isinstance(todo.status, str) + + # Property 5: All four fields should be present in the model + model_dict = todo.model_dump() + assert 'task' in model_dict + assert 'time' in model_dict + assert 'location' in model_dict + assert 'status' in model_dict + + @given( + task=st.text(min_size=1, max_size=200) + ) + @settings(max_examples=100) + def test_property_8_todo_default_status(self, task): + """ + Property 8: 待办数据结构验证 - Default Status + + For any new todo item created without explicit status, the status should + default to "pending". + + **Validates: Requirements 6.4** + """ + # Create TodoData without explicit status + todo = TodoData(task=task) + + # Verify status defaults to "pending" + assert todo.status == "pending" + assert isinstance(todo.status, str) + + @given( + task=st.text(min_size=1, max_size=200), + time=st.one_of(st.none(), st.text(min_size=1, max_size=50)), + location=st.one_of(st.none(), st.text(min_size=1, max_size=100)) + ) + @settings(max_examples=100) + def test_property_8_todo_optional_fields(self, task, time, location): + """ + Property 8: 待办数据结构验证 - Optional Fields + + For any todo data, time and location fields should be optional and can be None. + + **Validates: Requirements 6.2, 6.3** + """ + # Create TodoData with optional fields + todo = TodoData( + task=task, + time=time, + location=location + ) + + # Verify optional fields can be None or string + if time is None: + assert todo.time is None + else: + assert isinstance(todo.time, str) + + if location is None: + assert todo.location is None + else: + assert isinstance(todo.location, str) + + @given( + task=st.text(min_size=1, max_size=200), + time=st.one_of(st.none(), st.text(min_size=0, max_size=50)), + location=st.one_of(st.none(), st.text(min_size=0, max_size=100)), + status=st.text(min_size=1, max_size=20) + ) + @settings(max_examples=100) + def test_property_8_todo_serialization_deserialization(self, task, time, location, status): + """ + Property 8: 待办数据结构验证 - Serialization + + For any valid TodoData, it should be serializable to dict and deserializable + back to TodoData with the same values. + + **Validates: Requirements 6.1, 6.2, 6.3, 6.4** + """ + # Create original todo + original_todo = TodoData( + task=task, + time=time, + location=location, + status=status + ) + + # Serialize to dict + todo_dict = original_todo.model_dump() + + # Deserialize back to TodoData + deserialized_todo = TodoData(**todo_dict) + + # Verify all fields match + assert deserialized_todo.task == original_todo.task + assert deserialized_todo.time == original_todo.time + assert deserialized_todo.location == original_todo.location + assert deserialized_todo.status == original_todo.status + + @given( + time=st.text(min_size=1, max_size=50) + ) + @settings(max_examples=100) + def test_property_8_todo_time_preservation(self, time): + """ + Property 8: 待办数据结构验证 - Time Preservation + + For any todo data with time information, the time should be preserved as + the original expression (e.g., "明晚", "下周三"). + + **Validates: Requirements 6.2** + """ + # Create TodoData with time + todo = TodoData( + task="测试任务", + time=time + ) + + # Verify time is preserved exactly as provided + assert todo.time == time + assert isinstance(todo.time, str) + + @given( + task=st.text(min_size=1, max_size=200) + ) + @settings(max_examples=100) + def test_property_8_todo_minimal_creation(self, task): + """ + Property 8: 待办数据结构验证 - Minimal Creation + + For any todo data, only the task field is required. All other fields + should have sensible defaults or be optional. + + **Validates: Requirements 6.1, 6.4** + """ + # Create TodoData with only task + todo = TodoData(task=task) + + # Verify task is set + assert todo.task == task + + # Verify optional fields are None + assert todo.time is None + assert todo.location is None + + # Verify status has default value + assert todo.status == "pending" + + def test_property_8_todo_task_required(self): + """ + Property 8: 待办数据结构验证 - Task Required + + For any todo data, the task field is required and TodoData should reject + creation without it. + + **Validates: Requirements 6.1** + """ + # Attempt to create TodoData without task + with pytest.raises(ValidationError) as exc_info: + TodoData() + + # Verify the error message mentions the required field + error_str = str(exc_info.value) + assert "Field required" in error_str or "field required" in error_str.lower() diff --git a/tests/test_semantic_parser.py b/tests/test_semantic_parser.py new file mode 100644 index 0000000000000000000000000000000000000000..dcb2e8883dd4f6927ced97d1e7f0f9fda41bea4f --- /dev/null +++ b/tests/test_semantic_parser.py @@ -0,0 +1,635 @@ +"""Unit tests for semantic parser service. + +This module contains unit tests for the SemanticParserService class, testing +API call success scenarios, failure scenarios, System Prompt usage, and edge cases. + +Requirements: 3.1, 3.2, 3.3, 3.4, 3.5 +""" + +import pytest +import json +from unittest.mock import MagicMock +import httpx + +from app.semantic_parser import SemanticParserService, SemanticParserError +from app.models import ParsedData, MoodData, InspirationData, TodoData + + +@pytest.fixture +def semantic_parser_service(): + """Create a SemanticParserService instance for testing.""" + return SemanticParserService(api_key="test_api_key_12345") + + +@pytest.fixture +def mock_text(): + """Create mock text for testing.""" + return "今天心情很好,想到了一个新项目的创意,明天要去办公室开会。" + + +@pytest.mark.asyncio +async def test_semantic_parser_initialization(semantic_parser_service): + """Test semantic parser service initialization. + + Requirements: 3.1, 3.2 + """ + assert semantic_parser_service.api_key == "test_api_key_12345" + assert semantic_parser_service.model == "glm-4-flash" + assert semantic_parser_service.api_url == "https://open.bigmodel.cn/api/paas/v4/chat/completions" + assert isinstance(semantic_parser_service.client, httpx.AsyncClient) + + # Verify system prompt is correctly set + expected_prompt = ( + "你是一个数据转换器。请将文本解析为 JSON 格式。" + "维度包括:1.情绪(type,intensity,keywords); " + "2.灵感(core_idea,tags,category); " + "3.待办(task,time,location)。" + "必须严格遵循 JSON 格式返回。" + ) + assert semantic_parser_service.system_prompt == expected_prompt + + # Clean up + await semantic_parser_service.close() + + +@pytest.mark.asyncio +async def test_parse_success_with_all_dimensions(semantic_parser_service, mock_text, mocker): + """Test successful parsing with all dimensions present. + + Requirements: 3.1, 3.2, 3.3 + """ + # Mock successful API response with all dimensions + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "id": "test_id", + "created": 1234567890, + "model": "glm-4-flash", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": json.dumps({ + "mood": { + "type": "开心", + "intensity": 8, + "keywords": ["愉快", "放松"] + }, + "inspirations": [ + { + "core_idea": "新项目创意", + "tags": ["创新", "技术"], + "category": "工作" + } + ], + "todos": [ + { + "task": "去办公室开会", + "time": "明天", + "location": "办公室", + "status": "pending" + } + ] + }) + } + } + ] + } + + # Mock the HTTP client post method + mock_post = mocker.patch.object( + semantic_parser_service.client, + 'post', + return_value=mock_response + ) + + # Call parse + result = await semantic_parser_service.parse(mock_text) + + # Verify result structure + assert isinstance(result, ParsedData) + assert result.mood is not None + assert result.mood.type == "开心" + assert result.mood.intensity == 8 + assert result.mood.keywords == ["愉快", "放松"] + assert len(result.inspirations) == 1 + assert result.inspirations[0].core_idea == "新项目创意" + assert result.inspirations[0].tags == ["创新", "技术"] + assert result.inspirations[0].category == "工作" + assert len(result.todos) == 1 + assert result.todos[0].task == "去办公室开会" + assert result.todos[0].time == "明天" + assert result.todos[0].location == "办公室" + assert result.todos[0].status == "pending" + + # Verify API was called correctly + mock_post.assert_called_once() + call_args = mock_post.call_args + assert call_args.kwargs['headers']['Authorization'] == "Bearer test_api_key_12345" + assert call_args.kwargs['json']['model'] == "glm-4-flash" + + # Verify system prompt is used + messages = call_args.kwargs['json']['messages'] + assert len(messages) == 2 + assert messages[0]['role'] == "system" + assert messages[0]['content'] == semantic_parser_service.system_prompt + assert messages[1]['role'] == "user" + assert messages[1]['content'] == mock_text + + # Clean up + await semantic_parser_service.close() + + +@pytest.mark.asyncio +async def test_parse_success_with_markdown_json(semantic_parser_service, mock_text, mocker): + """Test successful parsing when API returns JSON in markdown code blocks. + + Requirements: 3.1, 3.3 + """ + # Mock API response with JSON in markdown code blocks + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "choices": [ + { + "message": { + "content": "```json\n" + json.dumps({ + "mood": { + "type": "开心", + "intensity": 7, + "keywords": ["愉快"] + }, + "inspirations": [], + "todos": [] + }) + "\n```" + } + } + ] + } + + # Mock the HTTP client post method + mocker.patch.object( + semantic_parser_service.client, + 'post', + return_value=mock_response + ) + + # Call parse + result = await semantic_parser_service.parse(mock_text) + + # Verify result + assert isinstance(result, ParsedData) + assert result.mood is not None + assert result.mood.type == "开心" + assert result.mood.intensity == 7 + + # Clean up + await semantic_parser_service.close() + + +@pytest.mark.asyncio +async def test_parse_no_mood_dimension(semantic_parser_service, mocker): + """Test parsing text with no mood information. + + This tests the edge case where the text does not contain mood information, + and the parser should return null for the mood dimension. + + Requirements: 3.4 + """ + text = "明天要去办公室开会,准备项目报告。" + + # Mock API response without mood + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "choices": [ + { + "message": { + "content": json.dumps({ + "mood": None, + "inspirations": [], + "todos": [ + { + "task": "去办公室开会", + "time": "明天", + "location": "办公室" + } + ] + }) + } + } + ] + } + + # Mock the HTTP client post method + mocker.patch.object( + semantic_parser_service.client, + 'post', + return_value=mock_response + ) + + # Call parse + result = await semantic_parser_service.parse(text) + + # Verify mood is None + assert result.mood is None + assert len(result.todos) == 1 + + # Clean up + await semantic_parser_service.close() + + +@pytest.mark.asyncio +async def test_parse_no_inspiration_dimension(semantic_parser_service, mocker): + """Test parsing text with no inspiration information. + + This tests the edge case where the text does not contain inspiration information, + and the parser should return an empty array for the inspirations dimension. + + Requirements: 3.4 + """ + text = "今天心情不错,明天要去开会。" + + # Mock API response without inspirations + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "choices": [ + { + "message": { + "content": json.dumps({ + "mood": { + "type": "开心", + "intensity": 7, + "keywords": ["不错"] + }, + "inspirations": [], + "todos": [ + { + "task": "去开会", + "time": "明天" + } + ] + }) + } + } + ] + } + + # Mock the HTTP client post method + mocker.patch.object( + semantic_parser_service.client, + 'post', + return_value=mock_response + ) + + # Call parse + result = await semantic_parser_service.parse(text) + + # Verify inspirations is empty array + assert result.inspirations == [] + assert result.mood is not None + assert len(result.todos) == 1 + + # Clean up + await semantic_parser_service.close() + + +@pytest.mark.asyncio +async def test_parse_no_todo_dimension(semantic_parser_service, mocker): + """Test parsing text with no todo information. + + This tests the edge case where the text does not contain todo information, + and the parser should return an empty array for the todos dimension. + + Requirements: 3.4 + """ + text = "今天心情很好,想到了一个有趣的想法。" + + # Mock API response without todos + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "choices": [ + { + "message": { + "content": json.dumps({ + "mood": { + "type": "开心", + "intensity": 8, + "keywords": ["很好"] + }, + "inspirations": [ + { + "core_idea": "有趣的想法", + "tags": ["创意"], + "category": "生活" + } + ], + "todos": [] + }) + } + } + ] + } + + # Mock the HTTP client post method + mocker.patch.object( + semantic_parser_service.client, + 'post', + return_value=mock_response + ) + + # Call parse + result = await semantic_parser_service.parse(text) + + # Verify todos is empty array + assert result.todos == [] + assert result.mood is not None + assert len(result.inspirations) == 1 + + # Clean up + await semantic_parser_service.close() + + +@pytest.mark.asyncio +async def test_parse_api_error_status(semantic_parser_service, mock_text, mocker): + """Test parsing when API returns error status code. + + Requirements: 3.5 + """ + # Mock API error response + mock_response = MagicMock() + mock_response.status_code = 500 + mock_response.json.return_value = { + "error": { + "message": "Internal server error", + "code": "internal_error" + } + } + mock_response.text = "Internal server error" + + # Mock the HTTP client post method + mocker.patch.object( + semantic_parser_service.client, + 'post', + return_value=mock_response + ) + + # Call parse and expect exception + with pytest.raises(SemanticParserError) as exc_info: + await semantic_parser_service.parse(mock_text) + + # Verify error message + assert "语义解析服务不可用" in str(exc_info.value) + + # Clean up + await semantic_parser_service.close() + + +@pytest.mark.asyncio +async def test_parse_api_timeout(semantic_parser_service, mock_text, mocker): + """Test parsing when API request times out. + + Requirements: 3.5 + """ + # Mock timeout exception + mocker.patch.object( + semantic_parser_service.client, + 'post', + side_effect=httpx.TimeoutException("Request timeout") + ) + + # Call parse and expect exception + with pytest.raises(SemanticParserError) as exc_info: + await semantic_parser_service.parse(mock_text) + + # Verify error message + assert "语义解析服务不可用" in str(exc_info.value) + assert "请求超时" in str(exc_info.value) + + # Clean up + await semantic_parser_service.close() + + +@pytest.mark.asyncio +async def test_parse_network_error(semantic_parser_service, mock_text, mocker): + """Test parsing when network error occurs. + + Requirements: 3.5 + """ + # Mock network error + mocker.patch.object( + semantic_parser_service.client, + 'post', + side_effect=httpx.RequestError("Network error") + ) + + # Call parse and expect exception + with pytest.raises(SemanticParserError) as exc_info: + await semantic_parser_service.parse(mock_text) + + # Verify error message + assert "语义解析服务不可用" in str(exc_info.value) + assert "网络错误" in str(exc_info.value) + + # Clean up + await semantic_parser_service.close() + + +@pytest.mark.asyncio +async def test_parse_invalid_json_response(semantic_parser_service, mock_text, mocker): + """Test parsing when API returns invalid JSON. + + Requirements: 3.5 + """ + # Mock response with invalid JSON + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.json.side_effect = ValueError("Invalid JSON") + + # Mock the HTTP client post method + mocker.patch.object( + semantic_parser_service.client, + 'post', + return_value=mock_response + ) + + # Call parse and expect exception + with pytest.raises(SemanticParserError) as exc_info: + await semantic_parser_service.parse(mock_text) + + # Verify error message + assert "语义解析服务不可用" in str(exc_info.value) + assert "响应格式无效" in str(exc_info.value) + + # Clean up + await semantic_parser_service.close() + + +@pytest.mark.asyncio +async def test_parse_invalid_response_structure(semantic_parser_service, mock_text, mocker): + """Test parsing when API response has invalid structure. + + Requirements: 3.5 + """ + # Mock response without required fields + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "id": "test_id", + "created": 1234567890 + # Missing "choices" field + } + + # Mock the HTTP client post method + mocker.patch.object( + semantic_parser_service.client, + 'post', + return_value=mock_response + ) + + # Call parse and expect exception + with pytest.raises(SemanticParserError) as exc_info: + await semantic_parser_service.parse(mock_text) + + # Verify error message + assert "语义解析服务不可用" in str(exc_info.value) + assert "响应结构无效" in str(exc_info.value) + + # Clean up + await semantic_parser_service.close() + + +@pytest.mark.asyncio +async def test_parse_invalid_json_content(semantic_parser_service, mock_text, mocker): + """Test parsing when API returns non-JSON content. + + Requirements: 3.5 + """ + # Mock response with non-JSON content + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "choices": [ + { + "message": { + "content": "This is not valid JSON content" + } + } + ] + } + + # Mock the HTTP client post method + mocker.patch.object( + semantic_parser_service.client, + 'post', + return_value=mock_response + ) + + # Call parse and expect exception + with pytest.raises(SemanticParserError) as exc_info: + await semantic_parser_service.parse(mock_text) + + # Verify error message + assert "语义解析服务不可用" in str(exc_info.value) + assert "JSON 解析失败" in str(exc_info.value) + + # Clean up + await semantic_parser_service.close() + + +@pytest.mark.asyncio +async def test_parse_unexpected_exception(semantic_parser_service, mock_text, mocker): + """Test parsing when unexpected exception occurs. + + Requirements: 3.5 + """ + # Mock unexpected exception + mocker.patch.object( + semantic_parser_service.client, + 'post', + side_effect=Exception("Unexpected error") + ) + + # Call parse and expect exception + with pytest.raises(SemanticParserError) as exc_info: + await semantic_parser_service.parse(mock_text) + + # Verify error message + assert "语义解析服务不可用" in str(exc_info.value) + + # Clean up + await semantic_parser_service.close() + + +@pytest.mark.asyncio +async def test_system_prompt_usage(semantic_parser_service, mock_text, mocker): + """Test that the correct System Prompt is used in API calls. + + This verifies that the system prompt specified in requirements is + correctly included in the API request. + + Requirements: 3.2 + """ + # Mock successful API response + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "choices": [ + { + "message": { + "content": json.dumps({ + "mood": None, + "inspirations": [], + "todos": [] + }) + } + } + ] + } + + # Mock the HTTP client post method + mock_post = mocker.patch.object( + semantic_parser_service.client, + 'post', + return_value=mock_response + ) + + # Call parse + await semantic_parser_service.parse(mock_text) + + # Verify system prompt is used correctly + call_args = mock_post.call_args + messages = call_args.kwargs['json']['messages'] + + expected_system_prompt = ( + "你是一个数据转换器。请将文本解析为 JSON 格式。" + "维度包括:1.情绪(type,intensity,keywords); " + "2.灵感(core_idea,tags,category); " + "3.待办(task,time,location)。" + "必须严格遵循 JSON 格式返回。" + ) + + assert messages[0]['role'] == "system" + assert messages[0]['content'] == expected_system_prompt + + # Clean up + await semantic_parser_service.close() + + +@pytest.mark.asyncio +async def test_close_client(semantic_parser_service): + """Test closing the HTTP client. + + Requirements: 3.1 + """ + # Verify client is open + assert not semantic_parser_service.client.is_closed + + # Close the client + await semantic_parser_service.close() + + # Verify client is closed + assert semantic_parser_service.client.is_closed diff --git a/tests/test_semantic_parser_properties.py b/tests/test_semantic_parser_properties.py new file mode 100644 index 0000000000000000000000000000000000000000..07ad33e2aab56863a4cf1da3a108ea213ca4f452 --- /dev/null +++ b/tests/test_semantic_parser_properties.py @@ -0,0 +1,824 @@ +"""Property-based tests for semantic parser service. + +This module uses hypothesis to verify that semantic parsing properties hold across +many random inputs, ensuring parse result structure integrity. + +Requirements: 3.3 +""" + +import pytest +import json +from unittest.mock import Mock, patch + +from hypothesis import given, strategies as st +from hypothesis import settings + +from app.semantic_parser import SemanticParserService, SemanticParserError +from app.models import ParsedData, MoodData, InspirationData, TodoData + + +# Custom strategies for generating API responses +@st.composite +def api_mood_response_strategy(draw): + """Generate valid mood data for API responses.""" + has_mood = draw(st.booleans()) + if not has_mood: + return None + + return { + "type": draw(st.one_of(st.none(), st.text(min_size=1, max_size=20))), + "intensity": draw(st.one_of(st.none(), st.integers(min_value=1, max_value=10))), + "keywords": draw(st.lists(st.text(min_size=1, max_size=15), min_size=0, max_size=5)) + } + + +@st.composite +def api_inspiration_response_strategy(draw): + """Generate valid inspiration data for API responses.""" + core_idea = draw(st.text(min_size=1, max_size=20)) + tags = draw(st.lists(st.text(min_size=1, max_size=10), min_size=0, max_size=5)) + category = draw(st.sampled_from(["工作", "生活", "学习", "创意"])) + + return { + "core_idea": core_idea, + "tags": tags, + "category": category + } + + +@st.composite +def api_todo_response_strategy(draw): + """Generate valid todo data for API responses.""" + task = draw(st.text(min_size=1, max_size=50)) + time = draw(st.one_of(st.none(), st.text(min_size=1, max_size=20))) + location = draw(st.one_of(st.none(), st.text(min_size=1, max_size=20))) + + return { + "task": task, + "time": time, + "location": location + } + + +@st.composite +def api_parsed_response_strategy(draw): + """Generate valid parsed data for API responses.""" + mood = draw(api_mood_response_strategy()) + inspirations = draw(st.lists(api_inspiration_response_strategy(), min_size=0, max_size=3)) + todos = draw(st.lists(api_todo_response_strategy(), min_size=0, max_size=3)) + + return { + "mood": mood, + "inspirations": inspirations, + "todos": todos + } + + +class TestSemanticParserServiceProperties: + """Property-based tests for SemanticParserService. + + **Validates: Requirements 3.3** + """ + + @given( + text=st.text(min_size=1, max_size=200), + api_response=api_parsed_response_strategy() + ) + @settings(max_examples=20) + @pytest.mark.asyncio + async def test_property_4_parse_result_structure_integrity(self, text, api_response): + """ + Property 4: 解析结果结构完整性 + + For any successful semantic parsing result, the returned JSON should contain + mood, inspirations, and todos fields, even if some fields are null or empty arrays. + + **Validates: Requirements 3.3** + """ + # Create service instance + service = SemanticParserService(api_key="test-api-key") + + try: + # Mock the API response + # Note: httpx Response.json() is NOT async, so use regular Mock + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "choices": [ + { + "message": { + "content": json.dumps(api_response, ensure_ascii=False) + } + } + ] + } + + # Patch the HTTP client's post method (which IS async) + async def mock_post(*args, **kwargs): + return mock_response + + with patch.object(service.client, 'post', side_effect=mock_post): + # Call parse method + result = await service.parse(text) + + # Property 1: Result should be a ParsedData instance + assert isinstance(result, ParsedData), \ + "Parse result should be a ParsedData instance" + + # Property 2: Result should have mood field (even if None) + assert hasattr(result, 'mood'), \ + "Parse result should have 'mood' field" + + # Property 3: Result should have inspirations field (even if empty list) + assert hasattr(result, 'inspirations'), \ + "Parse result should have 'inspirations' field" + assert isinstance(result.inspirations, list), \ + "Inspirations should be a list" + + # Property 4: Result should have todos field (even if empty list) + assert hasattr(result, 'todos'), \ + "Parse result should have 'todos' field" + assert isinstance(result.todos, list), \ + "Todos should be a list" + + # Property 5: If mood exists in API response, it should be in result + if api_response["mood"] is not None: + # Mood might be None if validation fails, but field should exist + assert result.mood is None or isinstance(result.mood, MoodData), \ + "Mood should be None or MoodData instance" + else: + assert result.mood is None, \ + "Mood should be None when not in API response" + + # Property 6: Inspirations count should match valid entries + # (Some might be filtered out due to validation errors) + assert len(result.inspirations) <= len(api_response["inspirations"]), \ + "Result inspirations count should not exceed API response count" + + for inspiration in result.inspirations: + assert isinstance(inspiration, InspirationData), \ + "Each inspiration should be an InspirationData instance" + + # Property 7: Todos count should match valid entries + # (Some might be filtered out due to validation errors) + assert len(result.todos) <= len(api_response["todos"]), \ + "Result todos count should not exceed API response count" + + for todo in result.todos: + assert isinstance(todo, TodoData), \ + "Each todo should be a TodoData instance" + assert todo.status == "pending", \ + "New todos should have status 'pending'" + + finally: + # Clean up + await service.close() + + @given( + text=st.text(min_size=1, max_size=200), + has_mood=st.booleans(), + has_inspirations=st.booleans(), + has_todos=st.booleans() + ) + @settings(max_examples=20) + @pytest.mark.asyncio + async def test_property_4_parse_result_structure_with_missing_dimensions( + self, text, has_mood, has_inspirations, has_todos + ): + """ + Property 4: 解析结果结构完整性 - Missing Dimensions + + For any parsing result, even when some dimensions are missing from the API + response, the result should still contain all three fields (mood, inspirations, todos). + + **Validates: Requirements 3.3** + """ + # Create service instance + service = SemanticParserService(api_key="test-api-key") + + try: + # Build API response based on flags + api_response = {} + + if has_mood: + api_response["mood"] = { + "type": "开心", + "intensity": 8, + "keywords": ["愉快", "放松"] + } + else: + api_response["mood"] = None + + if has_inspirations: + api_response["inspirations"] = [ + { + "core_idea": "新想法", + "tags": ["创新"], + "category": "工作" + } + ] + else: + api_response["inspirations"] = [] + + if has_todos: + api_response["todos"] = [ + { + "task": "完成任务", + "time": "明天", + "location": "办公室" + } + ] + else: + api_response["todos"] = [] + + # Mock the API response + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "choices": [ + { + "message": { + "content": json.dumps(api_response, ensure_ascii=False) + } + } + ] + } + + # Patch the HTTP client's post method (which IS async) + async def mock_post(*args, **kwargs): + return mock_response + + with patch.object(service.client, 'post', side_effect=mock_post): + # Call parse method + result = await service.parse(text) + + # Property 1: Result should always have all three fields + assert hasattr(result, 'mood'), \ + "Parse result should always have 'mood' field" + assert hasattr(result, 'inspirations'), \ + "Parse result should always have 'inspirations' field" + assert hasattr(result, 'todos'), \ + "Parse result should always have 'todos' field" + + # Property 2: Field types should be correct + assert result.mood is None or isinstance(result.mood, MoodData), \ + "Mood should be None or MoodData instance" + assert isinstance(result.inspirations, list), \ + "Inspirations should be a list" + assert isinstance(result.todos, list), \ + "Todos should be a list" + + # Property 3: Empty dimensions should be represented correctly + if not has_mood: + assert result.mood is None, \ + "Mood should be None when not present in API response" + + if not has_inspirations: + assert result.inspirations == [], \ + "Inspirations should be empty list when not present in API response" + + if not has_todos: + assert result.todos == [], \ + "Todos should be empty list when not present in API response" + + finally: + # Clean up + await service.close() + + @given( + text=st.text(min_size=1, max_size=200) + ) + @settings(max_examples=20) + @pytest.mark.asyncio + async def test_property_4_parse_result_structure_with_empty_response(self, text): + """ + Property 4: 解析结果结构完整性 - Empty Response + + For any text that results in an empty parsing response (no mood, no inspirations, + no todos), the result should still contain all three fields with appropriate + null/empty values. + + **Validates: Requirements 3.3** + """ + # Create service instance + service = SemanticParserService(api_key="test-api-key") + + try: + # Build completely empty API response + api_response = { + "mood": None, + "inspirations": [], + "todos": [] + } + + # Mock the API response + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "choices": [ + { + "message": { + "content": json.dumps(api_response, ensure_ascii=False) + } + } + ] + } + + # Patch the HTTP client's post method (which IS async) + async def mock_post(*args, **kwargs): + return mock_response + + with patch.object(service.client, 'post', side_effect=mock_post): + # Call parse method + result = await service.parse(text) + + # Property 1: Result should be a valid ParsedData instance + assert isinstance(result, ParsedData), \ + "Parse result should be a ParsedData instance even with empty response" + + # Property 2: All three fields should exist + assert hasattr(result, 'mood'), \ + "Parse result should have 'mood' field even when empty" + assert hasattr(result, 'inspirations'), \ + "Parse result should have 'inspirations' field even when empty" + assert hasattr(result, 'todos'), \ + "Parse result should have 'todos' field even when empty" + + # Property 3: Empty values should be represented correctly + assert result.mood is None, \ + "Mood should be None for empty response" + assert result.inspirations == [], \ + "Inspirations should be empty list for empty response" + assert result.todos == [], \ + "Todos should be empty list for empty response" + + finally: + # Clean up + await service.close() + + @given( + text=st.text(min_size=1, max_size=200), + api_response=api_parsed_response_strategy() + ) + @settings(max_examples=20) + @pytest.mark.asyncio + async def test_property_4_parse_result_structure_with_markdown_json( + self, text, api_response + ): + """ + Property 4: 解析结果结构完整性 - Markdown JSON Response + + For any API response that wraps JSON in markdown code blocks (```json...```), + the parser should still extract the JSON and return a properly structured result. + + **Validates: Requirements 3.3** + """ + # Create service instance + service = SemanticParserService(api_key="test-api-key") + + try: + # Wrap the JSON response in markdown code blocks + json_content = json.dumps(api_response, ensure_ascii=False) + markdown_content = f"```json\n{json_content}\n```" + + # Mock the API response + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "choices": [ + { + "message": { + "content": markdown_content + } + } + ] + } + + # Patch the HTTP client's post method (which IS async) + async def mock_post(*args, **kwargs): + return mock_response + + with patch.object(service.client, 'post', side_effect=mock_post): + # Call parse method + result = await service.parse(text) + + # Property 1: Result should be a valid ParsedData instance + assert isinstance(result, ParsedData), \ + "Parse result should be a ParsedData instance even with markdown-wrapped JSON" + + # Property 2: All three fields should exist + assert hasattr(result, 'mood'), \ + "Parse result should have 'mood' field" + assert hasattr(result, 'inspirations'), \ + "Parse result should have 'inspirations' field" + assert hasattr(result, 'todos'), \ + "Parse result should have 'todos' field" + + # Property 3: Field types should be correct + assert result.mood is None or isinstance(result.mood, MoodData), \ + "Mood should be None or MoodData instance" + assert isinstance(result.inspirations, list), \ + "Inspirations should be a list" + assert isinstance(result.todos, list), \ + "Todos should be a list" + + finally: + # Clean up + await service.close() + + @given( + text=st.text(min_size=1, max_size=200), + num_inspirations=st.integers(min_value=0, max_value=5), + num_todos=st.integers(min_value=0, max_value=5) + ) + @settings(max_examples=20) + @pytest.mark.asyncio + async def test_property_4_parse_result_structure_with_multiple_items( + self, text, num_inspirations, num_todos + ): + """ + Property 4: 解析结果结构完整性 - Multiple Items + + For any parsing result with multiple inspirations and todos, the result + should maintain proper structure with all items preserved as lists. + + **Validates: Requirements 3.3** + """ + # Create service instance + service = SemanticParserService(api_key="test-api-key") + + try: + # Build API response with multiple items + api_response = { + "mood": { + "type": "平静", + "intensity": 5, + "keywords": ["放松"] + }, + "inspirations": [ + { + "core_idea": f"想法{i}", + "tags": [f"标签{i}"], + "category": "生活" + } + for i in range(num_inspirations) + ], + "todos": [ + { + "task": f"任务{i}", + "time": f"时间{i}", + "location": f"地点{i}" + } + for i in range(num_todos) + ] + } + + # Mock the API response + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "choices": [ + { + "message": { + "content": json.dumps(api_response, ensure_ascii=False) + } + } + ] + } + + # Patch the HTTP client's post method (which IS async) + async def mock_post(*args, **kwargs): + return mock_response + + with patch.object(service.client, 'post', side_effect=mock_post): + # Call parse method + result = await service.parse(text) + + # Property 1: Result should have all three fields + assert hasattr(result, 'mood'), \ + "Parse result should have 'mood' field" + assert hasattr(result, 'inspirations'), \ + "Parse result should have 'inspirations' field" + assert hasattr(result, 'todos'), \ + "Parse result should have 'todos' field" + + # Property 2: Lists should contain correct number of items + assert len(result.inspirations) == num_inspirations, \ + f"Should have {num_inspirations} inspirations" + assert len(result.todos) == num_todos, \ + f"Should have {num_todos} todos" + + # Property 3: All items should be properly typed + for inspiration in result.inspirations: + assert isinstance(inspiration, InspirationData), \ + "Each inspiration should be an InspirationData instance" + + for todo in result.todos: + assert isinstance(todo, TodoData), \ + "Each todo should be a TodoData instance" + + # Property 4: Mood should be present + assert isinstance(result.mood, MoodData), \ + "Mood should be a MoodData instance" + + finally: + # Clean up + await service.close() + + @given( + text=st.text(min_size=1, max_size=200), + include_mood=st.booleans(), + include_inspirations=st.booleans(), + include_todos=st.booleans() + ) + @settings(max_examples=20) + @pytest.mark.asyncio + async def test_property_5_missing_dimension_handling( + self, text, include_mood, include_inspirations, include_todos + ): + """ + Property 5: 缺失维度处理 + + For any text that does not contain specific dimension information, + the parsing result should return null for mood or empty arrays for + inspirations and todos. + + **Validates: Requirements 3.4** + """ + # Create service instance + service = SemanticParserService(api_key="test-api-key") + + try: + # Build API response with selective dimensions + api_response = {} + + # Only include dimensions based on flags + if include_mood: + api_response["mood"] = { + "type": "开心", + "intensity": 7, + "keywords": ["愉快"] + } + else: + # Explicitly set to None to simulate missing dimension + api_response["mood"] = None + + if include_inspirations: + api_response["inspirations"] = [ + { + "core_idea": "测试想法", + "tags": ["测试"], + "category": "学习" + } + ] + else: + # Empty array for missing dimension + api_response["inspirations"] = [] + + if include_todos: + api_response["todos"] = [ + { + "task": "测试任务", + "time": "今天", + "location": None + } + ] + else: + # Empty array for missing dimension + api_response["todos"] = [] + + # Mock the API response + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "choices": [ + { + "message": { + "content": json.dumps(api_response, ensure_ascii=False) + } + } + ] + } + + # Patch the HTTP client's post method + async def mock_post(*args, **kwargs): + return mock_response + + with patch.object(service.client, 'post', side_effect=mock_post): + # Call parse method + result = await service.parse(text) + + # Property 1: Missing mood should return None + if not include_mood: + assert result.mood is None, \ + "When text does not contain mood information, mood should be None" + else: + assert result.mood is not None, \ + "When text contains mood information, mood should not be None" + assert isinstance(result.mood, MoodData), \ + "Mood should be a MoodData instance when present" + + # Property 2: Missing inspirations should return empty array + if not include_inspirations: + assert result.inspirations == [], \ + "When text does not contain inspiration information, inspirations should be empty array" + assert isinstance(result.inspirations, list), \ + "Inspirations should always be a list" + else: + assert len(result.inspirations) > 0, \ + "When text contains inspiration information, inspirations should not be empty" + for inspiration in result.inspirations: + assert isinstance(inspiration, InspirationData), \ + "Each inspiration should be an InspirationData instance" + + # Property 3: Missing todos should return empty array + if not include_todos: + assert result.todos == [], \ + "When text does not contain todo information, todos should be empty array" + assert isinstance(result.todos, list), \ + "Todos should always be a list" + else: + assert len(result.todos) > 0, \ + "When text contains todo information, todos should not be empty" + for todo in result.todos: + assert isinstance(todo, TodoData), \ + "Each todo should be a TodoData instance" + + # Property 4: Result structure should always be complete + assert hasattr(result, 'mood'), \ + "Result should always have mood field" + assert hasattr(result, 'inspirations'), \ + "Result should always have inspirations field" + assert hasattr(result, 'todos'), \ + "Result should always have todos field" + + finally: + # Clean up + await service.close() + + @given(text=st.text(min_size=1, max_size=200)) + @settings(max_examples=20) + @pytest.mark.asyncio + async def test_property_5_all_dimensions_missing(self, text): + """ + Property 5: 缺失维度处理 - All Dimensions Missing + + For any text where all dimensions are missing, the result should have + null mood and empty arrays for inspirations and todos. + + **Validates: Requirements 3.4** + """ + # Create service instance + service = SemanticParserService(api_key="test-api-key") + + try: + # Build API response with all dimensions missing + api_response = { + "mood": None, + "inspirations": [], + "todos": [] + } + + # Mock the API response + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "choices": [ + { + "message": { + "content": json.dumps(api_response, ensure_ascii=False) + } + } + ] + } + + # Patch the HTTP client's post method + async def mock_post(*args, **kwargs): + return mock_response + + with patch.object(service.client, 'post', side_effect=mock_post): + # Call parse method + result = await service.parse(text) + + # Property 1: All dimensions should be properly represented as missing + assert result.mood is None, \ + "Mood should be None when all dimensions are missing" + assert result.inspirations == [], \ + "Inspirations should be empty array when all dimensions are missing" + assert result.todos == [], \ + "Todos should be empty array when all dimensions are missing" + + # Property 2: Result should still be a valid ParsedData instance + assert isinstance(result, ParsedData), \ + "Result should be a valid ParsedData instance even with all dimensions missing" + + finally: + # Clean up + await service.close() + + @given( + text=st.text(min_size=1, max_size=200), + dimension=st.sampled_from(["mood", "inspirations", "todos"]) + ) + @settings(max_examples=20) + @pytest.mark.asyncio + async def test_property_5_single_dimension_missing(self, text, dimension): + """ + Property 5: 缺失维度处理 - Single Dimension Missing + + For any text where only one dimension is missing, that dimension should + return null (for mood) or empty array (for inspirations/todos), while + other dimensions should be present. + + **Validates: Requirements 3.4** + """ + # Create service instance + service = SemanticParserService(api_key="test-api-key") + + try: + # Build API response with one dimension missing + api_response = { + "mood": { + "type": "平静", + "intensity": 5, + "keywords": ["放松"] + }, + "inspirations": [ + { + "core_idea": "想法", + "tags": ["标签"], + "category": "生活" + } + ], + "todos": [ + { + "task": "任务", + "time": "明天", + "location": "家" + } + ] + } + + # Remove the selected dimension + if dimension == "mood": + api_response["mood"] = None + elif dimension == "inspirations": + api_response["inspirations"] = [] + elif dimension == "todos": + api_response["todos"] = [] + + # Mock the API response + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "choices": [ + { + "message": { + "content": json.dumps(api_response, ensure_ascii=False) + } + } + ] + } + + # Patch the HTTP client's post method + async def mock_post(*args, **kwargs): + return mock_response + + with patch.object(service.client, 'post', side_effect=mock_post): + # Call parse method + result = await service.parse(text) + + # Property 1: Missing dimension should be properly represented + if dimension == "mood": + assert result.mood is None, \ + "Mood should be None when missing" + assert len(result.inspirations) > 0, \ + "Inspirations should be present when not missing" + assert len(result.todos) > 0, \ + "Todos should be present when not missing" + elif dimension == "inspirations": + assert result.mood is not None, \ + "Mood should be present when not missing" + assert result.inspirations == [], \ + "Inspirations should be empty array when missing" + assert len(result.todos) > 0, \ + "Todos should be present when not missing" + elif dimension == "todos": + assert result.mood is not None, \ + "Mood should be present when not missing" + assert len(result.inspirations) > 0, \ + "Inspirations should be present when not missing" + assert result.todos == [], \ + "Todos should be empty array when missing" + + # Property 2: All fields should exist regardless + assert hasattr(result, 'mood'), \ + "Result should always have mood field" + assert hasattr(result, 'inspirations'), \ + "Result should always have inspirations field" + assert hasattr(result, 'todos'), \ + "Result should always have todos field" + + finally: + # Clean up + await service.close() diff --git a/tests/test_storage.py b/tests/test_storage.py new file mode 100644 index 0000000000000000000000000000000000000000..4e1f2902faa05c3defb9a0233ef5dbafa56bdead --- /dev/null +++ b/tests/test_storage.py @@ -0,0 +1,739 @@ +"""Unit tests for storage service. + +This module tests the StorageService class to ensure proper JSON file +persistence, error handling, and data integrity. + +Requirements: 7.1, 7.2, 7.3, 7.4, 7.5, 7.6, 7.7 +""" + +import json +import pytest +import tempfile +import shutil +from pathlib import Path +from datetime import datetime + +from app.storage import StorageService, StorageError +from app.models import ( + RecordData, + ParsedData, + MoodData, + InspirationData, + TodoData +) + + +@pytest.fixture +def temp_data_dir(): + """Create a temporary directory for test data.""" + temp_dir = tempfile.mkdtemp() + yield temp_dir + shutil.rmtree(temp_dir) + + +@pytest.fixture +def storage_service(temp_data_dir): + """Create a StorageService instance with temporary directory.""" + return StorageService(temp_data_dir) + + +class TestStorageServiceInitialization: + """Tests for StorageService initialization.""" + + def test_init_creates_data_directory(self, temp_data_dir): + """Test that initialization creates the data directory if it doesn't exist.""" + # Remove the directory + shutil.rmtree(temp_data_dir) + assert not Path(temp_data_dir).exists() + + # Initialize service + service = StorageService(temp_data_dir) + + # Verify directory was created + assert Path(temp_data_dir).exists() + assert Path(temp_data_dir).is_dir() + + def test_init_sets_file_paths(self, storage_service, temp_data_dir): + """Test that initialization sets correct file paths.""" + assert storage_service.records_file == Path(temp_data_dir) / "records.json" + assert storage_service.moods_file == Path(temp_data_dir) / "moods.json" + assert storage_service.inspirations_file == Path(temp_data_dir) / "inspirations.json" + assert storage_service.todos_file == Path(temp_data_dir) / "todos.json" + + +class TestFileInitialization: + """Tests for file initialization logic. + + Requirements: 7.5 + """ + + def test_ensure_file_exists_creates_new_file(self, storage_service): + """Test that _ensure_file_exists creates a new file with empty array.""" + test_file = storage_service.data_dir / "test.json" + assert not test_file.exists() + + storage_service._ensure_file_exists(test_file) + + assert test_file.exists() + with open(test_file, 'r', encoding='utf-8') as f: + data = json.load(f) + assert data == [] + + def test_ensure_file_exists_preserves_existing_file(self, storage_service): + """Test that _ensure_file_exists doesn't overwrite existing files.""" + test_file = storage_service.data_dir / "test.json" + existing_data = [{"key": "value"}] + + with open(test_file, 'w', encoding='utf-8') as f: + json.dump(existing_data, f) + + storage_service._ensure_file_exists(test_file) + + with open(test_file, 'r', encoding='utf-8') as f: + data = json.load(f) + assert data == existing_data + + +class TestSaveRecord: + """Tests for save_record method. + + Requirements: 7.1, 7.7 + """ + + def test_save_record_creates_file_if_not_exists(self, storage_service): + """Test that save_record creates records.json if it doesn't exist.""" + assert not storage_service.records_file.exists() + + record = RecordData( + record_id="test-id", + timestamp="2024-01-01T12:00:00Z", + input_type="text", + original_text="测试文本", + parsed_data=ParsedData() + ) + + storage_service.save_record(record) + + assert storage_service.records_file.exists() + + def test_save_record_generates_uuid_if_not_set(self, storage_service): + """Test that save_record generates a UUID if record_id is not set.""" + record = RecordData( + record_id="", + timestamp="2024-01-01T12:00:00Z", + input_type="text", + original_text="测试文本", + parsed_data=ParsedData() + ) + + record_id = storage_service.save_record(record) + + assert record_id + assert len(record_id) == 36 # UUID format + assert record.record_id == record_id + + def test_save_record_preserves_existing_id(self, storage_service): + """Test that save_record preserves existing record_id.""" + existing_id = "my-custom-id" + record = RecordData( + record_id=existing_id, + timestamp="2024-01-01T12:00:00Z", + input_type="text", + original_text="测试文本", + parsed_data=ParsedData() + ) + + record_id = storage_service.save_record(record) + + assert record_id == existing_id + + def test_save_record_appends_to_existing_records(self, storage_service): + """Test that save_record appends to existing records.""" + # Save first record + record1 = RecordData( + record_id="id-1", + timestamp="2024-01-01T12:00:00Z", + input_type="text", + original_text="文本1", + parsed_data=ParsedData() + ) + storage_service.save_record(record1) + + # Save second record + record2 = RecordData( + record_id="id-2", + timestamp="2024-01-01T13:00:00Z", + input_type="text", + original_text="文本2", + parsed_data=ParsedData() + ) + storage_service.save_record(record2) + + # Verify both records exist + with open(storage_service.records_file, 'r', encoding='utf-8') as f: + records = json.load(f) + + assert len(records) == 2 + assert records[0]["record_id"] == "id-1" + assert records[1]["record_id"] == "id-2" + + def test_save_record_with_complete_data(self, storage_service): + """Test saving a record with complete parsed data.""" + record = RecordData( + record_id="complete-id", + timestamp="2024-01-01T12:00:00Z", + input_type="text", + original_text="今天很开心", + parsed_data=ParsedData( + mood=MoodData(type="开心", intensity=8, keywords=["愉快"]), + inspirations=[InspirationData(core_idea="新想法", category="工作")], + todos=[TodoData(task="完成任务")] + ) + ) + + storage_service.save_record(record) + + with open(storage_service.records_file, 'r', encoding='utf-8') as f: + records = json.load(f) + + assert len(records) == 1 + saved_record = records[0] + assert saved_record["record_id"] == "complete-id" + assert saved_record["parsed_data"]["mood"]["type"] == "开心" + assert len(saved_record["parsed_data"]["inspirations"]) == 1 + assert len(saved_record["parsed_data"]["todos"]) == 1 + + +class TestAppendMood: + """Tests for append_mood method. + + Requirements: 7.2 + """ + + def test_append_mood_creates_file_if_not_exists(self, storage_service): + """Test that append_mood creates moods.json if it doesn't exist.""" + assert not storage_service.moods_file.exists() + + mood = MoodData(type="开心", intensity=8, keywords=["愉快"]) + storage_service.append_mood(mood, "record-1", "2024-01-01T12:00:00Z") + + assert storage_service.moods_file.exists() + + def test_append_mood_adds_metadata(self, storage_service): + """Test that append_mood adds record_id and timestamp.""" + mood = MoodData(type="开心", intensity=8, keywords=["愉快"]) + storage_service.append_mood(mood, "record-1", "2024-01-01T12:00:00Z") + + with open(storage_service.moods_file, 'r', encoding='utf-8') as f: + moods = json.load(f) + + assert len(moods) == 1 + assert moods[0]["record_id"] == "record-1" + assert moods[0]["timestamp"] == "2024-01-01T12:00:00Z" + assert moods[0]["type"] == "开心" + assert moods[0]["intensity"] == 8 + + def test_append_mood_multiple_moods(self, storage_service): + """Test appending multiple moods.""" + mood1 = MoodData(type="开心", intensity=8) + mood2 = MoodData(type="焦虑", intensity=6) + + storage_service.append_mood(mood1, "record-1", "2024-01-01T12:00:00Z") + storage_service.append_mood(mood2, "record-2", "2024-01-01T13:00:00Z") + + with open(storage_service.moods_file, 'r', encoding='utf-8') as f: + moods = json.load(f) + + assert len(moods) == 2 + assert moods[0]["type"] == "开心" + assert moods[1]["type"] == "焦虑" + + +class TestAppendInspirations: + """Tests for append_inspirations method. + + Requirements: 7.3 + """ + + def test_append_inspirations_creates_file_if_not_exists(self, storage_service): + """Test that append_inspirations creates inspirations.json if it doesn't exist.""" + assert not storage_service.inspirations_file.exists() + + inspirations = [InspirationData(core_idea="想法", category="工作")] + storage_service.append_inspirations(inspirations, "record-1", "2024-01-01T12:00:00Z") + + assert storage_service.inspirations_file.exists() + + def test_append_inspirations_empty_list(self, storage_service): + """Test that append_inspirations handles empty list gracefully.""" + storage_service.append_inspirations([], "record-1", "2024-01-01T12:00:00Z") + + # File should not be created for empty list + assert not storage_service.inspirations_file.exists() + + def test_append_inspirations_adds_metadata(self, storage_service): + """Test that append_inspirations adds record_id and timestamp.""" + inspirations = [ + InspirationData(core_idea="想法1", tags=["标签1"], category="工作") + ] + storage_service.append_inspirations(inspirations, "record-1", "2024-01-01T12:00:00Z") + + with open(storage_service.inspirations_file, 'r', encoding='utf-8') as f: + all_inspirations = json.load(f) + + assert len(all_inspirations) == 1 + assert all_inspirations[0]["record_id"] == "record-1" + assert all_inspirations[0]["timestamp"] == "2024-01-01T12:00:00Z" + assert all_inspirations[0]["core_idea"] == "想法1" + + def test_append_inspirations_multiple_items(self, storage_service): + """Test appending multiple inspirations at once.""" + inspirations = [ + InspirationData(core_idea="想法1", category="工作"), + InspirationData(core_idea="想法2", category="生活"), + InspirationData(core_idea="想法3", category="学习") + ] + storage_service.append_inspirations(inspirations, "record-1", "2024-01-01T12:00:00Z") + + with open(storage_service.inspirations_file, 'r', encoding='utf-8') as f: + all_inspirations = json.load(f) + + assert len(all_inspirations) == 3 + assert all_inspirations[0]["core_idea"] == "想法1" + assert all_inspirations[1]["core_idea"] == "想法2" + assert all_inspirations[2]["core_idea"] == "想法3" + + +class TestAppendTodos: + """Tests for append_todos method. + + Requirements: 7.4 + """ + + def test_append_todos_creates_file_if_not_exists(self, storage_service): + """Test that append_todos creates todos.json if it doesn't exist.""" + assert not storage_service.todos_file.exists() + + todos = [TodoData(task="任务1")] + storage_service.append_todos(todos, "record-1", "2024-01-01T12:00:00Z") + + assert storage_service.todos_file.exists() + + def test_append_todos_empty_list(self, storage_service): + """Test that append_todos handles empty list gracefully.""" + storage_service.append_todos([], "record-1", "2024-01-01T12:00:00Z") + + # File should not be created for empty list + assert not storage_service.todos_file.exists() + + def test_append_todos_adds_metadata(self, storage_service): + """Test that append_todos adds record_id and timestamp.""" + todos = [ + TodoData(task="任务1", time="明天", location="办公室") + ] + storage_service.append_todos(todos, "record-1", "2024-01-01T12:00:00Z") + + with open(storage_service.todos_file, 'r', encoding='utf-8') as f: + all_todos = json.load(f) + + assert len(all_todos) == 1 + assert all_todos[0]["record_id"] == "record-1" + assert all_todos[0]["timestamp"] == "2024-01-01T12:00:00Z" + assert all_todos[0]["task"] == "任务1" + assert all_todos[0]["status"] == "pending" + + def test_append_todos_multiple_items(self, storage_service): + """Test appending multiple todos at once.""" + todos = [ + TodoData(task="任务1", time="今天"), + TodoData(task="任务2", location="家里"), + TodoData(task="任务3") + ] + storage_service.append_todos(todos, "record-1", "2024-01-01T12:00:00Z") + + with open(storage_service.todos_file, 'r', encoding='utf-8') as f: + all_todos = json.load(f) + + assert len(all_todos) == 3 + assert all_todos[0]["task"] == "任务1" + assert all_todos[1]["task"] == "任务2" + assert all_todos[2]["task"] == "任务3" + + +class TestErrorHandling: + """Tests for error handling. + + Requirements: 7.6 + """ + + def test_storage_error_on_write_failure(self, storage_service, monkeypatch): + """Test that StorageError is raised when file writing fails.""" + # Mock the open function to raise an exception + def mock_open_error(*args, **kwargs): + if 'w' in args or kwargs.get('mode') == 'w': + raise IOError("Permission denied") + return open(*args, **kwargs) + + monkeypatch.setattr("builtins.open", mock_open_error) + + with pytest.raises(StorageError) as exc_info: + storage_service._write_json_file(storage_service.records_file, []) + + assert "Failed to write file" in str(exc_info.value) + + def test_storage_error_on_read_failure(self, storage_service): + """Test that StorageError is raised when file reading fails.""" + # Create an invalid JSON file + with open(storage_service.records_file, 'w') as f: + f.write("invalid json content") + + with pytest.raises(StorageError) as exc_info: + storage_service._read_json_file(storage_service.records_file) + + assert "Failed to read file" in str(exc_info.value) + + def test_save_record_write_failure(self, storage_service, monkeypatch): + """Test that save_record raises StorageError when file writing fails.""" + record = RecordData( + record_id="test-id", + timestamp="2024-01-01T12:00:00Z", + input_type="text", + original_text="测试文本", + parsed_data=ParsedData() + ) + + # Mock json.dump to raise an exception + import json + original_dump = json.dump + + def mock_dump_error(*args, **kwargs): + raise IOError("Disk full") + + monkeypatch.setattr("json.dump", mock_dump_error) + + with pytest.raises(StorageError) as exc_info: + storage_service.save_record(record) + + # Error can occur during initialization or write + assert "Failed to" in str(exc_info.value) + + def test_append_mood_write_failure(self, storage_service, monkeypatch): + """Test that append_mood raises StorageError when file writing fails.""" + mood = MoodData(type="开心", intensity=8, keywords=["愉快"]) + + # Mock json.dump to raise an exception + import json + + def mock_dump_error(*args, **kwargs): + raise IOError("Disk full") + + monkeypatch.setattr("json.dump", mock_dump_error) + + with pytest.raises(StorageError) as exc_info: + storage_service.append_mood(mood, "record-1", "2024-01-01T12:00:00Z") + + # Error can occur during initialization or write + assert "Failed to" in str(exc_info.value) + + def test_append_inspirations_write_failure(self, storage_service, monkeypatch): + """Test that append_inspirations raises StorageError when file writing fails.""" + inspirations = [InspirationData(core_idea="想法", category="工作")] + + # Mock json.dump to raise an exception + import json + + def mock_dump_error(*args, **kwargs): + raise IOError("Disk full") + + monkeypatch.setattr("json.dump", mock_dump_error) + + with pytest.raises(StorageError) as exc_info: + storage_service.append_inspirations(inspirations, "record-1", "2024-01-01T12:00:00Z") + + # Error can occur during initialization or write + assert "Failed to" in str(exc_info.value) + + def test_append_todos_write_failure(self, storage_service, monkeypatch): + """Test that append_todos raises StorageError when file writing fails.""" + todos = [TodoData(task="任务1")] + + # Mock json.dump to raise an exception + import json + + def mock_dump_error(*args, **kwargs): + raise IOError("Disk full") + + monkeypatch.setattr("json.dump", mock_dump_error) + + with pytest.raises(StorageError) as exc_info: + storage_service.append_todos(todos, "record-1", "2024-01-01T12:00:00Z") + + # Error can occur during initialization or write + assert "Failed to" in str(exc_info.value) + + def test_ensure_file_exists_creation_failure(self, storage_service, monkeypatch): + """Test that _ensure_file_exists raises StorageError when file creation fails.""" + test_file = storage_service.data_dir / "test.json" + + # Mock open to raise an exception + def mock_open_error(*args, **kwargs): + if 'w' in kwargs.get('mode', ''): + raise IOError("Permission denied") + return open(*args, **kwargs) + + monkeypatch.setattr("builtins.open", mock_open_error) + + with pytest.raises(StorageError) as exc_info: + storage_service._ensure_file_exists(test_file) + + assert "Failed to initialize file" in str(exc_info.value) + + def test_read_json_file_with_corrupted_data(self, storage_service): + """Test that _read_json_file raises StorageError with corrupted JSON.""" + # Create a file with corrupted JSON + with open(storage_service.records_file, 'w') as f: + f.write('{"incomplete": "json"') + + with pytest.raises(StorageError) as exc_info: + storage_service._read_json_file(storage_service.records_file) + + assert "Failed to read file" in str(exc_info.value) + + def test_read_json_file_with_non_list_data(self, storage_service): + """Test that _read_json_file can read non-list JSON (returns as-is).""" + # Create a file with valid JSON but not a list + with open(storage_service.records_file, 'w') as f: + json.dump({"key": "value"}, f) + + # This should not raise an error - it returns the data as-is + result = storage_service._read_json_file(storage_service.records_file) + assert result == {"key": "value"} + + + +class TestConcurrentWriteSafety: + """Tests for concurrent write safety. + + These tests document the current behavior of the storage service under + concurrent access. The current implementation does NOT provide thread-safe + file operations, so these tests verify that race conditions can occur. + + In a production system, you would need to add file locking or use a + proper database to ensure thread safety. + + Requirements: 7.6 + """ + + def test_concurrent_save_record_race_condition(self, storage_service): + """Test that demonstrates race conditions can occur with concurrent save_record calls. + + This test documents that the current implementation is NOT thread-safe. + Multiple threads writing simultaneously can cause data corruption or loss. + """ + import threading + + num_threads = 5 + records_per_thread = 3 + threads = [] + errors = [] + successful_saves = [] + lock = threading.Lock() + + def save_records(thread_id): + try: + for i in range(records_per_thread): + record = RecordData( + record_id="", # Force UUID generation + timestamp=f"2024-01-01T{thread_id:02d}:{i:02d}:00Z", + input_type="text", + original_text=f"Thread {thread_id} Record {i}", + parsed_data=ParsedData() + ) + record_id = storage_service.save_record(record) + with lock: + successful_saves.append(record_id) + except Exception as e: + with lock: + errors.append(e) + + # Start all threads + for thread_id in range(num_threads): + thread = threading.Thread(target=save_records, args=(thread_id,)) + threads.append(thread) + thread.start() + + # Wait for all threads to complete + for thread in threads: + thread.join() + + # Document the behavior: either errors occur or some data may be lost + # This is expected with the current non-thread-safe implementation + if errors: + # Race conditions caused errors - this is expected + assert all(isinstance(e, StorageError) for e in errors), \ + "All errors should be StorageError instances" + else: + # No errors, but verify data integrity + try: + with open(storage_service.records_file, 'r', encoding='utf-8') as f: + records = json.load(f) + + # Due to race conditions, we may have lost some records + # Just verify the file is still valid JSON and contains some records + assert isinstance(records, list), "Records file should contain a list" + assert len(records) > 0, "At least some records should be saved" + except json.JSONDecodeError: + # File may be corrupted due to concurrent writes + pytest.skip("File corrupted due to concurrent writes (expected behavior)") + + def test_sequential_writes_are_safe(self, storage_service): + """Test that sequential (non-concurrent) writes work correctly. + + This test verifies that when operations are performed sequentially, + all data is saved correctly without corruption. + """ + num_records = 20 + saved_ids = [] + + # Save records sequentially + for i in range(num_records): + record = RecordData( + record_id="", + timestamp=f"2024-01-01T00:{i:02d}:00Z", + input_type="text", + original_text=f"Record {i}", + parsed_data=ParsedData() + ) + record_id = storage_service.save_record(record) + saved_ids.append(record_id) + + # Verify all records were saved + with open(storage_service.records_file, 'r', encoding='utf-8') as f: + records = json.load(f) + + assert len(records) == num_records, \ + f"Expected {num_records} records, found {len(records)}" + + # Verify all IDs are unique + assert len(set(saved_ids)) == num_records, \ + "All record IDs should be unique" + + # Verify all saved IDs are in the file + file_ids = [r["record_id"] for r in records] + for saved_id in saved_ids: + assert saved_id in file_ids, \ + f"Record {saved_id} should be in the file" + + def test_concurrent_writes_with_different_files(self, storage_service): + """Test that concurrent writes to DIFFERENT files work better. + + When threads write to different files (records vs moods vs inspirations vs todos), + there's less chance of corruption since they don't share the same file. + """ + import threading + + errors = [] + lock = threading.Lock() + + def save_record(): + try: + record = RecordData( + record_id="", + timestamp="2024-01-01T00:00:00Z", + input_type="text", + original_text="Test record", + parsed_data=ParsedData() + ) + storage_service.save_record(record) + except Exception as e: + with lock: + errors.append(("record", e)) + + def save_mood(): + try: + mood = MoodData(type="开心", intensity=8) + storage_service.append_mood(mood, "test-id", "2024-01-01T00:00:00Z") + except Exception as e: + with lock: + errors.append(("mood", e)) + + def save_inspiration(): + try: + inspirations = [InspirationData(core_idea="想法", category="工作")] + storage_service.append_inspirations(inspirations, "test-id", "2024-01-01T00:00:00Z") + except Exception as e: + with lock: + errors.append(("inspiration", e)) + + def save_todo(): + try: + todos = [TodoData(task="任务")] + storage_service.append_todos(todos, "test-id", "2024-01-01T00:00:00Z") + except Exception as e: + with lock: + errors.append(("todo", e)) + + # Start threads writing to different files + threads = [ + threading.Thread(target=save_record), + threading.Thread(target=save_mood), + threading.Thread(target=save_inspiration), + threading.Thread(target=save_todo) + ] + + for thread in threads: + thread.start() + + for thread in threads: + thread.join() + + # When writing to different files, operations should succeed + # (though there's still a small chance of issues during file initialization) + if errors: + # Document which operations failed + error_types = [e[0] for e in errors] + pytest.skip(f"Some operations failed due to race conditions: {error_types}") + + # Verify all files were created + assert storage_service.records_file.exists() + assert storage_service.moods_file.exists() + assert storage_service.inspirations_file.exists() + assert storage_service.todos_file.exists() + + def test_error_handling_preserves_file_integrity(self, storage_service): + """Test that when errors occur, existing file data is not corrupted. + + This verifies that even if a write operation fails, the existing + data in the file remains intact and readable. + """ + # Save some initial data + record1 = RecordData( + record_id="initial-id", + timestamp="2024-01-01T00:00:00Z", + input_type="text", + original_text="Initial record", + parsed_data=ParsedData() + ) + storage_service.save_record(record1) + + # Verify initial data is saved + with open(storage_service.records_file, 'r', encoding='utf-8') as f: + initial_records = json.load(f) + assert len(initial_records) == 1 + + # Now try to save another record (this should succeed) + record2 = RecordData( + record_id="second-id", + timestamp="2024-01-01T01:00:00Z", + input_type="text", + original_text="Second record", + parsed_data=ParsedData() + ) + storage_service.save_record(record2) + + # Verify both records are saved + with open(storage_service.records_file, 'r', encoding='utf-8') as f: + final_records = json.load(f) + assert len(final_records) == 2 + assert final_records[0]["record_id"] == "initial-id" + assert final_records[1]["record_id"] == "second-id" diff --git a/tests/test_storage_integration.py b/tests/test_storage_integration.py new file mode 100644 index 0000000000000000000000000000000000000000..04f3a6ef1fdd7460a421c01c17300319d9b99822 --- /dev/null +++ b/tests/test_storage_integration.py @@ -0,0 +1,201 @@ +"""Integration tests for storage service. + +This module tests the complete workflow of saving records and appending +related data to demonstrate the storage service functionality. + +Requirements: 7.1, 7.2, 7.3, 7.4, 7.5, 7.7 +""" + +import json +import pytest +import tempfile +import shutil +from pathlib import Path +from datetime import datetime + +from app.storage import StorageService +from app.models import ( + RecordData, + ParsedData, + MoodData, + InspirationData, + TodoData +) + + +@pytest.fixture +def temp_data_dir(): + """Create a temporary directory for test data.""" + temp_dir = tempfile.mkdtemp() + yield temp_dir + shutil.rmtree(temp_dir) + + +@pytest.fixture +def storage_service(temp_data_dir): + """Create a StorageService instance with temporary directory.""" + return StorageService(temp_data_dir) + + +class TestStorageIntegration: + """Integration tests for complete storage workflow.""" + + def test_complete_workflow_with_all_data(self, storage_service): + """Test complete workflow: save record and append all related data.""" + # Create a complete record + timestamp = "2024-01-01T12:00:00Z" + mood = MoodData(type="开心", intensity=8, keywords=["愉快", "放松"]) + inspirations = [ + InspirationData(core_idea="新项目想法", tags=["创新", "技术"], category="工作"), + InspirationData(core_idea="周末计划", tags=["休闲"], category="生活") + ] + todos = [ + TodoData(task="完成报告", time="明天下午", location="办公室"), + TodoData(task="买菜", time="今晚", location="超市") + ] + + record = RecordData( + record_id="", # Will be generated + timestamp=timestamp, + input_type="text", + original_text="今天很开心,想到一个新项目想法,还要完成报告和买菜", + parsed_data=ParsedData( + mood=mood, + inspirations=inspirations, + todos=todos + ) + ) + + # Save record + record_id = storage_service.save_record(record) + assert record_id + + # Append mood + storage_service.append_mood(mood, record_id, timestamp) + + # Append inspirations + storage_service.append_inspirations(inspirations, record_id, timestamp) + + # Append todos + storage_service.append_todos(todos, record_id, timestamp) + + # Verify records.json + with open(storage_service.records_file, 'r', encoding='utf-8') as f: + records = json.load(f) + assert len(records) == 1 + assert records[0]["record_id"] == record_id + assert records[0]["original_text"] == record.original_text + + # Verify moods.json + with open(storage_service.moods_file, 'r', encoding='utf-8') as f: + moods = json.load(f) + assert len(moods) == 1 + assert moods[0]["record_id"] == record_id + assert moods[0]["type"] == "开心" + assert moods[0]["intensity"] == 8 + + # Verify inspirations.json + with open(storage_service.inspirations_file, 'r', encoding='utf-8') as f: + all_inspirations = json.load(f) + assert len(all_inspirations) == 2 + assert all_inspirations[0]["record_id"] == record_id + assert all_inspirations[0]["core_idea"] == "新项目想法" + assert all_inspirations[1]["core_idea"] == "周末计划" + + # Verify todos.json + with open(storage_service.todos_file, 'r', encoding='utf-8') as f: + all_todos = json.load(f) + assert len(all_todos) == 2 + assert all_todos[0]["record_id"] == record_id + assert all_todos[0]["task"] == "完成报告" + assert all_todos[1]["task"] == "买菜" + + def test_multiple_records_workflow(self, storage_service): + """Test saving multiple records and verifying data accumulation.""" + # First record + record1 = RecordData( + record_id="", + timestamp="2024-01-01T12:00:00Z", + input_type="text", + original_text="今天很开心", + parsed_data=ParsedData( + mood=MoodData(type="开心", intensity=8) + ) + ) + record_id1 = storage_service.save_record(record1) + storage_service.append_mood(record1.parsed_data.mood, record_id1, record1.timestamp) + + # Second record + record2 = RecordData( + record_id="", + timestamp="2024-01-01T13:00:00Z", + input_type="text", + original_text="有点焦虑", + parsed_data=ParsedData( + mood=MoodData(type="焦虑", intensity=6) + ) + ) + record_id2 = storage_service.save_record(record2) + storage_service.append_mood(record2.parsed_data.mood, record_id2, record2.timestamp) + + # Verify records accumulated + with open(storage_service.records_file, 'r', encoding='utf-8') as f: + records = json.load(f) + assert len(records) == 2 + + # Verify moods accumulated + with open(storage_service.moods_file, 'r', encoding='utf-8') as f: + moods = json.load(f) + assert len(moods) == 2 + assert moods[0]["type"] == "开心" + assert moods[1]["type"] == "焦虑" + + def test_workflow_with_partial_data(self, storage_service): + """Test workflow when only some data types are present.""" + # Record with only mood (no inspirations or todos) + timestamp = "2024-01-01T12:00:00Z" + mood = MoodData(type="平静", intensity=5) + + record = RecordData( + record_id="", + timestamp=timestamp, + input_type="text", + original_text="今天感觉很平静", + parsed_data=ParsedData(mood=mood) + ) + + record_id = storage_service.save_record(record) + storage_service.append_mood(mood, record_id, timestamp) + + # Empty lists should not create files + storage_service.append_inspirations([], record_id, timestamp) + storage_service.append_todos([], record_id, timestamp) + + # Verify only records.json and moods.json exist + assert storage_service.records_file.exists() + assert storage_service.moods_file.exists() + assert not storage_service.inspirations_file.exists() + assert not storage_service.todos_file.exists() + + def test_unique_id_generation_across_records(self, storage_service): + """Test that each record gets a unique ID.""" + record_ids = [] + + for i in range(5): + record = RecordData( + record_id="", + timestamp=f"2024-01-01T{12+i}:00:00Z", + input_type="text", + original_text=f"测试文本 {i}", + parsed_data=ParsedData() + ) + record_id = storage_service.save_record(record) + record_ids.append(record_id) + + # All IDs should be unique + assert len(record_ids) == len(set(record_ids)) + + # All IDs should be valid UUIDs (36 characters with hyphens) + for record_id in record_ids: + assert len(record_id) == 36 + assert record_id.count('-') == 4 diff --git a/tests/test_storage_properties.py b/tests/test_storage_properties.py new file mode 100644 index 0000000000000000000000000000000000000000..5505f0d1a278912c1947a282101f11e8518c9762 --- /dev/null +++ b/tests/test_storage_properties.py @@ -0,0 +1,762 @@ +"""Property-based tests for storage service. + +This module uses hypothesis to verify that storage properties hold across +many random inputs, ensuring data persistence integrity. + +Requirements: 7.1, 7.2, 7.3, 7.4 +""" + +import json +import pytest +import tempfile +import shutil +from pathlib import Path +from datetime import datetime + +from hypothesis import given, strategies as st +from hypothesis import settings + +from app.storage import StorageService, StorageError +from app.models import ( + RecordData, + ParsedData, + MoodData, + InspirationData, + TodoData +) + + +# Note: We don't use pytest fixtures with hypothesis tests because +# fixtures are not reset between examples. Instead, we create temp +# directories directly in the test methods. + + +# Custom strategies for generating valid model data +@st.composite +def mood_data_strategy(draw): + """Generate valid MoodData instances.""" + mood_type = draw(st.one_of(st.none(), st.text(min_size=1, max_size=20))) + intensity = draw(st.one_of(st.none(), st.integers(min_value=1, max_value=10))) + keywords = draw(st.lists(st.text(min_size=1, max_size=15), min_size=0, max_size=5)) + + return MoodData(type=mood_type, intensity=intensity, keywords=keywords) + + +@st.composite +def inspiration_data_strategy(draw): + """Generate valid InspirationData instances.""" + core_idea = draw(st.text(min_size=1, max_size=20)) + tags = draw(st.lists(st.text(min_size=1, max_size=10), min_size=0, max_size=5)) + category = draw(st.sampled_from(["工作", "生活", "学习", "创意"])) + + return InspirationData(core_idea=core_idea, tags=tags, category=category) + + +@st.composite +def todo_data_strategy(draw): + """Generate valid TodoData instances.""" + task = draw(st.text(min_size=1, max_size=50)) + time = draw(st.one_of(st.none(), st.text(min_size=1, max_size=20))) + location = draw(st.one_of(st.none(), st.text(min_size=1, max_size=20))) + status = "pending" # Always default to pending for new todos + + return TodoData(task=task, time=time, location=location, status=status) + + +@st.composite +def parsed_data_strategy(draw): + """Generate valid ParsedData instances with optional mood, inspirations, and todos.""" + # Randomly include or exclude mood + has_mood = draw(st.booleans()) + mood = draw(mood_data_strategy()) if has_mood else None + + # Generate 0-3 inspirations + inspirations = draw(st.lists(inspiration_data_strategy(), min_size=0, max_size=3)) + + # Generate 0-3 todos + todos = draw(st.lists(todo_data_strategy(), min_size=0, max_size=3)) + + return ParsedData(mood=mood, inspirations=inspirations, todos=todos) + + +@st.composite +def record_data_strategy(draw): + """Generate valid RecordData instances.""" + record_id = draw(st.text(min_size=1, max_size=36)) # UUID-like length + timestamp = draw(st.text(min_size=10, max_size=30)) # ISO timestamp-like + input_type = draw(st.sampled_from(["audio", "text"])) + original_text = draw(st.text(min_size=1, max_size=200)) + parsed_data = draw(parsed_data_strategy()) + + return RecordData( + record_id=record_id, + timestamp=timestamp, + input_type=input_type, + original_text=original_text, + parsed_data=parsed_data + ) + + +class TestStorageServiceProperties: + """Property-based tests for StorageService. + + **Validates: Requirements 7.1, 7.2, 7.3, 7.4** + """ + + @given(record=record_data_strategy()) + @settings(max_examples=100) + def test_property_9_data_persistence_integrity(self, record): + """ + Property 9: 数据持久化完整性 + + For any successfully processed record, it should be saved in records.json, + and if it contains mood/inspiration/todo data, it should also be appended + to the corresponding moods.json, inspirations.json, todos.json files. + + **Validates: Requirements 7.1, 7.2, 7.3, 7.4** + """ + # Create a fresh temporary directory and storage service for each example + temp_dir = tempfile.mkdtemp() + try: + storage_service = StorageService(temp_dir) + + # Save the complete record + returned_record_id = storage_service.save_record(record) + + # Property 1: Record should be saved in records.json + assert storage_service.records_file.exists() + with open(storage_service.records_file, 'r', encoding='utf-8') as f: + records = json.load(f) + + assert len(records) >= 1 + # Find the saved record + saved_record = None + for r in records: + if r["record_id"] == returned_record_id: + saved_record = r + break + + assert saved_record is not None, "Record should be saved in records.json" + assert saved_record["timestamp"] == record.timestamp + assert saved_record["input_type"] == record.input_type + assert saved_record["original_text"] == record.original_text + + # Property 2: If mood data exists, it should be in moods.json + if record.parsed_data.mood is not None: + storage_service.append_mood( + record.parsed_data.mood, + returned_record_id, + record.timestamp + ) + + assert storage_service.moods_file.exists() + with open(storage_service.moods_file, 'r', encoding='utf-8') as f: + moods = json.load(f) + + # Find the mood entry for this record + mood_entries = [m for m in moods if m["record_id"] == returned_record_id] + assert len(mood_entries) >= 1, "Mood should be saved in moods.json" + + mood_entry = mood_entries[-1] # Get the last one + assert mood_entry["record_id"] == returned_record_id + assert mood_entry["timestamp"] == record.timestamp + assert mood_entry["type"] == record.parsed_data.mood.type + assert mood_entry["intensity"] == record.parsed_data.mood.intensity + assert mood_entry["keywords"] == record.parsed_data.mood.keywords + + # Property 3: If inspiration data exists, it should be in inspirations.json + if record.parsed_data.inspirations: + storage_service.append_inspirations( + record.parsed_data.inspirations, + returned_record_id, + record.timestamp + ) + + assert storage_service.inspirations_file.exists() + with open(storage_service.inspirations_file, 'r', encoding='utf-8') as f: + inspirations = json.load(f) + + # Find inspiration entries for this record + inspiration_entries = [i for i in inspirations if i["record_id"] == returned_record_id] + assert len(inspiration_entries) == len(record.parsed_data.inspirations), \ + "All inspirations should be saved in inspirations.json" + + # Verify each inspiration - use a copy to track matched entries + remaining_entries = inspiration_entries.copy() + for inspiration in record.parsed_data.inspirations: + # Find matching entry (may not be in same order) + matching_entry = None + for idx, entry in enumerate(remaining_entries): + if (entry["core_idea"] == inspiration.core_idea and + entry["category"] == inspiration.category and + entry["tags"] == inspiration.tags): + matching_entry = entry + remaining_entries.pop(idx) + break + + assert matching_entry is not None, \ + f"Could not find matching entry for inspiration: {inspiration}" + assert matching_entry["record_id"] == returned_record_id + assert matching_entry["timestamp"] == record.timestamp + + # Property 4: If todo data exists, it should be in todos.json + if record.parsed_data.todos: + storage_service.append_todos( + record.parsed_data.todos, + returned_record_id, + record.timestamp + ) + + assert storage_service.todos_file.exists() + with open(storage_service.todos_file, 'r', encoding='utf-8') as f: + todos = json.load(f) + + # Find todo entries for this record + todo_entries = [t for t in todos if t["record_id"] == returned_record_id] + assert len(todo_entries) == len(record.parsed_data.todos), \ + "All todos should be saved in todos.json" + + # Verify each todo - use a copy to track matched entries + remaining_entries = todo_entries.copy() + for todo in record.parsed_data.todos: + # Find matching entry (may not be in same order) + matching_entry = None + for idx, entry in enumerate(remaining_entries): + if (entry["task"] == todo.task and + entry["time"] == todo.time and + entry["location"] == todo.location and + entry["status"] == todo.status): + matching_entry = entry + remaining_entries.pop(idx) + break + + assert matching_entry is not None, \ + f"Could not find matching entry for todo: {todo}" + assert matching_entry["record_id"] == returned_record_id + assert matching_entry["timestamp"] == record.timestamp + finally: + # Clean up temporary directory + shutil.rmtree(temp_dir) + + @given(records=st.lists(record_data_strategy(), min_size=1, max_size=5)) + @settings(max_examples=100) + def test_property_9_multiple_records_persistence(self, records): + """ + Property 9: 数据持久化完整性 - Multiple Records + + For any list of successfully processed records, all records should be + saved and retrievable from their respective JSON files. + + **Validates: Requirements 7.1, 7.2, 7.3, 7.4** + """ + # Create a fresh temporary directory and storage service for each example + temp_dir = tempfile.mkdtemp() + try: + storage_service = StorageService(temp_dir) + + saved_record_ids = [] + + # Save all records + for record in records: + record_id = storage_service.save_record(record) + saved_record_ids.append(record_id) + + # Append mood if exists + if record.parsed_data.mood is not None: + storage_service.append_mood( + record.parsed_data.mood, + record_id, + record.timestamp + ) + + # Append inspirations if exist + if record.parsed_data.inspirations: + storage_service.append_inspirations( + record.parsed_data.inspirations, + record_id, + record.timestamp + ) + + # Append todos if exist + if record.parsed_data.todos: + storage_service.append_todos( + record.parsed_data.todos, + record_id, + record.timestamp + ) + + # Verify all records are saved + with open(storage_service.records_file, 'r', encoding='utf-8') as f: + saved_records = json.load(f) + + assert len(saved_records) >= len(records), \ + "All records should be saved in records.json" + + # Verify each record can be found + for record_id in saved_record_ids: + found = any(r["record_id"] == record_id for r in saved_records) + assert found, f"Record {record_id} should be in records.json" + + # Count expected moods, inspirations, and todos + expected_moods = sum(1 for r in records if r.parsed_data.mood is not None) + expected_inspirations = sum(len(r.parsed_data.inspirations) for r in records) + expected_todos = sum(len(r.parsed_data.todos) for r in records) + + # Verify moods count + if expected_moods > 0: + assert storage_service.moods_file.exists() + with open(storage_service.moods_file, 'r', encoding='utf-8') as f: + moods = json.load(f) + assert len(moods) >= expected_moods, \ + f"Expected at least {expected_moods} moods, found {len(moods)}" + + # Verify inspirations count + if expected_inspirations > 0: + assert storage_service.inspirations_file.exists() + with open(storage_service.inspirations_file, 'r', encoding='utf-8') as f: + inspirations = json.load(f) + assert len(inspirations) >= expected_inspirations, \ + f"Expected at least {expected_inspirations} inspirations, found {len(inspirations)}" + + # Verify todos count + if expected_todos > 0: + assert storage_service.todos_file.exists() + with open(storage_service.todos_file, 'r', encoding='utf-8') as f: + todos = json.load(f) + assert len(todos) >= expected_todos, \ + f"Expected at least {expected_todos} todos, found {len(todos)}" + finally: + # Clean up temporary directory + shutil.rmtree(temp_dir) + + @given( + record=record_data_strategy(), + has_mood=st.booleans(), + has_inspirations=st.booleans(), + has_todos=st.booleans() + ) + @settings(max_examples=100) + def test_property_9_selective_data_persistence( + self, record, has_mood, has_inspirations, has_todos + ): + """ + Property 9: 数据持久化完整性 - Selective Persistence + + For any record, only the data types that exist should be persisted + to their respective files. Empty data should not create unnecessary entries. + + **Validates: Requirements 7.1, 7.2, 7.3, 7.4** + """ + # Create a fresh temporary directory and storage service for each example + temp_dir = tempfile.mkdtemp() + try: + storage_service = StorageService(temp_dir) + + # Modify record based on flags + if not has_mood: + record.parsed_data.mood = None + if not has_inspirations: + record.parsed_data.inspirations = [] + if not has_todos: + record.parsed_data.todos = [] + + # Save the record + record_id = storage_service.save_record(record) + + # Always save mood/inspirations/todos if they exist + if record.parsed_data.mood is not None: + storage_service.append_mood( + record.parsed_data.mood, + record_id, + record.timestamp + ) + + if record.parsed_data.inspirations: + storage_service.append_inspirations( + record.parsed_data.inspirations, + record_id, + record.timestamp + ) + + if record.parsed_data.todos: + storage_service.append_todos( + record.parsed_data.todos, + record_id, + record.timestamp + ) + + # Verify records.json always exists + assert storage_service.records_file.exists() + + # Verify mood file existence matches data presence + if has_mood and record.parsed_data.mood is not None: + assert storage_service.moods_file.exists(), \ + "moods.json should exist when mood data is present" + + # Verify inspirations file existence matches data presence + if has_inspirations and record.parsed_data.inspirations: + assert storage_service.inspirations_file.exists(), \ + "inspirations.json should exist when inspiration data is present" + + # Verify todos file existence matches data presence + if has_todos and record.parsed_data.todos: + assert storage_service.todos_file.exists(), \ + "todos.json should exist when todo data is present" + finally: + # Clean up temporary directory + shutil.rmtree(temp_dir) + + @given( + file_type=st.sampled_from(["records", "moods", "inspirations", "todos"]) + ) + @settings(max_examples=100) + def test_property_10_file_initialization(self, file_type): + """ + Property 10: 文件初始化 + + For any non-existent JSON file, when first written to, the system should + create the file and initialize it as an empty array []. + + **Validates: Requirements 7.5** + """ + # Create a fresh temporary directory and storage service for each example + temp_dir = tempfile.mkdtemp() + try: + storage_service = StorageService(temp_dir) + + # Map file type to file path + file_map = { + "records": storage_service.records_file, + "moods": storage_service.moods_file, + "inspirations": storage_service.inspirations_file, + "todos": storage_service.todos_file + } + + target_file = file_map[file_type] + + # Verify file doesn't exist initially + assert not target_file.exists(), \ + f"{file_type}.json should not exist initially" + + # Trigger file initialization by calling _ensure_file_exists + storage_service._ensure_file_exists(target_file) + + # Property 1: File should now exist + assert target_file.exists(), \ + f"{file_type}.json should be created" + + # Property 2: File should be initialized as empty array + with open(target_file, 'r', encoding='utf-8') as f: + content = json.load(f) + + assert isinstance(content, list), \ + f"{file_type}.json should contain a list" + assert content == [], \ + f"{file_type}.json should be initialized as empty array []" + + # Property 3: File should be valid JSON + # (already verified by json.load above, but let's be explicit) + with open(target_file, 'r', encoding='utf-8') as f: + raw_content = f.read() + + # Should be able to parse without error + parsed = json.loads(raw_content) + assert parsed == [] + finally: + # Clean up temporary directory + shutil.rmtree(temp_dir) + + @given( + operations=st.lists( + st.sampled_from(["records", "moods", "inspirations", "todos"]), + min_size=1, + max_size=10 + ) + ) + @settings(max_examples=100) + def test_property_10_file_initialization_idempotent(self, operations): + """ + Property 10: 文件初始化 - Idempotency + + For any sequence of file initialization operations, calling _ensure_file_exists + multiple times should be idempotent - it should not corrupt or overwrite + existing data. + + **Validates: Requirements 7.5** + """ + # Create a fresh temporary directory and storage service for each example + temp_dir = tempfile.mkdtemp() + try: + storage_service = StorageService(temp_dir) + + file_map = { + "records": storage_service.records_file, + "moods": storage_service.moods_file, + "inspirations": storage_service.inspirations_file, + "todos": storage_service.todos_file + } + + # Track which files have been initialized + initialized_files = set() + + for file_type in operations: + target_file = file_map[file_type] + + # Call _ensure_file_exists + storage_service._ensure_file_exists(target_file) + + # File should exist + assert target_file.exists() + + # Read current content + with open(target_file, 'r', encoding='utf-8') as f: + content = json.load(f) + + if file_type not in initialized_files: + # First time - should be empty array + assert content == [], \ + f"First initialization of {file_type}.json should create empty array" + initialized_files.add(file_type) + else: + # Subsequent calls - should preserve empty array + # (In real usage, data would be added between calls, + # but _ensure_file_exists should not overwrite) + assert isinstance(content, list), \ + f"Subsequent calls should preserve list structure" + + # Verify all unique files were created + unique_files = set(operations) + for file_type in unique_files: + target_file = file_map[file_type] + assert target_file.exists(), \ + f"{file_type}.json should exist after operations" + finally: + # Clean up temporary directory + shutil.rmtree(temp_dir) + + @given(record=record_data_strategy()) + @settings(max_examples=100) + def test_property_10_file_initialization_on_first_write(self, record): + """ + Property 10: 文件初始化 - First Write + + For any record being saved, if the JSON files don't exist, they should + be automatically created and initialized before writing data. + + **Validates: Requirements 7.5** + """ + # Create a fresh temporary directory and storage service for each example + temp_dir = tempfile.mkdtemp() + try: + storage_service = StorageService(temp_dir) + + # Verify no files exist initially + assert not storage_service.records_file.exists() + assert not storage_service.moods_file.exists() + assert not storage_service.inspirations_file.exists() + assert not storage_service.todos_file.exists() + + # Save a record (this should trigger file initialization) + record_id = storage_service.save_record(record) + + # records.json should now exist and contain the record + assert storage_service.records_file.exists() + with open(storage_service.records_file, 'r', encoding='utf-8') as f: + records = json.load(f) + assert len(records) >= 1 + assert any(r["record_id"] == record_id for r in records) + + # If mood exists, save it and verify file initialization + if record.parsed_data.mood is not None: + storage_service.append_mood( + record.parsed_data.mood, + record_id, + record.timestamp + ) + assert storage_service.moods_file.exists() + with open(storage_service.moods_file, 'r', encoding='utf-8') as f: + moods = json.load(f) + assert isinstance(moods, list) + assert len(moods) >= 1 + + # If inspirations exist, save them and verify file initialization + if record.parsed_data.inspirations: + storage_service.append_inspirations( + record.parsed_data.inspirations, + record_id, + record.timestamp + ) + assert storage_service.inspirations_file.exists() + with open(storage_service.inspirations_file, 'r', encoding='utf-8') as f: + inspirations = json.load(f) + assert isinstance(inspirations, list) + assert len(inspirations) >= len(record.parsed_data.inspirations) + + # If todos exist, save them and verify file initialization + if record.parsed_data.todos: + storage_service.append_todos( + record.parsed_data.todos, + record_id, + record.timestamp + ) + assert storage_service.todos_file.exists() + with open(storage_service.todos_file, 'r', encoding='utf-8') as f: + todos = json.load(f) + assert isinstance(todos, list) + assert len(todos) >= len(record.parsed_data.todos) + finally: + # Clean up temporary directory + shutil.rmtree(temp_dir) + + @given(records=st.lists(record_data_strategy(), min_size=2, max_size=20)) + @settings(max_examples=100) + def test_property_11_unique_id_generation(self, records): + """ + Property 11: 唯一 ID 生成 + + For any two different records, the generated record_ids should be unique (non-repeating). + + **Validates: Requirements 7.7** + """ + # Create a fresh temporary directory and storage service for each example + temp_dir = tempfile.mkdtemp() + try: + storage_service = StorageService(temp_dir) + + generated_ids = [] + + # Save all records and collect their IDs + for record in records: + # Clear the record_id to force generation of a new one + record.record_id = "" + + # Save record and get the generated ID + record_id = storage_service.save_record(record) + generated_ids.append(record_id) + + # Property 1: All IDs should be non-empty strings + for record_id in generated_ids: + assert record_id, "Generated record_id should not be empty" + assert isinstance(record_id, str), "Generated record_id should be a string" + + # Property 2: All IDs should be unique (no duplicates) + unique_ids = set(generated_ids) + assert len(unique_ids) == len(generated_ids), \ + f"All generated IDs should be unique. Generated {len(generated_ids)} IDs but only {len(unique_ids)} are unique. Duplicates found!" + + # Property 3: IDs should be valid UUIDs (format check) + import uuid + for record_id in generated_ids: + try: + # Try to parse as UUID - this will raise ValueError if invalid + uuid.UUID(record_id) + except ValueError: + pytest.fail(f"Generated ID '{record_id}' is not a valid UUID") + + # Property 4: Verify all records are saved with their unique IDs + with open(storage_service.records_file, 'r', encoding='utf-8') as f: + saved_records = json.load(f) + + saved_ids = [r["record_id"] for r in saved_records] + + # All generated IDs should be in the saved records + for record_id in generated_ids: + assert record_id in saved_ids, \ + f"Generated ID {record_id} should be found in saved records" + finally: + # Clean up temporary directory + shutil.rmtree(temp_dir) + + @given( + num_records=st.integers(min_value=10, max_value=50) + ) + @settings(max_examples=50, deadline=500) + def test_property_11_unique_id_generation_stress(self, num_records): + """ + Property 11: 唯一 ID 生成 - Stress Test + + For a large number of records saved in quick succession, all generated + record_ids should still be unique. This tests the robustness of UUID generation. + + **Validates: Requirements 7.7** + """ + # Create a fresh temporary directory and storage service for each example + temp_dir = tempfile.mkdtemp() + try: + storage_service = StorageService(temp_dir) + + generated_ids = [] + + # Generate and save many records quickly + for i in range(num_records): + # Create a minimal record + record = RecordData( + record_id="", # Force generation + timestamp=f"2024-01-01T00:00:{i:02d}Z", + input_type="text", + original_text=f"Test record {i}", + parsed_data=ParsedData(mood=None, inspirations=[], todos=[]) + ) + + record_id = storage_service.save_record(record) + generated_ids.append(record_id) + + # All IDs should be unique + unique_ids = set(generated_ids) + assert len(unique_ids) == num_records, \ + f"Expected {num_records} unique IDs, but got {len(unique_ids)}. " \ + f"Found {num_records - len(unique_ids)} duplicates!" + + # Verify all are valid UUIDs + import uuid + for record_id in generated_ids: + try: + uuid.UUID(record_id) + except ValueError: + pytest.fail(f"Generated ID '{record_id}' is not a valid UUID") + finally: + # Clean up temporary directory + shutil.rmtree(temp_dir) + + @given(record=record_data_strategy()) + @settings(max_examples=100) + def test_property_11_unique_id_generation_preserves_existing(self, record): + """ + Property 11: 唯一 ID 生成 - Preserve Existing IDs + + If a record already has a record_id set, the save_record method should + preserve it and not generate a new one. + + **Validates: Requirements 7.7** + """ + # Create a fresh temporary directory and storage service for each example + temp_dir = tempfile.mkdtemp() + try: + storage_service = StorageService(temp_dir) + + # Use the record's existing ID + original_id = record.record_id + + # Save the record + returned_id = storage_service.save_record(record) + + # The returned ID should match the original + assert returned_id == original_id, \ + "save_record should preserve existing record_id" + + # Verify the record is saved with the original ID + with open(storage_service.records_file, 'r', encoding='utf-8') as f: + saved_records = json.load(f) + + found_record = None + for r in saved_records: + if r["record_id"] == original_id: + found_record = r + break + + assert found_record is not None, \ + "Record should be saved with its original ID" + assert found_record["record_id"] == original_id + finally: + # Clean up temporary directory + shutil.rmtree(temp_dir)