diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000000000000000000000000000000000000..be2d170289a3f4a5842b226f42d3bc8e03bc00eb --- /dev/null +++ b/.dockerignore @@ -0,0 +1,75 @@ +# Git +.git +.gitignore +.gitattributes + +# Python +__pycache__ +*.py[cod] +*$py.class +*.so +.Python +env/ +venv/ +ENV/ +.venv + +# Testing +.pytest_cache +.hypothesis +.coverage +htmlcov/ +*.log + +# IDE +.vscode +.idea +*.swp +*.swo +*~ + +# Documentation (keep only essential) +docs/ +PRD.md +PROJECT_STRUCTURE.md +局域网访问修复完成.md + +# Deployment files (not needed in container) +deployment/ +scripts/start_local.py +scripts/start_local.bat +scripts/test_lan_access.bat +scripts/build_and_deploy.sh +scripts/build_and_deploy.bat + +# Frontend source (only need dist) +frontend/node_modules +frontend/src +frontend/components +frontend/services +frontend/utils +frontend/.env.local +frontend/package.json +frontend/package-lock.json +frontend/tsconfig.json +frontend/vite.config.ts +frontend/index.tsx +frontend/index.css +frontend/types.ts +frontend/App.tsx +frontend/test-*.html + +# Tests +tests/ + +# Logs +logs/ + +# OS +.DS_Store +Thumbs.db + +# Temporary files +*.tmp +*.bak +*.swp diff --git a/.env.example b/.env.example new file mode 100644 index 0000000000000000000000000000000000000000..05e398b08fdb65add586ea8793611d4867812e2d --- /dev/null +++ b/.env.example @@ -0,0 +1,33 @@ +# Voice Text Processor Configuration +# Copy this file to .env and fill in your values + +# Required: Zhipu AI API Key (for semantic parsing) +# 获取方式: https://open.bigmodel.cn/ -> API Keys +ZHIPU_API_KEY=your_zhipu_api_key_here + +# Required: MiniMax API Key (for image generation) +# 获取方式: https://platform.minimax.io/ -> API Keys +# 格式示例: sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx +MINIMAX_API_KEY=your_minimax_api_key_here + +# Optional: MiniMax Group ID (已废弃,保留用于兼容性) +MINIMAX_GROUP_ID=your_group_id_here + +# Optional: Data storage directory (default: data/) +DATA_DIR=data + +# Optional: Maximum audio file size in bytes (default: 10485760 = 10MB) +MAX_AUDIO_SIZE=10485760 + +# Optional: Logging level (default: INFO) +# Valid values: DEBUG, INFO, WARNING, ERROR, CRITICAL +LOG_LEVEL=INFO + +# Optional: Log file path (default: logs/app.log) +LOG_FILE=logs/app.log + +# Optional: Server host (default: 0.0.0.0) +HOST=0.0.0.0 + +# Optional: Server port (default: 8000) +PORT=8000 diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b --- /dev/null +++ b/.gitattributes @@ -0,0 +1,35 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/.github/workflows/sync.yml b/.github/workflows/sync.yml new file mode 100644 index 0000000000000000000000000000000000000000..db1068fe204f4c6f8bd4c67e52a4f2f127e37b84 --- /dev/null +++ b/.github/workflows/sync.yml @@ -0,0 +1,38 @@ +name: Sync to Hugging Face hub +on: + push: + branches: [main] + workflow_dispatch: + +jobs: + sync-to-hub: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + with: + fetch-depth: 0 + lfs: true + - name: Push to hub + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} + run: | + # 1. 配置身份 + git config --global user.email "bot@github.com" + git config --global user.name "GitHub Action" + + # 2. 彻底移除二进制文件及其索引 + rm -rf generated_images + git rm -r --cached generated_images || echo "Already removed" + + # 3. 创建一个全新的、没有历史记录的临时分支 + git checkout --orphan temp-branch + + # 4. 只添加当前的代码文件 + git add . + git commit -m "Deploy clean version of Nora" + + # 5. 强制推送到 Hugging Face 的 main 分支 + # 注意:这会覆盖 HF 上的所有历史,非常适合解决当前死锁 + git push --force https://kernel14:$HF_TOKEN@huggingface.co/spaces/kernel14/Nora temp-branch:main + + diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..dd6c6de8ca7154568123b346e0bc75063e5bb0c1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,61 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# Virtual environments +venv/ +env/ +ENV/ +.venv + +# IDE +.vscode/ +.idea/ +*.swp +*.swo +*~ + +# Environment variables +.env + +# Logs +logs/ +*.log + +# Data files +data/*.json + +# Testing +.pytest_cache/ +.coverage +htmlcov/ +.hypothesis/ + +# OS +.DS_Store +Thumbs.db + +# Frontend (开发时忽略,但部署时需要 dist) +frontend/node_modules/ +# 注意:frontend/dist/ 不要忽略,部署需要它! + +# Docker(不要忽略 Dockerfile) +# Dockerfile 需要提交 + diff --git a/.kiro/specs/voice-text-processor/design.md b/.kiro/specs/voice-text-processor/design.md new file mode 100644 index 0000000000000000000000000000000000000000..402fe645840c18c84624bfe7cc456dd6f12bad62 --- /dev/null +++ b/.kiro/specs/voice-text-processor/design.md @@ -0,0 +1,514 @@ +# Design Document: Voice Text Processor + +## Overview + +本系统是一个基于 FastAPI 的 REST API 服务,用于处理用户的语音录音或文字输入,通过智谱 API 进行语音识别和语义解析,提取情绪、灵感和待办事项等结构化数据,并持久化到本地 JSON 文件。 + +系统采用分层架构设计: +- **API 层**:FastAPI 路由和请求处理 +- **服务层**:业务逻辑处理(ASR、语义解析) +- **存储层**:JSON 文件持久化 + +核心工作流程: +1. 接收用户输入(音频文件或文本) +2. 如果是音频,调用智谱 ASR API 转写为文本 +3. 调用 GLM-4-Flash API 进行语义解析 +4. 提取情绪、灵感、待办数据 +5. 持久化到对应的 JSON 文件 +6. 返回结构化响应 + +## Architecture + +系统采用三层架构: + +``` +┌─────────────────────────────────────┐ +│ API Layer (FastAPI) │ +│ - POST /api/process │ +│ - Request validation │ +│ - Response formatting │ +└──────────────┬──────────────────────┘ + │ +┌──────────────▼──────────────────────┐ +│ Service Layer │ +│ - ASRService │ +│ - SemanticParserService │ +│ - StorageService │ +└──────────────┬──────────────────────┘ + │ +┌──────────────▼──────────────────────┐ +│ External Services │ +│ - Zhipu ASR API │ +│ - GLM-4-Flash API │ +│ - Local JSON Files │ +└─────────────────────────────────────┘ +``` + +### 模块职责 + +**API Layer**: +- 处理 HTTP 请求和响应 +- 输入验证(文件格式、大小、文本编码) +- 错误处理和状态码映射 +- 请求日志记录 + +**Service Layer**: +- `ASRService`: 封装智谱 ASR API 调用,处理音频转文字 +- `SemanticParserService`: 封装 GLM-4-Flash API 调用,执行语义解析 +- `StorageService`: 管理 JSON 文件读写,生成唯一 ID 和时间戳 + +**Configuration**: +- 环境变量管理(API 密钥、文件路径、大小限制) +- 启动时配置验证 + +## Components and Interfaces + +### 1. API Endpoint + +```python +@app.post("/api/process") +async def process_input( + audio: Optional[UploadFile] = File(None), + text: Optional[str] = Body(None) +) -> ProcessResponse +``` + +**输入**: +- `audio`: 音频文件(multipart/form-data),支持 mp3, wav, m4a +- `text`: 文本内容(application/json),UTF-8 编码 + +**输出**: +```python +class ProcessResponse(BaseModel): + record_id: str + timestamp: str + mood: Optional[MoodData] + inspirations: List[InspirationData] + todos: List[TodoData] + error: Optional[str] +``` + +### 2. ASRService + +```python +class ASRService: + def __init__(self, api_key: str): + self.api_key = api_key + self.client = httpx.AsyncClient() + + async def transcribe(self, audio_file: bytes) -> str: + """ + 调用智谱 ASR API 进行语音识别 + + 参数: + audio_file: 音频文件字节流 + + 返回: + 转写后的文本内容 + + 异常: + ASRServiceError: API 调用失败或识别失败 + """ +``` + +### 3. SemanticParserService + +```python +class SemanticParserService: + def __init__(self, api_key: str): + self.api_key = api_key + self.client = httpx.AsyncClient() + self.system_prompt = ( + "你是一个数据转换器。请将文本解析为 JSON 格式。" + "维度包括:1.情绪(type,intensity,keywords); " + "2.灵感(core_idea,tags,category); " + "3.待办(task,time,location)。" + "必须严格遵循 JSON 格式返回。" + ) + + async def parse(self, text: str) -> ParsedData: + """ + 调用 GLM-4-Flash API 进行语义解析 + + 参数: + text: 待解析的文本内容 + + 返回: + ParsedData 对象,包含 mood, inspirations, todos + + 异常: + SemanticParserError: API 调用失败或解析失败 + """ +``` + +### 4. StorageService + +```python +class StorageService: + def __init__(self, data_dir: str): + self.data_dir = Path(data_dir) + self.records_file = self.data_dir / "records.json" + self.moods_file = self.data_dir / "moods.json" + self.inspirations_file = self.data_dir / "inspirations.json" + self.todos_file = self.data_dir / "todos.json" + + def save_record(self, record: RecordData) -> str: + """ + 保存完整记录到 records.json + + 参数: + record: 记录数据对象 + + 返回: + 生成的唯一 record_id + + 异常: + StorageError: 文件写入失败 + """ + + def append_mood(self, mood: MoodData, record_id: str) -> None: + """追加情绪数据到 moods.json""" + + def append_inspirations(self, inspirations: List[InspirationData], record_id: str) -> None: + """追加灵感数据到 inspirations.json""" + + def append_todos(self, todos: List[TodoData], record_id: str) -> None: + """追加待办数据到 todos.json""" +``` + +## Data Models + +### 核心数据结构 + +```python +class MoodData(BaseModel): + type: Optional[str] = None + intensity: Optional[int] = Field(None, ge=1, le=10) + keywords: List[str] = [] + +class InspirationData(BaseModel): + core_idea: str = Field(..., max_length=20) + tags: List[str] = Field(default_factory=list, max_items=5) + category: Literal["工作", "生活", "学习", "创意"] + +class TodoData(BaseModel): + task: str + time: Optional[str] = None + location: Optional[str] = None + status: str = "pending" + +class ParsedData(BaseModel): + mood: Optional[MoodData] = None + inspirations: List[InspirationData] = [] + todos: List[TodoData] = [] + +class RecordData(BaseModel): + record_id: str + timestamp: str + input_type: Literal["audio", "text"] + original_text: str + parsed_data: ParsedData +``` + +### 存储格式 + +**records.json**: +```json +[ + { + "record_id": "uuid-string", + "timestamp": "2024-01-01T12:00:00Z", + "input_type": "audio", + "original_text": "转写后的文本", + "parsed_data": { + "mood": {...}, + "inspirations": [...], + "todos": [...] + } + } +] +``` + +**moods.json**: +```json +[ + { + "record_id": "uuid-string", + "timestamp": "2024-01-01T12:00:00Z", + "type": "开心", + "intensity": 8, + "keywords": ["愉快", "放松"] + } +] +``` + +**inspirations.json**: +```json +[ + { + "record_id": "uuid-string", + "timestamp": "2024-01-01T12:00:00Z", + "core_idea": "新的项目想法", + "tags": ["创新", "技术"], + "category": "工作" + } +] +``` + +**todos.json**: +```json +[ + { + "record_id": "uuid-string", + "timestamp": "2024-01-01T12:00:00Z", + "task": "完成报告", + "time": "明天下午", + "location": "办公室", + "status": "pending" + } +] +``` + + +## Correctness Properties + +属性(Property)是关于系统行为的特征或规则,应该在所有有效执行中保持为真。属性是人类可读规范和机器可验证正确性保证之间的桥梁。 + +### Property 1: 音频格式验证 +*For any* 提交的文件,如果文件扩展名是 mp3、wav 或 m4a,系统应该接受该文件;如果是其他格式,系统应该拒绝并返回错误。 +**Validates: Requirements 1.1** + +### Property 2: UTF-8 文本接受 +*For any* UTF-8 编码的文本字符串(包括中文、emoji、特殊字符),系统应该正确接受并处理。 +**Validates: Requirements 1.2** + +### Property 3: 无效输入错误处理 +*For any* 空输入或格式无效的输入,系统应该返回包含 error 字段的 JSON 响应,而不是崩溃或返回成功状态。 +**Validates: Requirements 1.3, 9.1** + +### Property 4: 解析结果结构完整性 +*For any* 成功的语义解析结果,返回的 JSON 应该包含 mood、inspirations、todos 三个字段,即使某些字段为空值或空数组。 +**Validates: Requirements 3.3** + +### Property 5: 缺失维度处理 +*For any* 不包含特定维度信息的文本,解析结果中该维度应该返回 null(对于 mood)或空数组(对于 inspirations 和 todos)。 +**Validates: Requirements 3.4** + +### Property 6: 情绪数据结构验证 +*For any* 解析出的情绪数据,应该包含 type(字符串)、intensity(1-10 的整数)、keywords(字符串数组)三个字段,且 intensity 必须在有效范围内。 +**Validates: Requirements 4.1, 4.2, 4.3** + +### Property 7: 灵感数据结构验证 +*For any* 解析出的灵感数据,应该包含 core_idea(长度 ≤ 20)、tags(数组长度 ≤ 5)、category(枚举值:工作/生活/学习/创意)三个字段,且所有约束都被满足。 +**Validates: Requirements 5.1, 5.2, 5.3** + +### Property 8: 待办数据结构验证 +*For any* 解析出的待办数据,应该包含 task(必需)、time(可选)、location(可选)、status(默认为 "pending")四个字段。 +**Validates: Requirements 6.1, 6.2, 6.3, 6.4** + +### Property 9: 数据持久化完整性 +*For any* 成功处理的记录,应该在 records.json 中保存完整记录,并且如果包含情绪/灵感/待办数据,应该同时追加到对应的 moods.json、inspirations.json、todos.json 文件中。 +**Validates: Requirements 7.1, 7.2, 7.3, 7.4** + +### Property 10: 文件初始化 +*For any* 不存在的 JSON 文件,当首次写入时,系统应该创建该文件并初始化为空数组 `[]`。 +**Validates: Requirements 7.5** + +### Property 11: 唯一 ID 生成 +*For any* 两条不同的记录,生成的 record_id 应该是唯一的(不重复)。 +**Validates: Requirements 7.7** + +### Property 12: 成功响应格式 +*For any* 成功处理的请求,HTTP 响应应该返回 200 状态码,并且响应 JSON 包含 record_id、timestamp、mood、inspirations、todos 字段。 +**Validates: Requirements 8.4, 8.6** + +### Property 13: 错误响应格式 +*For any* 处理失败的请求,HTTP 响应应该返回适当的错误状态码(400 或 500),并且响应 JSON 包含 error 字段,描述具体错误信息。 +**Validates: Requirements 8.5, 9.1, 9.3** + +### Property 14: 错误日志记录 +*For any* 系统发生的错误,应该在日志文件中记录该错误,包含时间戳和错误堆栈信息。 +**Validates: Requirements 9.5** + +### Property 15: 敏感信息保护 +*For any* 日志输出,不应该包含敏感信息(如 API 密钥、用户密码等)。 +**Validates: Requirements 10.5** + +## Error Handling + +### 错误分类 + +**1. 输入验证错误(HTTP 400)**: +- 音频文件格式不支持 +- 音频文件大小超过限制 +- 文本内容为空 +- 请求格式错误(既没有 audio 也没有 text) + +**2. 外部服务错误(HTTP 500)**: +- 智谱 ASR API 调用失败 +- GLM-4-Flash API 调用失败 +- API 返回非预期格式 + +**3. 存储错误(HTTP 500)**: +- JSON 文件写入失败 +- 磁盘空间不足 +- 文件权限错误 + +**4. 配置错误(启动时失败)**: +- API 密钥缺失 +- 数据目录不可访问 +- 必需配置项缺失 + +### 错误处理策略 + +```python +class APIError(Exception): + """API 层错误基类""" + def __init__(self, message: str, status_code: int): + self.message = message + self.status_code = status_code + +class ASRServiceError(APIError): + """ASR 服务错误""" + def __init__(self, message: str = "语音识别服务不可用"): + super().__init__(message, 500) + +class SemanticParserError(APIError): + """语义解析服务错误""" + def __init__(self, message: str = "语义解析服务不可用"): + super().__init__(message, 500) + +class StorageError(APIError): + """存储错误""" + def __init__(self, message: str = "数据存储失败"): + super().__init__(message, 500) + +class ValidationError(APIError): + """输入验证错误""" + def __init__(self, message: str): + super().__init__(message, 400) +``` + +### 错误响应格式 + +```json +{ + "error": "具体错误描述", + "detail": "详细错误信息(可选)", + "timestamp": "2024-01-01T12:00:00Z" +} +``` + +### 日志记录 + +使用 Python logging 模块: +- **INFO**: 正常请求处理流程 +- **WARNING**: 可恢复的异常情况(如 API 重试) +- **ERROR**: 错误情况,包含完整堆栈信息 +- **DEBUG**: 详细调试信息(开发环境) + +日志格式: +``` +[2024-01-01 12:00:00] [ERROR] [request_id: xxx] ASR API call failed: Connection timeout +Traceback: ... +``` + +## Testing Strategy + +本系统采用双重测试策略:单元测试和基于属性的测试(Property-Based Testing)。 + +### 单元测试 + +单元测试用于验证特定示例、边缘情况和错误条件: + +**测试范围**: +- API 端点的请求/响应处理 +- 各服务类的 mock 测试(模拟外部 API) +- 数据模型的验证逻辑 +- 错误处理流程 +- 配置加载和验证 + +**示例测试用例**: +- 测试 POST /api/process 端点存在 +- 测试接受 multipart/form-data 格式 +- 测试接受 application/json 格式 +- 测试 ASR API 调用失败时的错误处理 +- 测试 GLM-4-Flash API 调用失败时的错误处理 +- 测试文件写入失败时的错误处理 +- 测试配置缺失时启动失败 +- 测试空音频识别的边缘情况 +- 测试无情绪信息文本的边缘情况 +- 测试无灵感信息文本的边缘情况 +- 测试无待办信息文本的边缘情况 + +### 基于属性的测试(Property-Based Testing) + +基于属性的测试用于验证通用属性在所有输入下都成立。 + +**测试库**: 使用 `hypothesis` 库(Python 的 PBT 框架) + +**配置**: +- 每个属性测试运行最少 100 次迭代 +- 每个测试必须引用设计文档中的属性编号 +- 标签格式:`# Feature: voice-text-processor, Property N: [property text]` + +**属性测试覆盖**: +- Property 1: 音频格式验证 +- Property 2: UTF-8 文本接受 +- Property 3: 无效输入错误处理 +- Property 4: 解析结果结构完整性 +- Property 5: 缺失维度处理 +- Property 6: 情绪数据结构验证 +- Property 7: 灵感数据结构验证 +- Property 8: 待办数据结构验证 +- Property 9: 数据持久化完整性 +- Property 10: 文件初始化 +- Property 11: 唯一 ID 生成 +- Property 12: 成功响应格式 +- Property 13: 错误响应格式 +- Property 14: 错误日志记录 +- Property 15: 敏感信息保护 + +**测试策略**: +- 使用 hypothesis 生成随机输入(文件名、文本、数据结构) +- 使用 pytest-mock 模拟外部 API 调用 +- 使用临时文件系统进行存储测试 +- 验证所有属性在随机输入下都成立 + +**示例属性测试**: +```python +from hypothesis import given, strategies as st +import pytest + +@given(st.text(min_size=1)) +def test_property_2_utf8_text_acceptance(text): + """ + Feature: voice-text-processor, Property 2: UTF-8 文本接受 + For any UTF-8 encoded text string, the system should accept and process it. + """ + response = client.post("/api/process", json={"text": text}) + assert response.status_code in [200, 500] # 接受输入,可能解析失败但不应拒绝 + +@given(st.lists(st.text(), min_size=1, max_size=10)) +def test_property_11_unique_id_generation(texts): + """ + Feature: voice-text-processor, Property 11: 唯一 ID 生成 + For any two different records, the generated record_ids should be unique. + """ + record_ids = [] + for text in texts: + response = client.post("/api/process", json={"text": text}) + if response.status_code == 200: + record_ids.append(response.json()["record_id"]) + + # 所有 ID 应该唯一 + assert len(record_ids) == len(set(record_ids)) +``` + +### 测试覆盖目标 + +- 代码覆盖率:≥ 80% +- 属性测试:覆盖所有 15 个正确性属性 +- 单元测试:覆盖所有边缘情况和错误路径 +- 集成测试:端到端流程测试(音频 → 转写 → 解析 → 存储) + diff --git a/.kiro/specs/voice-text-processor/requirements.md b/.kiro/specs/voice-text-processor/requirements.md new file mode 100644 index 0000000000000000000000000000000000000000..1a0e71db610517e628aef53e6310ab8e612e64ae --- /dev/null +++ b/.kiro/specs/voice-text-processor/requirements.md @@ -0,0 +1,139 @@ +# Requirements Document + +## Introduction + +这是一个治愈系记录助手的后端核心模块。系统接收语音录音或文字输入,通过智谱 API 进行语音转写和语义解析,输出包含情绪、灵感、待办的结构化 JSON 数据,并持久化到本地文件系统。 + +## Glossary + +- **System**: 治愈系记录助手后端系统 +- **ASR_Service**: 智谱 API 语音识别服务 +- **Semantic_Parser**: GLM-4-Flash 语义解析引擎 +- **Storage_Manager**: 本地 JSON 文件存储管理器 +- **Record**: 用户输入的单次记录(语音或文字) +- **Mood**: 情绪数据结构(type, intensity, keywords) +- **Inspiration**: 灵感数据结构(core_idea, tags, category) +- **Todo**: 待办事项数据结构(task, time, location, status) + +## Requirements + +### Requirement 1: 接收用户输入 + +**User Story:** 作为用户,我想要提交语音录音或文字内容,以便系统能够处理我的记录。 + +#### Acceptance Criteria + +1. WHEN 用户提交音频文件,THE System SHALL 接受常见音频格式(mp3, wav, m4a) +2. WHEN 用户提交文字内容,THE System SHALL 接受 UTF-8 编码的文本字符串 +3. WHEN 输入数据为空或格式无效,THE System SHALL 返回明确的错误信息 +4. WHEN 音频文件大小超过 10MB,THE System SHALL 拒绝处理并返回文件过大错误 + +### Requirement 2: 语音转文字 + +**User Story:** 作为用户,我想要系统将我的语音录音转换为文字,以便进行后续的语义分析。 + +#### Acceptance Criteria + +1. WHEN 接收到音频文件,THE ASR_Service SHALL 调用智谱 ASR API 进行语音识别 +2. WHEN 语音识别成功,THE ASR_Service SHALL 返回转写后的文本内容 +3. IF 智谱 API 调用失败,THEN THE System SHALL 记录错误日志并返回转写失败错误 +4. WHEN 音频内容无法识别,THE ASR_Service SHALL 返回空文本并标记为识别失败 + +### Requirement 3: 语义解析 + +**User Story:** 作为用户,我想要系统从我的文本中提取情绪、灵感和待办事项,以便获得结构化的记录数据。 + +#### Acceptance Criteria + +1. WHEN 接收到文本内容,THE Semantic_Parser SHALL 调用 GLM-4-Flash API 进行语义解析 +2. WHEN 调用 GLM-4-Flash,THE System SHALL 使用指定的 System Prompt:"你是一个数据转换器。请将文本解析为 JSON 格式。维度包括:1.情绪(type,intensity,keywords); 2.灵感(core_idea,tags,category); 3.待办(task,time,location)。必须严格遵循 JSON 格式返回。" +3. WHEN 解析成功,THE Semantic_Parser SHALL 返回包含 mood、inspirations、todos 的 JSON 结构 +4. WHEN 文本中不包含某个维度的信息,THE Semantic_Parser SHALL 返回该维度的空值或空数组 +5. IF GLM-4-Flash API 调用失败,THEN THE System SHALL 记录错误日志并返回解析失败错误 + +### Requirement 4: 情绪数据提取 + +**User Story:** 作为用户,我想要系统识别我的情绪状态,以便追踪我的情绪变化。 + +#### Acceptance Criteria + +1. WHEN 解析情绪数据,THE Semantic_Parser SHALL 提取情绪类型(type) +2. WHEN 解析情绪数据,THE Semantic_Parser SHALL 提取情绪强度(intensity),范围为 1-10 的整数 +3. WHEN 解析情绪数据,THE Semantic_Parser SHALL 提取情绪关键词(keywords),以字符串数组形式返回 +4. WHEN 文本中不包含明确的情绪信息,THE Semantic_Parser SHALL 返回 null 或默认值 + +### Requirement 5: 灵感数据提取 + +**User Story:** 作为用户,我想要系统捕捉我的灵感想法,以便日后回顾和整理。 + +#### Acceptance Criteria + +1. WHEN 解析灵感数据,THE Semantic_Parser SHALL 提取核心观点(core_idea),长度不超过 20 个字符 +2. WHEN 解析灵感数据,THE Semantic_Parser SHALL 提取标签(tags),以字符串数组形式返回,最多 5 个标签 +3. WHEN 解析灵感数据,THE Semantic_Parser SHALL 提取分类(category),值为"工作"、"生活"、"学习"或"创意"之一 +4. WHEN 文本中包含多个灵感,THE Semantic_Parser SHALL 返回灵感数组 +5. WHEN 文本中不包含灵感信息,THE Semantic_Parser SHALL 返回空数组 + +### Requirement 6: 待办事项提取 + +**User Story:** 作为用户,我想要系统识别我提到的待办事项,以便自动创建任务清单。 + +#### Acceptance Criteria + +1. WHEN 解析待办数据,THE Semantic_Parser SHALL 提取任务描述(task) +2. WHEN 解析待办数据,THE Semantic_Parser SHALL 提取时间信息(time),保留原始表达(如"明晚"、"下周三") +3. WHEN 解析待办数据,THE Semantic_Parser SHALL 提取地点信息(location) +4. WHEN 创建新待办事项,THE System SHALL 设置状态(status)为"pending" +5. WHEN 文本中包含多个待办事项,THE Semantic_Parser SHALL 返回待办数组 +6. WHEN 文本中不包含待办信息,THE Semantic_Parser SHALL 返回空数组 + +### Requirement 7: 数据持久化 + +**User Story:** 作为用户,我想要系统保存我的记录数据,以便日后查询和分析。 + +#### Acceptance Criteria + +1. WHEN 解析完成后,THE Storage_Manager SHALL 将完整记录保存到 records.json 文件 +2. WHEN 提取到情绪数据,THE Storage_Manager SHALL 将情绪信息追加到 moods.json 文件 +3. WHEN 提取到灵感数据,THE Storage_Manager SHALL 将灵感信息追加到 inspirations.json 文件 +4. WHEN 提取到待办数据,THE Storage_Manager SHALL 将待办信息追加到 todos.json 文件 +5. WHEN JSON 文件不存在,THE Storage_Manager SHALL 创建新文件并初始化为空数组 +6. WHEN 写入文件失败,THE System SHALL 记录错误日志并返回存储失败错误 +7. WHEN 保存记录时,THE System SHALL 为每条记录生成唯一 ID 和时间戳 + +### Requirement 8: API 接口设计 + +**User Story:** 作为前端开发者,我想要调用清晰的 REST API,以便集成后端功能。 + +#### Acceptance Criteria + +1. THE System SHALL 提供 POST /api/process 接口接收用户输入 +2. WHEN 请求包含音频文件,THE System SHALL 接受 multipart/form-data 格式 +3. WHEN 请求包含文字内容,THE System SHALL 接受 application/json 格式 +4. WHEN 处理成功,THE System SHALL 返回 HTTP 200 状态码和结构化 JSON 响应 +5. WHEN 处理失败,THE System SHALL 返回适当的 HTTP 错误状态码(400/500)和错误信息 +6. THE System SHALL 在响应中包含 record_id 和 timestamp 字段 + +### Requirement 9: 错误处理 + +**User Story:** 作为用户,我想要在系统出错时获得清晰的错误提示,以便了解问题所在。 + +#### Acceptance Criteria + +1. WHEN 任何步骤发生错误,THE System SHALL 返回包含 error 字段的 JSON 响应 +2. WHEN 智谱 API 调用失败,THE System SHALL 返回"语音识别服务不可用"或"语义解析服务不可用"错误 +3. WHEN 输入验证失败,THE System SHALL 返回具体的验证错误信息 +4. WHEN 文件操作失败,THE System SHALL 返回"数据存储失败"错误 +5. THE System SHALL 记录所有错误到日志文件,包含时间戳和错误堆栈 + +### Requirement 10: 配置管理 + +**User Story:** 作为系统管理员,我想要配置 API 密钥和系统参数,以便灵活部署系统。 + +#### Acceptance Criteria + +1. THE System SHALL 从环境变量或配置文件读取智谱 API 密钥 +2. THE System SHALL 支持配置数据文件存储路径 +3. THE System SHALL 支持配置音频文件大小限制 +4. WHEN 必需的配置项缺失,THE System SHALL 在启动时报错并拒绝启动 +5. THE System SHALL 不在日志中输出敏感信息(如 API 密钥) diff --git a/.kiro/specs/voice-text-processor/tasks.md b/.kiro/specs/voice-text-processor/tasks.md new file mode 100644 index 0000000000000000000000000000000000000000..168f4653a812cfe7cccff2006f6cd995abb33c19 --- /dev/null +++ b/.kiro/specs/voice-text-processor/tasks.md @@ -0,0 +1,204 @@ +# Implementation Plan: Voice Text Processor + +## Overview + +本实现计划将语音文本处理系统分解为离散的编码步骤。实现顺序遵循从核心基础设施到业务逻辑,再到集成测试的渐进式方法。每个任务都引用具体的需求条款,确保完整的需求覆盖。 + +## Tasks + +- [x] 1. 设置项目结构和核心配置 + - 创建项目目录结构(app/, tests/, data/) + - 设置 FastAPI 应用和基础配置 + - 实现配置管理模块(从环境变量读取 API 密钥、数据路径、文件大小限制) + - 配置日志系统(格式、级别、文件输出) + - 添加启动时配置验证(缺失必需配置时拒绝启动) + - _Requirements: 10.1, 10.2, 10.3, 10.4, 10.5_ + +- [x] 2. 实现数据模型和验证 + - [x] 2.1 创建 Pydantic 数据模型 + - 实现 MoodData 模型(type, intensity 1-10, keywords) + - 实现 InspirationData 模型(core_idea ≤20 字符, tags ≤5, category 枚举) + - 实现 TodoData 模型(task, time, location, status 默认 "pending") + - 实现 ParsedData 模型(mood, inspirations, todos) + - 实现 RecordData 模型(record_id, timestamp, input_type, original_text, parsed_data) + - 实现 ProcessResponse 模型(record_id, timestamp, mood, inspirations, todos, error) + - _Requirements: 4.1, 4.2, 4.3, 5.1, 5.2, 5.3, 6.1, 6.2, 6.3, 6.4_ + + - [x] 2.2 编写数据模型属性测试 + - **Property 6: 情绪数据结构验证** + - **Validates: Requirements 4.1, 4.2, 4.3** + + - [x] 2.3 编写数据模型属性测试 + - **Property 7: 灵感数据结构验证** + - **Validates: Requirements 5.1, 5.2, 5.3** + + - [x] 2.4 编写数据模型属性测试 + - **Property 8: 待办数据结构验证** + - **Validates: Requirements 6.1, 6.2, 6.3, 6.4** + +- [x] 3. 实现存储服务(StorageService) + - [x] 3.1 实现 JSON 文件存储管理器 + - 实现 save_record 方法(保存到 records.json,生成唯一 UUID) + - 实现 append_mood 方法(追加到 moods.json) + - 实现 append_inspirations 方法(追加到 inspirations.json) + - 实现 append_todos 方法(追加到 todos.json) + - 实现文件初始化逻辑(不存在时创建并初始化为空数组) + - 实现错误处理(文件写入失败时抛出 StorageError) + - _Requirements: 7.1, 7.2, 7.3, 7.4, 7.5, 7.6, 7.7_ + + - [x] 3.2 编写存储服务属性测试 + - **Property 9: 数据持久化完整性** + - **Validates: Requirements 7.1, 7.2, 7.3, 7.4** + + - [x] 3.3 编写存储服务属性测试 + - **Property 10: 文件初始化** + - **Validates: Requirements 7.5** + + - [x] 3.4 编写存储服务属性测试 + - **Property 11: 唯一 ID 生成** + - **Validates: Requirements 7.7** + + - [x] 3.5 编写存储服务单元测试 + - 测试文件写入失败的错误处理 + - 测试并发写入的安全性 + - _Requirements: 7.6_ + +- [x] 4. 检查点 - 确保存储层测试通过 + - 确保所有测试通过,如有问题请询问用户。 + +- [x] 5. 实现 ASR 服务(ASRService) + - [x] 5.1 实现语音识别服务 + - 创建 ASRService 类,初始化 httpx.AsyncClient + - 实现 transcribe 方法(调用智谱 ASR API) + - 处理 API 响应,提取转写文本 + - 实现错误处理(API 调用失败时抛出 ASRServiceError) + - 处理空识别结果(返回空字符串并标记) + - 记录错误日志(包含时间戳和堆栈) + - _Requirements: 2.1, 2.2, 2.3, 2.4, 9.2, 9.5_ + + - [x] 5.2 编写 ASR 服务单元测试 + - 测试 API 调用成功场景(使用 mock) + - 测试 API 调用失败场景(使用 mock) + - 测试空识别结果的边缘情况 + - _Requirements: 2.1, 2.2, 2.3, 2.4_ + +- [x] 6. 实现语义解析服务(SemanticParserService) + - [x] 6.1 实现语义解析服务 + - 创建 SemanticParserService 类,初始化 httpx.AsyncClient + - 配置 System Prompt(数据转换器提示词) + - 实现 parse 方法(调用 GLM-4-Flash API) + - 解析 API 返回的 JSON 结构 + - 处理缺失维度(返回 null 或空数组) + - 实现错误处理(API 调用失败时抛出 SemanticParserError) + - 记录错误日志(包含时间戳和堆栈) + - _Requirements: 3.1, 3.2, 3.3, 3.4, 3.5, 9.2, 9.5_ + + - [x] 6.2 编写语义解析服务属性测试 + - **Property 4: 解析结果结构完整性** + - **Validates: Requirements 3.3** + + - [x] 6.3 编写语义解析服务属性测试 + - **Property 5: 缺失维度处理** + - **Validates: Requirements 3.4** + + - [x] 6.4 编写语义解析服务单元测试 + - 测试 API 调用成功场景(使用 mock) + - 测试 API 调用失败场景(使用 mock) + - 测试 System Prompt 正确使用 + - 测试无情绪信息文本的边缘情况 + - 测试无灵感信息文本的边缘情况 + - 测试无待办信息文本的边缘情况 + - _Requirements: 3.1, 3.2, 3.3, 3.4, 3.5_ + +- [x] 7. 检查点 - 确保服务层测试通过 + - 确保所有测试通过,如有问题请询问用户。 + +- [x] 8. 实现 API 端点和请求处理 + - [x] 8.1 实现 POST /api/process 端点 + - 创建 FastAPI 路由处理器 + - 实现输入验证(音频格式、文件大小、文本编码) + - 处理 multipart/form-data 格式(音频文件) + - 处理 application/json 格式(文本内容) + - 实现请求日志记录 + - _Requirements: 1.1, 1.2, 8.1, 8.2, 8.3_ + + - [x] 8.2 实现业务逻辑编排 + - 如果是音频输入,调用 ASRService.transcribe + - 调用 SemanticParserService.parse 进行语义解析 + - 生成 record_id 和 timestamp + - 调用 StorageService 保存数据 + - 构建成功响应(HTTP 200,包含 record_id, timestamp, mood, inspirations, todos) + - _Requirements: 7.7, 8.4, 8.6_ + + - [x] 8.3 实现错误处理和响应 + - 捕获 ValidationError,返回 HTTP 400 和错误信息 + - 捕获 ASRServiceError,返回 HTTP 500 和"语音识别服务不可用" + - 捕获 SemanticParserError,返回 HTTP 500 和"语义解析服务不可用" + - 捕获 StorageError,返回 HTTP 500 和"数据存储失败" + - 所有错误响应包含 error 字段和 timestamp + - 记录所有错误到日志文件 + - _Requirements: 1.3, 8.5, 9.1, 9.2, 9.3, 9.4, 9.5_ + + - [x] 8.4 编写 API 端点属性测试 + - **Property 1: 音频格式验证** + - **Validates: Requirements 1.1** + + - [x] 8.5 编写 API 端点属性测试 + - **Property 2: UTF-8 文本接受** + - **Validates: Requirements 1.2** + + - [x] 8.6 编写 API 端点属性测试 + - **Property 3: 无效输入错误处理** + - **Validates: Requirements 1.3, 9.1** + + - [x] 8.7 编写 API 端点属性测试 + - **Property 12: 成功响应格式** + - **Validates: Requirements 8.4, 8.6** + + - [x] 8.8 编写 API 端点属性测试 + - **Property 13: 错误响应格式** + - **Validates: Requirements 8.5, 9.1, 9.3** + + - [x] 8.9 编写 API 端点单元测试 + - 测试 POST /api/process 端点存在 + - 测试接受 multipart/form-data 格式 + - 测试接受 application/json 格式 + - _Requirements: 8.1, 8.2, 8.3_ + +- [x] 9. 实现日志安全性和错误日志 + - [x] 9.1 实现日志过滤器 + - 创建日志过滤器,屏蔽敏感信息(API 密钥、密码等) + - 配置日志格式(包含 request_id, timestamp, level, message) + - 确保错误日志包含完整堆栈信息 + - _Requirements: 9.5, 10.5_ + + - [x] 9.2 编写日志属性测试 + - **Property 14: 错误日志记录** + - **Validates: Requirements 9.5** + + - [-] 9.3 编写日志属性测试 + - **Property 15: 敏感信息保护** + - **Validates: Requirements 10.5** + +- [x] 10. 检查点 - 确保所有测试通过 + - 确保所有测试通过,如有问题请询问用户。 + +- [x] 11. 集成测试 + - [x] 11.1 编写端到端集成测试 + - 测试完整流程:音频上传 → ASR → 语义解析 → 存储 → 响应 + - 测试完整流程:文本提交 → 语义解析 → 存储 → 响应 + - 测试错误场景的端到端处理 + - _Requirements: 所有需求_ + +- [x] 12. 最终检查点 + - 确保所有测试通过,代码覆盖率达到 80% 以上,如有问题请询问用户。 + +## Notes + +- 所有任务均为必需任务,确保全面的测试覆盖 +- 每个任务都引用了具体的需求条款,确保可追溯性 +- 检查点任务确保增量验证 +- 属性测试验证通用正确性属性(使用 hypothesis 库,最少 100 次迭代) +- 单元测试验证特定示例和边缘情况 +- 所有外部 API 调用使用 mock 进行测试 + diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..a6c74154b4712507c904f564e8a64096c770d135 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,31 @@ +FROM python:3.11-slim + +WORKDIR /app + +# 安装系统依赖 +RUN apt-get update && apt-get install -y \ + build-essential \ + && rm -rf /var/lib/apt/lists/* + +# 复制依赖文件 +COPY requirements.txt . + +# 安装 Python 依赖 +RUN pip install --no-cache-dir -r requirements.txt + +# 复制应用代码 +COPY app/ ./app/ +COPY data/ ./data/ +COPY frontend/dist/ ./frontend/dist/ + +# 复制启动脚本 +COPY start.py . + +# 创建必要的目录 +RUN mkdir -p generated_images logs + +# 暴露端口 +EXPOSE 7860 + +# 启动应用 +CMD ["python", "start.py"] diff --git a/HOTFIX_DOCKER_BUILD.md b/HOTFIX_DOCKER_BUILD.md new file mode 100644 index 0000000000000000000000000000000000000000..8f72d4cfb3e3027848477d982f0cc3513ab1a285 --- /dev/null +++ b/HOTFIX_DOCKER_BUILD.md @@ -0,0 +1,123 @@ +# 🔧 紧急修复:Docker 构建失败 + +## 🐛 问题描述 +Hugging Face Space 构建失败: +``` +ERROR: failed to calculate checksum of ref: "/generated_images": not found +``` + +## 🔍 问题原因 +1. `Dockerfile` 尝试复制 `generated_images/` 目录 +2. 但该目录在 GitHub 仓库中被 `.github/workflows/sync.yml` 删除了 +3. Docker 构建时找不到该目录,导致失败 + +## ✅ 已修复 + +### 1. 简化 Dockerfile +**文件**:`Dockerfile` + +**修改前**: +```dockerfile +COPY generated_images/ ./generated_images/ +``` + +**修改后**: +```dockerfile +# 只创建空目录,不复制文件 +RUN mkdir -p generated_images logs +``` + +### 2. 修改默认配置 +**文件**:`app/user_config.py` 和 `app/storage.py` + +**修改前**: +```python +"image_url": "generated_images/default_character.jpeg", +``` + +**修改后**: +```python +"image_url": "", # 空字符串,前端会显示占位符 +``` + +**原因**: +- 不依赖 Git 仓库中的图片文件 +- 用户首次使用时可以生成自己的 AI 形象 +- 或者前端显示一个默认占位符 + +## 🚀 部署步骤 + +### 1. 提交修复 +```bash +git add Dockerfile app/user_config.py app/storage.py +git commit -m "Fix: Remove dependency on generated_images directory in Docker build" +git push origin main +``` + +### 2. 同步到 Hugging Face +1. 访问:https://huggingface.co/spaces/kernel14/Nora +2. Settings → Sync from GitHub → **Sync now** + +### 3. 等待重新构建 +- 查看 **Logs** 标签页 +- 应该能看到构建成功 + +## ✅ 验证修复 + +构建成功后,访问: +``` +https://kernel14-nora.hf.space/ +``` + +应该能看到: +- ✅ 前端正常加载 +- ✅ AI 形象位置显示占位符(或默认图标) +- ✅ 可以点击 ✨ 按钮生成自定义形象 +- ✅ 所有功能正常工作 + +## 📝 技术说明 + +### 为什么不在 Docker 镜像中包含默认图片? + +1. **Git 仓库限制**: + - 图片文件较大(几百 KB) + - 会增加仓库体积 + - 被 `.github/workflows/sync.yml` 清理 + +2. **更好的方案**: + - 用户首次使用时生成个性化形象 + - 或者使用 CDN 托管的默认图片 + - 或者前端显示 SVG 占位符 + +3. **运行时生成**: + - 用户可以随时生成新形象 + - 图片保存在容器的 `generated_images/` 目录 + - 重启容器后会丢失(可以接受) + +### 未来改进方向 + +1. **使用对象存储**: + - 将生成的图片上传到 S3/OSS + - 持久化存储,不会丢失 + - 支持多实例共享 + +2. **内嵌默认图片**: + - 将默认图片转为 Base64 + - 直接写在代码中 + - 或者使用 SVG 矢量图 + +3. **CDN 托管**: + - 将默认图片放在 CDN + - 配置 URL 指向 CDN + - 加载更快 + +## 🎉 修复完成 + +修复后,Docker 构建应该能成功,Space 可以正常运行。 + +--- + +**修复时间**:2026-01-18 +**影响范围**:Hugging Face Space Docker 构建 +**严重程度**:高(导致构建失败) +**修复状态**:✅ 已完成 diff --git a/HOTFIX_NULL_ERROR.md b/HOTFIX_NULL_ERROR.md new file mode 100644 index 0000000000000000000000000000000000000000..19b5889b00c8c64aaffe8319b8b934888ec42a71 --- /dev/null +++ b/HOTFIX_NULL_ERROR.md @@ -0,0 +1,129 @@ +# 🔧 紧急修复:Python null 错误 + +## 🐛 问题描述 +Hugging Face Space 部署后出现错误: +``` +NameError: name 'null' is not defined +``` + +## 🔍 问题原因 +在 `app/storage.py` 中使用了 JavaScript 语法的 `null`,但 Python 中应该使用 `None`。 + +## ✅ 已修复 + +### 1. 修复 storage.py 中的 null +**文件**:`app/storage.py` + +**修改位置**: +- 第 173-175 行:`_get_default_records()` 方法 +- 第 315-317 行:`_get_default_todos()` 方法 + +**修改内容**: +```python +# 错误 ❌ +"time": null, +"location": null, + +# 正确 ✅ +"time": None, +"location": None, +``` + +### 2. 修复 Dockerfile +**文件**:`Dockerfile` + +**问题**:未复制 `generated_images/` 目录,导致默认角色图片 404 + +**修改**: +```dockerfile +# 添加这行 +COPY generated_images/ ./generated_images/ +``` + +## 🚀 部署步骤 + +### 1. 提交修复 +```bash +git add app/storage.py Dockerfile +git commit -m "Fix: Replace null with None in Python code" +git push origin main +``` + +### 2. 同步到 Hugging Face +1. 访问:https://huggingface.co/spaces/kernel14/Nora +2. Settings → Sync from GitHub → **Sync now** + +### 3. 等待重新构建 +- 查看 **Logs** 标签页 +- 等待构建完成 + +## ✅ 验证修复 + +访问以下 API 端点,应该都能正常返回: + +1. **健康检查**: + ``` + https://kernel14-nora.hf.space/health + ``` + +2. **获取记录**: + ``` + https://kernel14-nora.hf.space/api/records + ``` + +3. **获取心情**: + ``` + https://kernel14-nora.hf.space/api/moods + ``` + +4. **获取待办**: + ``` + https://kernel14-nora.hf.space/api/todos + ``` + +5. **默认角色图片**: + ``` + https://kernel14-nora.hf.space/generated_images/default_character.jpeg + ``` + +## 📝 技术说明 + +### Python vs JavaScript 的 null/None + +| 语言 | 空值表示 | +|------|---------| +| JavaScript | `null` | +| Python | `None` | +| JSON | `null` | + +在 Python 代码中: +- ✅ 使用 `None` +- ❌ 不要使用 `null` + +在 JSON 字符串中(如 AI 提示): +- ✅ 使用 `"null"`(字符串形式) +- ✅ 这是正确的,因为是 JSON 格式 + +### 为什么会出现这个错误? + +1. **复制粘贴错误**:可能从 JSON 示例中复制了代码 +2. **语言混淆**:在多语言项目中容易混淆语法 +3. **IDE 未检测**:某些 IDE 可能不会立即标记这个错误 + +### 如何避免? + +1. **使用 Linter**:配置 pylint 或 flake8 +2. **类型检查**:使用 mypy 进行类型检查 +3. **单元测试**:编写测试覆盖默认数据生成 +4. **代码审查**:提交前仔细检查 + +## 🎉 修复完成 + +修复后,Space 应该能正常运行,所有 API 端点都能正常响应。 + +--- + +**修复时间**:2026-01-18 +**影响范围**:Hugging Face Space 部署 +**严重程度**:高(导致服务无法启动) +**修复状态**:✅ 已完成 diff --git a/HUGGINGFACE_DEPLOY.md b/HUGGINGFACE_DEPLOY.md new file mode 100644 index 0000000000000000000000000000000000000000..d741e10313493a48fd3c872b89a3cd8448222a39 --- /dev/null +++ b/HUGGINGFACE_DEPLOY.md @@ -0,0 +1,176 @@ +# 🚀 Hugging Face Spaces 部署指南 + +## ✅ 部署前检查清单 + +### 1. 根目录必需文件 + +确保以下文件在**根目录**(不是子目录): + +- ✅ `Dockerfile` - Docker 构建配置 +- ✅ `start.py` - 应用启动脚本 +- ✅ `requirements.txt` - Python 依赖 +- ✅ `README_HF.md` - Hugging Face 专用 README(带 frontmatter) + +### 2. 前端构建文件 + +确保前端已构建: + +```bash +cd frontend +npm install +npm run build +``` + +检查 `frontend/dist/` 目录是否存在且包含: +- ✅ `index.html` +- ✅ `assets/` 目录(包含 JS 和 CSS 文件) + +### 3. 环境变量配置 + +在 Hugging Face Space 的 **Settings → Variables and secrets** 中配置: + +**必需:** +- `ZHIPU_API_KEY` - 智谱 AI API 密钥 + +**可选:** +- `MINIMAX_API_KEY` - MiniMax API 密钥 +- `MINIMAX_GROUP_ID` - MiniMax Group ID + +### 4. README 配置 + +确保 `README_HF.md` 包含正确的 frontmatter: + +```yaml +--- +title: Nora - 治愈系记录助手 +emoji: 🌟 +colorFrom: purple +colorTo: pink +sdk: docker +pinned: false +license: mit +--- +``` + +## 🔧 部署步骤 + +### 方法 1:通过 GitHub 同步(推荐) + +1. **提交所有更改到 GitHub**: + ```bash + git add . + git commit -m "Fix: Add required files to root directory for HF deployment" + git push origin main + ``` + +2. **在 Hugging Face Space 中同步**: + - 进入你的 Space:https://huggingface.co/spaces/kernel14/Nora + - 点击 **Settings** + - 找到 **Sync from GitHub** 部分 + - 点击 **Sync now** + +3. **等待构建完成**: + - 查看 **Logs** 标签页 + - 等待 Docker 构建完成(可能需要 5-10 分钟) + +### 方法 2:直接上传文件 + +1. **在 Hugging Face Space 中上传文件**: + - 进入 **Files** 标签页 + - 上传以下文件到根目录: + - `Dockerfile` + - `start.py` + - `requirements.txt` + - `README_HF.md`(重命名为 `README.md`) + +2. **上传应用代码**: + - 上传 `app/` 目录 + - 上传 `data/` 目录 + - 上传 `frontend/dist/` 目录 + +3. **触发重新构建**: + - 点击 **Factory reboot** + +## 🐛 常见问题 + +### 问题 1:Space 显示 "Missing app file" + +**原因**:根目录缺少 `Dockerfile` 或 `start.py` + +**解决方案**: +1. 确认根目录有 `Dockerfile` 和 `start.py` +2. 如果使用 GitHub 同步,确保这些文件已提交并推送 +3. Factory reboot 重启 Space + +### 问题 2:Docker 构建失败 + +**原因**:依赖安装失败或文件路径错误 + +**解决方案**: +1. 查看 **Logs** 标签页的详细错误信息 +2. 检查 `requirements.txt` 是否正确 +3. 检查 `Dockerfile` 中的路径是否正确 + +### 问题 3:前端无法加载 + +**原因**:`frontend/dist/` 目录不存在或未包含在 Docker 镜像中 + +**解决方案**: +1. 本地运行 `cd frontend && npm run build` +2. 确认 `frontend/dist/` 目录存在 +3. 提交并推送到 GitHub +4. 重新同步 Space + +### 问题 4:API 调用失败 + +**原因**:未配置环境变量 + +**解决方案**: +1. 在 Space Settings 中配置 `ZHIPU_API_KEY` +2. Factory reboot 重启 Space +3. 检查 Logs 确认环境变量已加载 + +## 📊 验证部署 + +部署成功后,访问你的 Space URL,应该能看到: + +1. ✅ 前端页面正常加载 +2. ✅ AI 角色形象显示 +3. ✅ 可以进行文本输入 +4. ✅ 可以查看心情、灵感、待办数据 + +测试 API 端点: +- `https://你的space.hf.space/health` - 应该返回健康状态 +- `https://你的space.hf.space/docs` - 应该显示 API 文档 + +## 🔄 更新部署 + +当你更新代码后: + +1. **提交到 GitHub**: + ```bash + git add . + git commit -m "Update: 描述你的更改" + git push origin main + ``` + +2. **同步到 Hugging Face**: + - 在 Space Settings 中点击 **Sync now** + - 或者等待自动同步(如果已配置) + +3. **重启 Space**(如果需要): + - 点击 **Factory reboot** + +## 📚 相关文档 + +- [Hugging Face Spaces 文档](https://huggingface.co/docs/hub/spaces) +- [Docker SDK 文档](https://huggingface.co/docs/hub/spaces-sdks-docker) +- [项目完整文档](README.md) + +## 🆘 需要帮助? + +如果遇到问题: + +1. 查看 Space 的 **Logs** 标签页 +2. 检查 **Community** 标签页的讨论 +3. 在 GitHub 仓库提 Issue diff --git a/HUGGINGFACE_FIX_SUMMARY.md b/HUGGINGFACE_FIX_SUMMARY.md new file mode 100644 index 0000000000000000000000000000000000000000..0ec4b1d9de2257d8a948596cc7382d2d0f310a13 --- /dev/null +++ b/HUGGINGFACE_FIX_SUMMARY.md @@ -0,0 +1,223 @@ +# ✅ Hugging Face Spaces 部署问题已修复 + +## 🎯 问题描述 +Hugging Face Space 显示错误: +``` +This Space is missing an app file. An app file is required for the Space to build and run properly. +``` + +## 🔍 问题原因 +之前为了整理项目结构,将部署文件移到了 `deployment/` 目录,但 Hugging Face Spaces 要求关键文件必须在**根目录**。 + +## 🔧 已完成的修复 + +### 1. 复制关键文件到根目录 +- ✅ `Dockerfile` - 从 `deployment/Dockerfile` 复制到根目录 +- ✅ `start.py` - 从 `scripts/start.py` 复制到根目录 +- ✅ `README_HF.md` - 创建了带 frontmatter 的 Hugging Face 专用 README + +### 2. 创建部署工具 +- ✅ `.dockerignore` - 优化 Docker 构建,排除不必要的文件 +- ✅ `HUGGINGFACE_DEPLOY.md` - 完整的部署指南 +- ✅ `scripts/prepare_hf_deploy.bat` - 自动化部署准备脚本 + +### 3. 验证文件结构 +根目录现在包含所有必需文件: +``` +项目根目录/ +├── Dockerfile ✅ Docker 构建配置 +├── start.py ✅ 应用启动脚本 +├── requirements.txt ✅ Python 依赖 +├── README_HF.md ✅ HF 专用 README(带 frontmatter) +├── app/ ✅ 应用代码 +├── data/ ✅ 数据目录 +├── frontend/dist/ ✅ 前端构建文件 +└── generated_images/ ✅ 图片目录 +``` + +## 🚀 立即部署 + +### 方法 1:使用自动化脚本(推荐) + +运行准备脚本: +```bash +scripts\prepare_hf_deploy.bat +``` + +这会自动: +- ✅ 检查所有必需文件 +- ✅ 构建前端(如果需要) +- ✅ 生成部署清单 +- ✅ 显示下一步操作 + +### 方法 2:手动操作 + +#### 步骤 1:确认文件存在 +```bash +# 检查根目录文件 +dir Dockerfile +dir start.py +dir requirements.txt +dir README_HF.md + +# 检查前端构建 +dir frontend\dist\index.html +``` + +#### 步骤 2:提交到 GitHub +```bash +git add . +git commit -m "Fix: Add required files to root directory for HF deployment" +git push origin main +``` + +#### 步骤 3:同步到 Hugging Face +1. 访问:https://huggingface.co/spaces/kernel14/Nora +2. 点击 **Settings** 标签 +3. 找到 **Sync from GitHub** 部分 +4. 点击 **Sync now** 按钮 + +#### 步骤 4:配置环境变量 +1. 在 Settings 中找到 **Variables and secrets** +2. 添加环境变量: + - `ZHIPU_API_KEY` - 智谱 AI API 密钥(必需) + - `MINIMAX_API_KEY` - MiniMax API 密钥(可选) + - `MINIMAX_GROUP_ID` - MiniMax Group ID(可选) +3. 点击 **Factory reboot** 重启 Space + +#### 步骤 5:等待构建完成 +1. 切换到 **Logs** 标签页 +2. 观察 Docker 构建过程 +3. 等待显示 "Running on http://0.0.0.0:7860" + +## ✅ 验证部署 + +部署成功后,测试以下功能: + +### 1. 访问主页 +``` +https://kernel14-nora.hf.space/ +``` +应该看到: +- ✅ 前端页面正常加载 +- ✅ AI 角色形象显示 +- ✅ 输入框可用 + +### 2. 测试 API +``` +https://kernel14-nora.hf.space/health +``` +应该返回: +```json +{ + "status": "healthy", + "data_dir": "data", + "max_audio_size": 10485760 +} +``` + +### 3. 查看 API 文档 +``` +https://kernel14-nora.hf.space/docs +``` +应该显示完整的 API 文档 + +### 4. 测试功能 +- ✅ 文本输入和处理 +- ✅ 查看心情、灵感、待办 +- ✅ AI 对话功能 +- ✅ 心情气泡池 + +## 🐛 故障排查 + +### 问题 1:仍然显示 "Missing app file" + +**可能原因**: +- 文件未正确提交到 GitHub +- GitHub 同步未完成 + +**解决方案**: +1. 检查 GitHub 仓库根目录是否有 `Dockerfile` 和 `start.py` +2. 在 HF Space 中手动触发同步 +3. 查看 Logs 标签页的详细错误 + +### 问题 2:Docker 构建失败 + +**可能原因**: +- 依赖安装失败 +- 文件路径错误 + +**解决方案**: +1. 查看 Logs 标签页的详细错误信息 +2. 检查 `requirements.txt` 是否正确 +3. 确认 `frontend/dist/` 目录存在 + +### 问题 3:前端无法加载 + +**可能原因**: +- `frontend/dist/` 目录不存在或为空 +- 前端构建文件未提交 + +**解决方案**: +1. 本地运行:`cd frontend && npm run build` +2. 确认 `frontend/dist/` 包含 `index.html` 和 `assets/` +3. 提交并推送到 GitHub +4. 重新同步 Space + +### 问题 4:API 调用失败 + +**可能原因**: +- 未配置 `ZHIPU_API_KEY` +- API 密钥无效或配额不足 + +**解决方案**: +1. 在 Space Settings 中配置环境变量 +2. 访问 https://open.bigmodel.cn/ 检查 API 密钥和配额 +3. Factory reboot 重启 Space + +## 📊 部署状态检查 + +运行以下命令检查本地准备情况: +```bash +scripts\prepare_hf_deploy.bat +``` + +查看生成的 `deploy_checklist.txt` 文件。 + +## 📚 相关文档 + +- [HUGGINGFACE_DEPLOY.md](HUGGINGFACE_DEPLOY.md) - 完整部署指南 +- [README_HF.md](README_HF.md) - Hugging Face Space 的 README +- [deployment/DEPLOYMENT.md](deployment/DEPLOYMENT.md) - 通用部署文档 + +## 🎉 成功标志 + +当看到以下内容时,说明部署成功: + +1. ✅ Space 状态显示为 "Running" +2. ✅ 可以访问主页并看到 UI +3. ✅ API 端点正常响应 +4. ✅ 可以进行文本输入和查看数据 +5. ✅ Logs 中没有错误信息 + +--- + +## 📝 技术说明 + +### 为什么需要文件在根目录? + +Hugging Face Spaces 的构建系统会在根目录查找以下文件: + +1. **Dockerfile** - 用于 Docker SDK 的 Space +2. **app.py** - 用于 Gradio/Streamlit SDK 的 Space +3. **README.md** - 带 frontmatter 的配置文件 + +如果这些文件不在根目录,构建系统会报错 "Missing app file"。 + +### 我们的解决方案 + +- 保留 `deployment/` 目录用于备份和文档 +- 在根目录创建必需文件的副本 +- 使用 `.dockerignore` 优化构建,避免包含不必要的文件 + +这样既保持了项目结构的整洁,又满足了 Hugging Face 的要求。 diff --git a/PRD.md b/PRD.md new file mode 100644 index 0000000000000000000000000000000000000000..c36fcbd64cb343a69bcde8fa8f3a7d9309dab30e --- /dev/null +++ b/PRD.md @@ -0,0 +1,155 @@ +--- + +# 产品概述 + +一款通过 **iOS 原生 (SwiftUI)** 构建,结合 **BLE 蓝牙硬件** 震动提醒与 **AI 语义解析** 的治愈系记录助手。用户通过 APP 或配套硬件录音,系统自动将内容拆解为灵感、心情与待办,并通过 RAG 技术实现历史记忆的回溯。 + +# 核心交互逻辑 + +## 硬件交互:蓝牙协议 + +由于使用 iOS 原生开发,手机充当“网关”角色,负责硬件与云端的中转。 + +- **连接流程 (Local Only)**: + - **无需 API 接口**。iOS APP 使用 `CBCentralManager` 扫描硬件 UUID。 + - 硬件作为外设 (Peripheral) 被手机连接。 +- **指令交互**: + - **录音阶段**:硬件按下录音键,通过蓝牙特征值 (Characteristic) 将音频数据包流式传输或发送结束信号至 iOS。 + - **震动反馈**: + - **轻微短振(心跳感)**:iOS 检测到录音启动,向蓝牙写入 `0x01` 指令。 + - **急促振动(提醒感)**:iOS 的待办逻辑触发,向蓝牙写入 `0x02` 指令。 + +## AI:调用智谱原生api + +- **语音转写**:iOS 使用 `URLSession` 调用智谱 **ASR API** 上传音频,实时获取转写文字。 +- **语义理解**:iOS 调用 **GLM-4-Flash API**,通过 Prompt 约束 AI 返回标准 JSON(包含情绪、灵感、待办)。 +- **形象定制**:登录时调用 **CogView API** 生成固定形象,图片下载后由 iOS 进行本地持久化存储。 + +# **技术架构 (iOS Native)** + +## **前端:SwiftUI** + +- **状态管理**:使用 `@Observable` (iOS 17+) 实时同步 AI 解析出的心情颜色和形象气泡。 +- **持久化**:使用 **SwiftData** 存储本地 JSON 结构的记录(`records`, `moods`, `todos`, `inspirations`)。 +- **安全性**:智谱 API Key 存储在 **Keychain** 中,避免硬编码。 + +## **AI 引擎 (智谱 API 集成)** + +| **模块** | **API 模型** | **职责** | +| --- | --- | --- | +| **ASR** | 智谱语音识别 | 硬件原始音频转文字 | +| **NLP** | GLM-4-Flash | 解析 JSON 结构、RAG 历史回溯对话 | +| **图像** | CogView-3 | 登录时一次性生成固定猫咪形象 | + +# AI形象生成 + +## 设置 + +- **初始化生成**:用户注册/首次登录时,系统引导用户输入关键词(或默认随机),调用 **GLM-Image (CogView)** 生成 1-3 张插画。 +- **持久化存储**:生成的图片 URL 存储在用户配置中,不再随每次录音改变。 +- **按需修改**:在“设置”提供修改接口,用户可以消耗积分或次数重新生成。 + +## 生成逻辑 + +为了保证品牌统一性,系统预设为”**治愈系插画猫咪**”,通过映射逻辑处理用户输入。 + +- **提示词生成逻辑 (Prompt Engineering)** + +| **用户输入维度** | **映射逻辑 (Internal Tags)** | **示例** | +| --- | --- | --- | +| **颜色** | 主色调 & 环境色 | 温暖粉 -> `soft pastel pink fur, rose-colored aesthetic` | +| **性格** | 构图 & 眼神光 | 活泼 -> `big curious eyes, dynamic paw gesture, energetic aura` | +| **形象** | 配饰 & 特征 | 戴眼镜 -> `wearing tiny round glasses, scholarly look` | + +【陪伴式朋友】【温柔照顾型长辈】【引导型 老师】 + +**系统底座提示词 (System Base Prompt):** + +> "A masterpiece cute stylized cat illustration, [Color] theme, [Personality] facial expression and posture, [Description]. Japanese watercolor style, clean minimalist background, high quality, soft studio lighting, 4k." +> + +## 技术架构 + +### 前端:iOS Native (SwiftUI) + +- **UI 渲染**:利用 `SwiftUI` 实现毛玻璃效果与治愈系猫咪插画的流畅加载。 +- **状态管理**:使用 `Combine` 或 `Observation` 框架同步心情颜色变化。 +- **硬件接口**:`CoreBluetooth`。 + +### 后端:FastAPI (Python) + +- **API 核心**:处理 ASR、NLP、RAG 和 Image Generation。 +- **存储**:本地 JSON 文件系统(`records.json`, `moods.json`, `todos.json`, `inspirations.json`)。 + +### AI 引擎 (智谱全家桶) + +- **ASR**:语音转文字。 +- **GLM-4-Flash**:语义解析与 RAG 问答。 +- **GLM-Image (CogView)**:基于情绪映射生成的静态形象。 + +# 核心功能模块 + +### 首页 - 录音与实时处理 + +- **功能描述:** + - 支持语音录音(5-30 秒)或文字直接输入。 + - **静态形象展示**:页面中心展示常驻形象。 + - 实时处理:完成录音后自动触发后端 ASR 与 NLP 流程。 + - **结果速览**:展示最近一次分析的**原文及摘要**(提取出的情绪、灵感标签或待办任务)。 +- **数据存储:** * 音频文件:`data/audio/{timestamp}.wav` + - 完整记录索引:`data/records.json`(包含关联的 JSON ID 和音频路径)。 + +### 灵感看板页面 + +- **功能描述:** + - **瀑布流展示**:以卡片形式展示所有灵感。 + - **核心要素**:显示 AI 总结的核心观点、自动生成的标签、所属分类(工作/生活/学习/创意)。 + - **筛选排序**:支持按分类筛选及时间顺序/倒序排列。 +- **数据结构:** `inspirations.json` 存储核心观点、关键字及原文引用。 + +### 心情日记页面 + +- **功能描述:** + - **情绪可视化**:展示情绪分布柱状图(如:本周 60% 平静,20% 喜悦)。 + - **记录列表**:显示每条记录的情绪类型、强度(1-10)及当时的心情关键词。 + - **筛选**:可单独查看“喜”或“哀”等特定情绪的历史。 +- **数据结构:** `moods.json` 记录 `type`, `intensity`, `keywords` 等字段。 + +### 待办清单页面 + +- **功能描述:** + - **任务管理**:从输入中自动提取出的任务(包含时间、地点、内容)。 + - **状态切换**:支持手动勾选“已完成”。 + - **统计**:显示待办/已完成的数量对比。 +- **数据结构:** `todos.json` 包含任务描述、时间实体及完成状态。 + +### AI 对话页面 + +- **功能描述:** + - **智能检索**:用户询问“我上周关于论文有什么灵感?”时,系统通过 RAG 技术检索 `records.json` 并回答。 + - **快捷指令**:提供“总结今日心情”、“还有哪些待办”等快捷按钮。 +- **技术实现:** 基于 **GLM-4-Flash** 进行上下文理解与 RAG 检索。 + +--- + +# 业务流程与数据流 + +iOS 端在请求 GLM-4 时,使用以下 System Prompt 确保数据可被解析: + +> "你是一个数据转换器。请将文本解析为 JSON 格式。维度包括:1.情绪(type,intensity); 2.灵感(core_idea,tags); 3.待办(task,time,location)。必须严格遵循 JSON 格式返回。" +> + +### NLP 语义解析策略 + +| **提取维度** | **逻辑** | **去向** | +| --- | --- | --- | +| **情绪** | 识别情感极性与 1-10 的强度值 | `moods.json` | +| **灵感** | 提炼 20 字以内的核心观点 + 3个标签 | `inspirations.json` | +| **待办** | 识别时间词(如“明晚”)、地点与动词短语 | `todos.json` | + +# 技术栈总结 + +- **开发语言**:Swift 6.0 / SwiftUI +- **核心框架**:CoreBluetooth (硬件), SwiftData (存储), CoreHaptics (震动) +- **AI 接口**:智谱 API (HTTP/HTTPS 请求) +- **数据存储**:iOS Local SandBox (音频文件 + 结构化数据) \ No newline at end of file diff --git a/PROJECT_STRUCTURE.md b/PROJECT_STRUCTURE.md new file mode 100644 index 0000000000000000000000000000000000000000..807d1a821c394d7a1d6b2cf220d2f584a9a5ff32 --- /dev/null +++ b/PROJECT_STRUCTURE.md @@ -0,0 +1,155 @@ +# 项目目录结构 + +``` +Inspiration-Record-APP/ +├── app/ # 后端应用代码 +│ ├── __init__.py +│ ├── main.py # FastAPI 主应用 +│ ├── config.py # 配置管理 +│ ├── models.py # 数据模型 +│ ├── storage.py # 数据存储 +│ ├── asr_service.py # 语音识别服务 +│ ├── semantic_parser.py # 语义解析服务 +│ ├── image_service.py # 图像生成服务 +│ ├── user_config.py # 用户配置管理 +│ └── logging_config.py # 日志配置 +│ +├── frontend/ # 前端应用 +│ ├── components/ # React 组件 +│ ├── services/ # API 服务 +│ ├── utils/ # 工具函数 +│ ├── dist/ # 构建产物(部署需要) +│ ├── App.tsx # 主应用组件 +│ ├── index.tsx # 入口文件 +│ ├── types.ts # TypeScript 类型定义 +│ ├── package.json # 前端依赖 +│ └── vite.config.ts # Vite 配置 +│ +├── data/ # 数据存储目录 +│ ├── moods.json # 心情数据 +│ ├── inspirations.json # 灵感数据 +│ ├── todos.json # 待办数据 +│ ├── records.json # 记录数据 +│ └── user_config.json # 用户配置 +│ +├── generated_images/ # AI 生成的图片 +│ └── default_character.jpeg # 默认形象 +│ +├── logs/ # 日志文件 +│ └── app.log +│ +├── tests/ # 测试文件 +│ ├── test_*.py # 单元测试 +│ ├── test_api.html # API 测试页面 +│ ├── test_chat_api.py # 聊天 API 测试 +│ └── test_default_character.py # 默认形象测试 +│ +├── scripts/ # 脚本文件 +│ ├── start_local.py # 本地启动脚本(8000端口) +│ ├── start_local.bat # Windows 启动脚本 +│ ├── start.py # 通用启动脚本(7860端口) +│ ├── build_and_deploy.bat # 构建并部署脚本 +│ └── build_and_deploy.sh # Linux/Mac 部署脚本 +│ +├── deployment/ # 部署配置文件 +│ ├── Dockerfile # Docker 配置 +│ ├── app_modelscope.py # ModelScope 入口 +│ ├── configuration.json # ModelScope 配置 +│ ├── ms_deploy.json # ModelScope 部署配置 +│ ├── requirements_hf.txt # Hugging Face 依赖 +│ ├── requirements_modelscope.txt # ModelScope 依赖 +│ ├── README_HF.md # Hugging Face 说明 +│ ├── README_MODELSCOPE.md # ModelScope 说明 +│ ├── DEPLOY_CHECKLIST.md # 部署检查清单 +│ ├── DEPLOYMENT.md # 部署指南 +│ ├── deploy_to_hf.bat # 部署到 HF 脚本 +│ └── deploy_to_hf.sh # 部署到 HF 脚本 +│ +├── docs/ # 文档目录 +│ ├── README.md # 项目文档 +│ ├── FEATURE_SUMMARY.md # 功能总结 +│ ├── API_配置说明.md # API 配置说明 +│ ├── 局域网访问指南.md # 局域网访问指南 +│ ├── 功能架构图.md # 架构图 +│ ├── 后端启动问题排查.md # 故障排查 +│ ├── 心情气泡池功能说明.md +│ ├── 心情气泡池快速开始.md +│ └── 语音录制问题排查.md +│ +├── .github/ # GitHub 配置 +│ └── workflows/ +│ └── sync.yml # 自动同步工作流 +│ +├── .env # 环境变量(本地) +├── .env.example # 环境变量示例 +├── .gitignore # Git 忽略文件 +├── requirements.txt # Python 依赖(开发环境) +├── pytest.ini # Pytest 配置 +├── PRD.md # 产品需求文档 +└── README.md # 项目说明 +``` + +## 目录说明 + +### 核心目录 + +- **app/** - 后端 FastAPI 应用,包含所有业务逻辑 +- **frontend/** - 前端 React 应用,使用 TypeScript + Vite +- **data/** - 运行时数据存储,JSON 格式 +- **generated_images/** - AI 生成的角色图片 + +### 开发目录 + +- **tests/** - 所有测试文件,包括单元测试和集成测试 +- **scripts/** - 开发和部署脚本 +- **logs/** - 应用日志文件 + +### 部署目录 + +- **deployment/** - 所有部署相关的配置文件 + - Hugging Face Spaces 部署 + - ModelScope 部署 + - Docker 部署 + +### 文档目录 + +- **docs/** - 项目文档和使用指南 + +## 快速开始 + +### 本地开发 + +```bash +# 1. 安装依赖 +pip install -r requirements.txt +cd frontend && npm install && cd .. + +# 2. 构建前端 +cd frontend && npm run build && cd .. + +# 3. 启动服务器 +python scripts/start_local.py +``` + +### 部署 + +**Hugging Face:** +```bash +cd deployment +./deploy_to_hf.sh +``` + +**ModelScope:** +- 上传所有文件到 ModelScope +- 确保 `ms_deploy.json` 在根目录 + +## 文件清理说明 + +已删除的冗余文件: +- `app_gradio_old.py.bak` - 旧的 Gradio 备份文件 +- `packages.txt` - 不再使用的包列表 + +已整理的文件: +- 脚本文件 → `scripts/` +- 部署文件 → `deployment/` +- 测试文件 → `tests/` diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..5b44a3e0bb38bb0266e3f33d95804146644ebf54 --- /dev/null +++ b/README.md @@ -0,0 +1,175 @@ +--- +title: Nora - 治愈系记录助手 +emoji: 🌟 +colorFrom: purple +colorTo: pink +sdk: docker +pinned: false +license: mit +--- + +# 🌟 治愈系记录助手 - SoulMate AI Companion + +一个温暖、治愈的 AI 陪伴应用,帮助你记录心情、捕捉灵感、管理待办。 + +目前已上线huggingface,体验链接:https://huggingface.co/spaces/kernel14/Nora + +## ✨ 核心特性 + +- 🎤 **语音/文字快速记录** - 自动分类保存 +- 🤖 **AI 语义解析** - 智能提取情绪、灵感和待办 +- 💬 **AI 对话陪伴(RAG)** - 基于历史记录的个性化对话 +- 🖼️ **AI 形象定制** - 生成专属治愈系角色(720 种组合) +- 🫧 **物理引擎心情池** - 基于 Matter.js 的动态气泡可视化 + +## 🚀 快速开始 + +### 在线使用 + +直接访问本 Space 即可使用完整功能! + +### 配置 API 密钥 + +在 Space 的 **Settings → Repository secrets** 中配置: + +**必需:** +- `ZHIPU_API_KEY` - 智谱 AI API 密钥 + - 获取地址:https://open.bigmodel.cn/ + - 用途:语音识别、语义解析、AI 对话 + +**可选:** +- `MINIMAX_API_KEY` - MiniMax API 密钥 +- `MINIMAX_GROUP_ID` - MiniMax Group ID + - 获取地址:https://platform.minimaxi.com/ + - 用途:AI 形象生成 + +## 📖 使用说明 + +1. **首页快速记录** + - 点击麦克风录音或在输入框输入文字 + - AI 自动分析并分类保存 + +2. **查看分类数据** + - 点击顶部心情、灵感、待办图标 + - 查看不同类型的记录 + +3. **与 AI 对话** + - 点击 AI 形象显示问候对话框 + - 点击对话框中的聊天图标进入完整对话 + - AI 基于你的历史记录提供个性化回复 + +4. **定制 AI 形象** + - 点击右下角 ✨ 按钮 + - 选择颜色、性格、外观、角色 + - 生成专属形象(需要 MiniMax API) + +5. **心情气泡池** + - 点击顶部心情图标 + - 左右滑动查看不同日期的心情卡片 + - 点击卡片展开查看当天的气泡池 + - 可以拖拽气泡,感受物理引擎效果 + +## 📊 API 端点 + +- `POST /api/process` - 处理文本/语音输入 +- `POST /api/chat` - 与 AI 对话(RAG) +- `GET /api/records` - 获取所有记录 +- `GET /api/moods` - 获取情绪数据 +- `GET /api/inspirations` - 获取灵感 +- `GET /api/todos` - 获取待办事项 +- `POST /api/character/generate` - 生成角色形象 +- `GET /health` - 健康检查 +- `GET /docs` - API 文档 + +## 🔗 相关链接 + +- [GitHub 仓库](https://github.com/kernel-14/Nora) +- [详细文档](https://github.com/kernel-14/Nora/blob/main/README.md) +- [智谱 AI](https://open.bigmodel.cn/) +- [MiniMax](https://platform.minimaxi.com/) +- [Huggingface](https://huggingface.co/spaces/kernel14/Nora) + +## 📝 技术栈 + +- **后端**: FastAPI + Python 3.11 +- **前端**: React + TypeScript + Vite +- **物理引擎**: Matter.js +- **AI 服务**: 智谱 AI (GLM-4) + MiniMax +- **部署**: Hugging Face Spaces (Docker) + +## 🔧 本地开发 + +### 启动后端服务 + +```bash +# 安装依赖 +pip install -r requirements.txt + +# 配置环境变量(复制 .env.example 为 .env 并填写) +cp .env.example .env + +# 启动服务(端口 8000) +python scripts/start_local.py +``` + +### 构建前端 + +```bash +cd frontend +npm install +npm run build +``` + +### 局域网访问 + +1. 启动后端后,会显示局域网访问地址(如 `http://192.168.1.100:8000/`) +2. 其他设备连接同一 WiFi 后,使用该地址访问 +3. 如果无法访问,请参考 [局域网访问快速修复指南](docs/局域网访问快速修复.md) + +**快速诊断**: +```bash +# Windows +scripts\test_lan_access.bat + +# 或访问诊断页面 +http://你的IP:8000/test-connection.html +``` + +## 🐛 故障排查 + +### 问题:其他设备访问显示 "Load failed" + +**原因**:防火墙阻止、网络隔离或 API 地址配置错误 + +**解决方案**: +1. 运行诊断工具:`scripts\test_lan_access.bat` +2. 访问诊断页面:`http://你的IP:8000/test-connection.html` +3. 查看详细指南:[局域网访问快速修复](docs/局域网访问快速修复.md) + +### 问题:语音识别失败 + +**原因**:未配置 ZHIPU_API_KEY 或 API 配额不足 + +**解决方案**: +1. 检查 `.env` 文件中的 `ZHIPU_API_KEY` +2. 访问 https://open.bigmodel.cn/ 检查配额 + +### 问题:AI 形象生成失败 + +**原因**:未配置 MINIMAX_API_KEY 或 API 配额不足 + +**解决方案**: +1. 检查 `.env` 文件中的 `MINIMAX_API_KEY` 和 `MINIMAX_GROUP_ID` +2. 访问 https://platform.minimaxi.com/ 检查配额 + +## 📚 文档 + +- [功能架构图](docs/功能架构图.md) +- [API 配置说明](docs/API_配置说明.md) +- [局域网访问指南](docs/局域网访问指南.md) +- [局域网访问快速修复](docs/局域网访问快速修复.md) +- [心情气泡池功能说明](docs/心情气泡池功能说明.md) + +## 📄 License + +MIT License diff --git a/README_HF.md b/README_HF.md new file mode 100644 index 0000000000000000000000000000000000000000..cded59053da30a978f8f4886259a40693fde247a --- /dev/null +++ b/README_HF.md @@ -0,0 +1,131 @@ +--- +title: Nora - 治愈系记录助手 +emoji: 🌟 +colorFrom: purple +colorTo: pink +sdk: docker +pinned: false +license: mit +--- + +# 🌟 治愈系记录助手 - SoulMate AI Companion + +一个温暖、治愈的 AI 陪伴应用,帮助你记录心情、捕捉灵感、管理待办。 + +## ✨ 核心特性 + +- 🎤 **语音/文字快速记录** - 自动分类保存 +- 🤖 **AI 语义解析** - 智能提取情绪、灵感和待办 +- 💬 **AI 对话陪伴(RAG)** - 基于历史记录的个性化对话 +- 🖼️ **AI 形象定制** - 生成专属治愈系角色(720 种组合) +- 🫧 **物理引擎心情池** - 基于 Matter.js 的动态气泡可视化 + +## 🚀 快速开始 + +### 在线使用 + +直接访问本 Space 即可使用完整功能! + +### ⚙️ 配置 API 密钥 + +在 Space 的 **Settings → Variables and secrets** 中配置: + +**必需:** +- `ZHIPU_API_KEY` - 智谱 AI API 密钥 + - 获取地址:https://open.bigmodel.cn/ + - 用途:语音识别、语义解析、AI 对话 + +**可选:** +- `MINIMAX_API_KEY` - MiniMax API 密钥 +- `MINIMAX_GROUP_ID` - MiniMax Group ID + - 获取地址:https://platform.minimaxi.com/ + - 用途:AI 形象生成 + +配置后,点击 **Factory reboot** 重启 Space 使配置生效。 + +## 📖 使用说明 + +1. **首页快速记录** + - 点击麦克风录音或在输入框输入文字 + - AI 自动分析并分类保存 + +2. **查看分类数据** + - 点击顶部心情、灵感、待办图标 + - 查看不同类型的记录 + +3. **与 AI 对话** + - 点击 AI 形象显示问候对话框 + - 点击对话框中的聊天图标进入完整对话 + - AI 基于你的历史记录提供个性化回复 + +4. **定制 AI 形象** + - 点击右下角 ✨ 按钮 + - 选择颜色、性格、外观、角色 + - 生成专属形象(需要 MiniMax API) + +5. **心情气泡池** + - 点击顶部心情图标 + - 左右滑动查看不同日期的心情卡片 + - 点击卡片展开查看当天的气泡池 + - 可以拖拽气泡,感受物理引擎效果 + +## 📊 API 端点 + +- `POST /api/process` - 处理文本/语音输入 +- `POST /api/chat` - 与 AI 对话(RAG) +- `GET /api/records` - 获取所有记录 +- `GET /api/moods` - 获取情绪数据 +- `GET /api/inspirations` - 获取灵感 +- `GET /api/todos` - 获取待办事项 +- `POST /api/character/generate` - 生成角色形象 +- `GET /health` - 健康检查 +- `GET /docs` - API 文档 + +## 🔗 相关链接 + +- [GitHub 仓库](https://github.com/kernel-14/Nora) +- [完整文档](https://github.com/kernel-14/Nora/blob/main/README.md) +- [智谱 AI](https://open.bigmodel.cn/) +- [MiniMax](https://platform.minimaxi.com/) + +## 📝 技术栈 + +- **后端**: FastAPI + Python 3.11 +- **前端**: React + TypeScript + Vite +- **物理引擎**: Matter.js +- **AI 服务**: 智谱 AI (GLM-4) + MiniMax +- **部署**: Hugging Face Spaces (Docker) + +## 🐛 故障排查 + +### 问题:语音识别失败 + +**原因**:未配置 ZHIPU_API_KEY 或 API 配额不足 + +**解决方案**: +1. 在 Space Settings 中配置 `ZHIPU_API_KEY` +2. 访问 https://open.bigmodel.cn/ 检查配额 +3. Factory reboot 重启 Space + +### 问题:AI 形象生成失败 + +**原因**:未配置 MINIMAX_API_KEY 或 API 配额不足 + +**解决方案**: +1. 在 Space Settings 中配置 `MINIMAX_API_KEY` 和 `MINIMAX_GROUP_ID` +2. 访问 https://platform.minimaxi.com/ 检查配额 +3. Factory reboot 重启 Space + +### 问题:Space 构建失败 + +**原因**:缺少必要的文件或配置 + +**检查清单**: +- ✅ 根目录有 `Dockerfile` +- ✅ 根目录有 `start.py` +- ✅ 根目录有 `requirements.txt` +- ✅ `frontend/dist/` 目录存在且包含构建文件 + +## 📄 License + +MIT License diff --git a/app/__init__.py b/app/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..f3d05711f1fa89afd7b476d5c6c6370e4cb1a7a4 --- /dev/null +++ b/app/__init__.py @@ -0,0 +1 @@ +"""Voice Text Processor Application""" diff --git a/app/asr_service.py b/app/asr_service.py new file mode 100644 index 0000000000000000000000000000000000000000..4e85ff40ca7ce99857945e3670a2bf2b52cc2fbf --- /dev/null +++ b/app/asr_service.py @@ -0,0 +1,202 @@ +"""ASR (Automatic Speech Recognition) service for Voice Text Processor. + +This module implements the ASRService class for transcribing audio files +to text using the Zhipu AI GLM-ASR-2512 API. + +Requirements: 2.1, 2.2, 2.3, 2.4, 9.2, 9.5 +""" + +import logging +from typing import Optional +import httpx + + +logger = logging.getLogger(__name__) + + +class ASRServiceError(Exception): + """Exception raised when ASR service operations fail. + + This exception is raised when the Zhipu ASR API call fails, + such as due to network issues, API errors, or invalid responses. + + Requirements: 2.3 + """ + + def __init__(self, message: str = "语音识别服务不可用"): + """Initialize ASRServiceError. + + Args: + message: Error message describing the failure + """ + super().__init__(message) + self.message = message + + +class ASRService: + """Service for transcribing audio files using Zhipu AI ASR API. + + This service handles audio file transcription by calling the Zhipu AI + GLM-ASR-2512 API. It manages API authentication, request formatting, + response parsing, and error handling. + + Attributes: + api_key: Zhipu AI API key for authentication + client: Async HTTP client for making API requests + api_url: Zhipu AI ASR API endpoint URL + model: ASR model identifier + + Requirements: 2.1, 2.2, 2.3, 2.4, 9.2, 9.5 + """ + + def __init__(self, api_key: str): + """Initialize the ASR service. + + Args: + api_key: Zhipu AI API key for authentication + """ + self.api_key = api_key + self.client = httpx.AsyncClient(timeout=30.0) + self.api_url = "https://api.z.ai/api/paas/v4/audio/transcriptions" + self.model = "glm-asr-2512" + + async def close(self): + """Close the HTTP client. + + This should be called when the service is no longer needed + to properly clean up resources. + """ + await self.client.aclose() + + async def transcribe(self, audio_file: bytes, filename: str = "audio.mp3") -> str: + """Transcribe audio file to text using Zhipu ASR API. + + This method sends the audio file to the Zhipu AI ASR API and returns + the transcribed text. It handles API errors, empty recognition results, + and logs all errors with timestamps and stack traces. + + Args: + audio_file: Audio file content as bytes + filename: Name of the audio file (for API request) + + Returns: + Transcribed text content. Returns empty string if audio cannot + be recognized (empty recognition result). + + Raises: + ASRServiceError: If API call fails or returns invalid response + + Requirements: 2.1, 2.2, 2.3, 2.4, 9.2, 9.5 + """ + try: + # Prepare request headers + headers = { + "Authorization": f"Bearer {self.api_key}" + } + + # Prepare multipart form data + files = { + "file": (filename, audio_file, "audio/mpeg") + } + + data = { + "model": self.model, + "stream": "false" + } + + logger.info(f"Calling Zhipu ASR API for file: {filename}") + + # Make API request + response = await self.client.post( + self.api_url, + headers=headers, + files=files, + data=data + ) + + # Check response status + if response.status_code != 200: + error_msg = f"ASR API returned status {response.status_code}" + try: + error_detail = response.json() + error_msg += f": {error_detail}" + except Exception: + error_msg += f": {response.text}" + + logger.error( + f"ASR API call failed: {error_msg}", + exc_info=True, + extra={"timestamp": logger.makeRecord( + logger.name, logging.ERROR, "", 0, error_msg, (), None + ).created} + ) + raise ASRServiceError(f"语音识别服务不可用: {error_msg}") + + # Parse response + try: + result = response.json() + except Exception as e: + error_msg = f"Failed to parse ASR API response: {str(e)}" + logger.error( + error_msg, + exc_info=True, + extra={"timestamp": logger.makeRecord( + logger.name, logging.ERROR, "", 0, error_msg, (), None + ).created} + ) + raise ASRServiceError(f"语音识别服务不可用: 响应格式无效") + + # Extract transcribed text + text = result.get("text", "") + + # Handle empty recognition result + if not text or text.strip() == "": + logger.warning( + f"ASR returned empty text for file: {filename}. " + "Audio content may be unrecognizable." + ) + return "" + + logger.info( + f"ASR transcription successful for {filename}. " + f"Text length: {len(text)} characters" + ) + + return text + + except ASRServiceError: + # Re-raise ASRServiceError as-is + raise + + except httpx.TimeoutException as e: + error_msg = f"ASR API request timeout: {str(e)}" + logger.error( + error_msg, + exc_info=True, + extra={"timestamp": logger.makeRecord( + logger.name, logging.ERROR, "", 0, error_msg, (), None + ).created} + ) + raise ASRServiceError("语音识别服务不可用: 请求超时") + + except httpx.RequestError as e: + error_msg = f"ASR API request failed: {str(e)}" + logger.error( + error_msg, + exc_info=True, + extra={"timestamp": logger.makeRecord( + logger.name, logging.ERROR, "", 0, error_msg, (), None + ).created} + ) + raise ASRServiceError(f"语音识别服务不可用: 网络错误") + + except Exception as e: + error_msg = f"Unexpected error in ASR service: {str(e)}" + logger.error( + error_msg, + exc_info=True, + extra={"timestamp": logger.makeRecord( + logger.name, logging.ERROR, "", 0, error_msg, (), None + ).created} + ) + raise ASRServiceError(f"语音识别服务不可用: {str(e)}") diff --git a/app/config.py b/app/config.py new file mode 100644 index 0000000000000000000000000000000000000000..9c0c0263d89bb8a7f95afdf3ecc9c5da34a08c6c --- /dev/null +++ b/app/config.py @@ -0,0 +1,226 @@ +"""Configuration management module for Voice Text Processor. + +This module handles loading configuration from environment variables, +validating required settings, and providing configuration access throughout +the application. + +Requirements: 10.1, 10.2, 10.3, 10.4, 10.5 +""" + +import os +from pathlib import Path +from typing import Optional +from pydantic import BaseModel, Field, field_validator +from dotenv import load_dotenv + + +class Config(BaseModel): + """Application configuration loaded from environment variables.""" + + # API Keys + zhipu_api_key: str = Field( + ..., + description="Zhipu AI API key for ASR and GLM-4-Flash services" + ) + + minimax_api_key: Optional[str] = Field( + default=None, + description="MiniMax API key for image generation (optional)" + ) + + minimax_group_id: Optional[str] = Field( + default=None, + description="MiniMax Group ID (optional)" + ) + + # Data storage paths + data_dir: Path = Field( + default=Path("data"), + description="Directory for storing JSON data files" + ) + + # File size limits (in bytes) + max_audio_size: int = Field( + default=10 * 1024 * 1024, # 10 MB default + description="Maximum audio file size in bytes" + ) + + # Logging configuration + log_level: str = Field( + default="INFO", + description="Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)" + ) + + log_file: Optional[Path] = Field( + default=Path("logs/app.log"), + description="Log file path" + ) + + # Server configuration + host: str = Field( + default="0.0.0.0", + description="Server host" + ) + + port: int = Field( + default=8000, + description="Server port" + ) + + @field_validator("log_level") + @classmethod + def validate_log_level(cls, v: str) -> str: + """Validate log level is one of the standard levels.""" + valid_levels = ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] + v_upper = v.upper() + if v_upper not in valid_levels: + raise ValueError(f"log_level must be one of {valid_levels}") + return v_upper + + @field_validator("max_audio_size") + @classmethod + def validate_max_audio_size(cls, v: int) -> int: + """Validate max audio size is positive.""" + if v <= 0: + raise ValueError("max_audio_size must be positive") + return v + + @field_validator("data_dir", "log_file") + @classmethod + def convert_to_path(cls, v) -> Path: + """Convert string paths to Path objects.""" + if isinstance(v, str): + return Path(v) + return v + + class Config: + """Pydantic configuration.""" + frozen = True # Make config immutable + + +def load_config() -> Config: + """Load configuration from environment variables. + + Returns: + Config: Validated configuration object + + Raises: + ValueError: If required configuration is missing or invalid + + Environment Variables: + ZHIPU_API_KEY: Required. API key for Zhipu AI services + MINIMAX_API_KEY: Optional. API key for MiniMax image generation + MINIMAX_GROUP_ID: Optional. MiniMax Group ID + DATA_DIR: Optional. Directory for data storage (default: data/) + MAX_AUDIO_SIZE: Optional. Max audio file size in bytes (default: 10MB) + LOG_LEVEL: Optional. Logging level (default: INFO) + LOG_FILE: Optional. Log file path (default: logs/app.log) + HOST: Optional. Server host (default: 0.0.0.0) + PORT: Optional. Server port (default: 8000) + """ + # Load environment variables from .env file + load_dotenv() + + # Load from environment variables + config_dict = { + "zhipu_api_key": os.getenv("ZHIPU_API_KEY"), + "minimax_api_key": os.getenv("MINIMAX_API_KEY"), + "minimax_group_id": os.getenv("MINIMAX_GROUP_ID"), + "data_dir": os.getenv("DATA_DIR", "data"), + "max_audio_size": int(os.getenv("MAX_AUDIO_SIZE", str(10 * 1024 * 1024))), + "log_level": os.getenv("LOG_LEVEL", "INFO"), + "log_file": os.getenv("LOG_FILE", "logs/app.log"), + "host": os.getenv("HOST", "0.0.0.0"), + "port": int(os.getenv("PORT", "8000")), + } + + # Validate required fields + if not config_dict["zhipu_api_key"]: + raise ValueError( + "ZHIPU_API_KEY environment variable is required. " + "Please set it before starting the application." + ) + + # Create and validate config + try: + config = Config(**config_dict) + except Exception as e: + raise ValueError(f"Configuration validation failed: {e}") + + # Ensure data directory exists + config.data_dir.mkdir(parents=True, exist_ok=True) + + # Ensure log directory exists + if config.log_file: + config.log_file.parent.mkdir(parents=True, exist_ok=True) + + return config + + +def validate_config(config: Config) -> None: + """Validate configuration at startup. + + Args: + config: Configuration object to validate + + Raises: + ValueError: If configuration is invalid or required resources are unavailable + """ + # Check data directory is writable + if not os.access(config.data_dir, os.W_OK): + raise ValueError( + f"Data directory {config.data_dir} is not writable. " + "Please check permissions." + ) + + # Check log directory is writable + if config.log_file and not os.access(config.log_file.parent, os.W_OK): + raise ValueError( + f"Log directory {config.log_file.parent} is not writable. " + "Please check permissions." + ) + + # Validate API key format (basic check) + if len(config.zhipu_api_key) < 10: + raise ValueError( + "ZHIPU_API_KEY appears to be invalid (too short). " + "Please check your API key." + ) + + +# Global config instance (loaded on import) +_config: Optional[Config] = None + + +def get_config() -> Config: + """Get the global configuration instance. + + Returns: + Config: The application configuration + + Raises: + RuntimeError: If configuration has not been initialized + """ + global _config + if _config is None: + raise RuntimeError( + "Configuration not initialized. Call init_config() first." + ) + return _config + + +def init_config() -> Config: + """Initialize the global configuration. + + This should be called once at application startup. + + Returns: + Config: The initialized configuration + + Raises: + ValueError: If configuration is invalid + """ + global _config + _config = load_config() + validate_config(_config) + return _config diff --git a/app/image_service.py b/app/image_service.py new file mode 100644 index 0000000000000000000000000000000000000000..a425492388f847c5a394332389697662f0068801 --- /dev/null +++ b/app/image_service.py @@ -0,0 +1,441 @@ +"""Image Generation service for Voice Text Processor. + +This module implements the ImageGenerationService class for generating +cat character images using the MiniMax Text-to-Image API. + +Requirements: PRD - AI形象生成模块 +""" + +import logging +import httpx +from typing import Optional, Dict, List +import time +import json +from pathlib import Path + +logger = logging.getLogger(__name__) + + +class ImageGenerationError(Exception): + """Exception raised when image generation operations fail. + + This exception is raised when the MiniMax API call fails, + such as due to network issues, API errors, or invalid responses. + """ + + def __init__(self, message: str = "图像生成服务不可用"): + """Initialize ImageGenerationError. + + Args: + message: Error message describing the failure + """ + super().__init__(message) + self.message = message + + +class ImageGenerationService: + """Service for generating cat character images using MiniMax API. + + This service handles image generation by calling the MiniMax Text-to-Image API + to create healing-style cat illustrations based on user preferences + (color, personality, appearance). + + Attributes: + api_key: MiniMax API key for authentication + group_id: MiniMax group ID for authentication + client: Async HTTP client for making API requests + api_url: MiniMax API endpoint URL + model: Model identifier (text-to-image-v2) + + Requirements: PRD - AI形象生成模块 + """ + + # 颜色映射 + COLOR_MAPPING = { + "温暖粉": "soft pastel pink fur, rose-colored aesthetic", + "天空蓝": "light sky blue fur, serene blue atmosphere", + "薄荷绿": "mint green fur, fresh green ambiance", + "奶油黄": "cream yellow fur, warm golden glow", + "薰衣草紫": "lavender purple fur, gentle purple tones", + "珊瑚橙": "coral orange fur, warm peachy atmosphere", + "纯白": "pure white fur, clean minimalist aesthetic", + "浅灰": "light gray fur, soft neutral tones" + } + + # 性格映射 + PERSONALITY_MAPPING = { + "活泼": "big curious eyes, dynamic paw gesture, energetic aura, playful expression", + "温柔": "soft gentle eyes, calm posture, peaceful expression, caring demeanor", + "聪明": "intelligent eyes, thoughtful expression, wise appearance, attentive look", + "慵懒": "relaxed eyes, lounging posture, comfortable expression, laid-back vibe", + "勇敢": "confident eyes, strong posture, determined expression, courageous stance", + "害羞": "shy eyes, timid posture, gentle expression, reserved demeanor" + } + + # 形象特征映射 + APPEARANCE_MAPPING = { + "戴眼镜": "wearing tiny round glasses, scholarly look", + "戴帽子": "wearing a cute small hat, fashionable style", + "戴围巾": "wearing a cozy scarf, warm appearance", + "戴蝴蝶结": "wearing a cute bow tie, elegant look", + "无配饰": "natural appearance, simple and pure" + } + + # 角色类型映射 + ROLE_MAPPING = { + "陪伴式朋友": "friendly companion, approachable and warm", + "温柔照顾型长辈": "caring elder figure, nurturing and protective", + "引导型老师": "wise teacher figure, knowledgeable and patient" + } + + # 系统底座提示词 + BASE_PROMPT = ( + "A masterpiece cute stylized cat illustration, {color} theme, " + "{personality} facial expression and posture, {appearance}. " + "{role}. Japanese watercolor style, clean minimalist background, " + "high quality, soft studio lighting, 4k, healing aesthetic, " + "adorable and heartwarming" + ) + + def __init__(self, api_key: str, group_id: Optional[str] = None): + """Initialize the image generation service. + + Args: + api_key: MiniMax API key for authentication + group_id: MiniMax group ID (optional, for compatibility) + """ + self.api_key = api_key + self.group_id = group_id # 保留但不使用 + self.client = httpx.AsyncClient(timeout=120.0) # 图像生成需要更长时间 + self.api_url = "https://api.minimaxi.com/v1/image_generation" + self.model = "image-01" + + async def close(self): + """Close the HTTP client. + + This should be called when the service is no longer needed + to properly clean up resources. + """ + await self.client.aclose() + + async def download_image(self, url: str, save_path: str) -> str: + """Download image from URL and save to local file. + + Args: + url: Image URL to download + save_path: Local file path to save the image + + Returns: + Absolute path to the saved image file + + Raises: + ImageGenerationError: If download fails + """ + try: + logger.info(f"Downloading image from: {url}") + + # 创建保存目录(如果不存在) + save_path_obj = Path(save_path) + save_path_obj.parent.mkdir(parents=True, exist_ok=True) + + # 下载图像 + response = await self.client.get(url, timeout=60.0) + + if response.status_code != 200: + error_msg = f"Failed to download image: HTTP {response.status_code}" + logger.error(error_msg) + raise ImageGenerationError(error_msg) + + # 保存到文件 + with open(save_path, 'wb') as f: + f.write(response.content) + + abs_path = str(save_path_obj.absolute()) + logger.info(f"Image saved to: {abs_path}") + + return abs_path + + except ImageGenerationError: + raise + except Exception as e: + error_msg = f"Failed to download image: {str(e)}" + logger.error(error_msg) + raise ImageGenerationError(error_msg) + + def build_prompt( + self, + color: str = "温暖粉", + personality: str = "温柔", + appearance: str = "无配饰", + role: str = "陪伴式朋友" + ) -> str: + """Build the complete prompt for image generation. + + Args: + color: Color preference (温暖粉/天空蓝/薄荷绿等) + personality: Personality trait (活泼/温柔/聪明等) + appearance: Appearance feature (戴眼镜/戴帽子等) + role: Character role (陪伴式朋友/温柔照顾型长辈等) + + Returns: + Complete prompt string for CogView API + """ + # 获取映射值,如果没有则使用默认值 + color_desc = self.COLOR_MAPPING.get(color, self.COLOR_MAPPING["温暖粉"]) + personality_desc = self.PERSONALITY_MAPPING.get( + personality, + self.PERSONALITY_MAPPING["温柔"] + ) + appearance_desc = self.APPEARANCE_MAPPING.get( + appearance, + self.APPEARANCE_MAPPING["无配饰"] + ) + role_desc = self.ROLE_MAPPING.get( + role, + self.ROLE_MAPPING["陪伴式朋友"] + ) + + # 构建完整提示词 + prompt = self.BASE_PROMPT.format( + color=color_desc, + personality=personality_desc, + appearance=appearance_desc, + role=role_desc + ) + + logger.info(f"Generated prompt: {prompt[:100]}...") + return prompt + + async def generate_image( + self, + color: str = "温暖粉", + personality: str = "温柔", + appearance: str = "无配饰", + role: str = "陪伴式朋友", + aspect_ratio: str = "1:1", + n: int = 1, + response_format: str = "url" + ) -> Dict[str, str]: + """Generate a cat character image using MiniMax API. + + This method sends a request to the MiniMax API with the constructed + prompt and returns the generated image URL or base64 data. + + Args: + color: Color preference + personality: Personality trait + appearance: Appearance feature + role: Character role + aspect_ratio: Image aspect ratio (1:1, 16:9, 9:16, 4:3, 3:4) + n: Number of images to generate (1-4) + response_format: Response format ("url" or "base64") + + Returns: + Dictionary containing: + - url: Image URL (if response_format="url") + - data: Base64 image data (if response_format="base64") + - prompt: Used prompt + - task_id: Task ID from MiniMax + + Raises: + ImageGenerationError: If API call fails or returns invalid response + """ + try: + # 构建提示词 + prompt = self.build_prompt(color, personality, appearance, role) + + # 准备请求 + headers = { + "Authorization": f"Bearer {self.api_key.strip()}", + "Content-Type": "application/json" + } + + payload = { + "model": self.model, + "prompt": prompt, + "aspect_ratio": aspect_ratio, + "response_format": "url", + "n": n, + "prompt_optimizer": True + } + + logger.info( + f"Calling MiniMax API for image generation. " + f"Aspect ratio: {aspect_ratio}, Count: {n}" + ) + logger.debug(f"API URL: {self.api_url}") + logger.debug(f"API Key (first 20 chars): {self.api_key[:20]}...") + logger.debug(f"Payload: {json.dumps(payload, ensure_ascii=False)}") + + # 发送请求 + response = await self.client.post( + self.api_url, + headers=headers, + json=payload + ) + + # 检查响应状态 + if response.status_code != 200: + error_msg = f"MiniMax API returned status {response.status_code}" + try: + error_detail = response.json() + error_msg += f": {json.dumps(error_detail, ensure_ascii=False)}" + except Exception: + error_msg += f": {response.text}" + + logger.error(f"Image generation API call failed: {error_msg}") + logger.error(f"Request URL: {self.api_url}") + logger.error(f"Request headers: Authorization=Bearer {self.api_key[:20]}..., Content-Type=application/json") + logger.error(f"Request payload: {json.dumps(payload, ensure_ascii=False)}") + raise ImageGenerationError(f"图像生成服务不可用: {error_msg}") + + # 解析响应 + try: + result = response.json() + logger.info(f"API Response (full): {json.dumps(result, indent=2, ensure_ascii=False)}") + except Exception as e: + error_msg = f"Failed to parse MiniMax API response: {str(e)}" + logger.error(error_msg) + logger.error(f"Raw response text: {response.text}") + raise ImageGenerationError(f"图像生成服务不可用: 响应格式无效") + + # 提取图像 URL + try: + # MiniMax 实际返回格式: + # { + # "id": "task_id", + # "data": {"image_urls": [...]}, + # "metadata": {...}, + # "base_resp": {"status_code": 0, "status_msg": "success"} + # } + + # 先检查是否有 base_resp + if "base_resp" in result: + base_resp = result.get("base_resp", {}) + status_code = base_resp.get("status_code", -1) + error_msg = base_resp.get("status_msg", "Unknown error") + + # status_code = 0 表示成功 + if status_code != 0: + logger.error(f"MiniMax API error: {status_code} - {error_msg}") + raise ImageGenerationError(f"图像生成失败: {error_msg}") + + logger.info(f"MiniMax API success: {status_code} - {error_msg}") + + # 提取 task_id(可能在 id 或 task_id 字段) + task_id = result.get("id") or result.get("task_id", "") + + # 提取图像数据 + if "data" in result: + data = result["data"] + logger.info(f"Data field keys: {list(data.keys()) if isinstance(data, dict) else 'not a dict'}") + + if isinstance(data, dict): + # 尝试多个可能的字段名 + urls = None + if "image_urls" in data: + urls = data["image_urls"] + logger.info("Found image_urls field") + elif "url" in data: + urls = data["url"] + logger.info("Found url field") + + if urls: + # 如果只生成一张,返回单个 URL + image_url = urls[0] if n == 1 else urls + logger.info(f"Image generation successful. URLs: {urls}") + + return { + "url": image_url, + "prompt": prompt, + "task_id": task_id, + "metadata": result.get("metadata", {}) + } + + # 如果到这里还没有返回,说明响应格式不符合预期 + logger.error(f"Could not extract image URLs from response: {json.dumps(result, ensure_ascii=False)}") + raise ImageGenerationError("API 响应格式错误: 无法提取图像 URL") + + except (KeyError, IndexError) as e: + error_msg = f"Invalid API response structure: {str(e)}, Response: {json.dumps(result, ensure_ascii=False)}" + logger.error(error_msg) + raise ImageGenerationError(f"图像生成服务不可用: 响应结构无效") + + except ImageGenerationError: + # Re-raise ImageGenerationError as-is + raise + + except httpx.TimeoutException as e: + error_msg = f"MiniMax API request timeout: {str(e)}" + logger.error(error_msg) + raise ImageGenerationError("图像生成服务不可用: 请求超时") + + except httpx.RequestError as e: + error_msg = f"MiniMax API request failed: {str(e)}" + logger.error(error_msg) + raise ImageGenerationError(f"图像生成服务不可用: 网络错误") + + except Exception as e: + error_msg = f"Unexpected error in image generation service: {str(e)}" + logger.error(error_msg, exc_info=True) + raise ImageGenerationError(f"图像生成服务不可用: {str(e)}") + + async def generate_multiple_images( + self, + color: str = "温暖粉", + personality: str = "温柔", + appearance: str = "无配饰", + role: str = "陪伴式朋友", + count: int = 3, + aspect_ratio: str = "1:1" + ) -> List[Dict[str, str]]: + """Generate multiple cat character images. + + This method generates multiple images with the same parameters, + allowing users to choose their favorite one. + + Args: + color: Color preference + personality: Personality trait + appearance: Appearance feature + role: Character role + count: Number of images to generate (1-4) + aspect_ratio: Image aspect ratio + + Returns: + List of dictionaries, each containing url, prompt, and task_id + + Raises: + ImageGenerationError: If any API call fails + """ + if count < 1 or count > 4: + raise ValueError("Count must be between 1 and 4") + + try: + # MiniMax 支持一次生成多张图像 + result = await self.generate_image( + color=color, + personality=personality, + appearance=appearance, + role=role, + aspect_ratio=aspect_ratio, + n=count + ) + + # 将结果转换为列表格式 + urls = result['url'] if isinstance(result['url'], list) else [result['url']] + + images = [] + for i, url in enumerate(urls): + images.append({ + "url": url, + "prompt": result['prompt'], + "task_id": result['task_id'], + "index": i + }) + + return images + + except ImageGenerationError as e: + logger.error(f"Failed to generate images: {e.message}") + raise diff --git a/app/logging_config.py b/app/logging_config.py new file mode 100644 index 0000000000000000000000000000000000000000..b5c2809fc8b9bc44d44659eec973e6dbbe272e2b --- /dev/null +++ b/app/logging_config.py @@ -0,0 +1,196 @@ +"""Logging configuration for Voice Text Processor. + +This module sets up the logging system with proper formatting, levels, +and file output. It also includes a filter to prevent sensitive information +from being logged. + +Requirements: 10.5, 9.5 +""" + +import logging +import re +from typing import Optional +from pathlib import Path +from contextvars import ContextVar + + +# Context variable to store request_id across async calls +request_id_var: ContextVar[Optional[str]] = ContextVar('request_id', default=None) + + +class RequestIdFilter(logging.Filter): + """Filter to add request_id to log records. + + This filter adds the request_id from context to each log record, + making it available in the log format. + + Requirements: 9.5 + """ + + def filter(self, record: logging.LogRecord) -> bool: + """Add request_id to log record. + + Args: + record: Log record to enhance + + Returns: + bool: Always True (we modify but don't reject records) + """ + # Get request_id from context, default to empty string if not set + record.request_id = request_id_var.get() or '-' + return True + + +class SensitiveDataFilter(logging.Filter): + """Filter to remove sensitive information from log records. + + This filter masks API keys, passwords, and other sensitive data + to prevent them from appearing in logs. + + Requirements: 10.5 + """ + + # Patterns to detect and mask sensitive data + SENSITIVE_PATTERNS = [ + # API keys (various formats) + (re.compile(r'(api[_-]?key["\s:=]+)([a-zA-Z0-9_-]{10,})', re.IGNORECASE), r'\1***REDACTED***'), + (re.compile(r'(zhipu[_-]?api[_-]?key["\s:=]+)([a-zA-Z0-9_-]{10,})', re.IGNORECASE), r'\1***REDACTED***'), + # Bearer tokens + (re.compile(r'(bearer\s+)([a-zA-Z0-9_-]{10,})', re.IGNORECASE), r'\1***REDACTED***'), + # Passwords + (re.compile(r'(password["\s:=]+)([^\s"]+)', re.IGNORECASE), r'\1***REDACTED***'), + # Authorization headers (capture the whole value) + (re.compile(r'(authorization["\s:=]+)([^\s"]+)', re.IGNORECASE), r'\1***REDACTED***'), + ] + + def filter(self, record: logging.LogRecord) -> bool: + """Filter log record to mask sensitive data. + + Args: + record: Log record to filter + + Returns: + bool: Always True (we modify but don't reject records) + """ + # Mask sensitive data in the message + if hasattr(record, 'msg') and isinstance(record.msg, str): + record.msg = self._mask_sensitive_data(record.msg) + + # Mask sensitive data in arguments + if hasattr(record, 'args') and record.args: + if isinstance(record.args, dict): + record.args = { + k: self._mask_sensitive_data(str(v)) if isinstance(v, str) else v + for k, v in record.args.items() + } + elif isinstance(record.args, tuple): + record.args = tuple( + self._mask_sensitive_data(str(arg)) if isinstance(arg, str) else arg + for arg in record.args + ) + + return True + + def _mask_sensitive_data(self, text: str) -> str: + """Mask sensitive data in text using regex patterns. + + Args: + text: Text to mask + + Returns: + str: Text with sensitive data masked + """ + for pattern, replacement in self.SENSITIVE_PATTERNS: + text = pattern.sub(replacement, text) + return text + + +def setup_logging( + log_level: str = "INFO", + log_file: Optional[Path] = None, + log_format: Optional[str] = None +) -> None: + """Set up logging configuration for the application. + + Args: + log_level: Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL) + log_file: Optional path to log file. If None, logs only to console. + log_format: Optional custom log format string + + Requirements: 10.5, 9.5 + """ + # Default log format with request_id, timestamp, level, and message + if log_format is None: + log_format = "[%(asctime)s] [%(levelname)s] [%(request_id)s] [%(name)s] %(message)s" + + # Date format + date_format = "%Y-%m-%d %H:%M:%S" + + # Create formatter + formatter = logging.Formatter(log_format, datefmt=date_format) + + # Get root logger + root_logger = logging.getLogger() + root_logger.setLevel(getattr(logging, log_level.upper())) + + # Remove existing handlers + root_logger.handlers.clear() + + # Add filters + request_id_filter = RequestIdFilter() + sensitive_filter = SensitiveDataFilter() + + # Console handler + console_handler = logging.StreamHandler() + console_handler.setFormatter(formatter) + console_handler.addFilter(request_id_filter) + console_handler.addFilter(sensitive_filter) + root_logger.addHandler(console_handler) + + # File handler (if log file specified) + if log_file: + file_handler = logging.FileHandler(log_file, encoding="utf-8") + file_handler.setFormatter(formatter) + file_handler.addFilter(request_id_filter) + file_handler.addFilter(sensitive_filter) + root_logger.addHandler(file_handler) + + # Log startup message + logger = logging.getLogger(__name__) + logger.info(f"Logging initialized at level {log_level}") + if log_file: + logger.info(f"Logging to file: {log_file}") + + +def get_logger(name: str) -> logging.Logger: + """Get a logger instance for a module. + + Args: + name: Logger name (typically __name__) + + Returns: + logging.Logger: Logger instance + """ + return logging.getLogger(name) + + +def set_request_id(request_id: str) -> None: + """Set the request_id in the current context. + + This should be called at the beginning of each request to ensure + all log messages include the request_id. + + Args: + request_id: Unique identifier for the request + + Requirements: 9.5 + """ + request_id_var.set(request_id) + + +def clear_request_id() -> None: + """Clear the request_id from the current context. + + This should be called at the end of each request to clean up. + """ + request_id_var.set(None) diff --git a/app/main.py b/app/main.py new file mode 100644 index 0000000000000000000000000000000000000000..4f9fd51ca8544824b0dd479371f1748484722875 --- /dev/null +++ b/app/main.py @@ -0,0 +1,1132 @@ +"""Main FastAPI application for Voice Text Processor. + +This module initializes the FastAPI application, sets up configuration, +logging, and defines the application lifecycle. + +Requirements: 10.1, 10.2, 10.3, 10.4, 10.5 +""" + +import logging +import uuid +from contextlib import asynccontextmanager +from datetime import datetime +from typing import Optional +from fastapi import FastAPI, File, UploadFile, Form, HTTPException +from fastapi.responses import JSONResponse +from fastapi.middleware.cors import CORSMiddleware +from fastapi.staticfiles import StaticFiles + +from app.config import init_config, get_config +from app.logging_config import setup_logging, set_request_id, clear_request_id +from app.models import ProcessResponse, RecordData, ParsedData +from app.storage import StorageService, StorageError +from app.asr_service import ASRService, ASRServiceError +from app.semantic_parser import SemanticParserService, SemanticParserError + + +logger = logging.getLogger(__name__) + + +@asynccontextmanager +async def lifespan(app: FastAPI): + """Application lifespan manager. + + This handles startup and shutdown events for the application. + On startup, it initializes configuration and logging. + + Requirements: 10.4 - Startup configuration validation + """ + # Startup + logger.info("Starting Voice Text Processor application...") + + try: + # Initialize configuration (will raise ValueError if invalid) + config = init_config() + logger.info("Configuration loaded and validated successfully") + + # Setup logging with config values + setup_logging( + log_level=config.log_level, + log_file=config.log_file + ) + logger.info("Logging system configured") + + # Log configuration (without sensitive data) + logger.info(f"Data directory: {config.data_dir}") + logger.info(f"Max audio size: {config.max_audio_size} bytes") + logger.info(f"Log level: {config.log_level}") + + except ValueError as e: + # Configuration validation failed - refuse to start + logger.error(f"Configuration validation failed: {e}") + logger.error("Application startup aborted due to configuration errors") + raise RuntimeError(f"Configuration error: {e}") from e + except Exception as e: + logger.error(f"Unexpected error during startup: {e}", exc_info=True) + raise RuntimeError(f"Startup error: {e}") from e + + logger.info("Application startup complete") + + yield + + # Shutdown + logger.info("Shutting down Voice Text Processor application...") + logger.info("Application shutdown complete") + + +# Create FastAPI application +app = FastAPI( + title="Voice Text Processor", + description="治愈系记录助手后端核心模块 - 语音和文本处理服务", + version="1.0.0", + lifespan=lifespan +) + +# Add CORS middleware +app.add_middleware( + CORSMiddleware, + allow_origins=[ + "http://localhost:5173", + "http://localhost:3000", + "http://172.18.16.245:5173", # 允许从电脑 IP 访问 + "*" # 开发环境允许所有来源(生产环境应该限制) + ], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +# Mount static files for generated images +from pathlib import Path +from fastapi import Request + +generated_images_dir = Path("generated_images") +generated_images_dir.mkdir(exist_ok=True) +app.mount("/generated_images", StaticFiles(directory="generated_images"), name="generated_images") + + +def get_base_url(request: Request) -> str: + """获取请求的基础 URL(支持局域网访问)""" + # 使用请求的 host 来构建 URL + scheme = request.url.scheme # http 或 https + host = request.headers.get("host", "localhost:8000") + return f"{scheme}://{host}" + + +@app.get("/api/status") +async def root(): + """API status endpoint.""" + return { + "service": "Voice Text Processor", + "status": "running", + "version": "1.0.0" + } + + +@app.get("/health") +async def health_check(): + """Health check endpoint.""" + try: + config = get_config() + return { + "status": "healthy", + "data_dir": str(config.data_dir), + "max_audio_size": config.max_audio_size + } + except Exception as e: + logger.error(f"Health check failed: {e}") + return JSONResponse( + status_code=503, + content={ + "status": "unhealthy", + "error": str(e) + } + ) + + +# Validation error class +class ValidationError(Exception): + """Exception raised when input validation fails. + + Requirements: 1.3, 8.5, 9.1 + """ + def __init__(self, message: str): + super().__init__(message) + self.message = message + + +# Supported audio formats +SUPPORTED_AUDIO_FORMATS = {".mp3", ".wav", ".m4a", ".webm"} + + +@app.post("/api/process", response_model=ProcessResponse) +async def process_input( + audio: Optional[UploadFile] = File(None), + text: Optional[str] = Form(None) +) -> ProcessResponse: + """Process user input (audio or text) and extract structured data. + + This endpoint accepts either an audio file or text content, performs + speech recognition (if audio), semantic parsing, and stores the results. + + Args: + audio: Audio file (multipart/form-data) in mp3, wav, or m4a format + text: Text content (application/json) in UTF-8 encoding + + Returns: + ProcessResponse containing record_id, timestamp, mood, inspirations, todos + + Raises: + HTTPException: With appropriate status code and error message + + Requirements: 1.1, 1.2, 1.3, 7.7, 8.1, 8.2, 8.3, 8.4, 8.5, 8.6, 9.1, 9.2, 9.3, 9.4, 9.5 + """ + request_id = str(uuid.uuid4()) + timestamp = datetime.utcnow().isoformat() + "Z" + + # Set request_id in logging context + set_request_id(request_id) + + logger.info(f"Processing request - audio: {audio is not None}, text: {text is not None}") + + try: + # Input validation + if audio is None and text is None: + raise ValidationError("请提供音频文件或文本内容") + + if audio is not None and text is not None: + raise ValidationError("请只提供音频文件或文本内容中的一种") + + # Get configuration + config = get_config() + + # Initialize services + storage_service = StorageService(str(config.data_dir)) + asr_service = ASRService(config.zhipu_api_key) + parser_service = SemanticParserService(config.zhipu_api_key) + + original_text = "" + input_type = "text" + + try: + # Handle audio input + if audio is not None: + input_type = "audio" + + # Validate audio format + filename = audio.filename or "audio" + file_ext = "." + filename.split(".")[-1].lower() if "." in filename else "" + + if file_ext not in SUPPORTED_AUDIO_FORMATS: + raise ValidationError( + f"不支持的音频格式: {file_ext}. " + f"支持的格式: {', '.join(SUPPORTED_AUDIO_FORMATS)}" + ) + + # Read audio file + audio_content = await audio.read() + + # Validate audio file size + if len(audio_content) > config.max_audio_size: + raise ValidationError( + f"音频文件过大: {len(audio_content)} bytes. " + f"最大允许: {config.max_audio_size} bytes" + ) + + logger.info( + f"Audio file received: {filename}, " + f"size: {len(audio_content)} bytes" + ) + + # Transcribe audio to text + try: + original_text = await asr_service.transcribe(audio_content, filename) + logger.info( + f"ASR transcription successful. " + f"Text length: {len(original_text)}" + ) + except ASRServiceError as e: + logger.error( + f"ASR service error: {e.message}", + exc_info=True + ) + raise + + # Handle text input + else: + # Validate text encoding (UTF-8) + # Accept whitespace-only text as valid UTF-8, but reject None or empty string + if text is None or text == "": + raise ValidationError("文本内容不能为空") + + original_text = text + logger.info( + f"Text input received. " + f"Length: {len(original_text)}" + ) + + # Perform semantic parsing + try: + parsed_data = await parser_service.parse(original_text) + logger.info( + f"Semantic parsing successful. " + f"Mood: {'present' if parsed_data.mood else 'none'}, " + f"Inspirations: {len(parsed_data.inspirations)}, " + f"Todos: {len(parsed_data.todos)}" + ) + except SemanticParserError as e: + logger.error( + f"Semantic parser error: {e.message}", + exc_info=True + ) + raise + + # Generate record ID and timestamp + record_id = str(uuid.uuid4()) + record_timestamp = datetime.utcnow().isoformat() + "Z" + + # Create record data + record = RecordData( + record_id=record_id, + timestamp=record_timestamp, + input_type=input_type, + original_text=original_text, + parsed_data=parsed_data + ) + + # Save to storage + try: + storage_service.save_record(record) + logger.info(f"Record saved: {record_id}") + + # Save mood if present + if parsed_data.mood: + storage_service.append_mood( + parsed_data.mood, + record_id, + record_timestamp + ) + logger.info(f"Mood data saved") + + # Save inspirations if present + if parsed_data.inspirations: + storage_service.append_inspirations( + parsed_data.inspirations, + record_id, + record_timestamp + ) + logger.info( + f"{len(parsed_data.inspirations)} " + f"inspiration(s) saved" + ) + + # Save todos if present + if parsed_data.todos: + storage_service.append_todos( + parsed_data.todos, + record_id, + record_timestamp + ) + logger.info( + f"{len(parsed_data.todos)} " + f"todo(s) saved" + ) + + except StorageError as e: + logger.error( + f"Storage error: {str(e)}", + exc_info=True + ) + raise + + # Build success response + response = ProcessResponse( + record_id=record_id, + timestamp=record_timestamp, + mood=parsed_data.mood, + inspirations=parsed_data.inspirations, + todos=parsed_data.todos + ) + + logger.info(f"Request processed successfully") + + return response + + finally: + # Clean up services + await asr_service.close() + await parser_service.close() + # Clear request_id from context + clear_request_id() + + except ValidationError as e: + # Input validation error - HTTP 400 + logger.warning( + f"Validation error: {e.message}", + exc_info=True + ) + clear_request_id() + return JSONResponse( + status_code=400, + content={ + "error": e.message, + "timestamp": timestamp + } + ) + + except ASRServiceError as e: + # ASR service error - HTTP 500 + logger.error( + f"ASR service unavailable: {e.message}", + exc_info=True + ) + clear_request_id() + return JSONResponse( + status_code=500, + content={ + "error": "语音识别服务不可用", + "detail": e.message, + "timestamp": timestamp + } + ) + + except SemanticParserError as e: + # Semantic parser error - HTTP 500 + logger.error( + f"Semantic parser unavailable: {e.message}", + exc_info=True + ) + clear_request_id() + return JSONResponse( + status_code=500, + content={ + "error": "语义解析服务不可用", + "detail": e.message, + "timestamp": timestamp + } + ) + + except StorageError as e: + # Storage error - HTTP 500 + logger.error( + f"Storage error: {str(e)}", + exc_info=True + ) + clear_request_id() + return JSONResponse( + status_code=500, + content={ + "error": "数据存储失败", + "detail": str(e), + "timestamp": timestamp + } + ) + + except Exception as e: + # Unexpected error - HTTP 500 + logger.error( + f"Unexpected error: {str(e)}", + exc_info=True + ) + clear_request_id() + return JSONResponse( + status_code=500, + content={ + "error": "服务器内部错误", + "detail": str(e), + "timestamp": timestamp + } + ) + + +@app.get("/api/records") +async def get_records(): + """Get all records.""" + try: + config = get_config() + storage_service = StorageService(str(config.data_dir)) + records = storage_service._read_json_file(storage_service.records_file) + return {"records": records} + except Exception as e: + logger.error(f"Failed to get records: {e}") + return JSONResponse( + status_code=500, + content={"error": str(e)} + ) + + +@app.get("/api/moods") +async def get_moods(): + """Get all moods from both moods.json and records.json.""" + try: + config = get_config() + storage_service = StorageService(str(config.data_dir)) + + # 1. 读取 moods.json + moods_from_file = storage_service._read_json_file(storage_service.moods_file) + logger.info(f"Loaded {len(moods_from_file)} moods from moods.json") + + # 2. 从 records.json 中提取心情数据 + records = storage_service._read_json_file(storage_service.records_file) + moods_from_records = [] + + for record in records: + # 检查 parsed_data 中是否有 mood + parsed_data = record.get("parsed_data", {}) + mood_data = parsed_data.get("mood") + + if mood_data and mood_data.get("type"): + # 构造心情对象 + mood_obj = { + "record_id": record["record_id"], + "timestamp": record["timestamp"], + "type": mood_data.get("type"), + "intensity": mood_data.get("intensity", 5), + "keywords": mood_data.get("keywords", []), + "original_text": record.get("original_text", "") # 添加原文 + } + moods_from_records.append(mood_obj) + + logger.info(f"Extracted {len(moods_from_records)} moods from records.json") + + # 3. 合并两个来源的心情数据(去重,优先使用 records 中的数据) + # 同时需要补充 moods.json 中缺失的 original_text + mood_dict = {} + + # 先添加 moods.json 中的数据 + for mood in moods_from_file: + mood_dict[mood["record_id"]] = mood + # 如果没有 original_text,设置为空字符串 + if "original_text" not in mood: + mood["original_text"] = "" + + # 再添加/覆盖 records.json 中的数据(包含 original_text) + for mood in moods_from_records: + mood_dict[mood["record_id"]] = mood + + # 转换为列表并按时间排序(最新的在前) + all_moods = list(mood_dict.values()) + all_moods.sort(key=lambda x: x["timestamp"], reverse=True) + + logger.info(f"Total unique moods: {len(all_moods)}") + + return {"moods": all_moods} + except Exception as e: + logger.error(f"Failed to get moods: {e}", exc_info=True) + return JSONResponse( + status_code=500, + content={"error": str(e)} + ) + + +@app.get("/api/inspirations") +async def get_inspirations(): + """Get all inspirations.""" + try: + config = get_config() + storage_service = StorageService(str(config.data_dir)) + inspirations = storage_service._read_json_file(storage_service.inspirations_file) + return {"inspirations": inspirations} + except Exception as e: + logger.error(f"Failed to get inspirations: {e}") + return JSONResponse( + status_code=500, + content={"error": str(e)} + ) + + +@app.get("/api/todos") +async def get_todos(): + """Get all todos.""" + try: + config = get_config() + storage_service = StorageService(str(config.data_dir)) + todos = storage_service._read_json_file(storage_service.todos_file) + return {"todos": todos} + except Exception as e: + logger.error(f"Failed to get todos: {e}") + return JSONResponse( + status_code=500, + content={"error": str(e)} + ) + + +@app.patch("/api/todos/{todo_id}") +async def update_todo(todo_id: str, status: str = Form(...)): + """Update todo status.""" + try: + config = get_config() + storage_service = StorageService(str(config.data_dir)) + todos = storage_service._read_json_file(storage_service.todos_file) + + # Find and update todo + updated = False + for todo in todos: + if todo.get("record_id") == todo_id or str(hash(todo.get("task", ""))) == todo_id: + todo["status"] = status + updated = True + break + + if not updated: + return JSONResponse( + status_code=404, + content={"error": "Todo not found"} + ) + + storage_service._write_json_file(storage_service.todos_file, todos) + return {"success": True} + except Exception as e: + logger.error(f"Failed to update todo: {e}") + return JSONResponse( + status_code=500, + content={"error": str(e)} + ) + + +@app.post("/api/chat") +async def chat_with_ai(text: str = Form(...)): + """Chat with AI assistant using RAG with records.json as knowledge base. + + This endpoint provides conversational AI that has context about the user's + previous records, moods, inspirations, and todos. + """ + try: + config = get_config() + storage_service = StorageService(str(config.data_dir)) + + # Load user's records as RAG knowledge base + records = storage_service._read_json_file(storage_service.records_file) + + # Build context from recent records (last 10) + recent_records = records[-10:] if len(records) > 10 else records + context_parts = [] + + for record in recent_records: + original_text = record.get('original_text', '') + timestamp = record.get('timestamp', '') + + # Add parsed data context + parsed_data = record.get('parsed_data', {}) + mood = parsed_data.get('mood') + inspirations = parsed_data.get('inspirations', []) + todos = parsed_data.get('todos', []) + + context_entry = f"[{timestamp}] 用户说: {original_text}" + + if mood: + context_entry += f"\n情绪: {mood.get('type')} (强度: {mood.get('intensity')})" + + if inspirations: + ideas = [insp.get('core_idea') for insp in inspirations] + context_entry += f"\n灵感: {', '.join(ideas)}" + + if todos: + tasks = [todo.get('task') for todo in todos] + context_entry += f"\n待办: {', '.join(tasks)}" + + context_parts.append(context_entry) + + # Build system prompt with context + context_text = "\n\n".join(context_parts) if context_parts else "暂无历史记录" + + system_prompt = f"""你是一个温柔、善解人意的AI陪伴助手。你的名字叫小喵。 +你会用温暖、治愈的语气和用户聊天,给予他们情感支持和陪伴。 +回复要简短、自然、有温度。 + +你可以参考用户的历史记录来提供更贴心的回复: + +{context_text} + +请基于这些背景信息,用温暖、理解的语气回复用户。如果用户提到之前的事情,你可以自然地关联起来。""" + + try: + import httpx + + # 增加超时时间,添加重试逻辑 + async with httpx.AsyncClient(timeout=60.0) as client: + response = await client.post( + "https://open.bigmodel.cn/api/paas/v4/chat/completions", + headers={ + "Authorization": f"Bearer {config.zhipu_api_key}", + "Content-Type": "application/json" + }, + json={ + "model": "glm-4-flash", + "messages": [ + { + "role": "system", + "content": system_prompt + }, + { + "role": "user", + "content": text + } + ], + "temperature": 0.8, + "top_p": 0.9 + } + ) + + if response.status_code == 200: + result = response.json() + ai_response = result.get("choices", [{}])[0].get("message", {}).get("content", "") + logger.info(f"AI chat successful with RAG context") + return {"response": ai_response} + else: + logger.error(f"AI chat failed: {response.status_code} {response.text}") + return {"response": "抱歉,我现在有点累了,稍后再聊好吗?"} + + except httpx.TimeoutException: + logger.error(f"AI API timeout") + return {"response": "抱歉,网络有点慢,请稍后再试~"} + except httpx.ConnectError: + logger.error(f"AI API connection error") + return {"response": "抱歉,无法连接到AI服务,请检查网络连接~"} + except Exception as e: + logger.error(f"AI API call error: {e}") + return {"response": "抱歉,我现在有点累了,稍后再聊好吗?"} + + except Exception as e: + logger.error(f"Chat error: {e}") + return {"response": "抱歉,我现在有点累了,稍后再聊好吗?"} + + +@app.get("/api/user/config") +async def get_user_config(request: Request): + """Get user configuration including character image.""" + try: + from app.user_config import UserConfig + from pathlib import Path + import os + + config = get_config() + user_config = UserConfig(str(config.data_dir)) + user_data = user_config.load_config() + + base_url = get_base_url(request) + + # 如果没有保存的图片,尝试加载默认形象或最新的本地图片 + if not user_data.get('character', {}).get('image_url'): + generated_images_dir = Path("generated_images") + default_image = generated_images_dir / "default_character.jpeg" + + # 优先使用默认形象 + if default_image.exists(): + logger.info("Loading default character image") + user_config.save_character_image( + image_url=str(default_image), + prompt="默认治愈系小猫形象", + preferences={ + "color": "薰衣草紫", + "personality": "温柔", + "appearance": "无配饰", + "role": "陪伴式朋友" + } + ) + user_data = user_config.load_config() + logger.info("Default character image loaded successfully") + + # 如果没有默认形象,尝试加载最新的本地图片 + elif generated_images_dir.exists(): + # 获取所有图片文件 + image_files = list(generated_images_dir.glob("character_*.jpeg")) + if image_files: + # 按修改时间排序,获取最新的 + latest_image = max(image_files, key=lambda p: p.stat().st_mtime) + + # 构建 URL 路径(使用动态 base_url) + image_url = f"{base_url}/generated_images/{latest_image.name}" + + # 从文件名提取偏好设置 + # 格式: character_颜色_性格_时间戳.jpeg + parts = latest_image.stem.split('_') + if len(parts) >= 3: + color = parts[1] + personality = parts[2] + + # 更新配置 + user_config.save_character_image( + image_url=str(latest_image), + prompt=f"Character with {color} and {personality}", + preferences={ + "color": color, + "personality": personality, + "appearance": "无配饰", + "role": "陪伴式朋友" + } + ) + + # 重新加载配置 + user_data = user_config.load_config() + + logger.info(f"Loaded latest local image: {latest_image.name}") + + # 如果 image_url 是本地路径,转换为 URL + image_url = user_data.get('character', {}).get('image_url') + if image_url and not image_url.startswith('http'): + # 本地路径,转换为 URL(处理 Windows 和 Unix 路径) + image_path = Path(image_url) + if image_path.exists(): + # 使用正斜杠构建 URL(使用动态 base_url) + user_data['character']['image_url'] = f"{base_url}/generated_images/{image_path.name}" + else: + # 如果路径不存在,尝试只使用文件名 + filename = image_path.name + full_path = Path("generated_images") / filename + if full_path.exists(): + user_data['character']['image_url'] = f"{base_url}/generated_images/{filename}" + logger.info(f"Converted path to URL: {filename}") + + return user_data + except Exception as e: + logger.error(f"Failed to get user config: {e}") + return JSONResponse( + status_code=500, + content={"error": str(e)} + ) + + +@app.post("/api/character/generate") +async def generate_character( + request: Request, + color: str = Form(...), + personality: str = Form(...), + appearance: str = Form(...), + role: str = Form(...) +): + """Generate AI character image based on preferences. + + Args: + color: Color preference (温暖粉/天空蓝/薄荷绿等) + personality: Personality trait (活泼/温柔/聪明等) + appearance: Appearance feature (戴眼镜/戴帽子等) + role: Character role (陪伴式朋友/温柔照顾型长辈等) + + Returns: + JSON with image_url, prompt, and preferences + """ + try: + from app.image_service import ImageGenerationService, ImageGenerationError + from app.user_config import UserConfig + from datetime import datetime + from pathlib import Path + import httpx + + config = get_config() + + # 检查是否配置了 MiniMax API + minimax_api_key = getattr(config, 'minimax_api_key', None) + + if not minimax_api_key: + logger.warning("MiniMax API key not configured") + return JSONResponse( + status_code=400, + content={ + "error": "MiniMax API 未配置", + "detail": "请在 .env 文件中配置 MINIMAX_API_KEY。访问 https://platform.minimaxi.com/ 获取 API 密钥。" + } + ) + + # 初始化服务 + image_service = ImageGenerationService( + api_key=minimax_api_key, + group_id=getattr(config, 'minimax_group_id', None) + ) + user_config = UserConfig(str(config.data_dir)) + + try: + logger.info( + f"Generating character image: " + f"color={color}, personality={personality}, " + f"appearance={appearance}, role={role}" + ) + + # 生成图像 + result = await image_service.generate_image( + color=color, + personality=personality, + appearance=appearance, + role=role, + aspect_ratio="1:1", + n=1 + ) + + # 下载图片到本地 + generated_images_dir = Path("generated_images") + generated_images_dir.mkdir(exist_ok=True) + + # 生成文件名:character_颜色_性格_时间戳.jpeg + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + filename = f"character_{color}_{personality}_{timestamp}.jpeg" + local_path = generated_images_dir / filename + + logger.info(f"Downloading image to: {local_path}") + + # 下载图片 + async with httpx.AsyncClient(timeout=60.0) as client: + response = await client.get(result['url']) + if response.status_code == 200: + with open(local_path, 'wb') as f: + f.write(response.content) + logger.info(f"Image saved to: {local_path}") + else: + logger.error(f"Failed to download image: HTTP {response.status_code}") + # 如果下载失败,仍然使用远程 URL + local_path = None + + # 保存到用户配置 + preferences = { + "color": color, + "personality": personality, + "appearance": appearance, + "role": role + } + + # 使用本地路径(如果下载成功) + image_url = str(local_path) if local_path else result['url'] + + user_config.save_character_image( + image_url=image_url, + prompt=result['prompt'], + revised_prompt=result.get('metadata', {}).get('revised_prompt'), + preferences=preferences + ) + + logger.info(f"Character image generated and saved: {image_url}") + + # 返回 HTTP URL(使用动态 base_url) + base_url = get_base_url(request) + if local_path: + http_url = f"{base_url}/generated_images/{local_path.name}" + else: + http_url = image_url + + return { + "success": True, + "image_url": http_url, + "prompt": result['prompt'], + "preferences": preferences, + "task_id": result.get('task_id') + } + + finally: + await image_service.close() + + except ImageGenerationError as e: + logger.error(f"Image generation error: {e.message}") + + # 提供更友好的错误信息 + error_detail = e.message + if "invalid api key" in e.message.lower(): + error_detail = "API 密钥无效,请检查 MINIMAX_API_KEY 配置是否正确" + elif "quota" in e.message.lower() or "配额" in e.message: + error_detail = "API 配额不足,请充值或等待配额恢复" + elif "timeout" in e.message.lower() or "超时" in e.message: + error_detail = "请求超时,请检查网络连接后重试" + + return JSONResponse( + status_code=500, + content={ + "error": "图像生成失败", + "detail": error_detail + } + ) + + except Exception as e: + logger.error(f"Failed to generate character: {e}", exc_info=True) + return JSONResponse( + status_code=500, + content={ + "error": "生成角色形象失败", + "detail": str(e) + } + ) + + +@app.get("/api/character/history") +async def get_character_history(request: Request): + """Get list of all generated character images. + + Returns: + JSON with list of historical character images + """ + try: + from pathlib import Path + import os + + base_url = get_base_url(request) + generated_images_dir = Path("generated_images") + + if not generated_images_dir.exists(): + return {"images": []} + + # 获取所有图片文件 + image_files = [] + for file in generated_images_dir.glob("character_*.jpeg"): + # 解析文件名:character_颜色_性格_时间戳.jpeg + parts = file.stem.split("_") + if len(parts) >= 4: + color = parts[1] + personality = parts[2] + timestamp = "_".join(parts[3:]) + + # 获取文件信息 + stat = file.stat() + + image_files.append({ + "filename": file.name, + "url": f"{base_url}/generated_images/{file.name}", + "color": color, + "personality": personality, + "timestamp": timestamp, + "created_at": stat.st_ctime, + "size": stat.st_size + }) + + # 按创建时间倒序排列(最新的在前) + image_files.sort(key=lambda x: x["created_at"], reverse=True) + + logger.info(f"Found {len(image_files)} historical character images") + + return {"images": image_files} + + except Exception as e: + logger.error(f"Error getting character history: {e}", exc_info=True) + raise HTTPException(status_code=500, detail=str(e)) + + +@app.post("/api/character/select") +async def select_character( + request: Request, + filename: str = Form(...) +): + """Select a historical character image as current. + + Args: + filename: Filename of the character image to select + + Returns: + JSON with success status and image URL + """ + try: + from app.user_config import UserConfig + from pathlib import Path + + config = get_config() + user_config = UserConfig(str(config.data_dir)) + + # 验证文件存在 + image_path = Path("generated_images") / filename + if not image_path.exists(): + raise HTTPException(status_code=404, detail="图片文件不存在") + + # 解析文件名获取偏好设置 + parts = filename.replace(".jpeg", "").split("_") + if len(parts) >= 4: + color = parts[1] + personality = parts[2] + + preferences = { + "color": color, + "personality": personality, + "appearance": "未知", + "role": "未知" + } + else: + preferences = {} + + # 更新用户配置 + image_url = str(image_path) + user_config.save_character_image( + image_url=image_url, + prompt=f"历史形象: {filename}", + preferences=preferences + ) + + logger.info(f"Selected historical character: {filename}") + + # 返回 HTTP URL(使用动态 base_url) + base_url = get_base_url(request) + http_url = f"{base_url}/generated_images/{filename}" + + return { + "success": True, + "image_url": http_url, + "filename": filename, + "preferences": preferences + } + + except HTTPException: + raise + except Exception as e: + logger.error(f"Error selecting character: {e}", exc_info=True) + raise HTTPException(status_code=500, detail=str(e)) + + +@app.post("/api/character/preferences") +async def update_character_preferences( + color: Optional[str] = Form(None), + personality: Optional[str] = Form(None), + appearance: Optional[str] = Form(None), + role: Optional[str] = Form(None) +): + """Update character preferences without generating new image. + + Args: + color: Color preference (optional) + personality: Personality trait (optional) + appearance: Appearance feature (optional) + role: Character role (optional) + + Returns: + JSON with updated preferences + """ + try: + from app.user_config import UserConfig + + config = get_config() + user_config = UserConfig(str(config.data_dir)) + + # 更新偏好设置 + user_config.update_character_preferences( + color=color, + personality=personality, + appearance=appearance, + role=role + ) + + # 返回更新后的配置 + updated_config = user_config.load_config() + + return { + "success": True, + "preferences": updated_config['character']['preferences'] + } + + except Exception as e: + logger.error(f"Failed to update preferences: {e}") + return JSONResponse( + status_code=500, + content={"error": str(e)} + ) + + +if __name__ == "__main__": + import uvicorn + + # Load config for server settings + try: + config = init_config() + setup_logging(log_level=config.log_level, log_file=config.log_file) + + # Run server + uvicorn.run( + "app.main:app", + host=config.host, + port=config.port, + reload=False, + log_level=config.log_level.lower() + ) + except Exception as e: + print(f"Failed to start application: {e}") + exit(1) diff --git a/app/models.py b/app/models.py new file mode 100644 index 0000000000000000000000000000000000000000..bc372b6e0230b86ae21812455335d63092ef665c --- /dev/null +++ b/app/models.py @@ -0,0 +1,118 @@ +"""Data models for Voice Text Processor. + +This module defines all Pydantic data models used throughout the application +for data validation, serialization, and API request/response handling. + +Requirements: 4.1, 4.2, 4.3, 5.1, 5.2, 5.3, 6.1, 6.2, 6.3, 6.4 +""" + +from typing import Optional, List, Literal +from pydantic import BaseModel, Field + + +class MoodData(BaseModel): + """Mood data structure. + + Represents the emotional state extracted from user input. + + Attributes: + type: The type/name of the emotion (e.g., "开心", "焦虑") + intensity: Emotion intensity on a scale of 1-10 + keywords: List of keywords associated with the emotion + + Requirements: 4.1, 4.2, 4.3 + """ + type: Optional[str] = None + intensity: Optional[int] = Field(None, ge=1, le=10) + keywords: List[str] = Field(default_factory=list) + + +class InspirationData(BaseModel): + """Inspiration data structure. + + Represents an idea or inspiration extracted from user input. + + Attributes: + core_idea: The core idea/concept (max 20 characters) + tags: List of tags for categorization (max 5 tags) + category: Category of the inspiration + + Requirements: 5.1, 5.2, 5.3 + """ + core_idea: str = Field(..., max_length=20) + tags: List[str] = Field(default_factory=list, max_length=5) + category: Literal["工作", "生活", "学习", "创意"] + + +class TodoData(BaseModel): + """Todo item data structure. + + Represents a task/todo item extracted from user input. + + Attributes: + task: Description of the task + time: Time information (preserved as original expression) + location: Location information + status: Task status (defaults to "pending") + + Requirements: 6.1, 6.2, 6.3, 6.4 + """ + task: str + time: Optional[str] = None + location: Optional[str] = None + status: str = "pending" + + +class ParsedData(BaseModel): + """Parsed data structure. + + Contains all structured data extracted from semantic parsing. + + Attributes: + mood: Extracted mood data (optional) + inspirations: List of extracted inspirations + todos: List of extracted todo items + """ + mood: Optional[MoodData] = None + inspirations: List[InspirationData] = Field(default_factory=list) + todos: List[TodoData] = Field(default_factory=list) + + +class RecordData(BaseModel): + """Complete record data structure. + + Represents a complete user input record with all metadata and parsed data. + + Attributes: + record_id: Unique identifier for the record + timestamp: ISO 8601 timestamp of when the record was created + input_type: Type of input (audio or text) + original_text: The original or transcribed text + parsed_data: Structured data extracted from the text + """ + record_id: str + timestamp: str + input_type: Literal["audio", "text"] + original_text: str + parsed_data: ParsedData + + +class ProcessResponse(BaseModel): + """API response model for /api/process endpoint. + + Represents the response returned to clients after processing input. + + Attributes: + record_id: Unique identifier for the processed record + timestamp: ISO 8601 timestamp of when processing completed + mood: Extracted mood data (optional) + inspirations: List of extracted inspirations + todos: List of extracted todo items + error: Error message if processing failed (optional) + """ + record_id: str + timestamp: str + mood: Optional[MoodData] = None + inspirations: List[InspirationData] = Field(default_factory=list) + todos: List[TodoData] = Field(default_factory=list) + error: Optional[str] = None diff --git a/app/semantic_parser.py b/app/semantic_parser.py new file mode 100644 index 0000000000000000000000000000000000000000..1b31ecf315f2e0c96bc33ae45122e64434448d9c --- /dev/null +++ b/app/semantic_parser.py @@ -0,0 +1,326 @@ +"""Semantic Parser service for Voice Text Processor. + +This module implements the SemanticParserService class for parsing text +into structured data (mood, inspirations, todos) using the GLM-4-Flash API. + +Requirements: 3.1, 3.2, 3.3, 3.4, 3.5, 9.2, 9.5 +""" + +import logging +import json +from typing import Optional +import httpx + +from app.models import ParsedData, MoodData, InspirationData, TodoData + + +logger = logging.getLogger(__name__) + + +class SemanticParserError(Exception): + """Exception raised when semantic parsing operations fail. + + This exception is raised when the GLM-4-Flash API call fails, + such as due to network issues, API errors, or invalid responses. + + Requirements: 3.5 + """ + + def __init__(self, message: str = "语义解析服务不可用"): + """Initialize SemanticParserError. + + Args: + message: Error message describing the failure + """ + super().__init__(message) + self.message = message + + +class SemanticParserService: + """Service for parsing text into structured data using GLM-4-Flash API. + + This service handles semantic parsing by calling the GLM-4-Flash API + to extract mood, inspirations, and todos from text. It manages API + authentication, request formatting, response parsing, and error handling. + + Attributes: + api_key: Zhipu AI API key for authentication + client: Async HTTP client for making API requests + api_url: GLM-4-Flash API endpoint URL + model: Model identifier + system_prompt: System prompt for data conversion + + Requirements: 3.1, 3.2, 3.3, 3.4, 3.5, 9.2, 9.5 + """ + + def __init__(self, api_key: str): + """Initialize the semantic parser service. + + Args: + api_key: Zhipu AI API key for authentication + + Requirements: 3.1, 3.2 + """ + self.api_key = api_key + self.client = httpx.AsyncClient(timeout=30.0) + self.api_url = "https://open.bigmodel.cn/api/paas/v4/chat/completions" + self.model = "glm-4-flash" + + # System prompt as specified in requirements + self.system_prompt = ( + "你是一个专业的文本语义分析助手。请将用户输入的文本解析为结构化的 JSON 数据。\n\n" + "你需要提取以下三个维度的信息:\n\n" + "1. **情绪 (mood)**:\n" + " - type: 情绪类型(如:喜悦、焦虑、平静、忧虑、兴奋、悲伤等中文词汇)\n" + " - intensity: 情绪强度(1-10的整数,10表示最强烈)\n" + " - keywords: 情绪关键词列表(3-5个中文词)\n\n" + "2. **灵感 (inspirations)**:数组,每个元素包含:\n" + " - core_idea: 核心观点或想法(20字以内的中文)\n" + " - tags: 相关标签列表(3-5个中文词)\n" + " - category: 所属分类(必须是:工作、生活、学习、创意 之一)\n\n" + "3. **待办 (todos)**:数组,每个元素包含:\n" + " - task: 任务描述(中文)\n" + " - time: 时间信息(如:明天、下周、周五等,如果没有则为null)\n" + " - location: 地点信息(如果没有则为null)\n" + " - status: 状态(默认为\"pending\")\n\n" + "**重要规则**:\n" + "- 如果文本中没有某个维度的信息,mood 返回 null,inspirations 和 todos 返回空数组 []\n" + "- 必须返回有效的 JSON 格式,不要添加任何其他说明文字\n" + "- 所有字段名使用英文,内容使用中文\n" + "- 直接返回 JSON,不要用 markdown 代码块包裹\n\n" + "返回格式示例:\n" + "{\n" + " \"mood\": {\"type\": \"焦虑\", \"intensity\": 7, \"keywords\": [\"压力\", \"疲惫\", \"放松\"]},\n" + " \"inspirations\": [{\"core_idea\": \"晚霞可以缓解压力\", \"tags\": [\"自然\", \"治愈\"], \"category\": \"生活\"}],\n" + " \"todos\": [{\"task\": \"整理文档\", \"time\": \"明天\", \"location\": null, \"status\": \"pending\"}]\n" + "}" + ) + + async def close(self): + """Close the HTTP client. + + This should be called when the service is no longer needed + to properly clean up resources. + """ + await self.client.aclose() + + async def parse(self, text: str) -> ParsedData: + """Parse text into structured data using GLM-4-Flash API. + + This method sends the text to the GLM-4-Flash API with the configured + system prompt and returns structured data containing mood, inspirations, + and todos. It handles API errors, missing dimensions, and logs all errors + with timestamps and stack traces. + + Args: + text: Text content to parse + + Returns: + ParsedData object containing mood (optional), inspirations (list), + and todos (list). Missing dimensions return null or empty arrays. + + Raises: + SemanticParserError: If API call fails or returns invalid response + + Requirements: 3.1, 3.2, 3.3, 3.4, 3.5, 9.2, 9.5 + """ + try: + # Prepare request headers + headers = { + "Authorization": f"Bearer {self.api_key}", + "Content-Type": "application/json" + } + + # Prepare request payload + payload = { + "model": self.model, + "messages": [ + { + "role": "system", + "content": self.system_prompt + }, + { + "role": "user", + "content": text + } + ], + "temperature": 0.7, + "top_p": 0.9 + } + + logger.info(f"Calling GLM-4-Flash API for semantic parsing. Text length: {len(text)}") + + # Make API request + response = await self.client.post( + self.api_url, + headers=headers, + json=payload + ) + + # Check response status + if response.status_code != 200: + error_msg = f"GLM-4-Flash API returned status {response.status_code}" + try: + error_detail = response.json() + error_msg += f": {error_detail}" + except Exception: + error_msg += f": {response.text}" + + logger.error( + f"Semantic parsing API call failed: {error_msg}", + exc_info=True, + extra={"timestamp": logger.makeRecord( + logger.name, logging.ERROR, "", 0, error_msg, (), None + ).created} + ) + raise SemanticParserError(f"语义解析服务不可用: {error_msg}") + + # Parse response + try: + result = response.json() + except Exception as e: + error_msg = f"Failed to parse GLM-4-Flash API response: {str(e)}" + logger.error( + error_msg, + exc_info=True, + extra={"timestamp": logger.makeRecord( + logger.name, logging.ERROR, "", 0, error_msg, (), None + ).created} + ) + raise SemanticParserError(f"语义解析服务不可用: 响应格式无效") + + # Extract content from response + try: + content = result["choices"][0]["message"]["content"] + except (KeyError, IndexError) as e: + error_msg = f"Invalid API response structure: {str(e)}" + logger.error( + error_msg, + exc_info=True, + extra={"timestamp": logger.makeRecord( + logger.name, logging.ERROR, "", 0, error_msg, (), None + ).created} + ) + raise SemanticParserError(f"语义解析服务不可用: 响应结构无效") + + # Parse JSON from content + try: + # Try to extract JSON from markdown code blocks if present + if "```json" in content: + json_start = content.find("```json") + 7 + json_end = content.find("```", json_start) + content = content[json_start:json_end].strip() + elif "```" in content: + json_start = content.find("```") + 3 + json_end = content.find("```", json_start) + content = content[json_start:json_end].strip() + + parsed_json = json.loads(content) + except json.JSONDecodeError as e: + error_msg = f"Failed to parse JSON from API response: {str(e)}" + logger.error( + error_msg, + exc_info=True, + extra={"timestamp": logger.makeRecord( + logger.name, logging.ERROR, "", 0, error_msg, (), None + ).created} + ) + raise SemanticParserError(f"语义解析服务不可用: JSON 解析失败") + + # Extract and validate mood data + mood = None + if "mood" in parsed_json and parsed_json["mood"]: + try: + mood_data = parsed_json["mood"] + if isinstance(mood_data, dict): + mood = MoodData( + type=mood_data.get("type"), + intensity=mood_data.get("intensity"), + keywords=mood_data.get("keywords", []) + ) + except Exception as e: + logger.warning(f"Failed to parse mood data: {str(e)}") + mood = None + + # Extract and validate inspirations + inspirations = [] + if "inspirations" in parsed_json and parsed_json["inspirations"]: + for insp_data in parsed_json["inspirations"]: + try: + if isinstance(insp_data, dict): + inspiration = InspirationData( + core_idea=insp_data.get("core_idea", ""), + tags=insp_data.get("tags", []), + category=insp_data.get("category", "生活") + ) + inspirations.append(inspiration) + except Exception as e: + logger.warning(f"Failed to parse inspiration data: {str(e)}") + continue + + # Extract and validate todos + todos = [] + if "todos" in parsed_json and parsed_json["todos"]: + for todo_data in parsed_json["todos"]: + try: + if isinstance(todo_data, dict): + todo = TodoData( + task=todo_data.get("task", ""), + time=todo_data.get("time"), + location=todo_data.get("location"), + status=todo_data.get("status", "pending") + ) + todos.append(todo) + except Exception as e: + logger.warning(f"Failed to parse todo data: {str(e)}") + continue + + logger.info( + f"Semantic parsing successful. " + f"Mood: {'present' if mood else 'none'}, " + f"Inspirations: {len(inspirations)}, " + f"Todos: {len(todos)}" + ) + + return ParsedData( + mood=mood, + inspirations=inspirations, + todos=todos + ) + + except SemanticParserError: + # Re-raise SemanticParserError as-is + raise + + except httpx.TimeoutException as e: + error_msg = f"GLM-4-Flash API request timeout: {str(e)}" + logger.error( + error_msg, + exc_info=True, + extra={"timestamp": logger.makeRecord( + logger.name, logging.ERROR, "", 0, error_msg, (), None + ).created} + ) + raise SemanticParserError("语义解析服务不可用: 请求超时") + + except httpx.RequestError as e: + error_msg = f"GLM-4-Flash API request failed: {str(e)}" + logger.error( + error_msg, + exc_info=True, + extra={"timestamp": logger.makeRecord( + logger.name, logging.ERROR, "", 0, error_msg, (), None + ).created} + ) + raise SemanticParserError(f"语义解析服务不可用: 网络错误") + + except Exception as e: + error_msg = f"Unexpected error in semantic parser service: {str(e)}" + logger.error( + error_msg, + exc_info=True, + extra={"timestamp": logger.makeRecord( + logger.name, logging.ERROR, "", 0, error_msg, (), None + ).created} + ) + raise SemanticParserError(f"语义解析服务不可用: {str(e)}") diff --git a/app/storage.py b/app/storage.py new file mode 100644 index 0000000000000000000000000000000000000000..49637ed25d2d398301f7eb68d3c1b89ef5fbb683 --- /dev/null +++ b/app/storage.py @@ -0,0 +1,508 @@ +"""Storage service for JSON file persistence. + +This module implements the StorageService class for managing JSON file storage +of records, moods, inspirations, and todos. + +Requirements: 7.1, 7.2, 7.3, 7.4, 7.5, 7.6, 7.7 +""" + +import json +import uuid +from pathlib import Path +from typing import List, Optional +from datetime import datetime + +from app.models import RecordData, MoodData, InspirationData, TodoData + + +class StorageError(Exception): + """Exception raised when storage operations fail. + + This exception is raised when file operations (read/write) fail, + such as due to permission issues, disk space, or I/O errors. + + Requirements: 7.6 + """ + pass + + +class StorageService: + """Service for managing JSON file storage. + + This service handles persistence of records, moods, inspirations, and todos + to separate JSON files. It ensures file initialization, generates unique IDs, + and handles errors appropriately. + + Attributes: + data_dir: Directory path for storing JSON files + records_file: Path to records.json + moods_file: Path to moods.json + inspirations_file: Path to inspirations.json + todos_file: Path to todos.json + + Requirements: 7.1, 7.2, 7.3, 7.4, 7.5, 7.6, 7.7 + """ + + def __init__(self, data_dir: str): + """Initialize the storage service. + + Args: + data_dir: Directory path for storing JSON files + """ + self.data_dir = Path(data_dir) + self.records_file = self.data_dir / "records.json" + self.moods_file = self.data_dir / "moods.json" + self.inspirations_file = self.data_dir / "inspirations.json" + self.todos_file = self.data_dir / "todos.json" + + # Ensure data directory exists + self.data_dir.mkdir(parents=True, exist_ok=True) + + def _ensure_file_exists(self, file_path: Path) -> None: + """Ensure a JSON file exists and is initialized with default data. + + If the file doesn't exist, creates it with sample Chinese data. + + Args: + file_path: Path to the JSON file + + Raises: + StorageError: If file creation fails + + Requirements: 7.5 + """ + if not file_path.exists(): + try: + # 根据文件类型提供不同的默认数据 + default_data = [] + + if file_path.name == 'records.json': + default_data = self._get_default_records() + elif file_path.name == 'moods.json': + default_data = self._get_default_moods() + elif file_path.name == 'inspirations.json': + default_data = self._get_default_inspirations() + elif file_path.name == 'todos.json': + default_data = self._get_default_todos() + elif file_path.name == 'user_config.json': + default_data = self._get_default_user_config() + + with open(file_path, 'w', encoding='utf-8') as f: + json.dump(default_data, f, ensure_ascii=False, indent=2) + except Exception as e: + raise StorageError( + f"Failed to initialize file {file_path}: {str(e)}" + ) + + def _get_default_records(self) -> list: + """获取默认的记录数据""" + from datetime import datetime, timedelta + now = datetime.now() + + return [ + { + "record_id": "welcome-1", + "timestamp": (now - timedelta(hours=2)).isoformat() + "Z", + "input_type": "text", + "original_text": "今天天气真好,阳光洒在窗台上,心情也跟着明朗起来。决定下午去公园散散步,感受一下大自然的美好。", + "parsed_data": { + "mood": { + "type": "喜悦", + "intensity": 8, + "keywords": ["阳光", "明朗", "美好"] + }, + "inspirations": [ + { + "core_idea": "享受自然的美好时光", + "tags": ["自然", "散步", "放松"], + "category": "生活" + } + ], + "todos": [ + { + "task": "去公园散步", + "time": "下午", + "location": "公园", + "status": "pending" + } + ] + } + }, + { + "record_id": "welcome-2", + "timestamp": (now - timedelta(hours=5)).isoformat() + "Z", + "input_type": "text", + "original_text": "刚看完一本很棒的书,书中的一句话让我印象深刻:'生活不是等待暴风雨过去,而是学会在雨中跳舞。'这句话给了我很多启发。", + "parsed_data": { + "mood": { + "type": "平静", + "intensity": 7, + "keywords": ["启发", "思考", "感悟"] + }, + "inspirations": [ + { + "core_idea": "学会在困难中保持积极", + "tags": ["人生哲理", "积极心态", "成长"], + "category": "学习" + } + ], + "todos": [] + } + }, + { + "record_id": "welcome-3", + "timestamp": (now - timedelta(days=1, hours=3)).isoformat() + "Z", + "input_type": "text", + "original_text": "和好朋友聊了很久,她分享了最近的生活和工作。虽然大家都很忙,但能抽时间见面真的很珍贵。友谊需要用心维护。", + "parsed_data": { + "mood": { + "type": "温暖", + "intensity": 9, + "keywords": ["友谊", "珍贵", "陪伴"] + }, + "inspirations": [ + { + "core_idea": "珍惜身边的朋友", + "tags": ["友情", "陪伴", "珍惜"], + "category": "生活" + } + ], + "todos": [ + { + "task": "定期和朋友联系", + "time": None, + "location": None, + "status": "pending" + } + ] + } + }, + { + "record_id": "welcome-4", + "timestamp": (now - timedelta(days=2)).isoformat() + "Z", + "input_type": "text", + "original_text": "今天完成了一个困扰我很久的项目,虽然过程很辛苦,但看到成果的那一刻,所有的付出都值得了。成就感满满!", + "parsed_data": { + "mood": { + "type": "兴奋", + "intensity": 10, + "keywords": ["成就感", "完成", "满足"] + }, + "inspirations": [], + "todos": [] + } + }, + { + "record_id": "welcome-5", + "timestamp": (now - timedelta(days=3)).isoformat() + "Z", + "input_type": "text", + "original_text": "最近工作压力有点大,总是担心做不好。但转念一想,每个人都会遇到困难,重要的是保持积极的心态,一步一步来。", + "parsed_data": { + "mood": { + "type": "焦虑", + "intensity": 6, + "keywords": ["压力", "担心", "积极"] + }, + "inspirations": [ + { + "core_idea": "保持积极心态面对压力", + "tags": ["心态", "压力管理", "成长"], + "category": "工作" + } + ], + "todos": [] + } + } + ] + + def _get_default_moods(self) -> list: + """获取默认的心情数据""" + from datetime import datetime, timedelta + now = datetime.now() + + return [ + { + "record_id": "welcome-1", + "timestamp": (now - timedelta(hours=2)).isoformat() + "Z", + "type": "喜悦", + "intensity": 8, + "keywords": ["阳光", "明朗", "美好"] + }, + { + "record_id": "welcome-2", + "timestamp": (now - timedelta(hours=5)).isoformat() + "Z", + "type": "平静", + "intensity": 7, + "keywords": ["启发", "思考", "感悟"] + }, + { + "record_id": "welcome-3", + "timestamp": (now - timedelta(days=1, hours=3)).isoformat() + "Z", + "type": "温暖", + "intensity": 9, + "keywords": ["友谊", "珍贵", "陪伴"] + }, + { + "record_id": "welcome-4", + "timestamp": (now - timedelta(days=2)).isoformat() + "Z", + "type": "兴奋", + "intensity": 10, + "keywords": ["成就感", "完成", "满足"] + }, + { + "record_id": "welcome-5", + "timestamp": (now - timedelta(days=3)).isoformat() + "Z", + "type": "焦虑", + "intensity": 6, + "keywords": ["压力", "担心", "积极"] + } + ] + + def _get_default_inspirations(self) -> list: + """获取默认的灵感数据""" + from datetime import datetime, timedelta + now = datetime.now() + + return [ + { + "record_id": "welcome-1", + "timestamp": (now - timedelta(hours=2)).isoformat() + "Z", + "core_idea": "享受自然的美好时光", + "tags": ["自然", "散步", "放松"], + "category": "生活" + }, + { + "record_id": "welcome-2", + "timestamp": (now - timedelta(hours=5)).isoformat() + "Z", + "core_idea": "学会在困难中保持积极", + "tags": ["人生哲理", "积极心态", "成长"], + "category": "学习" + }, + { + "record_id": "welcome-3", + "timestamp": (now - timedelta(days=1, hours=3)).isoformat() + "Z", + "core_idea": "珍惜身边的朋友", + "tags": ["友情", "陪伴", "珍惜"], + "category": "生活" + }, + { + "record_id": "welcome-5", + "timestamp": (now - timedelta(days=3)).isoformat() + "Z", + "core_idea": "保持积极心态面对压力", + "tags": ["心态", "压力管理", "成长"], + "category": "工作" + } + ] + + def _get_default_todos(self) -> list: + """获取默认的待办数据""" + from datetime import datetime, timedelta + now = datetime.now() + + return [ + { + "record_id": "welcome-1", + "timestamp": (now - timedelta(hours=2)).isoformat() + "Z", + "task": "去公园散步", + "time": "下午", + "location": "公园", + "status": "pending" + }, + { + "record_id": "welcome-3", + "timestamp": (now - timedelta(days=1, hours=3)).isoformat() + "Z", + "task": "定期和朋友联系", + "time": None, + "location": None, + "status": "pending" + } + ] + + def _get_default_user_config(self) -> dict: + """获取默认的用户配置""" + return { + "character": { + "image_url": "", # 空字符串,前端会显示占位符 + "prompt": "默认形象:薰衣草紫色温柔猫咪", + "preferences": { + "color": "薰衣草紫", + "personality": "温柔", + "appearance": "无配饰", + "role": "陪伴式朋友" + } + } + } + + def _read_json_file(self, file_path: Path) -> List: + """Read and parse a JSON file. + + Args: + file_path: Path to the JSON file + + Returns: + List of records from the JSON file + + Raises: + StorageError: If file reading or parsing fails + """ + self._ensure_file_exists(file_path) + try: + with open(file_path, 'r', encoding='utf-8') as f: + return json.load(f) + except Exception as e: + raise StorageError( + f"Failed to read file {file_path}: {str(e)}" + ) + + def _write_json_file(self, file_path: Path, data: List) -> None: + """Write data to a JSON file. + + Args: + file_path: Path to the JSON file + data: List of records to write + + Raises: + StorageError: If file writing fails + + Requirements: 7.6 + """ + try: + with open(file_path, 'w', encoding='utf-8') as f: + json.dump(data, f, ensure_ascii=False, indent=2) + except Exception as e: + raise StorageError( + f"Failed to write file {file_path}: {str(e)}" + ) + + def save_record(self, record: RecordData) -> str: + """Save a complete record to records.json. + + Generates a unique UUID for the record if not already set, + and appends the record to the records.json file. + + Args: + record: RecordData object to save + + Returns: + The unique record_id (UUID string) + + Raises: + StorageError: If file writing fails + + Requirements: 7.1, 7.7 + """ + # Generate unique UUID if not set + if not record.record_id: + record.record_id = str(uuid.uuid4()) + + # Read existing records + records = self._read_json_file(self.records_file) + + # Append new record + records.append(record.model_dump()) + + # Write back to file + self._write_json_file(self.records_file, records) + + return record.record_id + + def append_mood(self, mood: MoodData, record_id: str, timestamp: str) -> None: + """Append mood data to moods.json. + + Args: + mood: MoodData object to append + record_id: Associated record ID + timestamp: ISO 8601 timestamp + + Raises: + StorageError: If file writing fails + + Requirements: 7.2 + """ + # Read existing moods + moods = self._read_json_file(self.moods_file) + + # Create mood entry with metadata + mood_entry = { + "record_id": record_id, + "timestamp": timestamp, + **mood.model_dump() + } + + # Append new mood + moods.append(mood_entry) + + # Write back to file + self._write_json_file(self.moods_file, moods) + + def append_inspirations( + self, + inspirations: List[InspirationData], + record_id: str, + timestamp: str + ) -> None: + """Append inspiration data to inspirations.json. + + Args: + inspirations: List of InspirationData objects to append + record_id: Associated record ID + timestamp: ISO 8601 timestamp + + Raises: + StorageError: If file writing fails + + Requirements: 7.3 + """ + if not inspirations: + return + + # Read existing inspirations + all_inspirations = self._read_json_file(self.inspirations_file) + + # Create inspiration entries with metadata + for inspiration in inspirations: + inspiration_entry = { + "record_id": record_id, + "timestamp": timestamp, + **inspiration.model_dump() + } + all_inspirations.append(inspiration_entry) + + # Write back to file + self._write_json_file(self.inspirations_file, all_inspirations) + + def append_todos( + self, + todos: List[TodoData], + record_id: str, + timestamp: str + ) -> None: + """Append todo data to todos.json. + + Args: + todos: List of TodoData objects to append + record_id: Associated record ID + timestamp: ISO 8601 timestamp + + Raises: + StorageError: If file writing fails + + Requirements: 7.4 + """ + if not todos: + return + + # Read existing todos + all_todos = self._read_json_file(self.todos_file) + + # Create todo entries with metadata + for todo in todos: + todo_entry = { + "record_id": record_id, + "timestamp": timestamp, + **todo.model_dump() + } + all_todos.append(todo_entry) + + # Write back to file + self._write_json_file(self.todos_file, all_todos) diff --git a/app/user_config.py b/app/user_config.py new file mode 100644 index 0000000000000000000000000000000000000000..560817dca693d1b42934d5d64e25491b4da3fcf9 --- /dev/null +++ b/app/user_config.py @@ -0,0 +1,211 @@ +"""User configuration management for Voice Text Processor. + +This module handles user-specific configurations, including +the generated cat character image settings. + +Requirements: PRD - AI形象生成模块 +""" + +import json +import os +from typing import Optional, Dict, List +from datetime import datetime +import logging + +logger = logging.getLogger(__name__) + + +class UserConfig: + """User configuration manager. + + This class manages user-specific settings, particularly + the generated cat character image configuration. + + Attributes: + config_dir: Directory for storing user configurations + config_file: Path to the user config JSON file + """ + + def __init__(self, config_dir: str = "data"): + """Initialize user configuration manager. + + Args: + config_dir: Directory for storing configurations + """ + self.config_dir = config_dir + self.config_file = os.path.join(config_dir, "user_config.json") + + # 确保目录存在 + os.makedirs(config_dir, exist_ok=True) + + # 初始化配置文件 + if not os.path.exists(self.config_file): + self._init_config_file() + + def _init_config_file(self): + """Initialize the configuration file with default values.""" + default_config = { + "user_id": "default_user", + "created_at": datetime.utcnow().isoformat() + "Z", + "character": { + "image_url": "", # 空字符串,前端会显示占位符 + "prompt": "默认治愈系小猫形象", + "revised_prompt": "一只薰衣草紫色的温柔猫咪,治愈系风格,温暖的陪伴者", + "preferences": { + "color": "薰衣草紫", + "personality": "温柔", + "appearance": "无配饰", + "role": "陪伴式朋友" + }, + "generated_at": datetime.utcnow().isoformat() + "Z", + "generation_count": 0 + }, + "settings": { + "theme": "light", + "language": "zh-CN" + } + } + + with open(self.config_file, 'w', encoding='utf-8') as f: + json.dump(default_config, f, ensure_ascii=False, indent=2) + + logger.info(f"Initialized user config file: {self.config_file}") + + def load_config(self) -> Dict: + """Load user configuration from file. + + Returns: + Dictionary containing user configuration + """ + try: + with open(self.config_file, 'r', encoding='utf-8') as f: + config = json.load(f) + return config + except Exception as e: + logger.error(f"Failed to load user config: {str(e)}") + # 返回默认配置 + self._init_config_file() + return self.load_config() + + def save_config(self, config: Dict): + """Save user configuration to file. + + Args: + config: Configuration dictionary to save + """ + try: + with open(self.config_file, 'w', encoding='utf-8') as f: + json.dump(config, f, ensure_ascii=False, indent=2) + logger.info("User config saved successfully") + except Exception as e: + logger.error(f"Failed to save user config: {str(e)}") + raise + + def get_character_config(self) -> Dict: + """Get character configuration. + + Returns: + Dictionary containing character settings + """ + config = self.load_config() + return config.get("character", {}) + + def save_character_image( + self, + image_url: str, + prompt: str, + revised_prompt: Optional[str] = None, + preferences: Optional[Dict] = None + ): + """Save generated character image configuration. + + Args: + image_url: URL of the generated image + prompt: Prompt used for generation + revised_prompt: AI-revised prompt (optional) + preferences: User preferences used (optional) + """ + config = self.load_config() + + # 更新角色配置 + config["character"]["image_url"] = image_url + config["character"]["prompt"] = prompt + config["character"]["revised_prompt"] = revised_prompt or prompt + config["character"]["generated_at"] = datetime.utcnow().isoformat() + "Z" + config["character"]["generation_count"] += 1 + + if preferences: + config["character"]["preferences"] = preferences + + self.save_config(config) + logger.info(f"Character image saved: {image_url[:50]}...") + + def get_character_image_url(self) -> Optional[str]: + """Get the current character image URL. + + Returns: + Image URL or None if not set + """ + character = self.get_character_config() + return character.get("image_url") + + def get_character_preferences(self) -> Dict: + """Get character generation preferences. + + Returns: + Dictionary containing color, personality, appearance, role + """ + character = self.get_character_config() + return character.get("preferences", { + "color": "温暖粉", + "personality": "温柔", + "appearance": "无配饰", + "role": "陪伴式朋友" + }) + + def update_character_preferences( + self, + color: Optional[str] = None, + personality: Optional[str] = None, + appearance: Optional[str] = None, + role: Optional[str] = None + ): + """Update character generation preferences. + + Args: + color: Color preference (optional) + personality: Personality trait (optional) + appearance: Appearance feature (optional) + role: Character role (optional) + """ + config = self.load_config() + preferences = config["character"]["preferences"] + + if color: + preferences["color"] = color + if personality: + preferences["personality"] = personality + if appearance: + preferences["appearance"] = appearance + if role: + preferences["role"] = role + + self.save_config(config) + logger.info("Character preferences updated") + + def get_generation_count(self) -> int: + """Get the number of times character has been generated. + + Returns: + Generation count + """ + character = self.get_character_config() + return character.get("generation_count", 0) + + def has_character_image(self) -> bool: + """Check if user has a character image set. + + Returns: + True if character image exists, False otherwise + """ + return self.get_character_image_url() is not None diff --git a/data/.gitkeep b/data/.gitkeep new file mode 100644 index 0000000000000000000000000000000000000000..2272f38100cecf4b8dca91c78ec784a247c1300f --- /dev/null +++ b/data/.gitkeep @@ -0,0 +1 @@ +# This file ensures the data directory is tracked by git diff --git a/deployment/DEPLOYMENT.md b/deployment/DEPLOYMENT.md new file mode 100644 index 0000000000000000000000000000000000000000..abf6b9ad4dbef715f75ee1d40598ebffdeac638d --- /dev/null +++ b/deployment/DEPLOYMENT.md @@ -0,0 +1,133 @@ +# 部署指南 + +## 部署到 Hugging Face Spaces + +### 前置准备 + +1. **构建前端** + ```bash + cd frontend + npm install + npm run build + ``` + +2. **验证构建产物** + - 确保 `frontend/dist/` 目录存在 + - 包含 `index.html` 和 `assets/` 文件夹 + +### 自动部署(推荐) + +**Windows:** +```bash +build_and_deploy.bat +``` + +**Linux/Mac:** +```bash +chmod +x build_and_deploy.sh +./build_and_deploy.sh +``` + +### 手动部署 + +1. **构建前端** + ```bash + cd frontend + npm run build + cd .. + ``` + +2. **提交更改** + ```bash + git add . + git commit -m "Deploy: Update frontend build" + ``` + +3. **推送到 Hugging Face** + ```bash + git push hf main + ``` + +### 配置 Hugging Face Secrets + +在 Space 的 Settings → Repository secrets 中添加: + +**必需:** +- `ZHIPU_API_KEY` - 智谱 AI API 密钥 + - 获取:https://open.bigmodel.cn/ + +**可选:** +- `MINIMAX_API_KEY` - MiniMax API 密钥 +- `MINIMAX_GROUP_ID` - MiniMax Group ID + - 获取:https://platform.minimaxi.com/ + +### 访问应用 + +部署成功后,访问: +- **前端应用**: `https://your-space.hf.space/app` +- **Gradio 界面**: `https://your-space.hf.space/gradio` +- **API 文档**: `https://your-space.hf.space/docs` + +### 文件结构 + +``` +. +├── app.py # Hugging Face 入口文件 +├── app/ # FastAPI 后端 +│ ├── main.py # 主应用 +│ └── ... +├── frontend/ +│ ├── dist/ # 构建产物(需要提交) +│ │ ├── index.html +│ │ └── assets/ +│ └── ... +├── requirements_hf.txt # Python 依赖 +└── README_HF.md # Hugging Face 说明 +``` + +### 故障排查 + +**问题:前端 404** +- 检查 `frontend/dist/` 是否存在 +- 确认已运行 `npm run build` +- 查看 Space 日志确认文件已上传 + +**问题:API 调用失败** +- 检查 Secrets 是否正确配置 +- 查看 Space 日志中的错误信息 +- 确认 API 密钥有效 + +**问题:静态资源加载失败** +- 检查 `frontend/dist/assets/` 是否存在 +- 确认 CSS 和 JS 文件已生成 +- 查看浏览器控制台的网络请求 + +### 本地测试 + +在部署前本地测试: + +```bash +# 构建前端 +cd frontend && npm run build && cd .. + +# 运行应用 +python app.py +``` + +访问 `http://localhost:7860/app` 测试前端应用。 + +### 更新部署 + +每次修改前端代码后: + +1. 重新构建:`cd frontend && npm run build && cd ..` +2. 提交更改:`git add . && git commit -m "Update"` +3. 推送:`git push hf main` + +### 注意事项 + +- ✅ `frontend/dist/` 必须提交到 Git(不要在 .gitignore 中忽略) +- ✅ 每次修改前端代码都需要重新构建 +- ✅ Hugging Face Spaces 会自动重启应用 +- ⚠️ 首次部署可能需要 5-10 分钟 +- ⚠️ 免费 Space 可能会在不活跃时休眠 diff --git a/deployment/DEPLOY_CHECKLIST.md b/deployment/DEPLOY_CHECKLIST.md new file mode 100644 index 0000000000000000000000000000000000000000..5109fd2ec5e66f68e2b74224876f6aab2df58614 --- /dev/null +++ b/deployment/DEPLOY_CHECKLIST.md @@ -0,0 +1,137 @@ +# Hugging Face Spaces 部署检查清单 + +## 📋 部署前检查 + +### 1. 依赖版本确认 +- [ ] `requirements_hf.txt` 中 `huggingface-hub==0.23.5` +- [ ] `requirements_hf.txt` 中 `gradio==4.44.0` +- [ ] `README_HF.md` frontmatter 中 `sdk_version: "4.44.0"` + +### 2. 文件结构确认 +- [ ] `app.py` 存在且正确 +- [ ] `frontend/dist/` 已构建(运行 `cd frontend && npm run build`) +- [ ] `data/` 目录存在 +- [ ] `generated_images/` 目录存在 + +### 3. 环境变量配置 +在 Space Settings → Repository secrets 中配置: +- [ ] `ZHIPU_API_KEY` - 必需 +- [ ] `MINIMAX_API_KEY` - 可选 +- [ ] `MINIMAX_GROUP_ID` - 可选 + +## 🚀 部署步骤 + +### 方法 1: 使用 deploy_to_hf.sh (推荐) + +```bash +# 1. 确保脚本可执行 +chmod +x deploy_to_hf.sh + +# 2. 运行部署脚本 +./deploy_to_hf.sh +``` + +### 方法 2: 手动部署 + +```bash +# 1. 构建前端 +cd frontend +npm install +npm run build +cd .. + +# 2. 提交到 Git +git add . +git commit -m "Deploy to Hugging Face Spaces" + +# 3. 推送到 Hugging Face +git push hf main +``` + +## 🐛 常见问题 + +### ImportError: cannot import name 'HfFolder' + +**原因:** `gradio` 和 `huggingface_hub` 版本不兼容 + +**解决方法:** +1. 确认 `requirements_hf.txt` 版本正确 +2. 在 Space Settings 中点击 "Factory reboot" +3. 查看 Container logs 确认安装的版本 + +### 前端 404 错误 + +**原因:** 前端未构建或未正确挂载 + +**解决方法:** +1. 本地运行 `cd frontend && npm run build` +2. 确认 `frontend/dist/` 目录存在且有内容 +3. 提交并推送 `frontend/dist/` 到仓库 + +### API 调用失败 + +**原因:** 环境变量未配置 + +**解决方法:** +1. 在 Space Settings → Repository secrets 添加 `ZHIPU_API_KEY` +2. 重启 Space +3. 查看 Logs 确认 API 密钥已加载 + +## 📊 部署后验证 + +### 1. 健康检查 +访问 `https://your-space.hf.space/health` 应返回: +```json +{ + "status": "healthy", + "timestamp": "..." +} +``` + +### 2. API 文档 +访问 `https://your-space.hf.space/docs` 查看 API 文档 + +### 3. 前端访问 +访问 `https://your-space.hf.space/` 应显示应用界面 + +### 4. 功能测试 +- [ ] 首页输入框可以输入文字 +- [ ] 点击麦克风可以录音(需要浏览器权限) +- [ ] 点击 AI 形象显示对话框 +- [ ] 底部导航可以切换页面 + +## 🔄 更新部署 + +### 代码更新 +```bash +git add . +git commit -m "Update: description" +git push hf main +``` + +### 强制重建 +如果遇到缓存问题: +1. 进入 Space Settings +2. 点击 "Factory reboot" +3. 等待重新构建完成 + +## 📝 版本兼容性 + +### 已测试的稳定组合 + +| gradio | huggingface-hub | Python | 状态 | +|--------|----------------|--------|------| +| 4.44.0 | 0.23.5 | 3.11 | ✅ 推荐 | +| 4.36.1 | 0.23.0 | 3.11 | ✅ 可用 | +| 5.x | latest | 3.11 | ❌ 不兼容 | + +### 不兼容的组合 + +- `gradio==4.x` + `huggingface-hub>=0.24.0` → HfFolder 错误 +- `gradio==5.x` + `huggingface-hub<0.24.0` → 版本冲突 + +## 🔗 相关资源 + +- [Hugging Face Spaces 文档](https://huggingface.co/docs/hub/spaces) +- [Gradio 文档](https://www.gradio.app/docs) +- [项目 README](./README.md) diff --git a/deployment/Dockerfile b/deployment/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..a6c74154b4712507c904f564e8a64096c770d135 --- /dev/null +++ b/deployment/Dockerfile @@ -0,0 +1,31 @@ +FROM python:3.11-slim + +WORKDIR /app + +# 安装系统依赖 +RUN apt-get update && apt-get install -y \ + build-essential \ + && rm -rf /var/lib/apt/lists/* + +# 复制依赖文件 +COPY requirements.txt . + +# 安装 Python 依赖 +RUN pip install --no-cache-dir -r requirements.txt + +# 复制应用代码 +COPY app/ ./app/ +COPY data/ ./data/ +COPY frontend/dist/ ./frontend/dist/ + +# 复制启动脚本 +COPY start.py . + +# 创建必要的目录 +RUN mkdir -p generated_images logs + +# 暴露端口 +EXPOSE 7860 + +# 启动应用 +CMD ["python", "start.py"] diff --git a/deployment/README_HF.md b/deployment/README_HF.md new file mode 100644 index 0000000000000000000000000000000000000000..c4b32f74240524f0cf171d1f937ac2f811c8ed4e --- /dev/null +++ b/deployment/README_HF.md @@ -0,0 +1,99 @@ +--- +title: Nora - 治愈系记录助手 +emoji: 🌟 +colorFrom: purple +colorTo: pink +sdk: docker +pinned: false +license: mit +--- + +# 🌟 治愈系记录助手 - SoulMate AI Companion + +一个温暖、治愈的 AI 陪伴应用,帮助你记录心情、捕捉灵感、管理待办。 + +## ✨ 核心特性 + +- 🎤 **语音/文字快速记录** - 自动分类保存 +- 🤖 **AI 语义解析** - 智能提取情绪、灵感和待办 +- 💬 **AI 对话陪伴(RAG)** - 基于历史记录的个性化对话 +- 🖼️ **AI 形象定制** - 生成专属治愈系角色(720 种组合) +- 🫧 **物理引擎心情池** - 基于 Matter.js 的动态气泡可视化 + +## 🚀 快速开始 + +### 在线使用 + +直接访问本 Space 即可使用完整功能! + +### 配置 API 密钥 + +在 Space 的 **Settings → Repository secrets** 中配置: + +**必需:** +- `ZHIPU_API_KEY` - 智谱 AI API 密钥 + - 获取地址:https://open.bigmodel.cn/ + - 用途:语音识别、语义解析、AI 对话 + +**可选:** +- `MINIMAX_API_KEY` - MiniMax API 密钥 +- `MINIMAX_GROUP_ID` - MiniMax Group ID + - 获取地址:https://platform.minimaxi.com/ + - 用途:AI 形象生成 + +## 📖 使用说明 + +1. **首页快速记录** + - 点击麦克风录音或在输入框输入文字 + - AI 自动分析并分类保存 + +2. **查看分类数据** + - 点击顶部心情、灵感、待办图标 + - 查看不同类型的记录 + +3. **与 AI 对话** + - 点击 AI 形象显示问候对话框 + - 点击对话框中的聊天图标进入完整对话 + - AI 基于你的历史记录提供个性化回复 + +4. **定制 AI 形象** + - 点击右下角 ✨ 按钮 + - 选择颜色、性格、外观、角色 + - 生成专属形象(需要 MiniMax API) + +5. **心情气泡池** + - 点击顶部心情图标 + - 左右滑动查看不同日期的心情卡片 + - 点击卡片展开查看当天的气泡池 + - 可以拖拽气泡,感受物理引擎效果 + +## 📊 API 端点 + +- `POST /api/process` - 处理文本/语音输入 +- `POST /api/chat` - 与 AI 对话(RAG) +- `GET /api/records` - 获取所有记录 +- `GET /api/moods` - 获取情绪数据 +- `GET /api/inspirations` - 获取灵感 +- `GET /api/todos` - 获取待办事项 +- `POST /api/character/generate` - 生成角色形象 +- `GET /health` - 健康检查 +- `GET /docs` - API 文档 + +## 🔗 相关链接 + +- [GitHub 仓库](https://github.com/kernel-14/Nora) +- [详细文档](https://github.com/kernel-14/Nora/blob/main/README.md) +- [智谱 AI](https://open.bigmodel.cn/) +- [MiniMax](https://platform.minimaxi.com/) + +## 📝 技术栈 + +- **后端**: FastAPI + Python 3.11 +- **前端**: React + TypeScript + Vite +- **物理引擎**: Matter.js +- **AI 服务**: 智谱 AI (GLM-4) + MiniMax +- **部署**: Hugging Face Spaces (Docker) + +## 📄 License + +MIT License diff --git a/deployment/README_MODELSCOPE.md b/deployment/README_MODELSCOPE.md new file mode 100644 index 0000000000000000000000000000000000000000..4622b622566c7b95956ac346faf7c7311873302f --- /dev/null +++ b/deployment/README_MODELSCOPE.md @@ -0,0 +1,126 @@ +# 🌟 治愈系记录助手 - SoulMate AI Companion + +一个温暖、治愈的 AI 陪伴应用,帮助你记录心情、捕捉灵感、管理待办。 + +## ✨ 核心特性 + +- 🎤 **语音/文字快速记录** - 自动分类保存 +- 🤖 **AI 语义解析** - 智能提取情绪、灵感和待办 +- 💬 **AI 对话陪伴(RAG)** - 基于历史记录的个性化对话 +- 🖼️ **AI 形象定制** - 生成专属治愈系角色(720 种组合) +- 🫧 **物理引擎心情池** - 基于 Matter.js 的动态气泡可视化 + +## 🚀 快速开始 + +### 在线使用 + +直接访问本应用即可使用完整功能! + +### 配置 API 密钥 + +在 ModelScope 的环境变量中配置: + +**必需:** +- `ZHIPU_API_KEY` - 智谱 AI API 密钥 + - 获取地址:https://open.bigmodel.cn/ + - 用途:语音识别、语义解析、AI 对话 + +**可选:** +- `MINIMAX_API_KEY` - MiniMax API 密钥 +- `MINIMAX_GROUP_ID` - MiniMax Group ID + - 获取地址:https://platform.minimaxi.com/ + - 用途:AI 形象生成 + +## 📖 使用说明 + +1. **首页快速记录** + - 点击麦克风录音或在输入框输入文字 + - AI 自动分析并分类保存 + +2. **查看分类数据** + - 点击顶部心情、灵感、待办图标 + - 查看不同类型的记录 + +3. **与 AI 对话** + - 点击 AI 形象显示问候对话框 + - 点击对话框中的聊天图标进入完整对话 + - AI 基于你的历史记录提供个性化回复 + +4. **定制 AI 形象** + - 点击右下角 ✨ 按钮 + - 选择颜色、性格、外观、角色 + - 生成专属形象(需要 MiniMax API) + +5. **心情气泡池** + - 点击顶部心情图标 + - 左右滑动查看不同日期的心情卡片 + - 点击卡片展开查看当天的气泡池 + - 可以拖拽气泡,感受物理引擎效果 + +## 📊 API 端点 + +- `POST /api/process` - 处理文本/语音输入 +- `POST /api/chat` - 与 AI 对话(RAG) +- `GET /api/records` - 获取所有记录 +- `GET /api/moods` - 获取情绪数据 +- `GET /api/inspirations` - 获取灵感 +- `GET /api/todos` - 获取待办事项 +- `POST /api/character/generate` - 生成角色形象 +- `GET /health` - 健康检查 +- `GET /docs` - API 文档 + +## 🔗 相关链接 + +- [GitHub 仓库](https://github.com/kernel-14/Nora) +- [详细文档](https://github.com/kernel-14/Nora/blob/main/README.md) +- [智谱 AI](https://open.bigmodel.cn/) +- [MiniMax](https://platform.minimaxi.com/) + +## 📝 技术栈 + +- **后端**: FastAPI + Python 3.11 +- **前端**: React + TypeScript + Vite +- **物理引擎**: Matter.js +- **AI 服务**: 智谱 AI (GLM-4) + MiniMax +- **部署**: ModelScope (Gradio) + +## 📄 License + +MIT License + +--- + +## 🚀 部署到 ModelScope + +### 方法一:通过 Git 导入 + +1. 在 ModelScope 创建新的应用空间 +2. 选择 "从 Git 导入" +3. 输入仓库地址:`https://github.com/kernel-14/Nora.git` +4. 选择 Gradio SDK +5. 配置环境变量(见上方配置说明) +6. 点击创建 + +### 方法二:手动上传 + +1. 克隆本仓库到本地 +2. 在 ModelScope 创建新的应用空间 +3. 上传所有文件 +4. 确保 `configuration.json` 和 `app_modelscope.py` 在根目录 +5. 配置环境变量 +6. 启动应用 + +### 文件说明 + +- `app_modelscope.py` - ModelScope 入口文件 +- `configuration.json` - ModelScope 配置文件 +- `requirements_modelscope.txt` - Python 依赖(使用兼容的 Gradio 版本) +- `app/` - FastAPI 后端代码 +- `frontend/dist/` - 前端构建产物 +- `data/` - 数据存储目录 + +### 注意事项 + +- 确保 `frontend/dist/` 目录已包含构建好的前端文件 +- 环境变量必须正确配置才能使用 AI 功能 +- ModelScope 使用 Gradio 4.44.1 版本以避免依赖冲突 diff --git a/deployment/app_modelscope.py b/deployment/app_modelscope.py new file mode 100644 index 0000000000000000000000000000000000000000..6f89cc73bc031bf110c330c3fde975c04487e3dc --- /dev/null +++ b/deployment/app_modelscope.py @@ -0,0 +1,187 @@ +""" +ModelScope 部署入口文件 +使用 Gradio 包装 FastAPI 应用 +""" + +import os +import sys +from pathlib import Path +import gradio as gr + +# 添加项目根目录到 Python 路径 +sys.path.insert(0, str(Path(__file__).parent)) + +# 设置环境变量 +os.environ.setdefault("DATA_DIR", "data") +os.environ.setdefault("LOG_LEVEL", "INFO") + +# 确保数据目录存在 +data_dir = Path("data") +data_dir.mkdir(exist_ok=True) + +generated_images_dir = Path("generated_images") +generated_images_dir.mkdir(exist_ok=True) + +# 导入 FastAPI 应用 +from app.main import app as fastapi_app +from fastapi.staticfiles import StaticFiles +from fastapi.responses import FileResponse + +# 挂载前端静态文件 +frontend_dist = Path(__file__).parent / "frontend" / "dist" +if frontend_dist.exists(): + # 挂载静态资源(CSS, JS) + assets_dir = frontend_dist / "assets" + if assets_dir.exists(): + fastapi_app.mount("/assets", StaticFiles(directory=str(assets_dir)), name="assets") + print(f"✅ 前端资源文件已挂载: {assets_dir}") + + print(f"✅ 前端应用已挂载: {frontend_dist}") +else: + print(f"⚠️ 前端构建目录不存在: {frontend_dist}") + +# 重写根路由以服务前端 +@fastapi_app.get("/", include_in_schema=False) +async def serve_root(): + """服务前端应用首页""" + if frontend_dist.exists(): + index_file = frontend_dist / "index.html" + if index_file.exists(): + return FileResponse(index_file) + return { + "service": "SoulMate AI Companion", + "status": "running", + "version": "1.0.0", + "message": "Welcome! Visit /docs for API documentation." + } + +# 添加 catch-all 路由用于 SPA +@fastapi_app.get("/{full_path:path}", include_in_schema=False) +async def serve_spa(full_path: str): + """服务前端应用(SPA 路由支持)""" + # 如果是 API 路径,跳过 + if full_path.startswith("api/") or full_path == "docs" or full_path == "openapi.json" or full_path == "health": + from fastapi import HTTPException + raise HTTPException(status_code=404, detail="Not found") + + # 返回前端 index.html + if frontend_dist.exists(): + index_file = frontend_dist / "index.html" + if index_file.exists(): + return FileResponse(index_file) + + return {"error": "Frontend not found"} + +# 创建 Gradio 界面(用于 ModelScope 的展示) +with gr.Blocks( + title="治愈系记录助手 - SoulMate AI Companion", + theme=gr.themes.Soft( + primary_hue="purple", + secondary_hue="pink", + ), +) as demo: + + gr.Markdown(""" + # 🌟 治愈系记录助手 - SoulMate AI Companion + + 一个温暖、治愈的 AI 陪伴应用,帮助你记录心情、捕捉灵感、管理待办。 + + ### ✨ 核心特性 + - 🎤 **语音/文字快速记录** - 自动分类保存 + - 🤖 **AI 语义解析** - 智能提取情绪、灵感和待办 + - 💬 **AI 对话陪伴(RAG)** - 基于历史记录的个性化对话 + - 🖼️ **AI 形象定制** - 生成专属治愈系角色(720 种组合) + - 🫧 **物理引擎心情池** - 基于 Matter.js 的动态气泡可视化 + + --- + + ### 🚀 开始使用 + + **🎯 前端应用地址:** 点击上方的 "App" 标签页访问完整应用 + + **📚 API 文档:** [FastAPI Swagger Docs →](/docs) + + --- + + ### 📖 使用说明 + + 1. **首页快速记录** + - 点击麦克风录音或在输入框输入文字 + - AI 自动分析并分类保存 + + 2. **查看分类数据** + - 点击顶部心情、灵感、待办图标 + - 查看不同类型的记录 + + 3. **与 AI 对话** + - 点击 AI 形象显示问候对话框 + - 点击对话框中的聊天图标进入完整对话 + - AI 基于你的历史记录提供个性化回复 + + 4. **定制 AI 形象** + - 点击右下角 ✨ 按钮 + - 选择颜色、性格、外观、角色 + - 生成专属形象(需要 MiniMax API) + + 5. **心情气泡池** + - 点击顶部心情图标 + - 左右滑动查看不同日期的心情卡片 + - 点击卡片展开查看当天的气泡池 + - 可以拖拽气泡,感受物理引擎效果 + + --- + + ### ⚙️ 配置说明 + + 需要在 ModelScope 的环境变量中配置: + + **必需:** + - `ZHIPU_API_KEY` - 智谱 AI API 密钥 + - 获取地址:https://open.bigmodel.cn/ + - 用途:语音识别、语义解析、AI 对话 + + **可选:** + - `MINIMAX_API_KEY` - MiniMax API 密钥 + - `MINIMAX_GROUP_ID` - MiniMax Group ID + - 获取地址:https://platform.minimaxi.com/ + - 用途:AI 形象生成 + + --- + + ### 🔗 相关链接 + - [GitHub 仓库](https://github.com/kernel-14/Nora) + - [详细文档](https://github.com/kernel-14/Nora/blob/main/README.md) + - [智谱 AI](https://open.bigmodel.cn/) + - [MiniMax](https://platform.minimaxi.com/) + + --- + + ### 📊 API 端点 + + - `POST /api/process` - 处理文本/语音输入 + - `POST /api/chat` - 与 AI 对话(RAG) + - `GET /api/records` - 获取所有记录 + - `GET /api/moods` - 获取情绪数据 + - `GET /api/inspirations` - 获取灵感 + - `GET /api/todos` - 获取待办事项 + - `POST /api/character/generate` - 生成角色形象 + - `GET /health` - 健康检查 + - `GET /docs` - API 文档 + """) + +# 挂载 FastAPI 到 Gradio +app = gr.mount_gradio_app(fastapi_app, demo, path="/gradio") + +# 如果直接运行此文件 +if __name__ == "__main__": + import uvicorn + print("=" * 50) + print("🌟 治愈系记录助手 - SoulMate AI Companion") + print("=" * 50) + print(f"📍 前端应用: http://0.0.0.0:7860/") + print(f"📚 Gradio 界面: http://0.0.0.0:7860/gradio") + print(f"📖 API 文档: http://0.0.0.0:7860/docs") + print(f"🔍 健康检查: http://0.0.0.0:7860/health") + print("=" * 50) + + uvicorn.run(app, host="0.0.0.0", port=7860) diff --git a/deployment/configuration.json b/deployment/configuration.json new file mode 100644 index 0000000000000000000000000000000000000000..5b5c1319418e6fec0afb3e699e6527ff2de9d597 --- /dev/null +++ b/deployment/configuration.json @@ -0,0 +1,5 @@ +{ + "framework": "Gradio", + "task": "chat", + "allow_remote_code": true +} diff --git a/deployment/deploy_to_hf.bat b/deployment/deploy_to_hf.bat new file mode 100644 index 0000000000000000000000000000000000000000..305d99bfd80eb1fad31356e0f687910b20bee455 --- /dev/null +++ b/deployment/deploy_to_hf.bat @@ -0,0 +1,109 @@ +@echo off +chcp 65001 >nul +echo 🚀 开始部署到 Hugging Face Spaces... +echo. + +REM 检查是否已登录 +huggingface-cli whoami >nul 2>&1 +if errorlevel 1 ( + echo ❌ 请先登录 Hugging Face CLI + echo 运行: huggingface-cli login + pause + exit /b 1 +) + +REM 获取用户名 +for /f "tokens=2" %%i in ('huggingface-cli whoami ^| findstr "username:"') do set USERNAME=%%i +echo ✅ 已登录为: %USERNAME% +echo. + +REM 询问 Space 名称 +set /p SPACE_NAME="请输入 Space 名称 (默认: soulmate-ai-companion): " +if "%SPACE_NAME%"=="" set SPACE_NAME=soulmate-ai-companion + +echo. +echo 📦 准备文件... + +REM 构建前端 +echo 🔨 构建前端... +cd frontend +call npm install +call npm run build +cd .. + +if not exist "frontend\dist" ( + echo ❌ 前端构建失败 + pause + exit /b 1 +) + +echo ✅ 前端构建完成 +echo. + +REM 创建临时目录 +set TEMP_DIR=temp_hf_deploy +if exist %TEMP_DIR% rmdir /s /q %TEMP_DIR% +mkdir %TEMP_DIR% + +REM 复制文件 +echo 📋 复制文件... +copy app.py %TEMP_DIR%\ +copy requirements_hf.txt %TEMP_DIR%\requirements.txt +copy README_HF.md %TEMP_DIR%\README.md +copy .gitattributes %TEMP_DIR%\ +xcopy /E /I /Y app %TEMP_DIR%\app +xcopy /E /I /Y frontend\dist %TEMP_DIR%\frontend +mkdir %TEMP_DIR%\data +mkdir %TEMP_DIR%\generated_images + +REM 创建或克隆 Space +echo 🌐 准备 Space... +set SPACE_URL=https://huggingface.co/spaces/%USERNAME%/%SPACE_NAME% + +huggingface-cli repo info spaces/%USERNAME%/%SPACE_NAME% >nul 2>&1 +if errorlevel 1 ( + echo 🆕 创建新 Space... + huggingface-cli repo create %SPACE_NAME% --type space --space_sdk gradio +) else ( + echo ✅ Space 已存在 +) + +cd %TEMP_DIR% +git clone %SPACE_URL% . + +REM 复制文件到仓库 +echo 📤 准备上传... +copy ..\app.py . +copy ..\requirements_hf.txt requirements.txt +copy ..\README_HF.md README.md +copy ..\.gitattributes . +xcopy /E /I /Y ..\app app +xcopy /E /I /Y ..\frontend\dist frontend +if not exist data mkdir data +if not exist generated_images mkdir generated_images + +REM 提交并推送 +echo 🚀 上传到 Hugging Face... +git add . +git commit -m "Deploy to Hugging Face Spaces" +git push + +cd .. +rmdir /s /q %TEMP_DIR% + +echo. +echo ✅ 部署完成! +echo. +echo 📍 Space URL: %SPACE_URL% +echo. +echo ⚙️ 下一步: +echo 1. 访问 %SPACE_URL% +echo 2. 点击 Settings → Repository secrets +echo 3. 添加环境变量: +echo - ZHIPU_API_KEY (必需) +echo - MINIMAX_API_KEY (可选) +echo - MINIMAX_GROUP_ID (可选) +echo. +echo 🎉 完成后即可使用! +echo. +pause diff --git a/deployment/deploy_to_hf.sh b/deployment/deploy_to_hf.sh new file mode 100644 index 0000000000000000000000000000000000000000..b048fa2a848aa0cfb1edec295d7380cb6b045dd6 --- /dev/null +++ b/deployment/deploy_to_hf.sh @@ -0,0 +1,101 @@ +#!/bin/bash + +# Hugging Face Spaces 快速部署脚本 + +echo "🚀 开始部署到 Hugging Face Spaces..." + +# 检查是否已登录 +if ! huggingface-cli whoami &> /dev/null; then + echo "❌ 请先登录 Hugging Face CLI" + echo "运行: huggingface-cli login" + exit 1 +fi + +# 获取用户名 +USERNAME=$(huggingface-cli whoami | grep "username:" | awk '{print $2}') +echo "✅ 已登录为: $USERNAME" + +# 询问 Space 名称 +read -p "请输入 Space 名称 (默认: soulmate-ai-companion): " SPACE_NAME +SPACE_NAME=${SPACE_NAME:-soulmate-ai-companion} + +echo "📦 准备文件..." + +# 构建前端 +echo "🔨 构建前端..." +cd frontend +npm install +npm run build +cd .. + +if [ ! -d "frontend/dist" ]; then + echo "❌ 前端构建失败" + exit 1 +fi + +echo "✅ 前端构建完成" + +# 创建临时目录 +TEMP_DIR="temp_hf_deploy" +rm -rf $TEMP_DIR +mkdir -p $TEMP_DIR + +# 复制文件 +echo "📋 复制文件..." +cp app.py $TEMP_DIR/ +cp requirements_hf.txt $TEMP_DIR/requirements.txt +cp README_HF.md $TEMP_DIR/README.md +cp .gitattributes $TEMP_DIR/ +cp -r app $TEMP_DIR/ +cp -r frontend/dist $TEMP_DIR/frontend/ +mkdir -p $TEMP_DIR/data +mkdir -p $TEMP_DIR/generated_images + +# 创建或克隆 Space +echo "🌐 准备 Space..." +SPACE_URL="https://huggingface.co/spaces/$USERNAME/$SPACE_NAME" + +if huggingface-cli repo info "spaces/$USERNAME/$SPACE_NAME" &> /dev/null; then + echo "✅ Space 已存在,克隆中..." + cd $TEMP_DIR + git clone $SPACE_URL . +else + echo "🆕 创建新 Space..." + huggingface-cli repo create $SPACE_NAME --type space --space_sdk gradio + cd $TEMP_DIR + git clone $SPACE_URL . +fi + +# 复制文件到仓库 +echo "📤 准备上传..." +cp ../app.py . +cp ../requirements_hf.txt ./requirements.txt +cp ../README_HF.md ./README.md +cp ../.gitattributes . +cp -r ../app . +cp -r ../frontend/dist ./frontend/ +mkdir -p data generated_images + +# 提交并推送 +echo "🚀 上传到 Hugging Face..." +git add . +git commit -m "Deploy to Hugging Face Spaces" +git push + +cd .. +rm -rf $TEMP_DIR + +echo "" +echo "✅ 部署完成!" +echo "" +echo "📍 Space URL: $SPACE_URL" +echo "" +echo "⚙️ 下一步:" +echo "1. 访问 $SPACE_URL" +echo "2. 点击 Settings → Repository secrets" +echo "3. 添加环境变量:" +echo " - ZHIPU_API_KEY (必需)" +echo " - MINIMAX_API_KEY (可选)" +echo " - MINIMAX_GROUP_ID (可选)" +echo "" +echo "🎉 完成后即可使用!" diff --git a/deployment/ms_deploy.json b/deployment/ms_deploy.json new file mode 100644 index 0000000000000000000000000000000000000000..fdac0a52256b462798ad4e7a9d1498a54de93008 --- /dev/null +++ b/deployment/ms_deploy.json @@ -0,0 +1,29 @@ +{ + "$schema": "https://modelscope.cn/api/v1/studios/deploy_schema.json", + "sdk_type": "gradio", + "sdk_version": "4.44.1", + "resource_configuration": "platform/2v-cpu-16g-mem", + "base_image": "ubuntu22.04-py311-torch2.3.1-modelscope1.31.0", + "environment_variables": [ + { + "name": "ZHIPU_API_KEY", + "value": "" + }, + { + "name": "MINIMAX_API_KEY", + "value": "" + }, + { + "name": "MINIMAX_GROUP_ID", + "value": "" + }, + { + "name": "DATA_DIR", + "value": "data" + }, + { + "name": "LOG_LEVEL", + "value": "INFO" + } + ] +} diff --git a/deployment/requirements_hf.txt b/deployment/requirements_hf.txt new file mode 100644 index 0000000000000000000000000000000000000000..15e4ff19c96da9954afefd89325bcd0aacdbd068 --- /dev/null +++ b/deployment/requirements_hf.txt @@ -0,0 +1,17 @@ +# Hugging Face Spaces Requirements +# Using latest stable versions + +# Core Gradio - use latest version which is compatible with new huggingface-hub +gradio==5.9.1 + +# Core dependencies (compatible with Python 3.11+) +fastapi==0.115.0 +uvicorn[standard]==0.32.0 +pydantic==2.10.0 +pydantic-settings==2.6.0 +httpx==0.27.0 +python-multipart==0.0.12 +python-dotenv==1.0.1 + +# Additional dependencies +aiofiles==24.1.0 diff --git a/deployment/requirements_modelscope.txt b/deployment/requirements_modelscope.txt new file mode 100644 index 0000000000000000000000000000000000000000..6abd4359fd9ed65f41f34ac3343618139170ca28 --- /dev/null +++ b/deployment/requirements_modelscope.txt @@ -0,0 +1,17 @@ +# ModelScope 部署依赖 +# 使用兼容的 Gradio 版本 + +# Gradio - 使用稳定版本 +gradio==4.44.1 + +# Core dependencies (compatible with Python 3.11+) +fastapi==0.115.0 +uvicorn[standard]==0.32.0 +pydantic==2.10.0 +pydantic-settings==2.6.0 +httpx==0.27.0 +python-multipart==0.0.12 +python-dotenv==1.0.1 + +# Additional dependencies +aiofiles==24.1.0 diff --git "a/docs/API_\351\205\215\347\275\256\350\257\264\346\230\216.md" "b/docs/API_\351\205\215\347\275\256\350\257\264\346\230\216.md" new file mode 100644 index 0000000000000000000000000000000000000000..cdd457e3204e2e58db4d9d24147e2e379a71cd33 --- /dev/null +++ "b/docs/API_\351\205\215\347\275\256\350\257\264\346\230\216.md" @@ -0,0 +1,113 @@ +# API 配置说明 + +## 自动检测 API 地址 + +前端应用会自动检测运行环境并配置正确的 API 地址。 + +### 支持的环境 + +#### 1. 生产环境(自动检测) + +**Hugging Face Spaces:** +- 域名包含:`hf.space`, `huggingface.co`, `gradio.live` +- API 地址:使用相同的协议和域名 +- 示例:`https://huggingface.co/spaces/kernel14/Nora` + - 前端:`https://huggingface.co/spaces/kernel14/Nora` + - API:`https://huggingface.co/spaces/kernel14/Nora/api/...` + +**ModelScope:** +- 域名包含:`modelscope.cn` +- API 地址:使用相同的协议和域名 +- 示例:`https://modelscope.cn/studios/xxx/yyy` + - 前端:`https://modelscope.cn/studios/xxx/yyy` + - API:`https://modelscope.cn/studios/xxx/yyy/api/...` + +#### 2. 局域网访问 + +**通过 IP 地址访问:** +- 前端:`http://192.168.1.100:5173` +- API:`http://192.168.1.100:8000` + +**通过主机名访问:** +- 前端:`http://mycomputer.local:5173` +- API:`http://mycomputer.local:8000` + +#### 3. 本地开发 + +**默认配置:** +- 前端:`http://localhost:5173` +- API:`http://localhost:8000` + +### 环境变量配置(可选) + +如果需要手动指定 API 地址,可以在前端项目中创建 `.env.local` 文件: + +```env +VITE_API_URL=https://your-custom-api-url.com +``` + +### 检测逻辑 + +```typescript +const getApiBaseUrl = () => { + // 1. 优先使用环境变量 + if (import.meta.env.VITE_API_URL) { + return import.meta.env.VITE_API_URL; + } + + // 2. 检测生产环境(Hugging Face, ModelScope) + if (hostname.includes('hf.space') || + hostname.includes('huggingface.co') || + hostname.includes('modelscope.cn')) { + return `${protocol}//${hostname}`; + } + + // 3. 检测局域网访问 + if (hostname !== 'localhost' && hostname !== '127.0.0.1') { + return `${protocol}//${hostname}:8000`; + } + + // 4. 默认本地开发 + return 'http://localhost:8000'; +}; +``` + +### 调试 + +打开浏览器控制台,查看 API 地址: + +``` +🔗 API Base URL: https://huggingface.co/spaces/kernel14/Nora +``` + +### 常见问题 + +**Q: 为什么其他设备无法访问?** + +A: 确保: +1. 后端服务器绑定到 `0.0.0.0` 而不是 `127.0.0.1` +2. 防火墙允许端口 8000 +3. 使用正确的 IP 地址访问 + +**Q: Hugging Face 上 API 调用失败?** + +A: 检查: +1. 浏览器控制台的 API 地址是否正确 +2. 是否配置了必需的环境变量(`ZHIPU_API_KEY`) +3. 查看 Space 的日志是否有错误 + +**Q: 如何测试 API 连接?** + +A: 访问以下地址: +- 健康检查:`/health` +- API 文档:`/docs` +- 测试页面:`/test_api.html` + +### 部署检查清单 + +- [ ] 前端已重新构建(`npm run build`) +- [ ] `frontend/dist/` 已提交到 Git +- [ ] 环境变量已配置(Hugging Face Secrets / ModelScope 环境变量) +- [ ] Space 已重启 +- [ ] 浏览器控制台显示正确的 API 地址 +- [ ] 测试 API 调用是否成功 diff --git a/docs/FEATURE_SUMMARY.md b/docs/FEATURE_SUMMARY.md new file mode 100644 index 0000000000000000000000000000000000000000..8926b0fb7f01573888cfe799346179cc33ca7b94 --- /dev/null +++ b/docs/FEATURE_SUMMARY.md @@ -0,0 +1,368 @@ +# Home Interaction Feature - Implementation Summary + +## Overview + +This document summarizes the implementation of the home page interaction feature for the SoulMate AI Companion application. The feature includes two complementary functionalities: + +1. **Quick Recording** - Fast capture of thoughts, inspirations, and todos +2. **AI Chat (RAG-Enhanced)** - Intelligent conversation with context awareness + +## Key Features + +### 1. Home Page Quick Recording + +**Purpose:** Enable users to quickly record their thoughts through voice or text input. + +**Workflow:** +``` +User Input (Voice/Text) + ↓ +Call /api/process + ↓ +AI Semantic Analysis + ↓ +Save to records.json + ↓ +Auto-split to: + - moods.json (emotions) + - inspirations.json (ideas) + - todos.json (tasks) +``` + +**Characteristics:** +- ✅ One-time processing +- ✅ Automatic categorization +- ✅ Structured data output +- ✅ No conversation context needed + +### 2. AI Chat with RAG Enhancement + +**Purpose:** Provide intelligent, warm companionship through context-aware conversations. + +**Workflow:** +``` +User Message + ↓ +Call /api/chat + ↓ +Load Recent Records (last 10) + ↓ +Build RAG Context + ↓ +AI Generates Personalized Response + ↓ +Return to User +``` + +**Characteristics:** +- ✅ Each message calls API +- ✅ Uses RAG (Retrieval-Augmented Generation) +- ✅ Context from records.json +- ✅ Personalized, warm responses +- ✅ Conversation not saved + +## Technical Implementation + +### Backend Changes + +#### File: `app/main.py` + +**Updated `/api/chat` endpoint with RAG:** + +```python +@app.post("/api/chat") +async def chat_with_ai(text: str = Form(...)): + # Load user's records as RAG knowledge base + records = storage_service._read_json_file(storage_service.records_file) + recent_records = records[-10:] # Last 10 records + + # Build context from records + context_parts = [] + for record in recent_records: + context_entry = f"[{timestamp}] User said: {original_text}" + if mood: + context_entry += f"\nMood: {mood['type']}" + if inspirations: + context_entry += f"\nInspirations: {ideas}" + if todos: + context_entry += f"\nTodos: {tasks}" + context_parts.append(context_entry) + + # Build system prompt with context + system_prompt = f"""You are a warm, empathetic AI companion. + You can reference the user's history to provide more caring responses: + + {context_text} + + Please respond with warmth and understanding based on this background.""" + + # Call AI API with context + response = await client.post( + "https://open.bigmodel.cn/api/paas/v4/chat/completions", + json={ + "model": "glm-4-flash", + "messages": [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": text} + ] + } + ) +``` + +### Frontend Changes + +#### New Component: `frontend/components/HomeInput.tsx` + +**Features:** +- Large circular microphone button with gradient +- Text input field +- Real-time processing status +- Success/error animations +- Auto-refresh data on completion + +**Key Functions:** + +```typescript +// Voice recording +const startRecording = async () => { + const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); + const mediaRecorder = new MediaRecorder(stream); + // Recording logic... +}; + +// Process audio +const processAudio = async (audioBlob: Blob) => { + const file = new File([audioBlob], 'recording.webm'); + await apiService.processInput(file); + setShowSuccess(true); + onRecordComplete(); +}; + +// Process text +const processText = async () => { + await apiService.processInput(undefined, textInput); + setTextInput(''); + setShowSuccess(true); + onRecordComplete(); +}; +``` + +#### Updated: `frontend/App.tsx` + +Integrated HomeInput component into the home page: + +```typescript +
+ {greeting} +
+还没有历史形象
+ +当前形象
+🎨 {preferences.color}
+😊 {preferences.personality}
+✨ {preferences.appearance}
+🎭 {preferences.role}
+{error}
++ {mode === 'history' + ? '点击历史形象即可切换,或创建新形象' + : isGenerating + ? '正在生成你的专属 AI 形象,请稍候(约 30-60 秒)...' + : currentImageUrl + ? '修改选项后点击"重新生成"更新形象' + : '选择你喜欢的风格,生成专属的 AI 陪伴形象' + } +
++ 在线 · 随时陪你聊天 +
++ {message.content} +
++ 按 Enter 发送,Shift + Enter 换行 +
++ {post.content} +
+
+ 说出或写下你的想法、灵感、待办事项
+ 我会帮你整理和记录
+
+ {item.content} +
++ {nodes.find((n: Node) => n.id === hoveredNode)?.type === 'tag' + ? nodes.find((n: Node) => n.id === hoveredNode)?.label + : nodes.find((n: Node) => n.id === hoveredNode)?.data?.content.substring(0, 35) + '...' + } +
+没有匹配的灵感
+ + > + ) : ( + <> +等待灵感的火花...
+ > + )} ++ "每一种情绪都值得被记录和珍惜" +
+暂无心情记录
++ 左右滑动或点击箭头切换日期 +
++ 左键查看详情 · 右键取消 · 拖动互动 +
++ {formatDate(selectedMood.timestamp)} +
+记录原文
++ {selectedMood.originalText} +
+关键词
++ "每一种情绪都值得被记录和珍惜" +
++ {subtitle} +
+ )} ++ Born on {profile.birthday} +
+ + {/* Mood Status Bubble */} ++ {profile.moodStatus} + 🍃 +
++ {item.content} +
++ {item.title} +
+ + {/* Meta Info (Time, Location & Category) */} +访问地址:
+主机名:
+协议:
+端口:
+检测到的 API 地址:
+Inspiration Knowledge Graph
+ +基于 Matter.js 的动态气泡交互演示
+ +