File size: 13,794 Bytes
59bd45e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
"""Semantic Parser service for Voice Text Processor.

This module implements the SemanticParserService class for parsing text
into structured data (mood, inspirations, todos) using the GLM-4-Flash API.

Requirements: 3.1, 3.2, 3.3, 3.4, 3.5, 9.2, 9.5
"""

import logging
import json
from typing import Optional
import httpx

from app.models import ParsedData, MoodData, InspirationData, TodoData


logger = logging.getLogger(__name__)


class SemanticParserError(Exception):
    """Exception raised when semantic parsing operations fail.
    
    This exception is raised when the GLM-4-Flash API call fails,
    such as due to network issues, API errors, or invalid responses.
    
    Requirements: 3.5
    """
    
    def __init__(self, message: str = "语义解析服务不可用"):
        """Initialize SemanticParserError.
        
        Args:
            message: Error message describing the failure
        """
        super().__init__(message)
        self.message = message


class SemanticParserService:
    """Service for parsing text into structured data using GLM-4-Flash API.
    
    This service handles semantic parsing by calling the GLM-4-Flash API
    to extract mood, inspirations, and todos from text. It manages API
    authentication, request formatting, response parsing, and error handling.
    
    Attributes:
        api_key: Zhipu AI API key for authentication
        client: Async HTTP client for making API requests
        api_url: GLM-4-Flash API endpoint URL
        model: Model identifier
        system_prompt: System prompt for data conversion
    
    Requirements: 3.1, 3.2, 3.3, 3.4, 3.5, 9.2, 9.5
    """
    
    def __init__(self, api_key: str):
        """Initialize the semantic parser service.
        
        Args:
            api_key: Zhipu AI API key for authentication
        
        Requirements: 3.1, 3.2
        """
        self.api_key = api_key
        self.client = httpx.AsyncClient(timeout=30.0)
        self.api_url = "https://open.bigmodel.cn/api/paas/v4/chat/completions"
        self.model = "glm-4-flash"
        
        # System prompt as specified in requirements
        self.system_prompt = (
            "你是一个专业的文本语义分析助手。请将用户输入的文本解析为结构化的 JSON 数据。\n\n"
            "你需要提取以下三个维度的信息:\n\n"
            "1. **情绪 (mood)**:\n"
            "   - type: 情绪类型(如:喜悦、焦虑、平静、忧虑、兴奋、悲伤等中文词汇)\n"
            "   - intensity: 情绪强度(1-10的整数,10表示最强烈)\n"
            "   - keywords: 情绪关键词列表(3-5个中文词)\n\n"
            "2. **灵感 (inspirations)**:数组,每个元素包含:\n"
            "   - core_idea: 核心观点或想法(20字以内的中文)\n"
            "   - tags: 相关标签列表(3-5个中文词)\n"
            "   - category: 所属分类(必须是:工作、生活、学习、创意 之一)\n\n"
            "3. **待办 (todos)**:数组,每个元素包含:\n"
            "   - task: 任务描述(中文)\n"
            "   - time: 时间信息(如:明天、下周、周五等,如果没有则为null)\n"
            "   - location: 地点信息(如果没有则为null)\n"
            "   - status: 状态(默认为\"pending\")\n\n"
            "**重要规则**:\n"
            "- 如果文本中没有某个维度的信息,mood 返回 null,inspirations 和 todos 返回空数组 []\n"
            "- 必须返回有效的 JSON 格式,不要添加任何其他说明文字\n"
            "- 所有字段名使用英文,内容使用中文\n"
            "- 直接返回 JSON,不要用 markdown 代码块包裹\n\n"
            "返回格式示例:\n"
            "{\n"
            "  \"mood\": {\"type\": \"焦虑\", \"intensity\": 7, \"keywords\": [\"压力\", \"疲惫\", \"放松\"]},\n"
            "  \"inspirations\": [{\"core_idea\": \"晚霞可以缓解压力\", \"tags\": [\"自然\", \"治愈\"], \"category\": \"生活\"}],\n"
            "  \"todos\": [{\"task\": \"整理文档\", \"time\": \"明天\", \"location\": null, \"status\": \"pending\"}]\n"
            "}"
        )
    
    async def close(self):
        """Close the HTTP client.
        
        This should be called when the service is no longer needed
        to properly clean up resources.
        """
        await self.client.aclose()
    
    async def parse(self, text: str) -> ParsedData:
        """Parse text into structured data using GLM-4-Flash API.
        
        This method sends the text to the GLM-4-Flash API with the configured
        system prompt and returns structured data containing mood, inspirations,
        and todos. It handles API errors, missing dimensions, and logs all errors
        with timestamps and stack traces.
        
        Args:
            text: Text content to parse
        
        Returns:
            ParsedData object containing mood (optional), inspirations (list),
            and todos (list). Missing dimensions return null or empty arrays.
        
        Raises:
            SemanticParserError: If API call fails or returns invalid response
        
        Requirements: 3.1, 3.2, 3.3, 3.4, 3.5, 9.2, 9.5
        """
        try:
            # Prepare request headers
            headers = {
                "Authorization": f"Bearer {self.api_key}",
                "Content-Type": "application/json"
            }
            
            # Prepare request payload
            payload = {
                "model": self.model,
                "messages": [
                    {
                        "role": "system",
                        "content": self.system_prompt
                    },
                    {
                        "role": "user",
                        "content": text
                    }
                ],
                "temperature": 0.7,
                "top_p": 0.9
            }
            
            logger.info(f"Calling GLM-4-Flash API for semantic parsing. Text length: {len(text)}")
            
            # Make API request
            response = await self.client.post(
                self.api_url,
                headers=headers,
                json=payload
            )
            
            # Check response status
            if response.status_code != 200:
                error_msg = f"GLM-4-Flash API returned status {response.status_code}"
                try:
                    error_detail = response.json()
                    error_msg += f": {error_detail}"
                except Exception:
                    error_msg += f": {response.text}"
                
                logger.error(
                    f"Semantic parsing API call failed: {error_msg}",
                    exc_info=True,
                    extra={"timestamp": logger.makeRecord(
                        logger.name, logging.ERROR, "", 0, error_msg, (), None
                    ).created}
                )
                raise SemanticParserError(f"语义解析服务不可用: {error_msg}")
            
            # Parse response
            try:
                result = response.json()
            except Exception as e:
                error_msg = f"Failed to parse GLM-4-Flash API response: {str(e)}"
                logger.error(
                    error_msg,
                    exc_info=True,
                    extra={"timestamp": logger.makeRecord(
                        logger.name, logging.ERROR, "", 0, error_msg, (), None
                    ).created}
                )
                raise SemanticParserError(f"语义解析服务不可用: 响应格式无效")
            
            # Extract content from response
            try:
                content = result["choices"][0]["message"]["content"]
            except (KeyError, IndexError) as e:
                error_msg = f"Invalid API response structure: {str(e)}"
                logger.error(
                    error_msg,
                    exc_info=True,
                    extra={"timestamp": logger.makeRecord(
                        logger.name, logging.ERROR, "", 0, error_msg, (), None
                    ).created}
                )
                raise SemanticParserError(f"语义解析服务不可用: 响应结构无效")
            
            # Parse JSON from content
            try:
                # Try to extract JSON from markdown code blocks if present
                if "```json" in content:
                    json_start = content.find("```json") + 7
                    json_end = content.find("```", json_start)
                    content = content[json_start:json_end].strip()
                elif "```" in content:
                    json_start = content.find("```") + 3
                    json_end = content.find("```", json_start)
                    content = content[json_start:json_end].strip()
                
                parsed_json = json.loads(content)
            except json.JSONDecodeError as e:
                error_msg = f"Failed to parse JSON from API response: {str(e)}"
                logger.error(
                    error_msg,
                    exc_info=True,
                    extra={"timestamp": logger.makeRecord(
                        logger.name, logging.ERROR, "", 0, error_msg, (), None
                    ).created}
                )
                raise SemanticParserError(f"语义解析服务不可用: JSON 解析失败")
            
            # Extract and validate mood data
            mood = None
            if "mood" in parsed_json and parsed_json["mood"]:
                try:
                    mood_data = parsed_json["mood"]
                    if isinstance(mood_data, dict):
                        mood = MoodData(
                            type=mood_data.get("type"),
                            intensity=mood_data.get("intensity"),
                            keywords=mood_data.get("keywords", [])
                        )
                except Exception as e:
                    logger.warning(f"Failed to parse mood data: {str(e)}")
                    mood = None
            
            # Extract and validate inspirations
            inspirations = []
            if "inspirations" in parsed_json and parsed_json["inspirations"]:
                for insp_data in parsed_json["inspirations"]:
                    try:
                        if isinstance(insp_data, dict):
                            inspiration = InspirationData(
                                core_idea=insp_data.get("core_idea", ""),
                                tags=insp_data.get("tags", []),
                                category=insp_data.get("category", "生活")
                            )
                            inspirations.append(inspiration)
                    except Exception as e:
                        logger.warning(f"Failed to parse inspiration data: {str(e)}")
                        continue
            
            # Extract and validate todos
            todos = []
            if "todos" in parsed_json and parsed_json["todos"]:
                for todo_data in parsed_json["todos"]:
                    try:
                        if isinstance(todo_data, dict):
                            todo = TodoData(
                                task=todo_data.get("task", ""),
                                time=todo_data.get("time"),
                                location=todo_data.get("location"),
                                status=todo_data.get("status", "pending")
                            )
                            todos.append(todo)
                    except Exception as e:
                        logger.warning(f"Failed to parse todo data: {str(e)}")
                        continue
            
            logger.info(
                f"Semantic parsing successful. "
                f"Mood: {'present' if mood else 'none'}, "
                f"Inspirations: {len(inspirations)}, "
                f"Todos: {len(todos)}"
            )
            
            return ParsedData(
                mood=mood,
                inspirations=inspirations,
                todos=todos
            )
            
        except SemanticParserError:
            # Re-raise SemanticParserError as-is
            raise
            
        except httpx.TimeoutException as e:
            error_msg = f"GLM-4-Flash API request timeout: {str(e)}"
            logger.error(
                error_msg,
                exc_info=True,
                extra={"timestamp": logger.makeRecord(
                    logger.name, logging.ERROR, "", 0, error_msg, (), None
                ).created}
            )
            raise SemanticParserError("语义解析服务不可用: 请求超时")
            
        except httpx.RequestError as e:
            error_msg = f"GLM-4-Flash API request failed: {str(e)}"
            logger.error(
                error_msg,
                exc_info=True,
                extra={"timestamp": logger.makeRecord(
                    logger.name, logging.ERROR, "", 0, error_msg, (), None
                ).created}
            )
            raise SemanticParserError(f"语义解析服务不可用: 网络错误")
            
        except Exception as e:
            error_msg = f"Unexpected error in semantic parser service: {str(e)}"
            logger.error(
                error_msg,
                exc_info=True,
                extra={"timestamp": logger.makeRecord(
                    logger.name, logging.ERROR, "", 0, error_msg, (), None
                ).created}
            )
            raise SemanticParserError(f"语义解析服务不可用: {str(e)}")