import os import json import uuid from flask import Flask, render_template, request, jsonify, send_from_directory app = Flask(__name__) app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024 # 16MB limit for safety # 配置 DATA_DIR = os.path.join(os.path.dirname(__file__), 'data') DATA_FILE = os.path.join(DATA_DIR, 'postmortems.json') # 确保数据目录存在 os.makedirs(DATA_DIR, exist_ok=True) def load_data(): if not os.path.exists(DATA_FILE): # 初始化默认数据 default_data = [ { "id": "demo-001", "title": "生产环境数据库连接池耗尽导致服务不可用", "date": "2023-10-24", "severity": "P0", "owner": "张三", "created_at": "2023-10-24T10:00:00Z", "summary": "在双十一预热活动期间,主数据库连接数突然飙升至 100%,导致订单服务无法连接数据库,所有下单请求失败。持续时间约 15 分钟。", "impact": "造成约 5000 单交易失败,预估 GMV 损失 100 万。用户投诉激增。", "timeline": [ {"time": "20:00", "description": "流量开始激增,达到平时峰值的 3 倍", "type": "detection"}, {"time": "20:05", "description": "监控系统报警,DB CPU 飙升至 90%", "type": "detection"}, {"time": "20:08", "description": "DBA 介入排查,发现活动连接数打满", "type": "investigation"}, {"time": "20:12", "description": "紧急扩容 Read Replica,并临时调大 Max Connections", "type": "fix"}, {"time": "20:15", "description": "服务逐步恢复正常", "type": "verification"} ], "root_cause": { "whys": [ "数据库连接数打满,拒绝新连接", "后端服务实例扩容后,每个实例的连接池配置未做相应调整", "微服务配置中心下发的连接池参数是静态的,未随实例数动态计算", "在进行压力测试时,只测试了单实例性能,未测试全链路大规模并发下的 DB 瓶颈", "缺乏对数据库总连接数的全局管控机制" ], "conclusion": "缺乏全局的数据库连接治理机制,且压测覆盖不全。" }, "action_items": [ {"task": "实施数据库连接代理 (Proxy) 层", "owner": "李四", "deadline": "2023-11-01", "status": "in_progress"}, {"task": "优化全链路压测模型,包含 DB 极限场景", "owner": "王五", "deadline": "2023-11-15", "status": "pending"} ] }, { "id": "demo-002", "title": "支付回调接口验签逻辑缺陷", "date": "2023-09-15", "severity": "P2", "owner": "赵六", "created_at": "2023-09-15T14:00:00Z", "summary": "收到用户反馈支付成功但订单状态未更新。排查发现部分特殊字符导致签名验证失败。", "impact": "约 20 笔订单卡单,需人工补单。", "timeline": [ {"time": "14:00", "description": "客服收到用户反馈", "type": "detection"}, {"time": "14:30", "description": "定位到日志中存在大量验签失败错误", "type": "investigation"}, {"time": "15:00", "description": "修复验签逻辑中的字符编码处理问题", "type": "fix"} ], "root_cause": { "whys": [ "签名验证失败", "第三方支付回调参数中包含特殊 Emoji 字符", "验签逻辑未正确处理 UTF-8 编码", "开发测试阶段未覆盖特殊字符场景", "" ], "conclusion": "编码规范执行不到位,测试用例缺失。" }, "action_items": [ {"task": "修复代码并补充单元测试", "owner": "赵六", "deadline": "2023-09-16", "status": "done"} ] } ] save_data(default_data) return default_data try: with open(DATA_FILE, 'r', encoding='utf-8') as f: return json.load(f) except Exception: return [] def save_data(data): with open(DATA_FILE, 'w', encoding='utf-8') as f: json.dump(data, f, ensure_ascii=False, indent=2) @app.route('/') def index(): return render_template('index.html') @app.route('/api/postmortems', methods=['GET']) def get_postmortems(): data = load_data() # 按最后修改时间或创建时间排序(这里简单按列表顺序倒序) return jsonify(data[::-1]) @app.route('/api/postmortems', methods=['POST']) def create_postmortem(): data = load_data() new_item = request.json # 基础验证与默认值 if not new_item.get('id'): new_item['id'] = str(uuid.uuid4()) new_item['created_at'] = new_item.get('created_at', '') new_item['updated_at'] = new_item.get('updated_at', '') data.append(new_item) save_data(data) return jsonify(new_item), 201 @app.route('/api/postmortems/', methods=['PUT']) def update_postmortem(item_id): data = load_data() updated_item = request.json for i, item in enumerate(data): if item['id'] == item_id: data[i] = updated_item save_data(data) return jsonify(updated_item) return jsonify({'error': 'Not found'}), 404 @app.route('/api/postmortems/', methods=['DELETE']) def delete_postmortem(item_id): data = load_data() data = [item for item in data if item['id'] != item_id] save_data(data) return jsonify({'success': True}) @app.route('/api/export/', methods=['GET']) def export_markdown(item_id): data = load_data() item = next((i for i in data if i['id'] == item_id), None) if not item: return "Not Found", 404 # 生成 Markdown 内容 md = f"# 事故复盘: {item.get('title', '无标题')}\n\n" md += f"**日期**: {item.get('date', '')} | **级别**: {item.get('severity', '')} | **负责人**: {item.get('owner', '')}\n\n" md += "## 1. 事故摘要 (Summary)\n" md += f"{item.get('summary', '无')}\n\n" md += "## 2. 影响范围 (Impact)\n" md += f"{item.get('impact', '无')}\n\n" md += "## 3. 时间线 (Timeline)\n" for t in item.get('timeline', []): md += f"- **{t.get('time', '')}**: {t.get('description', '')} ({t.get('type', '')})\n" md += "\n" md += "## 4. 根因分析 (5 Whys)\n" whys = item.get('root_cause', {}).get('whys', []) for idx, why in enumerate(whys): if why: md += f"{idx+1}. Why? {why}\n" md += f"\n**结论**: {item.get('root_cause', {}).get('conclusion', '')}\n\n" md += "## 5. 改进措施 (Action Items)\n" md += "| 任务 | 负责人 | 截止日期 | 状态 |\n" md += "| --- | --- | --- | --- |\n" for action in item.get('action_items', []): md += f"| {action.get('task', '')} | {action.get('owner', '')} | {action.get('deadline', '')} | {action.get('status', '')} |\n" return jsonify({'markdown': md}) if __name__ == '__main__': app.run(host='0.0.0.0', port=7860, debug=True)