Trae Assistant commited on
Commit
3de8de2
·
1 Parent(s): 5e01a23
Files changed (3) hide show
  1. Dockerfile +3 -1
  2. app.py +15 -2
  3. sops_default.json +113 -0
Dockerfile CHANGED
@@ -12,7 +12,9 @@ COPY . .
12
  # Create a non-root user for security (required by HF Spaces)
13
  RUN useradd -m -u 1000 user
14
  RUN chown -R user:user /app
15
- RUN chmod -R 777 /app/data
 
 
16
 
17
  USER user
18
 
 
12
  # Create a non-root user for security (required by HF Spaces)
13
  RUN useradd -m -u 1000 user
14
  RUN chown -R user:user /app
15
+
16
+ # Ensure data directory exists and has correct permissions
17
+ RUN mkdir -p /app/data && chown -R user:user /app/data && chmod -R 777 /app/data
18
 
19
  USER user
20
 
app.py CHANGED
@@ -2,6 +2,7 @@ from flask import Flask, render_template, request, jsonify, send_file
2
  import os
3
  import json
4
  import uuid
 
5
  from datetime import datetime
6
 
7
  app = Flask(__name__)
@@ -10,12 +11,24 @@ app.config['MAX_CONTENT_LENGTH'] = 5 * 1024 * 1024 # 5MB Limit
10
  # Configuration
11
  DATA_DIR = os.path.join(os.path.dirname(__file__), 'data')
12
  SOP_FILE = os.path.join(DATA_DIR, 'sops.json')
 
 
13
  os.makedirs(DATA_DIR, exist_ok=True)
14
 
15
  # Initialize data file if not exists
16
  if not os.path.exists(SOP_FILE):
17
- with open(SOP_FILE, 'w', encoding='utf-8') as f:
18
- json.dump([], f)
 
 
 
 
 
 
 
 
 
 
19
 
20
  # Error Handlers
21
  @app.errorhandler(404)
 
2
  import os
3
  import json
4
  import uuid
5
+ import shutil
6
  from datetime import datetime
7
 
8
  app = Flask(__name__)
 
11
  # Configuration
12
  DATA_DIR = os.path.join(os.path.dirname(__file__), 'data')
13
  SOP_FILE = os.path.join(DATA_DIR, 'sops.json')
14
+ DEFAULT_SOP_FILE = os.path.join(os.path.dirname(__file__), 'sops_default.json')
15
+
16
  os.makedirs(DATA_DIR, exist_ok=True)
17
 
18
  # Initialize data file if not exists
19
  if not os.path.exists(SOP_FILE):
20
+ if os.path.exists(DEFAULT_SOP_FILE):
21
+ try:
22
+ shutil.copy(DEFAULT_SOP_FILE, SOP_FILE)
23
+ print(f"Initialized {SOP_FILE} from default data.")
24
+ except Exception as e:
25
+ print(f"Error copying default data: {e}")
26
+ # Fallback to empty list
27
+ with open(SOP_FILE, 'w', encoding='utf-8') as f:
28
+ json.dump([], f)
29
+ else:
30
+ with open(SOP_FILE, 'w', encoding='utf-8') as f:
31
+ json.dump([], f)
32
 
33
  # Error Handlers
34
  @app.errorhandler(404)
sops_default.json ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "id": "sop-001",
4
+ "title": "服务器故障应急响应流程",
5
+ "category": "运维",
6
+ "version": "1.2",
7
+ "status": "published",
8
+ "created_at": "2023-10-01T09:00:00",
9
+ "updated_at": "2023-11-15T14:30:00",
10
+ "content": {
11
+ "purpose": "规范服务器故障时的应急处理步骤,最小化业务中断时间。",
12
+ "scope": "所有生产环境 Linux 服务器。",
13
+ "prerequisites": [
14
+ "拥有服务器 root 权限或 sudo 权限",
15
+ "已配置监控告警系统 (Prometheus/Grafana)",
16
+ "已备份关键数据"
17
+ ],
18
+ "roles": [
19
+ "运维工程师",
20
+ "开发负责人",
21
+ "CTO"
22
+ ],
23
+ "steps": [
24
+ {
25
+ "title": "收到告警与确认",
26
+ "description": "收到监控系统告警后,立即登录监控面板确认故障范围(单机/集群)和类型(CPU/内存/磁盘/网络)。",
27
+ "role": "运维工程师"
28
+ },
29
+ {
30
+ "title": "初步诊断",
31
+ "description": "登录故障服务器,使用 top, htop, iotop, netstat 等命令检查系统资源使用情况。查看 /var/log/messages 和应用日志。",
32
+ "role": "运维工程师"
33
+ },
34
+ {
35
+ "title": "通报故障",
36
+ "description": "如果预计修复时间超过 10 分钟,需在内部群通报故障情况,并通知开发负责人。",
37
+ "role": "运维工程师"
38
+ },
39
+ {
40
+ "title": "服务重启/回滚",
41
+ "description": "尝试重启服务。如果是最近更新导致的问题,立即执行版本回滚。",
42
+ "role": "运维工程师"
43
+ },
44
+ {
45
+ "title": "故障升级",
46
+ "description": "如果 30 分钟内无法解决,升级故障至 CTO,并请求高级技术支持。",
47
+ "role": "开发负责人"
48
+ }
49
+ ],
50
+ "flowchart": "graph TD\n A[收到告警] --> B{确认故障真实性}\n B -- 是 --> C[初步诊断]\n B -- 否 --> Z[标记误报]\n C --> D{预计修复>10min?}\n D -- 是 --> E[通报故障]\n D -- 否 --> F[尝试重启/修复]\n E --> F\n F --> G{修复成功?}\n G -- 是 --> H[撰写事故报告]\n G -- 否 --> I[回滚版本]\n I --> J{回滚成功?}\n J -- 否 --> K[升级故障至CTO]",
51
+ "troubleshooting": [
52
+ {
53
+ "issue": "SSH 无法连接",
54
+ "solution": "通过云服务商 VNC 控制台登录;检查安全组规则;检查 SSHD 服务状态。"
55
+ },
56
+ {
57
+ "issue": "磁盘空间满",
58
+ "solution": "使用 du -sh * 查找大文件;清理 /tmp 或日志文件;扩容磁盘。"
59
+ }
60
+ ]
61
+ }
62
+ },
63
+ {
64
+ "id": "sop-002",
65
+ "title": "新员工入职技术配置",
66
+ "category": "人事/IT",
67
+ "version": "2.0",
68
+ "status": "published",
69
+ "created_at": "2024-01-10T10:00:00",
70
+ "updated_at": "2024-01-10T10:00:00",
71
+ "content": {
72
+ "purpose": "确保新入职员工在第一天能够获得所有必要的账号和权限。",
73
+ "scope": "技术部所有新员工。",
74
+ "prerequisites": [
75
+ "HR 已发送入职通知",
76
+ "企业邮箱已创建"
77
+ ],
78
+ "roles": [
79
+ "IT 管理员",
80
+ "部门主管"
81
+ ],
82
+ "steps": [
83
+ {
84
+ "title": "开通 GitLab/GitHub 账号",
85
+ "description": "根据部门需求,将员工账号添加到对应的 Group,并分配 Developer 权限。",
86
+ "role": "IT 管理员"
87
+ },
88
+ {
89
+ "title": "配置 VPN/内网访问",
90
+ "description": "创建 VPN 账号,发送配置文件和连接手册给新员工。",
91
+ "role": "IT 管理员"
92
+ },
93
+ {
94
+ "title": "分配 JIRA/Confluence 权限",
95
+ "description": "开通项目管理工具账号,拉入对应项目板。",
96
+ "role": "部门主管"
97
+ },
98
+ {
99
+ "title": "硬件发放与登记",
100
+ "description": "发放笔记本电脑、显示器等,并在资产管理系统中登记。",
101
+ "role": "IT 管理员"
102
+ }
103
+ ],
104
+ "flowchart": "graph TD\n A[收到入职通知] --> B[创建域账号/邮箱]\n B --> C[配置VPN]\n C --> D[开通代码仓库权限]\n D --> E[硬件发放]\n E --> F[入职指引培训]",
105
+ "troubleshooting": [
106
+ {
107
+ "issue": "VPN 连接失败",
108
+ "solution": "检查证书是否过期;检查客户端版本;确认网络端口未被封锁。"
109
+ }
110
+ ]
111
+ }
112
+ }
113
+ ]