Gabriel00A commited on
Commit
d612fd0
·
verified ·
1 Parent(s): ffa044a

Update process_report.py

Browse files
Files changed (1) hide show
  1. process_report.py +354 -95
process_report.py CHANGED
@@ -1,48 +1,130 @@
1
  #!/usr/bin/env python3
 
2
  """
3
  process_report.py
4
- 采购执行表 → 邮件通知
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  """
6
 
 
 
 
 
 
 
 
 
 
 
7
  import pandas as pd
8
- import datetime as dt
9
- from pathlib import Path
10
 
 
 
 
 
 
11
 
12
- def _find_header_row(path: str) -> int:
13
- """
14
- 找到 Excel 中的表头行(假设含“物料名称”那一行就是表头)。
15
- """
16
- for i in range(5):
17
- row = pd.read_excel(path, header=i, nrows=1)
18
- if "物料名称" in row.columns:
19
- return i
20
- return 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
 
23
  def _normalize_columns(df: pd.DataFrame) -> pd.DataFrame:
24
  """
25
- 列名统一化
 
26
  """
27
- rename_map = {
28
- "物料名称": "物料名称",
29
- "需求日期": "需求日期",
30
- "请购日期": "请购日期",
31
- "计划到货日期": "计划到货日期",
32
- "到货日期": "到货日期",
33
- "入库日期": "入库日期",
34
- "主数量": "主数量",
35
- "到货主数量": "到货主数量",
36
- "入库主数量": "入库主数量",
37
- }
38
- df = df.rename(columns=lambda x: str(x).strip())
39
- df = df.rename(columns=rename_map)
40
  return df
41
 
42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  def read_system_export(path: str) -> pd.DataFrame:
44
  """
45
- 读取“系统导出格式”Excel,并做清洗。
46
  """
47
  header_row = _find_header_row(path)
48
  try:
@@ -52,96 +134,273 @@ def read_system_export(path: str) -> pd.DataFrame:
52
 
53
  # 丢掉全空列
54
  df = df.dropna(axis=1, how="all")
55
- # 列名统一
56
  df = _normalize_columns(df)
57
 
58
  # 转日期
59
- for c in ["请购日期", "需求日期", "到货日期", "入库日期", "计划到货日期"]:
60
  if c in df.columns:
61
  df[c] = pd.to_datetime(df[c], errors="coerce")
62
 
63
  # 转数字
64
- for c in ["主数量", "到货主数量", "入库主数量"]:
65
  if c in df.columns:
66
  df[c] = pd.to_numeric(df[c], errors="coerce").fillna(0)
67
 
68
  # 去掉全空行
69
  df = df.dropna(how="all")
 
70
 
71
- # ========= 新增过滤逻辑 =========
72
- if "物料名称" in df.columns:
73
- mask_remove = df["物料名称"].str.contains("鹅|鸭|华住", na=False)
74
- mask_keep = df["物料名称"].str.contains("华住专用", na=False)
75
- df = df[~mask_remove | mask_keep]
76
- # ===============================
77
 
78
- return df
 
 
 
 
79
 
80
 
81
- def analyze_report(df: pd.DataFrame) -> pd.DataFrame:
82
  """
83
- 根据需求,计算“目前进度”一列。
 
 
 
 
 
 
84
  """
85
- today = dt.datetime.now().date()
86
-
87
- # 汇总数量
88
- grouped = df.groupby("物料名称", as_index=False).agg({
89
- "主数量": "sum",
90
- "到货主数量": "sum",
91
- "入库主数量": "sum",
92
- "需求日期": "max",
93
- "到货日期": "max",
94
- })
95
-
96
- def calc_progress(row):
97
- demand = row["主数量"]
98
- received = row["到货主数量"]
99
- due_date = row["需求日期"]
100
- arrival_date = row["到货日期"]
101
-
102
- if received >= demand and demand > 0:
103
- return "完全到货"
104
-
105
- # 部分到货
106
- if 0 < received < demand:
107
- missing = demand - received
108
- return f"部分到货,缺 {missing:.0f} 米"
109
-
110
- # 未到货
111
- if received == 0:
112
- if pd.isna(due_date):
113
- return "未到货"
114
- days_diff = (today - due_date.date()).days
115
- if days_diff > 0:
116
- return f"逾期 {days_diff} 天未到货"
117
- elif 0 <= (due_date.date() - today).days <= 7:
118
- return "未来7天要到货"
119
- else:
120
- return "未到货"
121
-
122
- return "未到货"
123
-
124
- grouped["目前进度"] = grouped.apply(calc_progress, axis=1)
125
-
126
- return grouped
127
-
128
-
129
- def save_to_excel(df: pd.DataFrame, out_path: str):
130
  """
131
- 结果保存到 Excel
 
 
 
 
 
 
132
  """
133
- Path(out_path).parent.mkdir(parents=True, exist_ok=True)
134
- df.to_excel(out_path, index=False)
 
 
 
 
 
 
 
 
 
 
 
135
 
 
 
 
 
 
 
 
 
136
 
137
- def main():
138
- src = "系统导出格式.xlsx"
139
- out = "输出结果.xlsx"
140
- df = read_system_export(src)
141
- result = analyze_report(df)
142
- save_to_excel(result, out)
143
- print(f"已生成:{out}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
 
145
 
146
  if __name__ == "__main__":
147
- main()
 
 
 
 
1
  #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
  """
4
  process_report.py
5
+ 读取最新或指定的“系统导出格式”Excel,按业务口径聚合为“邮件发送的格式”,
6
+ 在“目前进度”列写入:
7
+ - 完全到货
8
+ - 部分到货 缺货X米(X=主数量-到货主数量汇总)
9
+ - 未到货(含逾期天数)
10
+ - 未来7天到货(依据 计划到货日期)
11
+ 并通过 Resend 发送邮件(HTML表格 + Excel附件)。
12
+
13
+ 环境变量(Hugging Face → Settings → Variables and secrets → Repository secrets):
14
+ RESEND_API_KEY : Resend 的 API Key(必须)
15
+ FROM_EMAIL : 发件人,例如 "采购机器人 <bot@your-domain.com>"(必须)
16
+ TO_EMAIL : 收件人,例如 "you@your-company.com"(必须)
17
+ INPUT_DIR : 输入目录(默认 /tmp/uploads)
18
+ OUTPUT_DIR : 输出目录(默认 /tmp/outputs)
19
+ TZ : 时区(默认 Asia/Shanghai)
20
+
21
+ 使用方式:
22
+ 1) 上传文件到 INPUT_DIR 后,调用 main(trigger_file=该文件路径)
23
+ 2) 或命令行:python process_report.py [可选:具体文件路径]
24
  """
25
 
26
+ import os
27
+ import sys
28
+ import glob
29
+ import json
30
+ import base64
31
+ from io import BytesIO
32
+ from typing import Optional, Tuple, List
33
+
34
+ from datetime import datetime, date, timedelta
35
+
36
  import pandas as pd
37
+ import requests
 
38
 
39
+ # ====== 目录 & 环境 ======
40
+ INPUT_DIR = os.environ.get("INPUT_DIR", "/tmp/uploads")
41
+ OUTPUT_DIR = os.environ.get("OUTPUT_DIR", "/tmp/outputs")
42
+ os.makedirs(INPUT_DIR, exist_ok=True)
43
+ os.makedirs(OUTPUT_DIR, exist_ok=True)
44
 
45
+ RESEND_API_KEY = os.environ.get("RESEND_API_KEY")
46
+ FROM_EMAIL = os.environ.get("FROM_EMAIL")
47
+ TO_EMAIL = os.environ.get("TO_EMAIL")
48
+ TIMEZONE = os.environ.get("TZ", "Asia/Shanghai")
49
+
50
+ # ====== 业务相关字段(尽量兼容括号全角/半角差异)======
51
+ # 一些系统表可能列名略有差异,这里做一个“同义列名”匹配表
52
+ ALIASES = {
53
+ "请购日期": ["请购日期", "请购日", "申请日期"],
54
+ "请购单号": ["请购单号", "请购单编号", "申请单号"],
55
+ "物料编码": ["物料编码", "物料号", "物料代码"],
56
+ "物料名称": ["物料名称", "品名", "名称"],
57
+ "纱支密度": ["纱支密度", "纱支/密度", "纱支 密度"],
58
+ "门幅(CM)": ["门幅(CM)", "门幅(CM)", "门幅cm", "门幅"],
59
+ "颜色": ["颜色", "色号/颜色", "色号"],
60
+ "主单位": ["主单位", "单位"],
61
+ "主数量": ["主数量", "数量", "请购数量"],
62
+ "需求日期": ["需求日期", "需求日", "交期", "要求到货日期"],
63
+ "供应商": ["供应商", "供货商", "供应商名称"],
64
+ "到货日期": ["到货日期", "实到日期", "收货日期"],
65
+ "到货主数量": ["到货主数量", "到货数量", "实到数量"],
66
+ "入库日期": ["入库日期", "入库日"],
67
+ "入库主数量": ["入库主数量", "入库数量"],
68
+ "计划到货日期": ["计划到货日期", "预计到货日期", "承诺到货日期", "计划到货日"],
69
+ }
70
+
71
+ # “邮件发送的格式”列顺序,如果检测到模板文件,会按模板优先排序
72
+ EMAIL_COLS_DEFAULT = [
73
+ "请购日期","请购单号","物料编码","物料名称","纱支密度","门幅(CM)","颜色","主单位",
74
+ "主数量","需求日期","供应商","到货日期","到货主数量","入库日期","入库主数量","目前进度"
75
+ ]
76
+
77
+ TEMPLATE_CANDIDATES = [
78
+ # 若你把模板Excel放进仓库根目录或 templates 目录,可被自动识别
79
+ "/workspace/邮件发送的格式.xlsx",
80
+ "/workspace/templates/邮件发送的格式.xlsx",
81
+ "/app/邮件发送的格式.xlsx",
82
+ "/app/templates/邮件发送的格式.xlsx",
83
+ ]
84
+
85
+
86
+ # ====== 工具函数 ======
87
+ def _today() -> date:
88
+ # 用本地系统日期即可(Space容器时区一般是UTC;你可在 Space 里设 TZ 环境变量 + tzdata 以保证正确)
89
+ return datetime.now().date()
90
 
91
 
92
  def _normalize_columns(df: pd.DataFrame) -> pd.DataFrame:
93
  """
94
+ 将 DataFrame 的列名映射到标准中文名(按 ALIASES)。
95
+ 例如:'门幅(CM)' -> '门幅(CM)'
96
  """
97
+ mapped = {}
98
+ for std_name, variants in ALIASES.items():
99
+ for v in df.columns:
100
+ v_clean = str(v).strip()
101
+ if v_clean in variants:
102
+ mapped[v] = std_name
103
+ break
104
+ df = df.rename(columns=mapped)
 
 
 
 
 
105
  return df
106
 
107
 
108
+ def _find_header_row(path: str, must_have: List[str] = None, try_rows: int = 10) -> int:
109
+ """
110
+ 尝试在前 try_rows 行中找到包含关键列(如“物料编码”、“主数量”)的表头行。
111
+ 找不到则返回 0。
112
+ """
113
+ must_have = must_have or ["物料编码", "主数量"]
114
+ for r in range(try_rows):
115
+ try:
116
+ df_try = pd.read_excel(path, header=r, nrows=1)
117
+ except Exception:
118
+ continue
119
+ cols = [str(c).strip() for c in df_try.columns]
120
+ if all(any(m in c for c in cols) or m in cols for m in must_have):
121
+ return r
122
+ return 0
123
+
124
+
125
  def read_system_export(path: str) -> pd.DataFrame:
126
  """
127
+ 读取“系统导出格式”Excel,并做列名标准化、空列丢弃、日期/数字类型转换。
128
  """
129
  header_row = _find_header_row(path)
130
  try:
 
134
 
135
  # 丢掉全空列
136
  df = df.dropna(axis=1, how="all")
137
+ # 标准化列名
138
  df = _normalize_columns(df)
139
 
140
  # 转日期
141
+ for c in ["请购日期","需求日期","到货日期","入库日期","计划到货日期"]:
142
  if c in df.columns:
143
  df[c] = pd.to_datetime(df[c], errors="coerce")
144
 
145
  # 转数字
146
+ for c in ["主数量","到货主数量","入库主数量"]:
147
  if c in df.columns:
148
  df[c] = pd.to_numeric(df[c], errors="coerce").fillna(0)
149
 
150
  # 去掉全空行
151
  df = df.dropna(how="all")
152
+ return df
153
 
 
 
 
 
 
 
154
 
155
+ def _first_nonnull(series: pd.Series):
156
+ for v in series:
157
+ if pd.notna(v):
158
+ return v
159
+ return None
160
 
161
 
162
+ def aggregate_for_email(df: pd.DataFrame) -> pd.DataFrame:
163
  """
164
+ 业务汇总规则:
165
+ - “主数量”不做分类汇总(保留原单的主数量),
166
+ - “到货主数量”、“入库主数量”需要汇总求和,
167
+ - “到货日期/入库日期/计划到货日期”取最大(最近),
168
+ - 分组键:不包含数量列,常用维度如下(尽量稳定,避免将数量/日期放入分组键):
169
+ 请购单号、物料编码、物料名称、纱支密度、门幅(CM)、颜色、主单位、供应商
170
+ - “请购日期/需求日期”保留“首个非空”
171
  """
172
+ group_keys = [k for k in [
173
+ "请购单号","物料编码","物料名称","纱支密度","门幅(CM)","颜色","主单位","供应商"
174
+ ] if k in df.columns]
175
+
176
+ if not group_keys:
177
+ raise RuntimeError("找不到用于分组的关键字段(如 请购单号/物料编码 等),请检查导入的表头。")
178
+
179
+ agg_map = {}
180
+
181
+ # 不汇总主数量:取首个非空(假设同一分组合并后主数量一致)
182
+ if "主数量" in df.columns:
183
+ agg_map["主数量"] = _first_nonnull
184
+
185
+ # 日期字段
186
+ if "请购日期" in df.columns:
187
+ agg_map["请购日期"] = _first_nonnull
188
+ if "需求日期" in df.columns:
189
+ agg_map["需求日期"] = _first_nonnull
190
+
191
+ if "到货日期" in df.columns:
192
+ agg_map["到货日期"] = "max"
193
+ if "入库日期" in df.columns:
194
+ agg_map["入库日期"] = "max"
195
+ if "计划到货日期" in df.columns:
196
+ agg_map["计划到货日期"] = "max"
197
+
198
+ # 数量汇总
199
+ if "到货主数量" in df.columns:
200
+ agg_map["到货主数量"] = "sum"
201
+ if "入库主数量" in df.columns:
202
+ agg_map["入库主数量"] = "sum"
203
+
204
+ grouped = df.groupby(group_keys, dropna=False).agg(agg_map).reset_index()
205
+
206
+ # 计算 “目前进度”
207
+ grouped["目前进度"] = grouped.apply(_calc_progress_row, axis=1)
208
+
209
+ # 最终列顺序:优先按模板,其次按默认顺序
210
+ final = _order_like_template(grouped)
211
+ return final
212
+
213
+
214
+ def _calc_progress_row(row: pd.Series) -> str:
 
 
215
  """
216
+ 目前进度的业务口径:
217
+ 1) 到货主数量 ≥ 主数量 => “完全到货”
218
+ 2) 否则:
219
+ - 若到货主数量 > 0 => “部分到货 缺货X米”
220
+ - 若到货主数量 == 0 且到货日期为空 => “未到货”
221
+ - 逾期:需求日期存在且 today > 需求日期,追加 “已逾期Y天”
222
+ - 未来7天到货:计划到货日期在 [today, today+7] 区间,追加 “未来7天到货(计划YYYY-MM-DD)”
223
  """
224
+ today = pd.Timestamp(_today())
225
+ main_qty = float(row.get("主数量", 0) or 0)
226
+ arr_qty = float(row.get("到货主数量", 0) or 0)
227
+
228
+ demand_date = row.get("需求日期", pd.NaT)
229
+ arrival_date = row.get("到货日期", pd.NaT)
230
+ plan_arrival = row.get("计划到货日期", pd.NaT)
231
+
232
+ # 完全到货
233
+ if main_qty > 0 and arr_qty >= main_qty:
234
+ return "完全到货"
235
+
236
+ parts: List[str] = []
237
 
238
+ # 部分/未到
239
+ shortage = max(0.0, main_qty - arr_qty)
240
+ if arr_qty > 0:
241
+ parts.append(f"部分到货 缺货{shortage:g}米")
242
+ else:
243
+ # 到货日期为空或数量为0都视为未到货
244
+ if pd.isna(arrival_date) or arr_qty == 0:
245
+ parts.append("未到货")
246
 
247
+ # 逾期天数
248
+ if (pd.isna(arrival_date) or arr_qty < main_qty) and pd.notna(demand_date):
249
+ overdue_days = (today - pd.Timestamp(demand_date.date())).days
250
+ if overdue_days > 0:
251
+ parts.append(f"已逾期{overdue_days}天")
252
+
253
+ # 未来7天到货(计划)
254
+ if pd.notna(plan_arrival):
255
+ days_ahead = (pd.Timestamp(plan_arrival.date()) - today).days
256
+ if 0 <= days_ahead <= 7:
257
+ parts.append(f"未来7天到货(计划{str(plan_arrival.date())})")
258
+
259
+ # 如果什么都没有匹配,给一个保底描述
260
+ if not parts:
261
+ # 例如:需求未到期且无计划到货
262
+ if pd.notna(demand_date) and today <= pd.Timestamp(demand_date.date()):
263
+ return "未到货(未到期)"
264
+ return "处理中"
265
+
266
+ return ";".join(parts)
267
+
268
+
269
+ def _order_like_template(df: pd.DataFrame) -> pd.DataFrame:
270
+ """
271
+ 若能找到“邮件发送的格式.xlsx”,按其表头顺序输出;否则用 EMAIL_COLS_DEFAULT。
272
+ 模板里缺的列会自动从 df 里补;df 有但模板没有的列会追加在后面。
273
+ """
274
+ template_cols = None
275
+ for p in TEMPLATE_CANDIDATES:
276
+ if os.path.exists(p):
277
+ try:
278
+ tdf = pd.read_excel(p, nrows=0)
279
+ template_cols = list(map(str, tdf.columns))
280
+ break
281
+ except Exception:
282
+ continue
283
+
284
+ if template_cols is None:
285
+ template_cols = EMAIL_COLS_DEFAULT
286
+
287
+ # 先取交集按顺序
288
+ front = [c for c in template_cols if c in df.columns]
289
+ # 再把 df 里其余列追加在后
290
+ tail = [c for c in df.columns if c not in front]
291
+ cols = front + tail
292
+ return df[cols].copy()
293
+
294
+
295
+ def _find_latest_input(input_dir: str) -> Optional[str]:
296
+ files = []
297
+ for pat in ("*.xlsx", "*.xls"):
298
+ files.extend(glob.glob(os.path.join(input_dir, pat)))
299
+ if not files:
300
+ return None
301
+ files.sort(key=os.path.getmtime, reverse=True)
302
+ return files[0]
303
+
304
+
305
+ def _df_to_excel_bytes(df: pd.DataFrame) -> bytes:
306
+ bio = BytesIO()
307
+ df.to_excel(bio, index=False)
308
+ bio.seek(0)
309
+ return bio.read()
310
+
311
+
312
+ def _build_html_body(df: pd.DataFrame, title: str) -> str:
313
+ table_html = df.to_html(index=False, escape=False)
314
+ html = f"""<html>
315
+ <head>
316
+ <meta charset="utf-8" />
317
+ <style>
318
+ table {{ border-collapse: collapse; font-size: 13px; }}
319
+ table, th, td {{ border: 1px solid #ccc; padding: 6px; }}
320
+ th {{ background:#f6f6f6; }}
321
+ </style>
322
+ </head>
323
+ <body>
324
+ <h3>{title}</h3>
325
+ {table_html}
326
+ <p style="color:#666;">备注:此邮件由自动化系统生成。</p>
327
+ </body>
328
+ </html>"""
329
+ return html
330
+
331
+
332
+ def _send_email_via_resend(subject: str, html_body: str,
333
+ attachment_bytes: Optional[bytes],
334
+ attachment_name: str) -> Tuple[bool, str]:
335
+ if not (RESEND_API_KEY and FROM_EMAIL and TO_EMAIL):
336
+ return False, "缺少 Resend 配置(RESEND_API_KEY / FROM_EMAIL / TO_EMAIL)"
337
+
338
+ url = "https://api.resend.com/emails"
339
+ headers = {
340
+ "Authorization": f"Bearer {RESEND_API_KEY}",
341
+ "Content-Type": "application/json",
342
+ }
343
+ payload = {
344
+ "from": FROM_EMAIL,
345
+ "to": [TO_EMAIL],
346
+ "subject": subject,
347
+ "html": html_body,
348
+ }
349
+ if attachment_bytes is not None:
350
+ payload["attachments"] = [{
351
+ "filename": attachment_name,
352
+ "content": base64.b64encode(attachment_bytes).decode("utf-8"),
353
+ }]
354
+
355
+ resp = requests.post(url, headers=headers, data=json.dumps(payload))
356
+ if resp.ok:
357
+ return True, resp.text
358
+ return False, f"HTTP {resp.status_code}: {resp.text}"
359
+
360
+
361
+ # ====== 主流程 ======
362
+ def run_once(file_path: Optional[str] = None) -> dict:
363
+ """
364
+ 单次处理:读入Excel -> 汇总 -> 生成输出 -> 发邮件。
365
+ 返回一个 dict 给上层(便于 app.py 返回给前端)。
366
+ """
367
+ if file_path is None:
368
+ file_path = _find_latest_input(INPUT_DIR)
369
+ if not file_path:
370
+ return {"ok": False, "msg": f"未在 {INPUT_DIR} 找到Excel输入文件"}
371
+
372
+ raw = read_system_export(file_path)
373
+ final = aggregate_for_email(raw)
374
+
375
+ out_name = f"邮件发送的格式_{datetime.now().strftime('%Y%m%d')}.xlsx"
376
+ out_path = os.path.join(OUTPUT_DIR, out_name)
377
+ os.makedirs(OUTPUT_DIR, exist_ok=True)
378
+ final.to_excel(out_path, index=False)
379
+
380
+ subject = f"采购执行表自动推送 {datetime.now().date()}"
381
+ html = _build_html_body(final, title=f"采购执行表({datetime.now().date()})")
382
+ attach = _df_to_excel_bytes(final)
383
+ ok, info = _send_email_via_resend(subject, html, attachment_bytes=attach, attachment_name=out_name)
384
+
385
+ return {
386
+ "ok": ok,
387
+ "msg": "邮件发送成功" if ok else f"邮件发送失败:{info}",
388
+ "input": file_path,
389
+ "output": out_path,
390
+ "rows": len(final),
391
+ }
392
+
393
+
394
+ def main(trigger_file: Optional[str] = None):
395
+ result = run_once(trigger_file)
396
+ print(json.dumps(result, ensure_ascii=False, indent=2))
397
+ # 返回码:成功0,失败1(便于将来做cron/健康检查)
398
+ if not result.get("ok"):
399
+ sys.exit(1)
400
 
401
 
402
  if __name__ == "__main__":
403
+ # 允许命令行传入具体文件路径
404
+ arg_file = sys.argv[1] if len(sys.argv) > 1 else None
405
+ main(arg_file)
406
+