Gabriel00A commited on
Commit
ffa044a
·
verified ·
1 Parent(s): a8706e1

Update process_report.py

Browse files
Files changed (1) hide show
  1. process_report.py +95 -353
process_report.py CHANGED
@@ -1,130 +1,48 @@
1
  #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
  """
4
  process_report.py
5
- 读取最新或指定的“系统导出格式”Excel,按业务口径聚合为“邮件发送的格式”,
6
- 在“目前进度”列写入:
7
- - 完全到货
8
- - 部分到货 缺货X米(X=主数量-到货主数量汇总)
9
- - 未到货(含逾期天数)
10
- - 未来7天到货(依据 计划到货日期)
11
- 并通过 Resend 发送邮件(HTML表格 + Excel附件)。
12
-
13
- 环境变量(Hugging Face → Settings → Variables and secrets → Repository secrets):
14
- RESEND_API_KEY : Resend 的 API Key(必须)
15
- FROM_EMAIL : 发件人,例如 "采购机器人 <bot@your-domain.com>"(必须)
16
- TO_EMAIL : 收件人,例如 "you@your-company.com"(必须)
17
- INPUT_DIR : 输入目录(默认 /tmp/uploads)
18
- OUTPUT_DIR : 输出目录(默认 /tmp/outputs)
19
- TZ : 时区(默认 Asia/Shanghai)
20
-
21
- 使用方式:
22
- 1) 上传文件到 INPUT_DIR 后,调用 main(trigger_file=该文件路径)
23
- 2) 或命令行:python process_report.py [可选:具体文件路径]
24
  """
25
 
26
- import os
27
- import sys
28
- import glob
29
- import json
30
- import base64
31
- from io import BytesIO
32
- from typing import Optional, Tuple, List
33
-
34
- from datetime import datetime, date, timedelta
35
-
36
  import pandas as pd
37
- import requests
38
-
39
- # ====== 目录 & 环境 ======
40
- INPUT_DIR = os.environ.get("INPUT_DIR", "/tmp/uploads")
41
- OUTPUT_DIR = os.environ.get("OUTPUT_DIR", "/tmp/outputs")
42
- os.makedirs(INPUT_DIR, exist_ok=True)
43
- os.makedirs(OUTPUT_DIR, exist_ok=True)
44
-
45
- RESEND_API_KEY = os.environ.get("RESEND_API_KEY")
46
- FROM_EMAIL = os.environ.get("FROM_EMAIL")
47
- TO_EMAIL = os.environ.get("TO_EMAIL")
48
- TIMEZONE = os.environ.get("TZ", "Asia/Shanghai")
49
-
50
- # ====== 业务相关字段(尽量兼容括号全角/半角差异)======
51
- # 一些系统表可能列名略有差异,这里做一个“同义列名”匹配表
52
- ALIASES = {
53
- "请购日期": ["请购日期", "请购日", "申请日期"],
54
- "请购单号": ["请购单号", "请购单编号", "申请单号"],
55
- "物料编码": ["物料编码", "物料号", "物料代码"],
56
- "物料名称": ["物料名称", "品名", "名称"],
57
- "纱支密度": ["纱支密度", "纱支/密度", "纱支 密度"],
58
- "门幅(CM)": ["门幅(CM)", "门幅(CM)", "门幅cm", "门幅"],
59
- "颜色": ["颜色", "色号/颜色", "色号"],
60
- "主单位": ["主单位", "单位"],
61
- "主数量": ["主数量", "数量", "请购数量"],
62
- "需求日期": ["需求日期", "需求日", "交期", "要求到货日期"],
63
- "供应商": ["供应商", "供货商", "供应商名称"],
64
- "到货日期": ["到货日期", "实到日期", "收货日期"],
65
- "到货主数量": ["到货主数量", "到货数量", "实到数量"],
66
- "入库日期": ["入库日期", "入库日"],
67
- "入库主数量": ["入库主数量", "入库数量"],
68
- "计划到货日期": ["计划到货日期", "预计到货日期", "承诺到货日期", "计划到货日"],
69
- }
70
-
71
- # “邮件发送的格式”列顺序,如果检测到模板文件,会按模板优先排序
72
- EMAIL_COLS_DEFAULT = [
73
- "请购日期","请购单号","物料编码","物料名称","纱支密度","门幅(CM)","颜色","主单位",
74
- "主数量","需求日期","供应商","到货日期","到货主数量","入库日期","入库主数量","目前进度"
75
- ]
76
-
77
- TEMPLATE_CANDIDATES = [
78
- # 若你把模板Excel放进仓库根目录或 templates 目录,可被自动识别
79
- "/workspace/邮件发送的格式.xlsx",
80
- "/workspace/templates/邮件发送的格式.xlsx",
81
- "/app/邮件发送的格式.xlsx",
82
- "/app/templates/邮件发送的格式.xlsx",
83
- ]
84
 
85
 
86
- # ====== 工具函数 ======
87
- def _today() -> date:
88
- # 用本地系统日期即可(Space容器时区一般是UTC;你可在 Space 里设 TZ 环境变量 + tzdata 以保证正确)
89
- return datetime.now().date()
90
-
91
-
92
- def _normalize_columns(df: pd.DataFrame) -> pd.DataFrame:
93
  """
94
- DataFrame 的列名映射到标准中文名(按 ALIASES)。
95
- 例如:'门幅(CM)' -> '门幅(CM)'
96
  """
97
- mapped = {}
98
- for std_name, variants in ALIASES.items():
99
- for v in df.columns:
100
- v_clean = str(v).strip()
101
- if v_clean in variants:
102
- mapped[v] = std_name
103
- break
104
- df = df.rename(columns=mapped)
105
- return df
106
 
107
 
108
- def _find_header_row(path: str, must_have: List[str] = None, try_rows: int = 10) -> int:
109
  """
110
- 尝试在前 try_rows 行中找到包含关键列(如“物料编码”、“主数量”)的表头行。
111
- 找不到则��回 0。
112
  """
113
- must_have = must_have or ["物料编码", "主数量"]
114
- for r in range(try_rows):
115
- try:
116
- df_try = pd.read_excel(path, header=r, nrows=1)
117
- except Exception:
118
- continue
119
- cols = [str(c).strip() for c in df_try.columns]
120
- if all(any(m in c for c in cols) or m in cols for m in must_have):
121
- return r
122
- return 0
 
 
 
 
123
 
124
 
125
  def read_system_export(path: str) -> pd.DataFrame:
126
  """
127
- 读取“系统导出格式”Excel,并做列名标准化、空列丢弃、日期/数字类型转换。
128
  """
129
  header_row = _find_header_row(path)
130
  try:
@@ -134,272 +52,96 @@ def read_system_export(path: str) -> pd.DataFrame:
134
 
135
  # 丢掉全空列
136
  df = df.dropna(axis=1, how="all")
137
- # 标准化列名
138
  df = _normalize_columns(df)
139
 
140
  # 转日期
141
- for c in ["请购日期","需求日期","到货日期","入库日期","计划到货日期"]:
142
  if c in df.columns:
143
  df[c] = pd.to_datetime(df[c], errors="coerce")
144
 
145
  # 转数字
146
- for c in ["主数量","到货主数量","入库主数量"]:
147
  if c in df.columns:
148
  df[c] = pd.to_numeric(df[c], errors="coerce").fillna(0)
149
 
150
  # 去掉全空行
151
  df = df.dropna(how="all")
152
- return df
153
-
154
-
155
- def _first_nonnull(series: pd.Series):
156
- for v in series:
157
- if pd.notna(v):
158
- return v
159
- return None
160
-
161
-
162
- def aggregate_for_email(df: pd.DataFrame) -> pd.DataFrame:
163
- """
164
- 业务汇总规则:
165
- - “主数量”不做分类汇总(保留原单的主数量),
166
- - “到货主数量”、“入库主数量”需要汇总求和,
167
- - “到货日期/入库日期/计划到货日期”取最大(最近),
168
- - 分组键:不包含数量列,常用维度如下(尽量稳定,避免将数量/日期放入分组键):
169
- 请购单号、物料编码、物料名称、纱支密度、门幅(CM)、颜色、主单位、供应商
170
- - “请购日期/需求日期”保留“首个非空”
171
- """
172
- group_keys = [k for k in [
173
- "请购单号","物料编码","物料名称","纱支密度","门幅(CM)","颜色","主单位","供应商"
174
- ] if k in df.columns]
175
-
176
- if not group_keys:
177
- raise RuntimeError("找不到用于分组的关键字段(如 请购单号/物料编码 等),请检查导入的表头。")
178
-
179
- agg_map = {}
180
-
181
- # 不汇总主数量:取首个非空(假设同一分组合并后主数量一致)
182
- if "主数量" in df.columns:
183
- agg_map["主数量"] = _first_nonnull
184
-
185
- # 日期字段
186
- if "请购日期" in df.columns:
187
- agg_map["请购日期"] = _first_nonnull
188
- if "需求日期" in df.columns:
189
- agg_map["需求日期"] = _first_nonnull
190
-
191
- if "到货日期" in df.columns:
192
- agg_map["到货日期"] = "max"
193
- if "入库日期" in df.columns:
194
- agg_map["入库日期"] = "max"
195
- if "计划到货日期" in df.columns:
196
- agg_map["计划到货日期"] = "max"
197
-
198
- # 数量汇总
199
- if "到货主数量" in df.columns:
200
- agg_map["到货主数量"] = "sum"
201
- if "入库主数量" in df.columns:
202
- agg_map["入库主数量"] = "sum"
203
-
204
- grouped = df.groupby(group_keys, dropna=False).agg(agg_map).reset_index()
205
-
206
- # 计算 “目前进度”
207
- grouped["目前进度"] = grouped.apply(_calc_progress_row, axis=1)
208
 
209
- # 最终列顺序:优先按模板,其次按默认顺序
210
- final = _order_like_template(grouped)
211
- return final
 
 
 
212
 
213
-
214
- def _calc_progress_row(row: pd.Series) -> str:
215
- """
216
- 目前进度的业务口径:
217
- 1) 到货主数量 ≥ 主数量 => “完全到货”
218
- 2) 否则:
219
- - 若到货主数量 > 0 => “部分到货 缺货X米”
220
- - 若到货主数量 == 0 且到货日期为空 => “未到货”
221
- - 逾期:需求日期存在且 today > 需求日期,追加 “已逾期Y天”
222
- - 未来7天到货:计划到货日期在 [today, today+7] 区间,追加 “未来7天到货(计划YYYY-MM-DD)”
223
- """
224
- today = pd.Timestamp(_today())
225
- main_qty = float(row.get("主数量", 0) or 0)
226
- arr_qty = float(row.get("到货主数量", 0) or 0)
227
-
228
- demand_date = row.get("需求日期", pd.NaT)
229
- arrival_date = row.get("到货日期", pd.NaT)
230
- plan_arrival = row.get("计划到货日期", pd.NaT)
231
-
232
- # 完全到货
233
- if main_qty > 0 and arr_qty >= main_qty:
234
- return "完全到货"
235
-
236
- parts: List[str] = []
237
-
238
- # 部分/未到
239
- shortage = max(0.0, main_qty - arr_qty)
240
- if arr_qty > 0:
241
- parts.append(f"部分到货 缺货{shortage:g}米")
242
- else:
243
- # 到货日期为空或数量为0都视为未到货
244
- if pd.isna(arrival_date) or arr_qty == 0:
245
- parts.append("未到货")
246
-
247
- # 逾期天数
248
- if (pd.isna(arrival_date) or arr_qty < main_qty) and pd.notna(demand_date):
249
- overdue_days = (today - pd.Timestamp(demand_date.date())).days
250
- if overdue_days > 0:
251
- parts.append(f"已逾期{overdue_days}天")
252
-
253
- # 未来7天到货(计划)
254
- if pd.notna(plan_arrival):
255
- days_ahead = (pd.Timestamp(plan_arrival.date()) - today).days
256
- if 0 <= days_ahead <= 7:
257
- parts.append(f"未来7天到货(计划{str(plan_arrival.date())})")
258
-
259
- # 如果什么都没有匹配,给一个保底描述
260
- if not parts:
261
- # 例如:需求未到期且无计划到货
262
- if pd.notna(demand_date) and today <= pd.Timestamp(demand_date.date()):
263
- return "未到货(未到期)"
264
- return "处理中"
265
-
266
- return ";".join(parts)
267
 
268
 
269
- def _order_like_template(df: pd.DataFrame) -> pd.DataFrame:
270
  """
271
- 若能找到“邮件发送的格式.xlsx”,按其表头顺序输出;否则用 EMAIL_COLS_DEFAULT。
272
- 模板里缺的列会自动从 df 里补;df 有但模板没有的列会追加在后面。
273
  """
274
- template_cols = None
275
- for p in TEMPLATE_CANDIDATES:
276
- if os.path.exists(p):
277
- try:
278
- tdf = pd.read_excel(p, nrows=0)
279
- template_cols = list(map(str, tdf.columns))
280
- break
281
- except Exception:
282
- continue
283
-
284
- if template_cols is None:
285
- template_cols = EMAIL_COLS_DEFAULT
286
-
287
- # 先取交集按顺序
288
- front = [c for c in template_cols if c in df.columns]
289
- # 再把 df 里其余列追加在后
290
- tail = [c for c in df.columns if c not in front]
291
- cols = front + tail
292
- return df[cols].copy()
293
-
294
-
295
- def _find_latest_input(input_dir: str) -> Optional[str]:
296
- files = []
297
- for pat in ("*.xlsx", "*.xls"):
298
- files.extend(glob.glob(os.path.join(input_dir, pat)))
299
- if not files:
300
- return None
301
- files.sort(key=os.path.getmtime, reverse=True)
302
- return files[0]
303
-
304
-
305
- def _df_to_excel_bytes(df: pd.DataFrame) -> bytes:
306
- bio = BytesIO()
307
- df.to_excel(bio, index=False)
308
- bio.seek(0)
309
- return bio.read()
310
-
311
-
312
- def _build_html_body(df: pd.DataFrame, title: str) -> str:
313
- table_html = df.to_html(index=False, escape=False)
314
- html = f"""<html>
315
- <head>
316
- <meta charset="utf-8" />
317
- <style>
318
- table {{ border-collapse: collapse; font-size: 13px; }}
319
- table, th, td {{ border: 1px solid #ccc; padding: 6px; }}
320
- th {{ background:#f6f6f6; }}
321
- </style>
322
- </head>
323
- <body>
324
- <h3>{title}</h3>
325
- {table_html}
326
- <p style="color:#666;">备注:此邮件由自动化系统生成。</p>
327
- </body>
328
- </html>"""
329
- return html
330
-
331
-
332
- def _send_email_via_resend(subject: str, html_body: str,
333
- attachment_bytes: Optional[bytes],
334
- attachment_name: str) -> Tuple[bool, str]:
335
- if not (RESEND_API_KEY and FROM_EMAIL and TO_EMAIL):
336
- return False, "缺少 Resend 配置(RESEND_API_KEY / FROM_EMAIL / TO_EMAIL)"
337
-
338
- url = "https://api.resend.com/emails"
339
- headers = {
340
- "Authorization": f"Bearer {RESEND_API_KEY}",
341
- "Content-Type": "application/json",
342
- }
343
- payload = {
344
- "from": FROM_EMAIL,
345
- "to": [TO_EMAIL],
346
- "subject": subject,
347
- "html": html_body,
348
- }
349
- if attachment_bytes is not None:
350
- payload["attachments"] = [{
351
- "filename": attachment_name,
352
- "content": base64.b64encode(attachment_bytes).decode("utf-8"),
353
- }]
354
-
355
- resp = requests.post(url, headers=headers, data=json.dumps(payload))
356
- if resp.ok:
357
- return True, resp.text
358
- return False, f"HTTP {resp.status_code}: {resp.text}"
359
-
360
-
361
- # ====== 主流程 ======
362
- def run_once(file_path: Optional[str] = None) -> dict:
363
  """
364
- 单次处理:读入Excel -> 汇总 -> 生成输出 -> 发邮件。
365
- 返回一个 dict 给上层(便于 app.py 返回给前端)。
366
  """
367
- if file_path is None:
368
- file_path = _find_latest_input(INPUT_DIR)
369
- if not file_path:
370
- return {"ok": False, "msg": f"未在 {INPUT_DIR} 找到Excel输入文件"}
371
-
372
- raw = read_system_export(file_path)
373
- final = aggregate_for_email(raw)
374
-
375
- out_name = f"邮件发送的格式_{datetime.now().strftime('%Y%m%d')}.xlsx"
376
- out_path = os.path.join(OUTPUT_DIR, out_name)
377
- os.makedirs(OUTPUT_DIR, exist_ok=True)
378
- final.to_excel(out_path, index=False)
379
-
380
- subject = f"采购执行表自动推送 {datetime.now().date()}"
381
- html = _build_html_body(final, title=f"采购执行表({datetime.now().date()})")
382
- attach = _df_to_excel_bytes(final)
383
- ok, info = _send_email_via_resend(subject, html, attachment_bytes=attach, attachment_name=out_name)
384
-
385
- return {
386
- "ok": ok,
387
- "msg": "邮件发送成功" if ok else f"邮件发送失败:{info}",
388
- "input": file_path,
389
- "output": out_path,
390
- "rows": len(final),
391
- }
392
 
393
 
394
- def main(trigger_file: Optional[str] = None):
395
- result = run_once(trigger_file)
396
- print(json.dumps(result, ensure_ascii=False, indent=2))
397
- # 返回码:成功0,失败1(便于将来做cron/健康检查)
398
- if not result.get("ok"):
399
- sys.exit(1)
 
400
 
401
 
402
  if __name__ == "__main__":
403
- # 允许命令行传入具体文件路径
404
- arg_file = sys.argv[1] if len(sys.argv) > 1 else None
405
- main(arg_file)
 
1
  #!/usr/bin/env python3
 
2
  """
3
  process_report.py
4
+ 采购执行表 → 邮件通知
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  """
6
 
 
 
 
 
 
 
 
 
 
 
7
  import pandas as pd
8
+ import datetime as dt
9
+ from pathlib import Path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
 
12
+ def _find_header_row(path: str) -> int:
 
 
 
 
 
 
13
  """
14
+ 找到 Excel 中的表头行(假设含“物料名称”那一行就是表头)。
 
15
  """
16
+ for i in range(5):
17
+ row = pd.read_excel(path, header=i, nrows=1)
18
+ if "物料名称" in row.columns:
19
+ return i
20
+ return 0
 
 
 
 
21
 
22
 
23
+ def _normalize_columns(df: pd.DataFrame) -> pd.DataFrame:
24
  """
25
+ 列名统一化
 
26
  """
27
+ rename_map = {
28
+ "物料名称": "物料名称",
29
+ "需求日期": "需求日期",
30
+ "请购日期": "请购日期",
31
+ "计划到货日期": "计划到货日期",
32
+ "到货日期": "到货日期",
33
+ "入库日期": "入库日期",
34
+ "主数量": "主数量",
35
+ "到货主数量": "到货主数量",
36
+ "入库主数量": "入库主数量",
37
+ }
38
+ df = df.rename(columns=lambda x: str(x).strip())
39
+ df = df.rename(columns=rename_map)
40
+ return df
41
 
42
 
43
  def read_system_export(path: str) -> pd.DataFrame:
44
  """
45
+ 读取“系统导出格式”Excel,并做清洗。
46
  """
47
  header_row = _find_header_row(path)
48
  try:
 
52
 
53
  # 丢掉全空列
54
  df = df.dropna(axis=1, how="all")
55
+ # 列名统一
56
  df = _normalize_columns(df)
57
 
58
  # 转日期
59
+ for c in ["请购日期", "需求日期", "到货日期", "入库日期", "计划到货日期"]:
60
  if c in df.columns:
61
  df[c] = pd.to_datetime(df[c], errors="coerce")
62
 
63
  # 转数字
64
+ for c in ["主数量", "到货主数量", "入库主数量"]:
65
  if c in df.columns:
66
  df[c] = pd.to_numeric(df[c], errors="coerce").fillna(0)
67
 
68
  # 去掉全空行
69
  df = df.dropna(how="all")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
 
71
+ # ========= 新增过滤逻辑 =========
72
+ if "物料名称" in df.columns:
73
+ mask_remove = df["物料名称"].str.contains("鹅|鸭|华住", na=False)
74
+ mask_keep = df["物料名称"].str.contains("华住专用", na=False)
75
+ df = df[~mask_remove | mask_keep]
76
+ # ===============================
77
 
78
+ return df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
 
80
 
81
+ def analyze_report(df: pd.DataFrame) -> pd.DataFrame:
82
  """
83
+ 根据需求,计算“目前进度”一列。
 
84
  """
85
+ today = dt.datetime.now().date()
86
+
87
+ # 汇总数量
88
+ grouped = df.groupby("物料名称", as_index=False).agg({
89
+ "主数量": "sum",
90
+ "到货主数量": "sum",
91
+ "入库主数量": "sum",
92
+ "需求日期": "max",
93
+ "到货日期": "max",
94
+ })
95
+
96
+ def calc_progress(row):
97
+ demand = row["主数量"]
98
+ received = row["到货主数量"]
99
+ due_date = row["需求日期"]
100
+ arrival_date = row["到货日期"]
101
+
102
+ if received >= demand and demand > 0:
103
+ return "完全到货"
104
+
105
+ # 部分到货
106
+ if 0 < received < demand:
107
+ missing = demand - received
108
+ return f"部分到货,缺 {missing:.0f} "
109
+
110
+ # 未到货
111
+ if received == 0:
112
+ if pd.isna(due_date):
113
+ return "未到货"
114
+ days_diff = (today - due_date.date()).days
115
+ if days_diff > 0:
116
+ return f"逾期 {days_diff} 天未到货"
117
+ elif 0 <= (due_date.date() - today).days <= 7:
118
+ return "未来7天要到货"
119
+ else:
120
+ return "未到货"
121
+
122
+ return "未到货"
123
+
124
+ grouped["目前进度"] = grouped.apply(calc_progress, axis=1)
125
+
126
+ return grouped
127
+
128
+
129
+ def save_to_excel(df: pd.DataFrame, out_path: str):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
  """
131
+ 结果保存到 Excel
 
132
  """
133
+ Path(out_path).parent.mkdir(parents=True, exist_ok=True)
134
+ df.to_excel(out_path, index=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
 
136
 
137
+ def main():
138
+ src = "系统导出格式.xlsx"
139
+ out = "输出结果.xlsx"
140
+ df = read_system_export(src)
141
+ result = analyze_report(df)
142
+ save_to_excel(result, out)
143
+ print(f"已生成:{out}")
144
 
145
 
146
  if __name__ == "__main__":
147
+ main()