Corin1998 commited on
Commit
c547356
·
verified ·
1 Parent(s): 1f951ea

Update data.py

Browse files
Files changed (1) hide show
  1. data.py +34 -21
data.py CHANGED
@@ -5,36 +5,47 @@ from typing import Optional, Dict, Any
5
  from datetime import datetime
6
  import pandas as pd
7
 
8
- DATA_DIR = os.environ.get("DATA_DIR", "./data")
 
 
 
 
 
9
  LOG_PATH = os.path.join(DATA_DIR, "events.csv")
10
  META_PATH = os.path.join(DATA_DIR, "meta.json")
11
 
12
  SCHEMA = [
13
- "ts", # ISO timestamp
14
- "data", # YYYY-MM-DD(便宜)
15
- "medium", # 媒体名(例:FB,GDN)
16
- "creative", # クリエイティブID/名前(例:A1)
17
- "is_control" # 0/1(コントロール群)
18
- "impressions", # 表示数
19
- "clicks", # クリック数(または目的コンバージョン)
20
- "conversions", # 追加のCV(任意:0でもOK)
21
- "cost", # コスト(任意)
22
- "features_json" # クリエイティブ特徴量(dictをJSON文字列で)
23
  ]
24
 
25
- os.makedirs(DATA_DIR, exist_ok=True)
26
- if not os.path.exists(LOG_PATH):
27
- pd.DataFrame(columns=SCHEMA).to_csv(LOG_PATH, index=False)
28
- if not os.path.exists(META_PATH):
29
- with open(META_PATH, "w" , encoding="utf-8") as f:
30
- json.dump({"created_at": datetime.utcnow().isoformat()}, f)
 
 
 
 
 
31
 
32
  def read_events() -> pd.DataFrame:
 
33
  df = pd.read_csv(LOG_PATH)
34
  if df.empty:
35
  return df
36
  # 型整備
37
- df["data"] = pd.to_datetime(df["date"]).dt.date.astype(str)
38
  df["is_control"] = df["is_control"].fillna(0).astype(int)
39
  for col in ["impressions", "clicks", "conversions"]:
40
  df[col] = df[col].fillna(0).astype(int)
@@ -43,7 +54,8 @@ def read_events() -> pd.DataFrame:
43
  return df
44
 
45
  def append_events(rows: pd.DataFrame) -> None:
46
- # 必須列チェック& 補完
 
47
  for c in SCHEMA:
48
  if c not in rows.columns:
49
  if c == "features_json":
@@ -62,13 +74,14 @@ def append_events(rows: pd.DataFrame) -> None:
62
  rows.to_csv(LOG_PATH, mode="a", header=False, index=False)
63
 
64
  def aggregate(levels=("medium", "creative")) -> pd.DataFrame:
 
65
  df = read_events()
66
  if df.empty:
67
- return pd.DataFrame(columns=[*levels, "is_control" , "impressions", "clicks", "conversions", "cost"])
68
  g = df.groupby([*levels, "is_control"], dropna=False).agg(
69
  impressions=("impressions", "sum"),
70
  clicks=("clicks", "sum"),
71
  conversions=("conversions", "sum"),
72
  cost=("cost", "sum"),
73
  ).reset_index()
74
- return g
 
5
  from datetime import datetime
6
  import pandas as pd
7
 
8
+ # 書き込み可能な場所をデフォルトにする
9
+ # - 既定: /tmp/adcopy_data(ephemeral)
10
+ # - 環境変数 DATA_DIR を設定すると、例: /data/adcopy_mab(HF Spaces の Persistent Storage)
11
+ DEFAULT_WRITABLE_DIR = "/tmp/adcopy_data"
12
+ DATA_DIR = os.environ.get("DATA_DIR", DEFAULT_WRITABLE_DIR)
13
+
14
  LOG_PATH = os.path.join(DATA_DIR, "events.csv")
15
  META_PATH = os.path.join(DATA_DIR, "meta.json")
16
 
17
  SCHEMA = [
18
+ "ts", # ISO timestamp
19
+ "date", # YYYY-MM-DD (便宜)
20
+ "medium", # 媒体名 (例: FB, GDN)
21
+ "creative", # クリエイティブID/名前 (例: A1)
22
+ "is_control", # 0/1(コントロール群)
23
+ "impressions", # 表示数
24
+ "clicks", # クリック数(または目的コンバージョン)
25
+ "conversions", # 追加のCV(任意: 0 でもOK
26
+ "cost", # コスト(任意)
27
+ "features_json" # クリエイティブ特徴量(dict をJSON文字列で)
28
  ]
29
 
30
+ def _ensure_storage():
31
+ """初回起動時に保存先と空ファイルを準備。"""
32
+ os.makedirs(DATA_DIR, exist_ok=True)
33
+ if not os.path.exists(LOG_PATH):
34
+ pd.DataFrame(columns=SCHEMA).to_csv(LOG_PATH, index=False)
35
+ if not os.path.exists(META_PATH):
36
+ with open(META_PATH, "w", encoding="utf-8") as f:
37
+ json.dump({"created_at": datetime.utcnow().isoformat()}, f)
38
+
39
+ # インポート時に準備(書き込み可能ディレクトリなのでOK)
40
+ _ensure_storage()
41
 
42
  def read_events() -> pd.DataFrame:
43
+ _ensure_storage()
44
  df = pd.read_csv(LOG_PATH)
45
  if df.empty:
46
  return df
47
  # 型整備
48
+ df["date"] = pd.to_datetime(df["date"]).dt.date.astype(str)
49
  df["is_control"] = df["is_control"].fillna(0).astype(int)
50
  for col in ["impressions", "clicks", "conversions"]:
51
  df[col] = df[col].fillna(0).astype(int)
 
54
  return df
55
 
56
  def append_events(rows: pd.DataFrame) -> None:
57
+ _ensure_storage()
58
+ # 必須列チェック & 補完
59
  for c in SCHEMA:
60
  if c not in rows.columns:
61
  if c == "features_json":
 
74
  rows.to_csv(LOG_PATH, mode="a", header=False, index=False)
75
 
76
  def aggregate(levels=("medium", "creative")) -> pd.DataFrame:
77
+ _ensure_storage()
78
  df = read_events()
79
  if df.empty:
80
+ return pd.DataFrame(columns=[*levels, "is_control", "impressions", "clicks", "conversions", "cost"])
81
  g = df.groupby([*levels, "is_control"], dropna=False).agg(
82
  impressions=("impressions", "sum"),
83
  clicks=("clicks", "sum"),
84
  conversions=("conversions", "sum"),
85
  cost=("cost", "sum"),
86
  ).reset_index()
87
+ return g