File size: 3,430 Bytes
6b1a839
 
 
ab49d10
6b1a839
ab49d10
6b1a839
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ab49d10
 
6b1a839
 
ab49d10
6b1a839
 
ab49d10
 
 
6b1a839
ab49d10
 
6b1a839
ab49d10
 
6b1a839
 
 
ab49d10
 
 
 
 
 
 
6b1a839
 
 
 
 
ab49d10
 
 
 
 
 
 
 
 
 
6b1a839
ab49d10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6b1a839
 
 
ab49d10
 
 
6b1a839
 
 
 
 
ab49d10
6b1a839
ab49d10
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
from __future__ import annotations
import pandas as pd
import numpy as np
from . import storage

# 可能なら Prophet / NeuralProphet を使用(無ければフォールバック)
try:
    from prophet import Prophet
except Exception:
    Prophet = None

try:
    from neuralprophet import NeuralProphet
except Exception:
    NeuralProphet = None


class SeasonalityModel:
    def __init__(self, campaign_id: str):
        self.campaign_id = campaign_id
        self.model = None
        self.model_type = "none"
        self.global_mean = 0.05  # データが乏しいときの既定CTR

    def fit(self):
        # イベントから時系列(1時間粒度のCTR)を作る
        with storage.get_conn() as con:
            df = pd.read_sql_query(
                "SELECT ts, event_type FROM events WHERE campaign_id=?",
                con,
                params=(self.campaign_id,),
            )

        if df.empty:
            self.model_type = "none"
            return

        df["ts"] = pd.to_datetime(df["ts"], errors="coerce")
        df = df.dropna(subset=["ts"])
        df["hour"] = df["ts"].dt.floor("h")

        agg = (
            df.pivot_table(
                index="hour", columns="event_type", values="ts", aggfunc="count"
            )
            .fillna(0)
        )
        if "impression" not in agg:
            agg["impression"] = 0
        if "click" not in agg:
            agg["click"] = 0

        ctr = np.where(
            agg["impression"] > 0, agg["click"] / agg["impression"], np.nan
        )
        if np.all(np.isnan(ctr)):
            self.model_type = "none"
            return

        self.global_mean = float(np.nanmean(ctr))

        # Prophet / NeuralProphet の学習データ
        ds = agg.index.to_series().reset_index(drop=True)
        train = pd.DataFrame({"ds": ds, "y": pd.Series(ctr).fillna(self.global_mean).values})

        try:
            if Prophet is not None:
                m = Prophet(weekly_seasonality=True, daily_seasonality=True)
                m.fit(train)
                self.model = m
                self.model_type = "prophet"
            elif NeuralProphet is not None:
                m = NeuralProphet(weekly_seasonality=True, daily_seasonality=True)
                m.fit(train, freq="H")
                self.model = m
                self.model_type = "neuralprophet"
            else:
                self.model_type = "none"
        except Exception:
            # 失敗時はフォールバック
            self.model_type = "none"

    def expected_ctr(self, context: dict) -> float:
        hour = int(context.get("hour", 12))

        # モデルが無い場合は簡易ヒューリスティック
        if self.model_type in {None, "none"}:
            base = self.global_mean
            if 11 <= hour <= 13:
                return min(0.99, base * 1.1)
            if 20 <= hour <= 23:
                return min(0.99, base * 1.15)
            return max(0.01, base)

        # モデルあり:当日・指定時間の1点予測
        now_ds = pd.Timestamp.utcnow().floor("D") + pd.Timedelta(hours=hour)
        if self.model_type == "prophet":
            yhat = float(self.model.predict(pd.DataFrame({"ds": [now_ds]}))["yhat"].iloc[0])
        else:  # neuralprophet
            yhat = float(self.model.predict(pd.DataFrame({"ds": [now_ds]}))["yhat1"].iloc[0])

        return max(0.01, min(0.99, yhat))