File size: 17,909 Bytes
d413faf
0599c34
 
 
 
 
d413faf
 
 
 
 
 
 
 
 
 
 
0599c34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d413faf
 
 
 
 
 
 
 
0599c34
d413faf
 
0599c34
d413faf
 
 
 
 
 
 
 
 
 
0599c34
 
d413faf
 
0599c34
 
d413faf
 
 
 
0599c34
d413faf
 
 
 
 
 
 
 
 
 
 
 
 
 
0599c34
2aa98b4
 
 
 
466d122
0599c34
 
d413faf
466d122
0599c34
 
466d122
 
0599c34
466d122
0599c34
d413faf
0599c34
466d122
d413faf
 
 
 
2aa98b4
 
d413faf
2aa98b4
d413faf
0599c34
466d122
d413faf
 
 
 
0599c34
 
466d122
d413faf
 
466d122
 
d413faf
2aa98b4
0599c34
2aa98b4
d413faf
 
 
 
466d122
 
d413faf
2aa98b4
d413faf
 
 
 
2aa98b4
466d122
2aa98b4
466d122
 
 
 
 
 
 
2aa98b4
466d122
 
 
 
 
 
2aa98b4
466d122
 
 
 
d413faf
466d122
 
2aa98b4
d413faf
466d122
 
d413faf
466d122
d413faf
 
741c22e
d413faf
 
 
741c22e
d413faf
 
 
 
 
0599c34
741c22e
d413faf
 
 
 
0599c34
741c22e
 
 
 
 
 
d413faf
 
 
 
 
 
 
 
 
 
 
741c22e
d413faf
 
 
 
 
 
741c22e
d413faf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
741c22e
 
d413faf
 
 
 
 
 
 
3831e4d
d413faf
 
 
3831e4d
d413faf
 
 
3831e4d
 
 
 
d413faf
 
 
 
 
 
 
3831e4d
d413faf
 
 
 
 
 
 
 
3831e4d
 
 
 
d413faf
 
3831e4d
d413faf
3831e4d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d413faf
 
0599c34
d413faf
 
 
 
 
 
 
 
 
3831e4d
d413faf
 
 
 
3831e4d
 
d413faf
 
3831e4d
d413faf
 
 
0599c34
d413faf
 
 
 
 
 
 
 
3831e4d
 
d413faf
3831e4d
d413faf
3831e4d
d413faf
3831e4d
d413faf
3831e4d
d413faf
 
 
 
 
 
3831e4d
 
d413faf
 
 
 
 
 
 
 
 
0599c34
 
d413faf
 
 
 
 
0599c34
 
 
d413faf
 
 
 
d098a1b
d413faf
 
d098a1b
d413faf
 
d098a1b
d413faf
 
 
d098a1b
d413faf
0599c34
 
d413faf
 
 
 
741c22e
 
d413faf
 
 
 
741c22e
d413faf
 
741c22e
d413faf
 
0599c34
 
d413faf
 
 
3831e4d
 
d413faf
 
 
 
3831e4d
d413faf
 
3831e4d
d413faf
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
# 統合版 Gradio アプリ (閾値診断 + 傾向検出 + 予兆解析)
import gradio as gr
import pandas as pd
import numpy as np
import json
import os
from sklearn.linear_model import LinearRegression

# --- 共通ユーティリティ ---
def convert_value(v):
    if hasattr(v, "item"):
        return v.item()
    if isinstance(v, (np.integer, int)):
        return int(v)
    if isinstance(v, (np.floating, float)):
        return float(v)
    return v

def normalize(s):
    return str(s).replace("\u3000", " ").replace("\n", "").replace("\r", "").strip()

def find_matching_column(df, col_id, item_name, process_name):
    norm_item = normalize(item_name)
    candidates = [
        c for c in df.columns
        if isinstance(c, str)
        and col_id in c
        and process_name in c
        and norm_item in normalize(c)
    ]
    return candidates[0] if candidates else None

# --- グローバル変数(全タブで共有) ---
df = None
thresholds_df = None
lag_matrix = None

# --- ファイル読み込み ---
def load_files(csv_file, excel_file, lag_file):
    global df, thresholds_df, lag_matrix
    try:
        df = pd.read_csv(csv_file.name, header=[0, 1, 2])
        timestamp_col = pd.to_datetime(df.iloc[:, 0], errors="coerce")
        df = df.drop(df.columns[0], axis=1)
        df.insert(0, "timestamp", timestamp_col)

        # MultiIndex → 文字列化
        def col_to_str(col):
            return "_".join([str(c) for c in col if c]) if isinstance(col, tuple) else str(col)

        df.columns = [
            "timestamp" if (isinstance(c, str) and c == "timestamp") else col_to_str(c)
            for c in df.columns
        ]

        thresholds_df = pd.read_excel(excel_file.name)
        thresholds_df["Important"] = thresholds_df["Important"].astype(str).str.upper().map({"TRUE": True, "FALSE": False})
        for col in ["LL", "L", "H", "HH"]:
            if col in thresholds_df.columns:
                thresholds_df[col] = pd.to_numeric(thresholds_df[col], errors="coerce")

        lag_matrix = pd.read_excel(lag_file.name, index_col=0)

        return "✅ ファイル読み込み成功"
    except Exception as e:
        return f"❌ ファイル読み込み失敗: {e}"

# --- Tab1: 閾値診断 ---
def judge_status(value, ll, l, h, hh):
    if pd.notna(ll) and value < ll:
        return "LOW-LOW"
    elif pd.notna(l) and value < l:
        return "LOW"
    elif pd.notna(hh) and value > hh:
        return "HIGH-HIGH"
    elif pd.notna(h) and value > h:
        return "HIGH"
    else:
        return "OK"

def diagnose_process_range(process_name, datetime_str, window_minutes):
    global df, thresholds_df
    if df is None or thresholds_df is None:
        return None, None, None, "⚠ ファイル未読み込み", None

    try:
        target_time = pd.to_datetime(datetime_str)
    except Exception:
        return None, None, None, f"⚠ 入力した日時 {datetime_str} が無効です。", None

    start_time = target_time - pd.Timedelta(minutes=window_minutes)
    end_time = target_time
    df_window = df[(df["timestamp"] >= start_time) & (df["timestamp"] <= end_time)]
    if df_window.empty:
        return None, None, None, "⚠ 指定した時間幅にデータが見つかりません。", None

    proc_thresholds = thresholds_df[thresholds_df["ProcessNo_ProcessName"] == process_name]
    if proc_thresholds.empty:
        return None, None, None, f"⚠ プロセス {process_name} の閾値が設定されていません。", None

    all_results = []
    for _, row in df_window.iterrows():
        for _, thr in proc_thresholds.iterrows():
            col_name = f"{thr['ColumnID']}_{thr['ItemName']}_{thr['ProcessNo_ProcessName']}"
            if col_name not in df.columns:
                continue
            value = row[col_name]
            status = judge_status(value, thr.get("LL"), thr.get("L"), thr.get("H"), thr.get("HH"))
            all_results.append({
                "ColumnID": thr["ColumnID"],
                "ItemName": thr["ItemName"],
                "判定": status,
                "重要項目": bool(thr.get("Important", False)),
                "時刻": str(row["timestamp"])
            })

    # --- 全項目集計 ---
    total = len(all_results)
    status_counts = pd.Series([r["判定"] for r in all_results]).value_counts().reindex(
        ["LOW-LOW", "LOW", "OK", "HIGH", "HIGH-HIGH"], fill_value=0
    )
    status_ratio = (status_counts / total * 100).round(1)
    result_df_all = pd.DataFrame({"状態": status_counts.index, "件数": status_counts.values, "割合(%)": status_ratio.values})

    # --- 重要項目全体 ---
    important_results = [r for r in all_results if r["重要項目"]]
    if important_results:
        total_imp = len(important_results)
        status_counts_imp = pd.Series([r["判定"] for r in important_results]).value_counts().reindex(
            ["LOW-LOW", "LOW", "OK", "HIGH", "HIGH-HIGH"], fill_value=0
        )
        status_ratio_imp = (status_counts_imp / total_imp * 100).round(1)
        result_df_imp = pd.DataFrame({"状態": status_counts_imp.index, "件数": status_counts_imp.values, "割合(%)": status_ratio_imp.values})
    else:
        result_df_imp = pd.DataFrame(columns=["状態", "件数", "割合(%)"])
        status_ratio_imp = pd.Series(dtype=float)

    # --- 重要項目ごと ---
    result_per_item = []
    for item in set([r["ItemName"] for r in important_results]):
        item_results = [r for r in important_results if r["ItemName"] == item]
        total_item = len(item_results)
        status_counts_item = pd.Series([r["判定"] for r in item_results]).value_counts().reindex(
            ["LOW-LOW", "LOW", "OK", "HIGH", "HIGH-HIGH"], fill_value=0
        )
        status_ratio_item = (status_counts_item / total_item * 100).round(1)
        for s, c, r in zip(status_counts_item.index, status_counts_item.values, status_ratio_item.values):
            result_per_item.append({"ItemName": item, "状態": s, "件数": int(c), "割合(%)": float(r)})
    result_df_imp_items = pd.DataFrame(result_per_item)

    # --- サマリー ---
    summary = (
        f"✅ {process_name} の診断完了({start_time}{end_time})\n"
        + "[全項目] " + " / ".join([f"{s}:{r:.1f}%" for s, r in status_ratio.items()]) + "\n"
        + "[重要項目全体] " + (" / ".join([f"{s}:{r:.1f}%" for s, r in status_ratio_imp.items()]) if not result_df_imp.empty else "対象データなし")
    )

    # --- JSON ---
    json_data = {
        "集計結果": {
            "全項目割合": {k: float(v) for k, v in status_ratio.to_dict().items()},
            "重要項目全体割合": {k: float(v) for k, v in status_ratio_imp.to_dict().items()} if not result_df_imp.empty else {},
            "重要項目ごと割合": [dict(row) for _, row in result_df_imp_items.iterrows()]
        }
    }
    result_json = json.dumps(json_data, ensure_ascii=False, indent=2)

    return result_df_all, result_df_imp, result_df_imp_items, summary, result_json

# --- Tab2: 傾向検出 ---
def detect_trends_with_forecast(process_name, datetime_str, window_minutes, forecast_minutes, downsample_factor):
    global df, thresholds_df
    if df is None or thresholds_df is None:
        return None, "⚠ ファイル未読み込み", None
    
    target_time = pd.to_datetime(datetime_str)
    start_time = target_time - pd.Timedelta(minutes=window_minutes)
    df_window = df[(df["timestamp"] >= start_time) & (df["timestamp"] <= target_time)]
    if df_window.empty:
        return None, "⚠ データなし", None

    # サンプリング間隔を推定
    interval = df_window["timestamp"].diff().median()
    if pd.isna(interval):
        return None, "⚠ サンプリング間隔検出失敗", None
    interval_minutes = interval.total_seconds() / 60

    # --- 粗化処理 ---
    df_window = df_window.iloc[::int(downsample_factor), :].reset_index(drop=True)
    effective_interval = interval_minutes * int(downsample_factor)

    proc_thresholds = thresholds_df[(thresholds_df["ProcessNo_ProcessName"] == process_name) & 
                                    (thresholds_df["Important"] == True)]
    if proc_thresholds.empty:
        return None, f"⚠ {process_name} の重要項目なし", None

    results = []
    for _, thr in proc_thresholds.iterrows():
        col_tuple = f"{thr['ColumnID']}_{thr['ItemName']}_{thr['ProcessNo_ProcessName']}"
        if col_tuple not in df.columns:
            continue
        series = df_window[col_tuple].dropna()
        if len(series) < 3:
            continue
        
        x = np.arange(len(series)).reshape(-1, 1)
        y = series.values.reshape(-1, 1)
        model = LinearRegression().fit(x, y)
        slope = model.coef_[0][0]
        last_val = series.iloc[-1]

        forecast_steps = int(forecast_minutes / effective_interval)
        forecast_val = model.predict([[len(series) + forecast_steps]])[0][0]
        forecast_time = target_time + pd.Timedelta(minutes=forecast_minutes)

        risk = "安定"
        if pd.notna(thr.get("LL")) and forecast_val <= thr["LL"]:
            risk = "LL逸脱予測"
        elif pd.notna(thr.get("HH")) and forecast_val >= thr["HH"]:
            risk = "HH逸脱予測"

        results.append({
            "ItemName": thr["ItemName"],
            "傾き": round(float(slope), 4),
            "最終値": round(float(last_val), 3),
            "予測値": round(float(forecast_val), 3),
            "予測時刻": str(forecast_time),
            "予測リスク": risk,
            "粗化間隔(分)": round(effective_interval, 2)
        })

    result_df = pd.DataFrame(results)
    result_json = json.dumps(results, ensure_ascii=False, indent=2)
    return result_df, "✅ 傾向検出完了", result_json

# --- Tab3: 予兆解析 ---
def forecast_process_with_lag(process_name, datetime_str, forecast_minutes, downsample_factor=3):
    global df, thresholds_df, lag_matrix
    if df is None or thresholds_df is None or lag_matrix is None:
        return None, "⚠ ファイル未読み込み", None

    target_time = pd.to_datetime(datetime_str)
    forecast_time = target_time + pd.Timedelta(minutes=forecast_minutes)

    proc_thresholds = thresholds_df[
        (thresholds_df["ProcessNo_ProcessName"] == process_name) & 
        (thresholds_df["Important"] == True)
    ]
    if proc_thresholds.empty:
        return None, f"⚠ {process_name} の重要項目なし", None

    if process_name not in lag_matrix.index:
        return None, f"⚠ {process_name} のラグ行なし", None

    lag_row = lag_matrix.loc[process_name].dropna()
    lag_row = lag_row[lag_row > 0]  # 正のラグのみ
    if lag_row.empty:
        return None, f"⚠ {process_name} に正のラグなし", None

    results = []
    for _, thr in proc_thresholds.iterrows():
        y_col = find_matching_column(df, thr["ColumnID"], thr["ItemName"], thr["ProcessNo_ProcessName"])
        if y_col is None:
            continue

        # 学習データ(直近24h)
        df_window = df[(df["timestamp"] >= target_time - pd.Timedelta(hours=24)) &
                       (df["timestamp"] <= target_time)].copy()
        if df_window.empty:
            continue

        base_df = df_window[["timestamp", y_col]].rename(columns={y_col: "y"})

        # === 粗化処理 ===
        dt = df_window["timestamp"].diff().median()
        if pd.notna(dt):
            base_min = max(int(dt.total_seconds() // 60), 1)
        else:
            base_min = 1
        new_interval = max(base_min * int(downsample_factor), 1)

        df_down = (base_df.set_index("timestamp")
                           .resample(f"{new_interval}T").mean()
                           .dropna()
                           .reset_index())

        merged_df = df_down.copy()
        for up_proc, lag_min in lag_row.items():
            up_cols = [c for c in df.columns if isinstance(c, str) and up_proc in c]
            for x_col in up_cols:
                shifted = df_window.loc[:, ["timestamp", x_col]].copy()
                shifted["timestamp"] = shifted["timestamp"] + pd.Timedelta(minutes=lag_min)
                shifted = shifted.rename(columns={x_col: f"{x_col}_lag{lag_min}"})
                merged_df = pd.merge_asof(
                    merged_df.sort_values("timestamp"),
                    shifted.sort_values("timestamp"),
                    on="timestamp",
                    direction="nearest"
                )

        X_all = merged_df.drop(columns=["timestamp", "y"], errors="ignore").values
        Y_all = merged_df["y"].values
        if X_all.shape[1] == 0 or len(Y_all) < 5:
            continue

        # 重回帰
        model = LinearRegression().fit(X_all, Y_all)

        # 未来予測
        X_pred = []
        for up_proc, lag_min in lag_row.items():
            up_cols = [c for c in df.columns if isinstance(c, str) and up_proc in c]
            for x_col in up_cols:
                ref_time = forecast_time - pd.Timedelta(minutes=lag_min)
                idx = (df["timestamp"] - ref_time).abs().idxmin()
                X_pred.append(df.loc[idx, x_col])
        if not X_pred:
            continue

        pred_val = model.predict([X_pred])[0]

        # 閾値リスク判定
        ll, l, h, hh = thr.get("LL"), thr.get("L"), thr.get("H"), thr.get("HH")
        risk = "OK"
        if pd.notna(ll) and pred_val <= ll:
            risk = "LOW-LOW"
        elif pd.notna(l) and pred_val <= l:
            risk = "LOW"
        elif pd.notna(hh) and pred_val >= hh:
            risk = "HIGH-HIGH"
        elif pd.notna(h) and pred_val >= h:
            risk = "HIGH"

        results.append({
            "ItemName": thr["ItemName"],
            "予測値": round(float(pred_val), 3),
            "予測時刻": str(forecast_time),
            "予測リスク": risk,
            "粗化間隔(分)": new_interval
        })

    result_df = pd.DataFrame(results)
    result_json = json.dumps(results, ensure_ascii=False, indent=2)
    return result_df, f"✅ {process_name} の予兆解析完了", result_json

# --- Gradio UI ---
with gr.Blocks(css=".gradio-container {overflow: auto !important;}") as demo:
    gr.Markdown("## 統合トレンド解析アプリ (MCP対応)")

    with gr.Row():
        csv_input = gr.File(label="CSVファイル", file_types=[".csv"], type="filepath")
        excel_input = gr.File(label="Excel閾値ファイル", file_types=[".xlsx"], type="filepath")
        lag_input = gr.File(label="ラグファイル", file_types=[".xlsx"], type="filepath")
        load_btn = gr.Button("ファイル読み込み")
        load_status = gr.Textbox(label="読み込み結果")

    with gr.Tabs():
        with gr.Tab("閾値診断"):
            process_name1 = gr.Textbox(label="プロセス名")
            datetime_str1 = gr.Textbox(label="診断基準日時")
            window_minutes1 = gr.Number(label="さかのぼる時間幅(分)", value=60)
            run_btn1 = gr.Button("診断実行")
            
            result_df_all = gr.Dataframe(label="全項目の状態集計")
            result_df_imp = gr.Dataframe(label="重要項目全体の状態集計")
            result_df_imp_items = gr.Dataframe(label="重要項目ごとの状態集計")
            summary_output = gr.Textbox(label="サマリー")
            json_output = gr.Json(label="JSON集計結果")
            
            run_btn1.click(
                diagnose_process_range,
                inputs=[process_name1, datetime_str1, window_minutes1],
                outputs=[result_df_all, result_df_imp, result_df_imp_items, summary_output, json_output]
            )

        with gr.Tab("傾向検出"):
            process_name2 = gr.Textbox(label="プロセス名")
            datetime_str2 = gr.Textbox(label="基準日時")
            window_minutes2 = gr.Number(label="過去の時間幅(分)", value=60)
            forecast_minutes2 = gr.Number(label="未来予測時間幅(分)", value=60)
            downsample_factor2 = gr.Slider(label="粗化倍率", minimum=1, maximum=10, step=1, value=3)
            
            run_btn2 = gr.Button("傾向検出実行")
            result_df2 = gr.Dataframe(label="傾向+予測結果")
            summary_output2 = gr.Textbox(label="サマリー")
            json_output2 = gr.Json(label="JSON結果")
            
            run_btn2.click(
                detect_trends_with_forecast,
                inputs=[process_name2, datetime_str2, window_minutes2, forecast_minutes2, downsample_factor2],
                outputs=[result_df2, summary_output2, json_output2]
            )

        with gr.Tab("予兆解析"):
            process_name3 = gr.Textbox(label="プロセス名")
            datetime_str3 = gr.Textbox(label="基準日時")
            forecast_minutes3 = gr.Number(label="未来予測時間幅(分)", value=60)
            downsample_factor3 = gr.Slider(1, 10, value=3, step=1, label="粗化倍率(サンプリング間引き)")
            
            run_btn3 = gr.Button("予兆解析実行")
            result_df3 = gr.Dataframe(label="予兆解析結果")
            summary_output3 = gr.Textbox(label="サマリー")
            json_output3 = gr.Json(label="JSON結果")
            
            run_btn3.click(
                forecast_process_with_lag,
                inputs=[process_name3, datetime_str3, forecast_minutes3, downsample_factor3],
                outputs=[result_df3, summary_output3, json_output3]
            )

    load_btn.click(load_files, inputs=[csv_input, excel_input, lag_input], outputs=[load_status])

if __name__ == "__main__":
    use_mcp = os.getenv("USE_MCP", "0") == "1"
    if use_mcp:
        demo.launch(mcp_server=True)
    else:
        demo.launch(server_name="0.0.0.0", share=False)