Ken-INOUE commited on
Commit
7b56cc8
·
1 Parent(s): 8ad4b11

Implement initial project structure and setup

Browse files
Files changed (2) hide show
  1. app.py +221 -0
  2. requirements.txt +13 -0
app.py ADDED
@@ -0,0 +1,221 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 変動解析アプリ(単独 Gradio 版・粗化なし)
2
+ import gradio as gr
3
+ import pandas as pd
4
+ import numpy as np
5
+ import json
6
+ import os
7
+ import time
8
+ from typing import Dict, Optional
9
+
10
+ # ---------- ユーティリティ ----------
11
+ def _np_to_py(x):
12
+ if hasattr(x, "item"):
13
+ try:
14
+ return x.item()
15
+ except Exception:
16
+ pass
17
+ if isinstance(x, (np.integer,)):
18
+ return int(x)
19
+ if isinstance(x, (np.floating,)):
20
+ return float(x)
21
+ return x
22
+
23
+ def robust_mad(x: pd.Series) -> float:
24
+ """差分系列のロバストなスケール推定量(1.4826×MAD)。"""
25
+ if len(x) == 0:
26
+ return np.nan
27
+ med = np.median(x)
28
+ mad = np.median(np.abs(x - med))
29
+ return 1.4826 * mad
30
+
31
+ def load_thresholds(excel_path: Optional[str]) -> Dict[tuple, bool]:
32
+ """閾値Excelから Important フラグを辞書に。"""
33
+ if not excel_path:
34
+ return {}
35
+ try:
36
+ thresholds_df = pd.read_excel(excel_path)
37
+ if "Important" in thresholds_df.columns:
38
+ thresholds_df["Important"] = (
39
+ thresholds_df["Important"].astype(str).str.upper().map({"TRUE": True, "FALSE": False})
40
+ )
41
+ else:
42
+ thresholds_df["Important"] = False
43
+ need = {"ColumnID", "ItemName", "ProcessNo_ProcessName", "Important"}
44
+ if not need.issubset(set(thresholds_df.columns)):
45
+ return {}
46
+ return {
47
+ (row["ColumnID"], row["ItemName"], row["ProcessNo_ProcessName"]): bool(row["Important"])
48
+ for _, row in thresholds_df.iterrows()
49
+ }
50
+ except Exception:
51
+ return {}
52
+
53
+ # ---------- 変動解析ロジック ----------
54
+ def analyze_variability_core(
55
+ df: pd.DataFrame,
56
+ important_lookup: Dict[tuple, bool],
57
+ datetime_str: str,
58
+ window_minutes: int,
59
+ cv_threshold_pct: float = 10.0,
60
+ jump_pct_threshold: float = 10.0,
61
+ mad_sigma: float = 3.0,
62
+ ):
63
+ target_time = pd.to_datetime(datetime_str)
64
+ start_time = target_time - pd.Timedelta(minutes=window_minutes)
65
+ end_time = target_time
66
+
67
+ dfw = df[(df["timestamp"] >= start_time) & (df["timestamp"] <= end_time)].copy()
68
+ if dfw.empty:
69
+ return None, f"⚠ 指定時間幅({start_time}~{end_time})にデータが見つかりません。", None, None
70
+
71
+ data_cols = [
72
+ c for c in dfw.columns
73
+ if c != "timestamp" and pd.api.types.is_numeric_dtype(dfw[c])
74
+ ]
75
+
76
+ results = []
77
+ unstable_count = 0
78
+
79
+ for col in data_cols:
80
+ s = dfw[col].dropna()
81
+ n = len(s)
82
+ if n < 3:
83
+ continue
84
+
85
+ mean = float(np.mean(s))
86
+ std = float(np.std(s, ddof=1)) if n >= 2 else 0.0
87
+ cv_pct = np.nan if mean == 0 else abs(std / mean) * 100.0
88
+
89
+ diffs = s.diff().dropna()
90
+ mad_scale = robust_mad(diffs)
91
+ ref = max(1e-9, abs(float(np.median(s))))
92
+ rel_jump = diffs.abs() / ref * 100.0
93
+
94
+ abs_thr = (mad_sigma * mad_scale) if (not np.isnan(mad_scale) and mad_scale > 0) else np.inf
95
+ abs_cond = diffs.abs() > abs_thr
96
+ pct_cond = rel_jump >= jump_pct_threshold
97
+ spike_mask = abs_cond | pct_cond
98
+
99
+ spike_count = int(spike_mask.sum())
100
+ spike_up_count = int((diffs[spike_mask] > 0).sum())
101
+ spike_down_count = spike_count - spike_up_count
102
+ max_step = float(diffs.abs().max()) if len(diffs) else np.nan
103
+ last_val = float(s.iloc[-1])
104
+ first_val = float(s.iloc[0])
105
+
106
+ important = False
107
+ if isinstance(col, tuple) and len(col) == 3:
108
+ important = important_lookup.get(col, False)
109
+
110
+ unstable = (not np.isnan(cv_pct) and cv_pct >= cv_threshold_pct) or (spike_count > 0)
111
+ if unstable:
112
+ unstable_count += 1
113
+
114
+ colid, itemname, proc = (col if isinstance(col, tuple) else ("", str(col), ""))
115
+
116
+ results.append({
117
+ "ColumnID": colid,
118
+ "ItemName": itemname,
119
+ "Process": proc,
120
+ "サンプル数": n,
121
+ "平均": _np_to_py(round(mean, 6)),
122
+ "標準偏差": _np_to_py(round(std, 6)),
123
+ "CV(%)": None if np.isnan(cv_pct) else float(round(cv_pct, 3)),
124
+ "スパイク数": spike_count,
125
+ "スパイク上昇数": spike_up_count,
126
+ "スパイク下降数": spike_down_count,
127
+ "最大|ステップ|": None if np.isnan(max_step) else float(round(max_step, 6)),
128
+ "最初の値": _np_to_py(round(first_val, 6)),
129
+ "最後の値": _np_to_py(round(last_val, 6)),
130
+ "重要項目": bool(important),
131
+ "不安定判定": bool(unstable),
132
+ })
133
+
134
+ result_df = pd.DataFrame(results)
135
+ if not result_df.empty:
136
+ result_df = result_df.sort_values(
137
+ by=["不安定判定", "CV(%)", "スパイク数"],
138
+ ascending=[False, False, False],
139
+ na_position="last"
140
+ ).reset_index(drop=True)
141
+
142
+ total_cols = len(results)
143
+ summary = (
144
+ f"✅ 変動解析完了({start_time} ~ {end_time})\n"
145
+ f"- 対象項目数: {total_cols}\n"
146
+ f"- 不安定と判定: {unstable_count} 項目(CV≥{cv_threshold_pct:.1f}% または スパイクあり)\n"
147
+ f"- スパイク条件: |diff| > {mad_sigma:.1f}×MAD または 1ステップ相対変化 ≥ {jump_pct_threshold:.1f}%"
148
+ )
149
+
150
+ records = result_df.to_dict(orient="records") if result_df is not None else []
151
+ records = [{k: _np_to_py(v) for k, v in row.items()} for row in records]
152
+ json_obj = records
153
+ json_text = json.dumps(json_obj, ensure_ascii=False, indent=2)
154
+
155
+ return result_df, summary, json_obj, json_text
156
+
157
+ # ---------- Gradio ラッパ ----------
158
+ def run_variability(csv_file, excel_file, datetime_str, window_minutes, cv_threshold_pct, jump_pct_threshold, mad_sigma):
159
+ try:
160
+ df = pd.read_csv(csv_file.name, header=[0, 1, 2])
161
+ timestamp_col = pd.to_datetime(df.iloc[:, 0], errors="coerce")
162
+ df = df.drop(df.columns[0], axis=1)
163
+ df.insert(0, "timestamp", timestamp_col)
164
+ except Exception as e:
165
+ return None, f"❌ CSV 読み込み失敗: {e}", None, None
166
+
167
+ important_lookup = {}
168
+ if excel_file is not None:
169
+ important_lookup = load_thresholds(excel_file.name)
170
+
171
+ result_df, summary, json_obj, json_text = analyze_variability_core(
172
+ df=df,
173
+ important_lookup=important_lookup,
174
+ datetime_str=datetime_str,
175
+ window_minutes=int(window_minutes),
176
+ cv_threshold_pct=float(cv_threshold_pct),
177
+ jump_pct_threshold=float(jump_pct_threshold),
178
+ mad_sigma=float(mad_sigma),
179
+ )
180
+
181
+ if result_df is None:
182
+ return None, summary, None, None
183
+
184
+ fname = f"variability_result_{int(time.time())}.json"
185
+ with open(fname, "w", encoding="utf-8") as f:
186
+ f.write(json_text)
187
+
188
+ return result_df, summary, json_obj, fname
189
+
190
+ # ---------- Gradio UI ----------
191
+ with gr.Blocks(css=".gradio-container {overflow: auto !important;}") as demo:
192
+ gr.Markdown("## 変動解析アプリ(単独 / Hugging Face 対応)")
193
+
194
+ with gr.Row():
195
+ csv_input = gr.File(label="CSVファイル(3行ヘッダー)", file_types=[".csv"], type="filepath")
196
+ excel_input = gr.File(label="Excel(任意: Important参照)", file_types=[".xlsx"], type="filepath")
197
+
198
+ with gr.Row():
199
+ datetime_str = gr.Textbox(label="基準日時", value="2025/8/1 1:05")
200
+ window_minutes = gr.Number(label="さかのぼる時間幅(分)", value=60)
201
+
202
+ with gr.Row():
203
+ cv_threshold_pct = gr.Number(label="CV(%) しきい値", value=10.0)
204
+ jump_pct_threshold = gr.Number(label="1ステップ相対ジャンプ率しきい値(%)", value=10.0)
205
+ mad_sigma = gr.Number(label="MAD倍率(スパイク閾値)", value=3.0)
206
+
207
+ run_btn = gr.Button("変動解析を実行")
208
+
209
+ result_table = gr.Dataframe(label="変動解析結果")
210
+ summary_out = gr.Textbox(label="サマリー", lines=6)
211
+ json_out = gr.Json(label="JSONプレビュー")
212
+ json_file = gr.File(label="JSONダウンロード", type="filepath")
213
+
214
+ run_btn.click(
215
+ run_variability,
216
+ inputs=[csv_input, excel_input, datetime_str, window_minutes, cv_threshold_pct, jump_pct_threshold, mad_sigma],
217
+ outputs=[result_table, summary_out, json_out, json_file]
218
+ )
219
+
220
+ if __name__ == "__main__":
221
+ demo.launch(server_name="0.0.0.0", share=False)
requirements.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Webアプリ/UI
2
+ gradio>=4.44.0
3
+
4
+ # データ処理
5
+ pandas>=2.2.0
6
+ numpy>=1.26.0
7
+ openpyxl>=3.1.2
8
+
9
+ # 機械学習(回帰や予兆解析で利用)
10
+ scikit-learn>=1.5.0
11
+
12
+ # 可視化(将来グラフ表示を追加する可能性を考慮)
13
+ matplotlib>=3.8.0