Spaces:

AI-Agent-Exercise-2025
/

TrendAnalyticsTool4

Sleeping

App Files Files Community

Ken-INOUE commited on Sep 18, 2025

Commit

7b56cc8

1 Parent(s): 8ad4b11

Implement initial project structure and setup

Browse files

Files changed (2) hide show

app.py +221 -0
requirements.txt +13 -0

app.py ADDED Viewed

	@@ -0,0 +1,221 @@

+# 変動解析アプリ（単独 Gradio 版・粗化なし）
+import gradio as gr
+import pandas as pd
+import numpy as np
+import json
+import os
+import time
+from typing import Dict, Optional
+# ---------- ユーティリティ ----------
+def _np_to_py(x):
+    if hasattr(x, "item"):
+        try:
+            return x.item()
+        except Exception:
+            pass
+    if isinstance(x, (np.integer,)):
+        return int(x)
+    if isinstance(x, (np.floating,)):
+        return float(x)
+    return x
+def robust_mad(x: pd.Series) -> float:
+    """差分系列のロバストなスケール推定量（1.4826×MAD）。"""
+    if len(x) == 0:
+        return np.nan
+    med = np.median(x)
+    mad = np.median(np.abs(x - med))
+    return 1.4826 * mad
+def load_thresholds(excel_path: Optional[str]) -> Dict[tuple, bool]:
+    """閾値Excelから Important フラグを辞書に。"""
+    if not excel_path:
+        return {}
+    try:
+        thresholds_df = pd.read_excel(excel_path)
+        if "Important" in thresholds_df.columns:
+            thresholds_df["Important"] = (
+                thresholds_df["Important"].astype(str).str.upper().map({"TRUE": True, "FALSE": False})
+            )
+        else:
+            thresholds_df["Important"] = False
+        need = {"ColumnID", "ItemName", "ProcessNo_ProcessName", "Important"}
+        if not need.issubset(set(thresholds_df.columns)):
+            return {}
+        return {
+            (row["ColumnID"], row["ItemName"], row["ProcessNo_ProcessName"]): bool(row["Important"])
+            for _, row in thresholds_df.iterrows()
+        }
+    except Exception:
+        return {}
+# ---------- 変動解析ロジック ----------
+def analyze_variability_core(
+    df: pd.DataFrame,
+    important_lookup: Dict[tuple, bool],
+    datetime_str: str,
+    window_minutes: int,
+    cv_threshold_pct: float = 10.0,
+    jump_pct_threshold: float = 10.0,
+    mad_sigma: float = 3.0,
+):
+    target_time = pd.to_datetime(datetime_str)
+    start_time = target_time - pd.Timedelta(minutes=window_minutes)
+    end_time = target_time
+    dfw = df[(df["timestamp"] >= start_time) & (df["timestamp"] <= end_time)].copy()
+    if dfw.empty:
+        return None, f"⚠ 指定時間幅（{start_time}～{end_time}）にデータが見つかりません。", None, None
+    data_cols = [
+        c for c in dfw.columns
+        if c != "timestamp" and pd.api.types.is_numeric_dtype(dfw[c])
+    ]
+    results = []
+    unstable_count = 0
+    for col in data_cols:
+        s = dfw[col].dropna()
+        n = len(s)
+        if n < 3:
+            continue
+        mean = float(np.mean(s))
+        std = float(np.std(s, ddof=1)) if n >= 2 else 0.0
+        cv_pct = np.nan if mean == 0 else abs(std / mean) * 100.0
+        diffs = s.diff().dropna()
+        mad_scale = robust_mad(diffs)
+        ref = max(1e-9, abs(float(np.median(s))))
+        rel_jump = diffs.abs() / ref * 100.0
+        abs_thr = (mad_sigma * mad_scale) if (not np.isnan(mad_scale) and mad_scale > 0) else np.inf
+        abs_cond = diffs.abs() > abs_thr
+        pct_cond = rel_jump >= jump_pct_threshold
+        spike_mask = abs_cond | pct_cond
+        spike_count = int(spike_mask.sum())
+        spike_up_count = int((diffs[spike_mask] > 0).sum())
+        spike_down_count = spike_count - spike_up_count
+        max_step = float(diffs.abs().max()) if len(diffs) else np.nan
+        last_val = float(s.iloc[-1])
+        first_val = float(s.iloc[0])
+        important = False
+        if isinstance(col, tuple) and len(col) == 3:
+            important = important_lookup.get(col, False)
+        unstable = (not np.isnan(cv_pct) and cv_pct >= cv_threshold_pct) or (spike_count > 0)
+        if unstable:
+            unstable_count += 1
+        colid, itemname, proc = (col if isinstance(col, tuple) else ("", str(col), ""))
+        results.append({
+            "ColumnID": colid,
+            "ItemName": itemname,
+            "Process": proc,
+            "サンプル数": n,
+            "平均": _np_to_py(round(mean, 6)),
+            "標準偏差": _np_to_py(round(std, 6)),
+            "CV(%)": None if np.isnan(cv_pct) else float(round(cv_pct, 3)),
+            "スパイク数": spike_count,
+            "スパイク上昇数": spike_up_count,
+            "スパイク下降数": spike_down_count,
+            "最大|ステップ|": None if np.isnan(max_step) else float(round(max_step, 6)),
+            "最初の値": _np_to_py(round(first_val, 6)),
+            "最後の値": _np_to_py(round(last_val, 6)),
+            "重要項目": bool(important),
+            "不安定判定": bool(unstable),
+        })
+    result_df = pd.DataFrame(results)
+    if not result_df.empty:
+        result_df = result_df.sort_values(
+            by=["不安定判定", "CV(%)", "スパイク数"],
+            ascending=[False, False, False],
+            na_position="last"
+        ).reset_index(drop=True)
+    total_cols = len(results)
+    summary = (
+        f"✅ 変動解析完了（{start_time} ～ {end_time}）\n"
+        f"- 対象項目数: {total_cols}\n"
+        f"- 不安定と判定: {unstable_count} 項目（CV≥{cv_threshold_pct:.1f}% または スパイクあり）\n"
+        f"- スパイク条件: |diff| > {mad_sigma:.1f}×MAD  または  1ステップ相対変化 ≥ {jump_pct_threshold:.1f}%"
+    )
+    records = result_df.to_dict(orient="records") if result_df is not None else []
+    records = [{k: _np_to_py(v) for k, v in row.items()} for row in records]
+    json_obj = records
+    json_text = json.dumps(json_obj, ensure_ascii=False, indent=2)
+    return result_df, summary, json_obj, json_text
+# ---------- Gradio ラッパ ----------
+def run_variability(csv_file, excel_file, datetime_str, window_minutes, cv_threshold_pct, jump_pct_threshold, mad_sigma):
+    try:
+        df = pd.read_csv(csv_file.name, header=[0, 1, 2])
+        timestamp_col = pd.to_datetime(df.iloc[:, 0], errors="coerce")
+        df = df.drop(df.columns[0], axis=1)
+        df.insert(0, "timestamp", timestamp_col)
+    except Exception as e:
+        return None, f"❌ CSV 読み込み失敗: {e}", None, None
+    important_lookup = {}
+    if excel_file is not None:
+        important_lookup = load_thresholds(excel_file.name)
+    result_df, summary, json_obj, json_text = analyze_variability_core(
+        df=df,
+        important_lookup=important_lookup,
+        datetime_str=datetime_str,
+        window_minutes=int(window_minutes),
+        cv_threshold_pct=float(cv_threshold_pct),
+        jump_pct_threshold=float(jump_pct_threshold),
+        mad_sigma=float(mad_sigma),
+    )
+    if result_df is None:
+        return None, summary, None, None
+    fname = f"variability_result_{int(time.time())}.json"
+    with open(fname, "w", encoding="utf-8") as f:
+        f.write(json_text)
+    return result_df, summary, json_obj, fname
+# ---------- Gradio UI ----------
+with gr.Blocks(css=".gradio-container {overflow: auto !important;}") as demo:
+    gr.Markdown("## 変動解析アプリ（単独 / Hugging Face 対応）")
+    with gr.Row():
+        csv_input = gr.File(label="CSVファイル（3行ヘッダー）", file_types=[".csv"], type="filepath")
+        excel_input = gr.File(label="Excel（任意: Important参照）", file_types=[".xlsx"], type="filepath")
+    with gr.Row():
+        datetime_str = gr.Textbox(label="基準日時", value="2025/8/1 1:05")
+        window_minutes = gr.Number(label="さかのぼる時間幅（分）", value=60)
+    with gr.Row():
+        cv_threshold_pct = gr.Number(label="CV(%) しきい値", value=10.0)
+        jump_pct_threshold = gr.Number(label="1ステップ相対ジャンプ率しきい値(%)", value=10.0)
+        mad_sigma = gr.Number(label="MAD倍率（スパイク閾値）", value=3.0)
+    run_btn = gr.Button("変動解析を実行")
+    result_table = gr.Dataframe(label="変動解析結果")
+    summary_out = gr.Textbox(label="サマリー", lines=6)
+    json_out = gr.Json(label="JSONプレビュー")
+    json_file = gr.File(label="JSONダウンロード", type="filepath")
+    run_btn.click(
+        run_variability,
+        inputs=[csv_input, excel_input, datetime_str, window_minutes, cv_threshold_pct, jump_pct_threshold, mad_sigma],
+        outputs=[result_table, summary_out, json_out, json_file]
+    )
+if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", share=False)

requirements.txt ADDED Viewed

	@@ -0,0 +1,13 @@

+# Webアプリ/UI
+gradio>=4.44.0
+# データ処理
+pandas>=2.2.0
+numpy>=1.26.0
+openpyxl>=3.1.2
+# 機械学習（回帰や予兆解析で利用）
+scikit-learn>=1.5.0
+# 可視化（将来グラフ表示を追加する可能性を考慮）
+matplotlib>=3.8.0