Spaces:

AI-Agent-Exercise-2025
/

TrendAnalyticsTool4

Sleeping

App Files Files Community

TrendAnalyticsTool4 / app.py

Ken-INOUE

Implement initial project structure and setup

7b56cc8 6 months ago

raw

history blame contribute delete

8.37 kB

	# 変動解析アプリ（単独 Gradio 版・粗化なし）
	import gradio as gr
	import pandas as pd
	import numpy as np
	import json
	import os
	import time
	from typing import Dict, Optional

	# ---------- ユーティリティ ----------
	def _np_to_py(x):
	if hasattr(x, "item"):
	try:
	return x.item()
	except Exception:
	pass
	if isinstance(x, (np.integer,)):
	return int(x)
	if isinstance(x, (np.floating,)):
	return float(x)
	return x

	def robust_mad(x: pd.Series) -> float:
	"""差分系列のロバストなスケール推定量（1.4826×MAD）。"""
	if len(x) == 0:
	return np.nan
	med = np.median(x)
	mad = np.median(np.abs(x - med))
	return 1.4826 * mad

	def load_thresholds(excel_path: Optional[str]) -> Dict[tuple, bool]:
	"""閾値Excelから Important フラグを辞書に。"""
	if not excel_path:
	return {}
	try:
	thresholds_df = pd.read_excel(excel_path)
	if "Important" in thresholds_df.columns:
	thresholds_df["Important"] = (
	thresholds_df["Important"].astype(str).str.upper().map({"TRUE": True, "FALSE": False})
	)
	else:
	thresholds_df["Important"] = False
	need = {"ColumnID", "ItemName", "ProcessNo_ProcessName", "Important"}
	if not need.issubset(set(thresholds_df.columns)):
	return {}
	return {
	(row["ColumnID"], row["ItemName"], row["ProcessNo_ProcessName"]): bool(row["Important"])
	for _, row in thresholds_df.iterrows()
	}
	except Exception:
	return {}

	# ---------- 変動解析ロジック ----------
	def analyze_variability_core(
	df: pd.DataFrame,
	important_lookup: Dict[tuple, bool],
	datetime_str: str,
	window_minutes: int,
	cv_threshold_pct: float = 10.0,
	jump_pct_threshold: float = 10.0,
	mad_sigma: float = 3.0,
	):
	target_time = pd.to_datetime(datetime_str)
	start_time = target_time - pd.Timedelta(minutes=window_minutes)
	end_time = target_time

	dfw = df[(df["timestamp"] >= start_time) & (df["timestamp"] <= end_time)].copy()
	if dfw.empty:
	return None, f"⚠ 指定時間幅（{start_time}～{end_time}）にデータが見つかりません。", None, None

	data_cols = [
	c for c in dfw.columns
	if c != "timestamp" and pd.api.types.is_numeric_dtype(dfw[c])
	]

	results = []
	unstable_count = 0

	for col in data_cols:
	s = dfw[col].dropna()
	n = len(s)
	if n < 3:
	continue

	mean = float(np.mean(s))
	std = float(np.std(s, ddof=1)) if n >= 2 else 0.0
	cv_pct = np.nan if mean == 0 else abs(std / mean) * 100.0

	diffs = s.diff().dropna()
	mad_scale = robust_mad(diffs)
	ref = max(1e-9, abs(float(np.median(s))))
	rel_jump = diffs.abs() / ref * 100.0

	abs_thr = (mad_sigma * mad_scale) if (not np.isnan(mad_scale) and mad_scale > 0) else np.inf
	abs_cond = diffs.abs() > abs_thr
	pct_cond = rel_jump >= jump_pct_threshold
	spike_mask = abs_cond \| pct_cond

	spike_count = int(spike_mask.sum())
	spike_up_count = int((diffs[spike_mask] > 0).sum())
	spike_down_count = spike_count - spike_up_count
	max_step = float(diffs.abs().max()) if len(diffs) else np.nan
	last_val = float(s.iloc[-1])
	first_val = float(s.iloc[0])

	important = False
	if isinstance(col, tuple) and len(col) == 3:
	important = important_lookup.get(col, False)

	unstable = (not np.isnan(cv_pct) and cv_pct >= cv_threshold_pct) or (spike_count > 0)
	if unstable:
	unstable_count += 1

	colid, itemname, proc = (col if isinstance(col, tuple) else ("", str(col), ""))

	results.append({
	"ColumnID": colid,
	"ItemName": itemname,
	"Process": proc,
	"サンプル数": n,
	"平均": _np_to_py(round(mean, 6)),
	"標準偏差": _np_to_py(round(std, 6)),
	"CV(%)": None if np.isnan(cv_pct) else float(round(cv_pct, 3)),
	"スパイク数": spike_count,
	"スパイク上昇数": spike_up_count,
	"スパイク下降数": spike_down_count,
	"最大\|ステップ\|": None if np.isnan(max_step) else float(round(max_step, 6)),
	"最初の値": _np_to_py(round(first_val, 6)),
	"最後の値": _np_to_py(round(last_val, 6)),
	"重要項目": bool(important),
	"不安定判定": bool(unstable),
	})

	result_df = pd.DataFrame(results)
	if not result_df.empty:
	result_df = result_df.sort_values(
	by=["不安定判定", "CV(%)", "スパイク数"],
	ascending=[False, False, False],
	na_position="last"
	).reset_index(drop=True)

	total_cols = len(results)
	summary = (
	f"✅ 変動解析完了（{start_time} ～ {end_time}）\n"
	f"- 対象項目数: {total_cols}\n"
	f"- 不安定と判定: {unstable_count} 項目（CV≥{cv_threshold_pct:.1f}% またはスパイクあり）\n"
	f"- スパイク条件: \|diff\| > {mad_sigma:.1f}×MAD または 1ステップ相対変化 ≥ {jump_pct_threshold:.1f}%"
	)

	records = result_df.to_dict(orient="records") if result_df is not None else []
	records = [{k: _np_to_py(v) for k, v in row.items()} for row in records]
	json_obj = records
	json_text = json.dumps(json_obj, ensure_ascii=False, indent=2)

	return result_df, summary, json_obj, json_text

	# ---------- Gradio ラッパ ----------
	def run_variability(csv_file, excel_file, datetime_str, window_minutes, cv_threshold_pct, jump_pct_threshold, mad_sigma):
	try:
	df = pd.read_csv(csv_file.name, header=[0, 1, 2])
	timestamp_col = pd.to_datetime(df.iloc[:, 0], errors="coerce")
	df = df.drop(df.columns[0], axis=1)
	df.insert(0, "timestamp", timestamp_col)
	except Exception as e:
	return None, f"❌ CSV 読み込み失敗: {e}", None, None

	important_lookup = {}
	if excel_file is not None:
	important_lookup = load_thresholds(excel_file.name)

	result_df, summary, json_obj, json_text = analyze_variability_core(
	df=df,
	important_lookup=important_lookup,
	datetime_str=datetime_str,
	window_minutes=int(window_minutes),
	cv_threshold_pct=float(cv_threshold_pct),
	jump_pct_threshold=float(jump_pct_threshold),
	mad_sigma=float(mad_sigma),
	)

	if result_df is None:
	return None, summary, None, None

	fname = f"variability_result_{int(time.time())}.json"
	with open(fname, "w", encoding="utf-8") as f:
	f.write(json_text)

	return result_df, summary, json_obj, fname

	# ---------- Gradio UI ----------
	with gr.Blocks(css=".gradio-container {overflow: auto !important;}") as demo:
	gr.Markdown("## 変動解析アプリ（単独 / Hugging Face 対応）")

	with gr.Row():
	csv_input = gr.File(label="CSVファイル（3行ヘッダー）", file_types=[".csv"], type="filepath")
	excel_input = gr.File(label="Excel（任意: Important参照）", file_types=[".xlsx"], type="filepath")

	with gr.Row():
	datetime_str = gr.Textbox(label="基準日時", value="2025/8/1 1:05")
	window_minutes = gr.Number(label="さかのぼる時間幅（分）", value=60)

	with gr.Row():
	cv_threshold_pct = gr.Number(label="CV(%) しきい値", value=10.0)
	jump_pct_threshold = gr.Number(label="1ステップ相対ジャンプ率しきい値(%)", value=10.0)
	mad_sigma = gr.Number(label="MAD倍率（スパイク閾値）", value=3.0)

	run_btn = gr.Button("変動解析を実行")

	result_table = gr.Dataframe(label="変動解析結果")
	summary_out = gr.Textbox(label="サマリー", lines=6)
	json_out = gr.Json(label="JSONプレビュー")
	json_file = gr.File(label="JSONダウンロード", type="filepath")

	run_btn.click(
	run_variability,
	inputs=[csv_input, excel_input, datetime_str, window_minutes, cv_threshold_pct, jump_pct_threshold, mad_sigma],
	outputs=[result_table, summary_out, json_out, json_file]
	)

	if __name__ == "__main__":
	demo.launch(server_name="0.0.0.0", share=False)