Spaces:
Sleeping
Sleeping
Update src/streamlit_app.py
Browse files- src/streamlit_app.py +124 -66
src/streamlit_app.py
CHANGED
|
@@ -4,36 +4,77 @@ import numpy as np
|
|
| 4 |
import matplotlib.pyplot as plt
|
| 5 |
|
| 6 |
# =========================
|
| 7 |
-
# Page
|
| 8 |
# =========================
|
| 9 |
st.set_page_config(
|
| 10 |
-
page_title="COPOD Demo",
|
|
|
|
| 11 |
layout="wide"
|
| 12 |
)
|
| 13 |
|
| 14 |
# =========================
|
| 15 |
-
#
|
| 16 |
# =========================
|
| 17 |
-
st.title("🔍 COPOD – Copula-Based Outlier Detection")
|
| 18 |
st.markdown("""
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
-
|
| 22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
# =========================
|
| 25 |
-
# Sidebar
|
| 26 |
# =========================
|
| 27 |
-
st.sidebar.header("⚙️
|
| 28 |
|
| 29 |
uploaded_file = st.sidebar.file_uploader(
|
| 30 |
-
"Upload CSV file",
|
| 31 |
type=["csv"]
|
| 32 |
)
|
| 33 |
|
| 34 |
run_copod = st.sidebar.button("▶️ Run COPOD")
|
| 35 |
show_outlier_graph = st.sidebar.button("📊 Show Outlier Graph")
|
| 36 |
-
|
| 37 |
|
| 38 |
# =========================
|
| 39 |
# Session State
|
|
@@ -45,99 +86,116 @@ if "scores" not in st.session_state:
|
|
| 45 |
st.session_state.scores = None
|
| 46 |
|
| 47 |
# =========================
|
| 48 |
-
#
|
| 49 |
# =========================
|
|
|
|
|
|
|
|
|
|
| 50 |
if uploaded_file is not None:
|
| 51 |
df = pd.read_csv(uploaded_file)
|
| 52 |
st.session_state.df = df
|
| 53 |
|
| 54 |
-
st.
|
| 55 |
st.dataframe(df.head())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
|
| 57 |
# =========================
|
| 58 |
-
# Run COPOD
|
| 59 |
# =========================
|
|
|
|
|
|
|
|
|
|
| 60 |
if run_copod:
|
| 61 |
if st.session_state.df is None:
|
| 62 |
-
st.warning("
|
| 63 |
else:
|
| 64 |
df = st.session_state.df
|
| 65 |
-
|
| 66 |
-
# Chỉ lấy cột numeric
|
| 67 |
X = df.select_dtypes(include=[np.number])
|
| 68 |
|
| 69 |
if X.shape[1] == 0:
|
| 70 |
-
st.error("
|
| 71 |
else:
|
| 72 |
-
#
|
| 73 |
-
|
| 74 |
-
df["outlier_score"] =
|
| 75 |
|
| 76 |
st.session_state.df = df
|
| 77 |
-
st.session_state.scores =
|
| 78 |
|
| 79 |
-
st.success("
|
| 80 |
|
| 81 |
-
st.
|
| 82 |
st.dataframe(
|
| 83 |
df.sort_values("outlier_score", ascending=False).head(10)
|
| 84 |
)
|
| 85 |
|
|
|
|
|
|
|
| 86 |
# =========================
|
| 87 |
-
#
|
| 88 |
# =========================
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
st.warning("⚠️ Run COPOD first.")
|
| 92 |
-
else:
|
| 93 |
-
st.subheader("📊 Outlier Score Distribution")
|
| 94 |
|
| 95 |
-
|
| 96 |
-
ax.hist(st.session_state.scores, bins=30)
|
| 97 |
-
ax.set_xlabel("Outlier Score")
|
| 98 |
-
ax.set_ylabel("Count")
|
| 99 |
|
| 100 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
""")
|
| 107 |
|
| 108 |
-
|
| 109 |
-
# Show Correlation Failure (Demo Logic)
|
| 110 |
-
# =========================
|
| 111 |
-
if show_correlation_failure:
|
| 112 |
-
if st.session_state.df is None:
|
| 113 |
-
st.warning("⚠️ Upload dữ liệu trước.")
|
| 114 |
-
else:
|
| 115 |
-
df = st.session_state.df
|
| 116 |
-
numeric_cols = df.select_dtypes(include=[np.number]).columns
|
| 117 |
|
| 118 |
-
|
| 119 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
else:
|
| 121 |
-
|
|
|
|
| 122 |
|
| 123 |
-
|
|
|
|
|
|
|
|
|
|
| 124 |
|
| 125 |
-
|
| 126 |
-
ax.scatter(df[x_col], df[y_col], alpha=0.6)
|
| 127 |
-
ax.set_xlabel(x_col)
|
| 128 |
-
ax.set_ylabel(y_col)
|
| 129 |
|
| 130 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
|
| 132 |
-
|
| 133 |
-
**Ý tưởng minh họa**:
|
| 134 |
-
- Điểm phá vỡ mối quan hệ (correlation outlier)
|
| 135 |
-
- COPOD có thể **không gán score cao**
|
| 136 |
-
- Vì từng chiều vẫn "bình thường"
|
| 137 |
-
""")
|
| 138 |
|
| 139 |
# =========================
|
| 140 |
# Footer
|
| 141 |
# =========================
|
| 142 |
st.markdown("---")
|
| 143 |
-
st.caption("COPOD Demo
|
|
|
|
| 4 |
import matplotlib.pyplot as plt
|
| 5 |
|
| 6 |
# =========================
|
| 7 |
+
# Page Config
|
| 8 |
# =========================
|
| 9 |
st.set_page_config(
|
| 10 |
+
page_title="COPOD Interactive Demo",
|
| 11 |
+
page_icon="🔍",
|
| 12 |
layout="wide"
|
| 13 |
)
|
| 14 |
|
| 15 |
# =========================
|
| 16 |
+
# Custom CSS
|
| 17 |
# =========================
|
|
|
|
| 18 |
st.markdown("""
|
| 19 |
+
<style>
|
| 20 |
+
.main {
|
| 21 |
+
background-color: #f9fafc;
|
| 22 |
+
}
|
| 23 |
+
h1, h2, h3 {
|
| 24 |
+
color: #2c3e50;
|
| 25 |
+
}
|
| 26 |
+
.step-box {
|
| 27 |
+
background-color: #ffffff;
|
| 28 |
+
padding: 1.5rem;
|
| 29 |
+
border-radius: 12px;
|
| 30 |
+
box-shadow: 0 4px 12px rgba(0,0,0,0.05);
|
| 31 |
+
margin-bottom: 1.5rem;
|
| 32 |
+
}
|
| 33 |
+
.info-box {
|
| 34 |
+
background-color: #eef4ff;
|
| 35 |
+
padding: 1rem;
|
| 36 |
+
border-left: 6px solid #4c6ef5;
|
| 37 |
+
border-radius: 6px;
|
| 38 |
+
}
|
| 39 |
+
.warning-box {
|
| 40 |
+
background-color: #fff4e6;
|
| 41 |
+
padding: 1rem;
|
| 42 |
+
border-left: 6px solid #f08c00;
|
| 43 |
+
border-radius: 6px;
|
| 44 |
+
}
|
| 45 |
+
</style>
|
| 46 |
+
""", unsafe_allow_html=True)
|
| 47 |
+
|
| 48 |
+
# =========================
|
| 49 |
+
# Title Section
|
| 50 |
+
# =========================
|
| 51 |
+
st.title("🔍 COPOD – Interactive Outlier Detection Demo")
|
| 52 |
+
|
| 53 |
+
st.markdown("""
|
| 54 |
+
<div class="info-box">
|
| 55 |
+
<b>COPOD</b> (Copula-Based Outlier Detection) là thuật toán:
|
| 56 |
+
<ul>
|
| 57 |
+
<li>Không cần hyperparameter</li>
|
| 58 |
+
<li>Nhanh</li>
|
| 59 |
+
<li>Có khả năng giải thích theo từng chiều</li>
|
| 60 |
+
</ul>
|
| 61 |
+
Ứng dụng này minh họa cả <b>điểm mạnh</b> và <b>điểm yếu</b> của COPOD.
|
| 62 |
+
</div>
|
| 63 |
+
""", unsafe_allow_html=True)
|
| 64 |
|
| 65 |
# =========================
|
| 66 |
+
# Sidebar
|
| 67 |
# =========================
|
| 68 |
+
st.sidebar.header("⚙️ Control Panel")
|
| 69 |
|
| 70 |
uploaded_file = st.sidebar.file_uploader(
|
| 71 |
+
"📂 Upload CSV file",
|
| 72 |
type=["csv"]
|
| 73 |
)
|
| 74 |
|
| 75 |
run_copod = st.sidebar.button("▶️ Run COPOD")
|
| 76 |
show_outlier_graph = st.sidebar.button("📊 Show Outlier Graph")
|
| 77 |
+
show_corr_failure = st.sidebar.button("⚠️ Show Correlation Failure")
|
| 78 |
|
| 79 |
# =========================
|
| 80 |
# Session State
|
|
|
|
| 86 |
st.session_state.scores = None
|
| 87 |
|
| 88 |
# =========================
|
| 89 |
+
# STEP 1 – Upload Data
|
| 90 |
# =========================
|
| 91 |
+
st.markdown("<div class='step-box'>", unsafe_allow_html=True)
|
| 92 |
+
st.subheader("🟢 Step 1: Upload Dataset")
|
| 93 |
+
|
| 94 |
if uploaded_file is not None:
|
| 95 |
df = pd.read_csv(uploaded_file)
|
| 96 |
st.session_state.df = df
|
| 97 |
|
| 98 |
+
st.success("Dataset loaded successfully!")
|
| 99 |
st.dataframe(df.head())
|
| 100 |
+
else:
|
| 101 |
+
st.markdown("""
|
| 102 |
+
<div class="warning-box">
|
| 103 |
+
Please upload a CSV file to begin.
|
| 104 |
+
</div>
|
| 105 |
+
""", unsafe_allow_html=True)
|
| 106 |
+
|
| 107 |
+
st.markdown("</div>", unsafe_allow_html=True)
|
| 108 |
|
| 109 |
# =========================
|
| 110 |
+
# STEP 2 – Run COPOD
|
| 111 |
# =========================
|
| 112 |
+
st.markdown("<div class='step-box'>", unsafe_allow_html=True)
|
| 113 |
+
st.subheader("🔵 Step 2: Run COPOD")
|
| 114 |
+
|
| 115 |
if run_copod:
|
| 116 |
if st.session_state.df is None:
|
| 117 |
+
st.warning("Upload data first.")
|
| 118 |
else:
|
| 119 |
df = st.session_state.df
|
|
|
|
|
|
|
| 120 |
X = df.select_dtypes(include=[np.number])
|
| 121 |
|
| 122 |
if X.shape[1] == 0:
|
| 123 |
+
st.error("Dataset has no numeric columns.")
|
| 124 |
else:
|
| 125 |
+
# PLACEHOLDER SCORES
|
| 126 |
+
scores = np.random.rand(len(X)) * 10
|
| 127 |
+
df["outlier_score"] = scores
|
| 128 |
|
| 129 |
st.session_state.df = df
|
| 130 |
+
st.session_state.scores = scores
|
| 131 |
|
| 132 |
+
st.success("COPOD completed (placeholder).")
|
| 133 |
|
| 134 |
+
st.markdown("**Top potential outliers:**")
|
| 135 |
st.dataframe(
|
| 136 |
df.sort_values("outlier_score", ascending=False).head(10)
|
| 137 |
)
|
| 138 |
|
| 139 |
+
st.markdown("</div>", unsafe_allow_html=True)
|
| 140 |
+
|
| 141 |
# =========================
|
| 142 |
+
# STEP 3 – Visual Analysis
|
| 143 |
# =========================
|
| 144 |
+
st.markdown("<div class='step-box'>", unsafe_allow_html=True)
|
| 145 |
+
st.subheader("🟣 Step 3: Visual Analysis")
|
|
|
|
|
|
|
|
|
|
| 146 |
|
| 147 |
+
col1, col2 = st.columns(2)
|
|
|
|
|
|
|
|
|
|
| 148 |
|
| 149 |
+
# --- Outlier Graph ---
|
| 150 |
+
with col1:
|
| 151 |
+
if show_outlier_graph:
|
| 152 |
+
if st.session_state.scores is None:
|
| 153 |
+
st.warning("Run COPOD first.")
|
| 154 |
+
else:
|
| 155 |
+
st.markdown("**📊 Outlier Score Distribution**")
|
| 156 |
|
| 157 |
+
fig, ax = plt.subplots()
|
| 158 |
+
ax.hist(st.session_state.scores, bins=30)
|
| 159 |
+
ax.set_xlabel("Outlier Score")
|
| 160 |
+
ax.set_ylabel("Count")
|
|
|
|
| 161 |
|
| 162 |
+
st.pyplot(fig)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 163 |
|
| 164 |
+
st.caption(
|
| 165 |
+
"Higher score → more likely to be an outlier (tail probability)."
|
| 166 |
+
)
|
| 167 |
+
|
| 168 |
+
# --- Correlation Failure ---
|
| 169 |
+
with col2:
|
| 170 |
+
if show_corr_failure:
|
| 171 |
+
if st.session_state.df is None:
|
| 172 |
+
st.warning("Upload data first.")
|
| 173 |
else:
|
| 174 |
+
df = st.session_state.df
|
| 175 |
+
num_cols = df.select_dtypes(include=[np.number]).columns
|
| 176 |
|
| 177 |
+
if len(num_cols) < 2:
|
| 178 |
+
st.error("Need at least 2 numeric columns.")
|
| 179 |
+
else:
|
| 180 |
+
x, y = num_cols[:2]
|
| 181 |
|
| 182 |
+
st.markdown("**⚠️ Correlation Failure Illustration**")
|
|
|
|
|
|
|
|
|
|
| 183 |
|
| 184 |
+
fig, ax = plt.subplots()
|
| 185 |
+
ax.scatter(df[x], df[y], alpha=0.6)
|
| 186 |
+
ax.set_xlabel(x)
|
| 187 |
+
ax.set_ylabel(y)
|
| 188 |
+
|
| 189 |
+
st.pyplot(fig)
|
| 190 |
+
|
| 191 |
+
st.caption(
|
| 192 |
+
"COPOD may miss outliers that break correlations but are marginally normal."
|
| 193 |
+
)
|
| 194 |
|
| 195 |
+
st.markdown("</div>", unsafe_allow_html=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 196 |
|
| 197 |
# =========================
|
| 198 |
# Footer
|
| 199 |
# =========================
|
| 200 |
st.markdown("---")
|
| 201 |
+
st.caption("COPOD Demo • Integrator View • Streamlit + Hugging Face")
|