Gumball2k5 commited on
Commit
ac6c128
·
verified ·
1 Parent(s): 1bcc527

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +124 -66
src/streamlit_app.py CHANGED
@@ -4,36 +4,77 @@ import numpy as np
4
  import matplotlib.pyplot as plt
5
 
6
  # =========================
7
- # Page config
8
  # =========================
9
  st.set_page_config(
10
- page_title="COPOD Demo",
 
11
  layout="wide"
12
  )
13
 
14
  # =========================
15
- # Title
16
  # =========================
17
- st.title("🔍 COPOD – Copula-Based Outlier Detection")
18
  st.markdown("""
19
- Demo tương tác để minh họa:
20
- - Khi COPOD hoạt động tốt
21
- - Khi COPOD thất bại do bỏ qua tương quan
22
- """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
  # =========================
25
- # Sidebar – Controls
26
  # =========================
27
- st.sidebar.header("⚙️ Controls")
28
 
29
  uploaded_file = st.sidebar.file_uploader(
30
- "Upload CSV file",
31
  type=["csv"]
32
  )
33
 
34
  run_copod = st.sidebar.button("▶️ Run COPOD")
35
  show_outlier_graph = st.sidebar.button("📊 Show Outlier Graph")
36
- show_correlation_failure = st.sidebar.button("⚠️ Show Correlation Failure")
37
 
38
  # =========================
39
  # Session State
@@ -45,99 +86,116 @@ if "scores" not in st.session_state:
45
  st.session_state.scores = None
46
 
47
  # =========================
48
- # Load Data
49
  # =========================
 
 
 
50
  if uploaded_file is not None:
51
  df = pd.read_csv(uploaded_file)
52
  st.session_state.df = df
53
 
54
- st.subheader("📄 Data Preview")
55
  st.dataframe(df.head())
 
 
 
 
 
 
 
 
56
 
57
  # =========================
58
- # Run COPOD (Placeholder)
59
  # =========================
 
 
 
60
  if run_copod:
61
  if st.session_state.df is None:
62
- st.warning("⚠️ Please upload a CSV file first.")
63
  else:
64
  df = st.session_state.df
65
-
66
- # Chỉ lấy cột numeric
67
  X = df.select_dtypes(include=[np.number])
68
 
69
  if X.shape[1] == 0:
70
- st.error("Dataset không cột numeric.")
71
  else:
72
- # ===== PLACEHOLDER COPOD =====
73
- fake_scores = np.random.rand(len(X)) * 10
74
- df["outlier_score"] = fake_scores
75
 
76
  st.session_state.df = df
77
- st.session_state.scores = fake_scores
78
 
79
- st.success("COPOD finished (placeholder).")
80
 
81
- st.subheader("📌 Top Outliers")
82
  st.dataframe(
83
  df.sort_values("outlier_score", ascending=False).head(10)
84
  )
85
 
 
 
86
  # =========================
87
- # Show Outlier Graph
88
  # =========================
89
- if show_outlier_graph:
90
- if st.session_state.scores is None:
91
- st.warning("⚠️ Run COPOD first.")
92
- else:
93
- st.subheader("📊 Outlier Score Distribution")
94
 
95
- fig, ax = plt.subplots()
96
- ax.hist(st.session_state.scores, bins=30)
97
- ax.set_xlabel("Outlier Score")
98
- ax.set_ylabel("Count")
99
 
100
- st.pyplot(fig)
 
 
 
 
 
 
101
 
102
- st.markdown("""
103
- **Giải thích**:
104
- - Điểm càng cao → xác suất rơi vào tail càng nhỏ
105
- - COPOD đánh giá từng chiều độc lập
106
- """)
107
 
108
- # =========================
109
- # Show Correlation Failure (Demo Logic)
110
- # =========================
111
- if show_correlation_failure:
112
- if st.session_state.df is None:
113
- st.warning("⚠️ Upload dữ liệu trước.")
114
- else:
115
- df = st.session_state.df
116
- numeric_cols = df.select_dtypes(include=[np.number]).columns
117
 
118
- if len(numeric_cols) < 2:
119
- st.error(" Cần ít nhất 2 biến numeric để minh họa tương quan.")
 
 
 
 
 
 
 
120
  else:
121
- x_col, y_col = numeric_cols[:2]
 
122
 
123
- st.subheader("⚠️ Correlation Failure Illustration")
 
 
 
124
 
125
- fig, ax = plt.subplots()
126
- ax.scatter(df[x_col], df[y_col], alpha=0.6)
127
- ax.set_xlabel(x_col)
128
- ax.set_ylabel(y_col)
129
 
130
- st.pyplot(fig)
 
 
 
 
 
 
 
 
 
131
 
132
- st.markdown("""
133
- **Ý tưởng minh họa**:
134
- - Điểm phá vỡ mối quan hệ (correlation outlier)
135
- - COPOD có thể **không gán score cao**
136
- - Vì từng chiều vẫn "bình thường"
137
- """)
138
 
139
  # =========================
140
  # Footer
141
  # =========================
142
  st.markdown("---")
143
- st.caption("COPOD Demo Integrator View")
 
4
  import matplotlib.pyplot as plt
5
 
6
  # =========================
7
+ # Page Config
8
  # =========================
9
  st.set_page_config(
10
+ page_title="COPOD Interactive Demo",
11
+ page_icon="🔍",
12
  layout="wide"
13
  )
14
 
15
  # =========================
16
+ # Custom CSS
17
  # =========================
 
18
  st.markdown("""
19
+ <style>
20
+ .main {
21
+ background-color: #f9fafc;
22
+ }
23
+ h1, h2, h3 {
24
+ color: #2c3e50;
25
+ }
26
+ .step-box {
27
+ background-color: #ffffff;
28
+ padding: 1.5rem;
29
+ border-radius: 12px;
30
+ box-shadow: 0 4px 12px rgba(0,0,0,0.05);
31
+ margin-bottom: 1.5rem;
32
+ }
33
+ .info-box {
34
+ background-color: #eef4ff;
35
+ padding: 1rem;
36
+ border-left: 6px solid #4c6ef5;
37
+ border-radius: 6px;
38
+ }
39
+ .warning-box {
40
+ background-color: #fff4e6;
41
+ padding: 1rem;
42
+ border-left: 6px solid #f08c00;
43
+ border-radius: 6px;
44
+ }
45
+ </style>
46
+ """, unsafe_allow_html=True)
47
+
48
+ # =========================
49
+ # Title Section
50
+ # =========================
51
+ st.title("🔍 COPOD – Interactive Outlier Detection Demo")
52
+
53
+ st.markdown("""
54
+ <div class="info-box">
55
+ <b>COPOD</b> (Copula-Based Outlier Detection) là thuật toán:
56
+ <ul>
57
+ <li>Không cần hyperparameter</li>
58
+ <li>Nhanh</li>
59
+ <li>Có khả năng giải thích theo từng chiều</li>
60
+ </ul>
61
+ Ứng dụng này minh họa cả <b>điểm mạnh</b> và <b>điểm yếu</b> của COPOD.
62
+ </div>
63
+ """, unsafe_allow_html=True)
64
 
65
  # =========================
66
+ # Sidebar
67
  # =========================
68
+ st.sidebar.header("⚙️ Control Panel")
69
 
70
  uploaded_file = st.sidebar.file_uploader(
71
+ "📂 Upload CSV file",
72
  type=["csv"]
73
  )
74
 
75
  run_copod = st.sidebar.button("▶️ Run COPOD")
76
  show_outlier_graph = st.sidebar.button("📊 Show Outlier Graph")
77
+ show_corr_failure = st.sidebar.button("⚠️ Show Correlation Failure")
78
 
79
  # =========================
80
  # Session State
 
86
  st.session_state.scores = None
87
 
88
  # =========================
89
+ # STEP 1 – Upload Data
90
  # =========================
91
+ st.markdown("<div class='step-box'>", unsafe_allow_html=True)
92
+ st.subheader("🟢 Step 1: Upload Dataset")
93
+
94
  if uploaded_file is not None:
95
  df = pd.read_csv(uploaded_file)
96
  st.session_state.df = df
97
 
98
+ st.success("Dataset loaded successfully!")
99
  st.dataframe(df.head())
100
+ else:
101
+ st.markdown("""
102
+ <div class="warning-box">
103
+ Please upload a CSV file to begin.
104
+ </div>
105
+ """, unsafe_allow_html=True)
106
+
107
+ st.markdown("</div>", unsafe_allow_html=True)
108
 
109
  # =========================
110
+ # STEP 2 – Run COPOD
111
  # =========================
112
+ st.markdown("<div class='step-box'>", unsafe_allow_html=True)
113
+ st.subheader("🔵 Step 2: Run COPOD")
114
+
115
  if run_copod:
116
  if st.session_state.df is None:
117
+ st.warning("Upload data first.")
118
  else:
119
  df = st.session_state.df
 
 
120
  X = df.select_dtypes(include=[np.number])
121
 
122
  if X.shape[1] == 0:
123
+ st.error("Dataset has no numeric columns.")
124
  else:
125
+ # PLACEHOLDER SCORES
126
+ scores = np.random.rand(len(X)) * 10
127
+ df["outlier_score"] = scores
128
 
129
  st.session_state.df = df
130
+ st.session_state.scores = scores
131
 
132
+ st.success("COPOD completed (placeholder).")
133
 
134
+ st.markdown("**Top potential outliers:**")
135
  st.dataframe(
136
  df.sort_values("outlier_score", ascending=False).head(10)
137
  )
138
 
139
+ st.markdown("</div>", unsafe_allow_html=True)
140
+
141
  # =========================
142
+ # STEP 3 – Visual Analysis
143
  # =========================
144
+ st.markdown("<div class='step-box'>", unsafe_allow_html=True)
145
+ st.subheader("🟣 Step 3: Visual Analysis")
 
 
 
146
 
147
+ col1, col2 = st.columns(2)
 
 
 
148
 
149
+ # --- Outlier Graph ---
150
+ with col1:
151
+ if show_outlier_graph:
152
+ if st.session_state.scores is None:
153
+ st.warning("Run COPOD first.")
154
+ else:
155
+ st.markdown("**📊 Outlier Score Distribution**")
156
 
157
+ fig, ax = plt.subplots()
158
+ ax.hist(st.session_state.scores, bins=30)
159
+ ax.set_xlabel("Outlier Score")
160
+ ax.set_ylabel("Count")
 
161
 
162
+ st.pyplot(fig)
 
 
 
 
 
 
 
 
163
 
164
+ st.caption(
165
+ "Higher score more likely to be an outlier (tail probability)."
166
+ )
167
+
168
+ # --- Correlation Failure ---
169
+ with col2:
170
+ if show_corr_failure:
171
+ if st.session_state.df is None:
172
+ st.warning("Upload data first.")
173
  else:
174
+ df = st.session_state.df
175
+ num_cols = df.select_dtypes(include=[np.number]).columns
176
 
177
+ if len(num_cols) < 2:
178
+ st.error("Need at least 2 numeric columns.")
179
+ else:
180
+ x, y = num_cols[:2]
181
 
182
+ st.markdown("**⚠️ Correlation Failure Illustration**")
 
 
 
183
 
184
+ fig, ax = plt.subplots()
185
+ ax.scatter(df[x], df[y], alpha=0.6)
186
+ ax.set_xlabel(x)
187
+ ax.set_ylabel(y)
188
+
189
+ st.pyplot(fig)
190
+
191
+ st.caption(
192
+ "COPOD may miss outliers that break correlations but are marginally normal."
193
+ )
194
 
195
+ st.markdown("</div>", unsafe_allow_html=True)
 
 
 
 
 
196
 
197
  # =========================
198
  # Footer
199
  # =========================
200
  st.markdown("---")
201
+ st.caption("COPOD Demo Integrator View • Streamlit + Hugging Face")