Fancy-yousa commited on
Commit
b5567db
·
verified ·
1 Parent(s): 7363a4a

Upload 78 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,33 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ data/Authorship.mat filter=lfs diff=lfs merge=lfs -text
37
+ data/Dermatology.mat filter=lfs diff=lfs merge=lfs -text
38
+ data/dna.mat filter=lfs diff=lfs merge=lfs -text
39
+ data/Factors.mat filter=lfs diff=lfs merge=lfs -text
40
+ data/madelon.mat filter=lfs diff=lfs merge=lfs -text
41
+ data/Movement_libras.mat filter=lfs diff=lfs merge=lfs -text
42
+ data/Musk1.mat filter=lfs diff=lfs merge=lfs -text
43
+ data/results.db filter=lfs diff=lfs merge=lfs -text
44
+ data/spambase.mat filter=lfs diff=lfs merge=lfs -text
45
+ data/splice.mat filter=lfs diff=lfs merge=lfs -text
46
+ data/Synthetic_control.mat filter=lfs diff=lfs merge=lfs -text
47
+ data/Waveform.mat filter=lfs diff=lfs merge=lfs -text
48
+ data/Wdbc.mat filter=lfs diff=lfs merge=lfs -text
49
+ pdf/CFR.pdf filter=lfs diff=lfs merge=lfs -text
50
+ pdf/CIFE.pdf filter=lfs diff=lfs merge=lfs -text
51
+ pdf/CMIFS.pdf filter=lfs diff=lfs merge=lfs -text
52
+ pdf/CMIM.pdf filter=lfs diff=lfs merge=lfs -text
53
+ pdf/CSMDCCMR.pdf filter=lfs diff=lfs merge=lfs -text
54
+ pdf/CSMI.pdf filter=lfs diff=lfs merge=lfs -text
55
+ pdf/DCSF.pdf filter=lfs diff=lfs merge=lfs -text
56
+ pdf/DISR.pdf filter=lfs diff=lfs merge=lfs -text
57
+ pdf/DWFS.pdf filter=lfs diff=lfs merge=lfs -text
58
+ pdf/IWFS.pdf filter=lfs diff=lfs merge=lfs -text
59
+ pdf/JMI.pdf filter=lfs diff=lfs merge=lfs -text
60
+ pdf/JMIM.pdf filter=lfs diff=lfs merge=lfs -text
61
+ pdf/MIM.pdf filter=lfs diff=lfs merge=lfs -text
62
+ pdf/MRI.pdf filter=lfs diff=lfs merge=lfs -text
63
+ pdf/MRMD.pdf filter=lfs diff=lfs merge=lfs -text
64
+ pdf/MRMR.pdf filter=lfs diff=lfs merge=lfs -text
65
+ pdf/UCRFS.pdf filter=lfs diff=lfs merge=lfs -text
Webapp/app.py CHANGED
@@ -2,7 +2,8 @@ import os
2
  import sys
3
  import pickle
4
  import json
5
- from flask import Flask, jsonify, request, render_template
 
6
 
7
  # Add project root to sys.path to import leaderboard
8
  sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
@@ -13,6 +14,7 @@ from leaderboard import rank_results
13
  PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
14
  RESULT_DIR = os.path.join(PROJECT_ROOT, "results")
15
  DATASET_DIR = os.path.join(PROJECT_ROOT, "datasets")
 
16
 
17
  os.makedirs(RESULT_DIR, exist_ok=True)
18
  os.makedirs(DATASET_DIR, exist_ok=True)
@@ -179,7 +181,26 @@ def get_results():
179
  # ===============================
180
  @app.route("/api/datasets")
181
  def api_datasets():
182
- return jsonify(list_available_datasets())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
183
 
184
 
185
  @app.route("/api/global_stats")
@@ -256,6 +277,11 @@ def get_global_stats():
256
  return jsonify(final_list)
257
 
258
 
 
 
 
 
 
259
  if __name__ == "__main__":
260
  port = int(os.environ.get("PORT", 7860))
261
  app.run(host="0.0.0.0", port=port, debug=False)
 
2
  import sys
3
  import pickle
4
  import json
5
+ import datetime
6
+ from flask import Flask, jsonify, request, render_template, send_from_directory
7
 
8
  # Add project root to sys.path to import leaderboard
9
  sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
 
14
  PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
15
  RESULT_DIR = os.path.join(PROJECT_ROOT, "results")
16
  DATASET_DIR = os.path.join(PROJECT_ROOT, "datasets")
17
+ PDF_DIR = os.path.join(PROJECT_ROOT, "pdf")
18
 
19
  os.makedirs(RESULT_DIR, exist_ok=True)
20
  os.makedirs(DATASET_DIR, exist_ok=True)
 
181
  # ===============================
182
  @app.route("/api/datasets")
183
  def api_datasets():
184
+ try:
185
+ datasets = []
186
+ ds_names = list_available_datasets()
187
+ for name in ds_names:
188
+ # Get modification time of the result file
189
+ result_path = os.path.join(RESULT_DIR, f"{name}.json")
190
+ last_updated = "Unknown"
191
+ if os.path.exists(result_path):
192
+ mtime = os.path.getmtime(result_path)
193
+ last_updated = datetime.datetime.fromtimestamp(mtime).strftime('%Y-%m-%d')
194
+
195
+ datasets.append({
196
+ "name": name,
197
+ "last_updated": last_updated
198
+ })
199
+ return jsonify(datasets)
200
+ except Exception as e:
201
+ import traceback
202
+ traceback.print_exc()
203
+ return jsonify({"error": str(e)}), 500
204
 
205
 
206
  @app.route("/api/global_stats")
 
277
  return jsonify(final_list)
278
 
279
 
280
+ @app.route("/pdfs/<path:filename>")
281
+ def serve_pdf(filename):
282
+ return send_from_directory(PDF_DIR, filename)
283
+
284
+
285
  if __name__ == "__main__":
286
  port = int(os.environ.get("PORT", 7860))
287
  app.run(host="0.0.0.0", port=port, debug=False)
Webapp/templates/global.html CHANGED
@@ -60,6 +60,24 @@
60
  background-color: #34495e;
61
  }
62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  .weights-control {
64
  background-color: #f1f1f1;
65
  padding: 15px;
@@ -89,16 +107,21 @@
89
 
90
  .slider-group label {
91
  font-weight: bold;
92
- min-width: 80px;
93
  }
94
 
95
  input[type="number"] {
96
- width: 70px;
97
  padding: 5px;
98
  border: 1px solid #ccc;
99
  border-radius: 4px;
100
  }
101
 
 
 
 
 
 
102
  button.recalc-btn {
103
  background-color: var(--primary-color);
104
  color: white;
@@ -162,11 +185,16 @@
162
  background-color: var(--primary-color);
163
  }
164
 
165
- .warning-text {
166
- color: #e74c3c;
167
- font-size: 0.9em;
 
 
 
 
 
 
168
  margin-top: 5px;
169
- display: none;
170
  }
171
  </style>
172
  </head>
@@ -174,36 +202,46 @@
174
 
175
  <div class="container">
176
  <header>
177
- <h1>🌍 Global Algorithm Rankings</h1>
 
 
 
178
  <a href="/" class="nav-link">← Back to Dataset View</a>
179
  </header>
180
 
 
 
 
 
 
 
 
 
 
181
  <div class="weights-control">
182
- <h3>🏆 Scoring Formula: S = a·F1 + b·AUC + c·TimeScore</h3>
183
  <p style="font-size: 0.9em; color: #666; margin-bottom: 10px;">
184
- Note: TimeScore is calculated as normalized efficiency (1 = fastest, 0 = slowest).
185
- <br>Constraint: a + b + c = 1.
186
  </p>
187
 
188
  <div class="sliders-container">
189
  <div class="slider-group">
190
- <label for="weight-a">a (F1):</label>
191
- <input type="number" id="weight-a" value="0.4" step="0.1" min="0" max="1">
192
  </div>
193
 
194
  <div class="slider-group">
195
- <label for="weight-b">b (AUC):</label>
196
- <input type="number" id="weight-b" value="0.4" step="0.1" min="0" max="1">
197
  </div>
198
 
199
  <div class="slider-group">
200
- <label for="weight-c">c (Time):</label>
201
- <input type="number" id="weight-c" value="0.2" step="0.1" min="0" max="1">
202
  </div>
203
 
204
  <button class="recalc-btn" onclick="calculateAndRender()">Recalculate Rankings</button>
205
  </div>
206
- <div id="weight-warning" class="warning-text">⚠️ Weights must sum to 1.0</div>
207
  </div>
208
 
209
  <div id="loading-indicator" style="text-align: center; color: #666;">Loading global stats...</div>
@@ -215,7 +253,7 @@
215
  <th data-key="algorithm">Algorithm <span class="arrow">↕</span></th>
216
  <th data-key="mean_f1_global">Global F1 <span class="arrow">↕</span></th>
217
  <th data-key="mean_auc_global">Global AUC <span class="arrow">↕</span></th>
218
- <th data-key="mean_time_global">Avg Time (s) <span class="arrow">↕</span></th>
219
  <th data-key="final_score">Final Score <span class="arrow">↕</span></th>
220
  </tr>
221
  </thead>
@@ -235,7 +273,27 @@
235
  const weightA = document.getElementById("weight-a");
236
  const weightB = document.getElementById("weight-b");
237
  const weightC = document.getElementById("weight-c");
238
- const warningText = document.getElementById("weight-warning");
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
239
 
240
  function fetchGlobalStats() {
241
  fetch("/api/global_stats")
@@ -251,20 +309,50 @@
251
  });
252
  }
253
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
254
  function calculateAndRender() {
255
  const a = parseFloat(weightA.value) || 0;
256
  const b = parseFloat(weightB.value) || 0;
257
  const c = parseFloat(weightC.value) || 0;
258
 
259
- // Validation
260
- const sum = a + b + c;
261
- if (Math.abs(sum - 1.0) > 0.01) {
262
- warningText.style.display = 'block';
263
- warningText.textContent = `⚠️ Weights sum to ${sum.toFixed(2)}. They should sum to 1.0.`;
264
- } else {
265
- warningText.style.display = 'none';
266
- }
267
-
268
  // Find min/max time for normalization
269
  let minTime = Infinity;
270
  let maxTime = -Infinity;
@@ -273,14 +361,11 @@
273
  if (d.mean_time_global > maxTime) maxTime = d.mean_time_global;
274
  });
275
 
276
- // Prevent division by zero if all times are same
277
  const timeRange = maxTime - minTime;
278
 
279
  // Process data
280
  processedData = rawData.map(d => {
281
  // Time Score: 1 if fast, 0 if slow
282
- // Formula: 1 - (time - min) / (max - min)
283
- // If range is 0, score is 1 (all same speed)
284
  let timeScore = 1.0;
285
  if (timeRange > 0.0001) {
286
  timeScore = 1.0 - ((d.mean_time_global - minTime) / timeRange);
@@ -338,8 +423,8 @@
338
  <td>${safeFixed(row.mean_f1_global)}</td>
339
  <td>${safeFixed(row.mean_auc_global)}</td>
340
  <td>
341
- ${safeFixed(row.mean_time_global, 2)}s
342
- <div style="font-size:0.7em; color:#888;">Score: ${safeFixed(row.time_score_norm, 2)}</div>
343
  </td>
344
  <td>
345
  <strong>${safeFixed(row.final_score)}</strong>
@@ -358,7 +443,6 @@
358
  if (activeHeader) activeHeader.textContent = sortDirection === 1 ? '↑' : '↓';
359
  }
360
 
361
- // Sort handlers
362
  document.querySelectorAll('th[data-key]').forEach(th => {
363
  th.addEventListener('click', () => {
364
  const key = th.dataset.key;
@@ -368,19 +452,19 @@
368
  sortKey = key;
369
  sortDirection = (key === 'rank' || key === 'mean_time_global') ? 1 : -1;
370
  }
371
- // For rank, it's just index, but let's assume we sort by score desc if rank is clicked
372
  if (key === 'rank') {
373
  sortKey = 'final_score';
374
  sortDirection = -1;
375
  }
376
-
377
  sortData();
378
  renderTable();
379
  });
380
  });
381
 
382
- // Initial load
383
- document.addEventListener("DOMContentLoaded", fetchGlobalStats);
 
 
384
 
385
  </script>
386
 
 
60
  background-color: #34495e;
61
  }
62
 
63
+ .description-box {
64
+ background-color: #e8f4fd;
65
+ border-left: 4px solid #3498db;
66
+ padding: 15px;
67
+ margin-bottom: 20px;
68
+ border-radius: 4px;
69
+ }
70
+
71
+ .description-box h3 {
72
+ margin-top: 0;
73
+ color: #2980b9;
74
+ }
75
+
76
+ .description-box p {
77
+ margin: 5px 0;
78
+ line-height: 1.5;
79
+ }
80
+
81
  .weights-control {
82
  background-color: #f1f1f1;
83
  padding: 15px;
 
107
 
108
  .slider-group label {
109
  font-weight: bold;
110
+ min-width: 60px;
111
  }
112
 
113
  input[type="number"] {
114
+ width: 80px;
115
  padding: 5px;
116
  border: 1px solid #ccc;
117
  border-radius: 4px;
118
  }
119
 
120
+ input[readonly] {
121
+ background-color: #e9ecef;
122
+ color: #666;
123
+ }
124
+
125
  button.recalc-btn {
126
  background-color: var(--primary-color);
127
  color: white;
 
185
  background-color: var(--primary-color);
186
  }
187
 
188
+ .time-detail {
189
+ font-size: 0.8em;
190
+ color: #666;
191
+ margin-top: 2px;
192
+ }
193
+
194
+ .version-tag {
195
+ font-size: 0.8em;
196
+ color: #7f8c8d;
197
  margin-top: 5px;
 
198
  }
199
  </style>
200
  </head>
 
202
 
203
  <div class="container">
204
  <header>
205
+ <div>
206
+ <h1>🌍 Global Algorithm Rankings</h1>
207
+ <div id="last-updated" class="version-tag">Data Last Updated: Loading...</div>
208
+ </div>
209
  <a href="/" class="nav-link">← Back to Dataset View</a>
210
  </header>
211
 
212
+ <div class="description-box">
213
+ <h3>About Global Rankings</h3>
214
+ <p>
215
+ This page provides a comprehensive evaluation of feature selection algorithms across all available datasets.
216
+ Algorithms are ranked based on a weighted score combining <strong>Accuracy (F1)</strong>, <strong>Robustness (AUC)</strong>, and <strong>Efficiency (Time)</strong>.
217
+ You can adjust the importance of each factor below to customize the ranking criteria.
218
+ </p>
219
+ </div>
220
+
221
  <div class="weights-control">
222
+ <h3>🏆 Scoring Formula: S = α·F1 + β·AUC + γ·TimeScore</h3>
223
  <p style="font-size: 0.9em; color: #666; margin-bottom: 10px;">
224
+ Constraint: α + β + γ = 1. TimeScore is normalized (1 = fastest).
 
225
  </p>
226
 
227
  <div class="sliders-container">
228
  <div class="slider-group">
229
+ <label for="weight-a">F1 (α):</label>
230
+ <input type="number" id="weight-a" value="0.4" step="0.05" min="0" max="1">
231
  </div>
232
 
233
  <div class="slider-group">
234
+ <label for="weight-b">AUC (β):</label>
235
+ <input type="number" id="weight-b" value="0.4" step="0.05" min="0" max="1">
236
  </div>
237
 
238
  <div class="slider-group">
239
+ <label for="weight-c">Time (γ):</label>
240
+ <input type="number" id="weight-c" value="0.2" readonly title="Auto-calculated: 1 - α - β">
241
  </div>
242
 
243
  <button class="recalc-btn" onclick="calculateAndRender()">Recalculate Rankings</button>
244
  </div>
 
245
  </div>
246
 
247
  <div id="loading-indicator" style="text-align: center; color: #666;">Loading global stats...</div>
 
253
  <th data-key="algorithm">Algorithm <span class="arrow">↕</span></th>
254
  <th data-key="mean_f1_global">Global F1 <span class="arrow">↕</span></th>
255
  <th data-key="mean_auc_global">Global AUC <span class="arrow">↕</span></th>
256
+ <th data-key="mean_time_global">Efficiency (Time) <span class="arrow">↕</span></th>
257
  <th data-key="final_score">Final Score <span class="arrow">↕</span></th>
258
  </tr>
259
  </thead>
 
273
  const weightA = document.getElementById("weight-a");
274
  const weightB = document.getElementById("weight-b");
275
  const weightC = document.getElementById("weight-c");
276
+ const lastUpdatedDiv = document.getElementById("last-updated");
277
+
278
+ // Fetch datasets info to get latest date
279
+ function fetchLastUpdated() {
280
+ fetch("/api/datasets")
281
+ .then(res => res.json())
282
+ .then(data => {
283
+ if (data.length > 0) {
284
+ // Sort by date to find latest? Or just take one?
285
+ // Usually we want the overall latest date.
286
+ // Let's just pick the first one's date or find the max date if needed.
287
+ // For simplicity, just showing "Latest"
288
+ const dates = data.map(d => d.last_updated).filter(d => d !== 'Unknown').sort().reverse();
289
+ if (dates.length > 0) {
290
+ lastUpdatedDiv.textContent = `Data Last Updated: ${dates[0]}`;
291
+ } else {
292
+ lastUpdatedDiv.textContent = `Data Last Updated: Unknown`;
293
+ }
294
+ }
295
+ });
296
+ }
297
 
298
  function fetchGlobalStats() {
299
  fetch("/api/global_stats")
 
309
  });
310
  }
311
 
312
+ // Weight auto-adjustment logic
313
+ function updateWeights(changedInput) {
314
+ let a = parseFloat(weightA.value) || 0;
315
+ let b = parseFloat(weightB.value) || 0;
316
+
317
+ // Clamp inputs to 0-1
318
+ if (a < 0) a = 0; if (a > 1) a = 1;
319
+ if (b < 0) b = 0; if (b > 1) b = 1;
320
+
321
+ if (changedInput === 'a') {
322
+ // If a changes, we try to adjust c first (c = 1 - a - b)
323
+ // If 1 - a - b < 0, it means a + b > 1, so we must reduce b
324
+ let c = 1 - a - b;
325
+ if (c < 0) {
326
+ b = Math.max(0, 1 - a); // Reduce b
327
+ c = 0; // c becomes 0
328
+ }
329
+ // Update UI
330
+ weightA.value = parseFloat(a.toFixed(2));
331
+ weightB.value = parseFloat(b.toFixed(2));
332
+ weightC.value = parseFloat(c.toFixed(2));
333
+ } else if (changedInput === 'b') {
334
+ // If b changes, we try to adjust c first
335
+ // If 1 - a - b < 0, we must reduce a
336
+ let c = 1 - a - b;
337
+ if (c < 0) {
338
+ a = Math.max(0, 1 - b); // Reduce a
339
+ c = 0;
340
+ }
341
+ // Update UI
342
+ weightA.value = parseFloat(a.toFixed(2));
343
+ weightB.value = parseFloat(b.toFixed(2));
344
+ weightC.value = parseFloat(c.toFixed(2));
345
+ }
346
+ }
347
+
348
+ weightA.addEventListener('input', () => updateWeights('a'));
349
+ weightB.addEventListener('input', () => updateWeights('b'));
350
+
351
  function calculateAndRender() {
352
  const a = parseFloat(weightA.value) || 0;
353
  const b = parseFloat(weightB.value) || 0;
354
  const c = parseFloat(weightC.value) || 0;
355
 
 
 
 
 
 
 
 
 
 
356
  // Find min/max time for normalization
357
  let minTime = Infinity;
358
  let maxTime = -Infinity;
 
361
  if (d.mean_time_global > maxTime) maxTime = d.mean_time_global;
362
  });
363
 
 
364
  const timeRange = maxTime - minTime;
365
 
366
  // Process data
367
  processedData = rawData.map(d => {
368
  // Time Score: 1 if fast, 0 if slow
 
 
369
  let timeScore = 1.0;
370
  if (timeRange > 0.0001) {
371
  timeScore = 1.0 - ((d.mean_time_global - minTime) / timeRange);
 
423
  <td>${safeFixed(row.mean_f1_global)}</td>
424
  <td>${safeFixed(row.mean_auc_global)}</td>
425
  <td>
426
+ <strong>${safeFixed(row.time_score_norm)}</strong>
427
+ <div class="time-detail">${safeFixed(row.mean_time_global, 2)}s</div>
428
  </td>
429
  <td>
430
  <strong>${safeFixed(row.final_score)}</strong>
 
443
  if (activeHeader) activeHeader.textContent = sortDirection === 1 ? '↑' : '↓';
444
  }
445
 
 
446
  document.querySelectorAll('th[data-key]').forEach(th => {
447
  th.addEventListener('click', () => {
448
  const key = th.dataset.key;
 
452
  sortKey = key;
453
  sortDirection = (key === 'rank' || key === 'mean_time_global') ? 1 : -1;
454
  }
 
455
  if (key === 'rank') {
456
  sortKey = 'final_score';
457
  sortDirection = -1;
458
  }
 
459
  sortData();
460
  renderTable();
461
  });
462
  });
463
 
464
+ document.addEventListener("DOMContentLoaded", () => {
465
+ fetchLastUpdated();
466
+ fetchGlobalStats();
467
+ });
468
 
469
  </script>
470
 
Webapp/templates/index.html CHANGED
@@ -13,6 +13,7 @@
13
  --text-color: #333;
14
  --border-color: #dee2e6;
15
  --hover-color: #f1f1f1;
 
16
  }
17
 
18
  body {
@@ -59,6 +60,76 @@
59
  font-size: 14px;
60
  }
61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  table {
63
  width: 100%;
64
  border-collapse: collapse;
@@ -122,6 +193,25 @@
122
  color: var(--primary-color);
123
  }
124
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
  /* Modal styles */
126
  .modal {
127
  display: none;
@@ -171,13 +261,64 @@
171
  padding: 20px;
172
  color: #666;
173
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
  </style>
175
  </head>
176
  <body>
177
 
178
  <div class="container">
179
  <header>
180
- <h1>🏆 AutoFS Leaderboard</h1>
 
 
 
181
  <div style="display:flex; gap:15px; align-items:center;">
182
  <a href="/global" style="text-decoration:none; color:white; background-color:#8e44ad; padding:8px 15px; border-radius:4px; font-size:0.9em;">🌍 Global Rankings</a>
183
  <div class="controls">
@@ -189,29 +330,51 @@
189
  </div>
190
  </header>
191
 
192
- <div id="loading-indicator" class="loading" style="display: none;">Loading data...</div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
193
 
194
- <div class="chart-controls" style="text-align:center; margin-top: 20px; margin-bottom: 15px;">
195
- <label style="margin-right:15px; font-weight:bold;">View Mode:</label>
196
- <input type="radio" id="view-overall" name="chart-view" value="overall" checked onchange="updateView()">
197
- <label for="view-overall" style="margin-right:10px;">Overall (Mean)</label>
198
-
199
- <input type="radio" id="view-classifiers-f1" name="chart-view" value="classifiers-f1" onchange="updateView()">
200
- <label for="view-classifiers-f1" style="margin-right:10px;">F1 by Classifier</label>
201
 
202
- <input type="radio" id="view-classifiers-auc" name="chart-view" value="classifiers-auc" onchange="updateView()">
203
- <label for="view-classifiers-auc">AUC by Classifier</label>
204
- </div>
 
 
205
 
206
- <div class="charts-container" style="display: flex; gap: 20px; margin-bottom: 20px;">
207
- <div style="flex: 1; background: white; padding: 15px; border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
208
- <canvas id="scoreChart"></canvas>
 
209
  </div>
210
- <div style="flex: 1; background: white; padding: 15px; border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
211
- <canvas id="timeChart"></canvas>
 
 
 
 
 
 
212
  </div>
213
  </div>
214
 
 
 
215
  <table id="result-table">
216
  <thead>
217
  <!-- Headers generated dynamically -->
@@ -220,6 +383,19 @@
220
  <!-- Data rows will be populated here -->
221
  </tbody>
222
  </table>
 
 
 
 
 
 
 
 
 
 
 
 
 
223
  </div>
224
 
225
  <!-- Modal for details -->
@@ -231,11 +407,30 @@
231
  </div>
232
  </div>
233
 
 
 
 
 
 
 
 
 
 
 
 
234
  <script>
235
  let currentResults = [];
 
 
236
  let sortDirection = 1; // 1 for asc, -1 for desc
237
  let lastSortKey = '';
238
 
 
 
 
 
 
 
239
  const VIEW_CONFIG = {
240
  'overall': [
241
  { key: 'mean_f1', label: 'Mean F1' },
@@ -259,6 +454,12 @@
259
  const loadingIndicator = document.getElementById("loading-indicator");
260
  const modal = document.getElementById("details-modal");
261
  const closeModal = document.querySelector(".close");
 
 
 
 
 
 
262
 
263
  // Close modal
264
  closeModal.onclick = () => modal.style.display = "none";
@@ -266,217 +467,185 @@
266
  if (event.target == modal) modal.style.display = "none";
267
  }
268
 
269
- // Global chart instances
270
  let scoreChartInstance = null;
271
- let timeChartInstance = null;
272
 
273
- function updateCharts(results) {
274
- if (!Array.isArray(results) || results.length === 0) return;
 
 
 
 
 
 
 
 
 
 
 
275
 
276
- // Limit to top 15 for readability
277
- const topResults = results.slice(0, 15);
 
 
 
 
 
 
 
 
 
 
 
278
  const labels = topResults.map(r => r.algorithm || 'Unknown');
279
- const times = topResults.map(r => r.time || 0);
280
 
281
- const viewMode = document.querySelector('input[name="chart-view"]:checked').value;
282
  let datasets = [];
283
 
284
  if (viewMode === 'overall') {
285
- const f1Scores = topResults.map(r => r.mean_f1 || 0);
286
- const aucScores = topResults.map(r => r.mean_auc || 0);
287
  datasets = [
288
  {
289
  label: 'Mean F1',
290
- data: f1Scores,
291
  backgroundColor: 'rgba(52, 152, 219, 0.7)',
292
  borderColor: 'rgba(52, 152, 219, 1)',
293
  borderWidth: 1
294
  },
295
  {
296
  label: 'Mean AUC',
297
- data: aucScores,
298
  backgroundColor: 'rgba(46, 204, 113, 0.7)',
299
  borderColor: 'rgba(46, 204, 113, 1)',
300
  borderWidth: 1
301
  }
302
  ];
303
  } else if (viewMode === 'classifiers-f1') {
304
- const classifiers = ['nb', 'svm', 'rf'];
305
- const colors = ['rgba(255, 206, 86, 0.5)', 'rgba(75, 192, 192, 0.5)', 'rgba(153, 102, 255, 0.5)'];
306
- const borderColors = ['rgba(255, 206, 86, 1)', 'rgba(75, 192, 192, 1)', 'rgba(153, 102, 255, 1)'];
307
-
308
- datasets = classifiers.map((cls, idx) => ({
309
- label: cls.toUpperCase() + ' F1',
310
- data: topResults.map(r => (r.metrics && r.metrics[cls]) ? r.metrics[cls].f1 : 0),
311
- backgroundColor: colors[idx],
312
- borderColor: borderColors[idx],
313
  borderWidth: 1
314
  }));
315
- } else if (viewMode === 'classifiers-auc') {
316
- const classifiers = ['nb', 'svm', 'rf'];
317
- const colors = ['rgba(255, 206, 86, 0.5)', 'rgba(75, 192, 192, 0.5)', 'rgba(153, 102, 255, 0.5)'];
318
- const borderColors = ['rgba(255, 206, 86, 1)', 'rgba(75, 192, 192, 1)', 'rgba(153, 102, 255, 1)'];
319
-
320
- datasets = classifiers.map((cls, idx) => ({
321
- label: cls.toUpperCase() + ' AUC',
322
- data: topResults.map(r => (r.metrics && r.metrics[cls]) ? r.metrics[cls].auc : 0),
323
- backgroundColor: colors[idx],
324
- borderColor: borderColors[idx],
325
  borderWidth: 1
326
  }));
327
  }
328
 
329
- // Score Chart
330
- const scoreCtx = document.getElementById('scoreChart').getContext('2d');
331
  if (scoreChartInstance) scoreChartInstance.destroy();
332
 
333
- scoreChartInstance = new Chart(scoreCtx, {
334
  type: 'bar',
335
- data: {
336
- labels: labels,
337
- datasets: datasets
338
- },
339
  options: {
 
340
  responsive: true,
341
  maintainAspectRatio: false,
342
- plugins: {
343
- title: {
344
- display: true,
345
- text: viewMode === 'overall' ? 'Top Algorithms Performance (Mean)' :
346
- (viewMode === 'classifiers-f1' ? 'F1-Score by Classifier' : 'AUC by Classifier')
347
- }
348
- },
349
  scales: {
350
- y: {
351
- beginAtZero: false,
352
- // min: 0.8
353
- }
354
  }
355
  }
356
  });
357
 
358
- // Time Chart
359
- const timeCtx = document.getElementById('timeChart').getContext('2d');
360
- if (timeChartInstance) timeChartInstance.destroy();
 
 
 
 
361
 
362
- timeChartInstance = new Chart(timeCtx, {
363
- type: 'line',
 
 
 
364
  data: {
365
- labels: labels,
366
  datasets: [{
367
- label: 'Time (s)',
368
- data: times,
369
- backgroundColor: 'rgba(231, 76, 60, 0.2)',
370
- borderColor: 'rgba(231, 76, 60, 1)',
371
- borderWidth: 2,
372
- tension: 0.3,
373
- fill: true
374
  }]
375
  },
376
  options: {
377
  responsive: true,
378
  maintainAspectRatio: false,
379
- plugins: {
380
- title: { display: true, text: 'Execution Time' }
381
- },
382
  scales: {
383
- y: { beginAtZero: true }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
384
  }
385
  }
386
  });
387
  }
388
 
389
- function showDetails(result) {
390
- const title = document.getElementById("modal-title");
391
- const body = document.getElementById("modal-body");
392
-
393
- title.textContent = `${result.algorithm} Details`;
394
-
395
- let featuresHtml = result.selected_features.map(f =>
396
- `<span class="feature-tag">${f}</span>`
397
- ).join('');
398
-
399
- let metricsHtml = '<div style="margin-top: 15px;"><h3>Metrics Breakdown</h3>';
400
- for (const [clf, m] of Object.entries(result.metrics || {})) {
401
- metricsHtml += `
402
- <div style="margin-bottom: 10px;">
403
- <strong>${clf.toUpperCase()}:</strong>
404
- F1: ${m.f1.toFixed(4)}, AUC: ${m.auc.toFixed(4)}
405
- </div>`;
406
- }
407
- metricsHtml += '</div>';
408
-
409
- body.innerHTML = `
410
- <p><strong>Time:</strong> ${result.time.toFixed(4)}s</p>
411
- <p><strong>Num Features:</strong> ${result.num_features}</p>
412
- <p><strong>Selected Features (${result.selected_features.length}):</strong></p>
413
- <div>${featuresHtml}</div>
414
- ${metricsHtml}
415
- `;
416
-
417
- modal.style.display = "block";
418
- }
419
-
420
- function getValue(obj, path) {
421
- if (!path) return undefined;
422
- return path.split('.').reduce((acc, part) => (acc && acc[part] !== undefined) ? acc[part] : undefined, obj);
423
- }
424
-
425
- function safeFixed(value, digits=4) {
426
- if (value === undefined || value === null) return 'N/A';
427
- return Number(value).toFixed(digits);
428
  }
429
 
430
  function renderTableHeader() {
431
- const viewMode = document.querySelector('input[name="chart-view"]:checked').value;
432
- const dynamicCols = VIEW_CONFIG[viewMode] || VIEW_CONFIG['overall'];
433
-
434
- let html = '<tr>';
435
- html += '<th data-key="rank" style="width: 60px;">#</th>';
436
- html += '<th data-key="algorithm">Algorithm <span class="arrow">↕</span></th>';
437
 
438
- dynamicCols.forEach(col => {
439
- html += `<th data-key="${col.key}">${col.label} <span class="arrow">↕</span></th>`;
440
- });
441
-
442
- html += '<th data-key="time">Time (s) <span class="arrow">↕</span></th>';
443
- html += '<th data-key="selected_features">Selected Features</th>';
444
- html += '</tr>';
445
-
446
- tableHead.innerHTML = html;
447
-
448
- // Re-attach sort listeners
449
- tableHead.querySelectorAll('th[data-key]').forEach(th => {
450
- th.addEventListener('click', () => sortTable(th.dataset.key));
451
  });
 
 
 
 
 
 
 
 
452
  }
453
 
454
- function updateTable(results) {
455
  tableBody.innerHTML = "";
456
 
457
  // Robust data handling
458
- if (!results) {
459
- results = [];
460
- } else if (!Array.isArray(results)) {
461
- // Try to handle wrapped data or single object
462
- if (results.data && Array.isArray(results.data)) {
463
- results = results.data;
464
- } else if (results.results && Array.isArray(results.results)) {
465
- results = results.results;
466
- } else if (typeof results === 'object') {
467
- // Assume it's a single record? Or convert object values to array?
468
- // For now, wrap in array if it looks like a record (has algorithm)
469
- if (results.algorithm) {
470
- results = [results];
471
- } else {
472
- console.error("Invalid data format:", results);
473
- tableBody.innerHTML = '<tr><td colspan="10" style="text-align:center; color:red;">Error: Invalid data format. Check console for details.</td></tr>';
474
- return;
475
- }
476
- } else {
477
- tableBody.innerHTML = '<tr><td colspan="10" style="text-align:center; color:red;">Error: Invalid data format</td></tr>';
478
- return;
479
- }
480
  }
481
 
482
  if (results.length === 0) {
@@ -484,39 +653,47 @@
484
  return;
485
  }
486
 
487
- const viewMode = document.querySelector('input[name="chart-view"]:checked').value;
488
- const dynamicCols = VIEW_CONFIG[viewMode] || VIEW_CONFIG['overall'];
489
 
490
- results.forEach((r, idx) => {
491
- const row = document.createElement("tr");
492
-
493
- // Format features for preview
494
- const featurePreview = (r.selected_features && Array.isArray(r.selected_features))
495
- ? r.selected_features.slice(0, 5).join(', ') + (r.selected_features.length > 5 ? '...' : '')
496
- : 'N/A';
497
 
498
- let html = `<td>${idx + 1}</td>`;
499
- html += `<td><strong>${r.algorithm || 'Unknown'}</strong></td>`;
500
-
501
- dynamicCols.forEach(col => {
502
- const val = getValue(r, col.key);
503
- const score = val !== undefined ? val : 0;
504
- html += `
505
- <td>
506
- ${safeFixed(val)}
507
- <div class="score-bar"><div class="score-fill" style="width: ${Math.min(score * 100, 100)}%"></div></div>
508
- </td>`;
509
  });
510
-
511
- const time = r.time || 0;
512
- html += `<td>${safeFixed(time, 2)}</td>`;
513
- html += `
514
- <td class="features-cell" onclick="showDetails(currentResults[${idx}])" title="Click for details">
515
- ${featurePreview} <span style="font-size:0.8em; color:#999;">(Click for details)</span>
516
- </td>`;
517
 
518
- row.innerHTML = html;
519
- tableBody.appendChild(row);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
520
  });
521
  }
522
 
@@ -524,21 +701,22 @@
524
  if (lastSortKey === key) {
525
  sortDirection *= -1;
526
  } else {
527
- sortDirection = key === 'time' || key === 'rank' ? 1 : -1;
528
  lastSortKey = key;
529
  }
530
 
531
- // We don't call renderTableHeader here because it resets the sort indicators if we rebuild entirely.
532
- // Instead, we just update the arrows.
533
- document.querySelectorAll('th .arrow').forEach(span => span.textContent = '↕');
534
- const activeHeader = document.querySelector(`th[data-key="${key}"] .arrow`);
535
- if (activeHeader) activeHeader.textContent = sortDirection === 1 ? '↑' : '↓';
536
 
537
- const sorted = [...currentResults].sort((a, b) => {
538
- let valA = getValue(a, key);
539
- let valB = getValue(b, key);
540
 
541
- if (key === 'rank') return 0;
 
 
 
 
542
 
543
  if (valA === undefined) valA = -Infinity;
544
  if (valB === undefined) valB = -Infinity;
@@ -547,79 +725,127 @@
547
  if (valA > valB) return 1 * sortDirection;
548
  return 0;
549
  });
550
-
551
- // Don't update currentResults global if it breaks things, but here it's fine.
552
- // Actually, let's keep currentResults as the master list?
553
- // No, currentResults should be the sorted list for consistent subsequent sorts.
554
- currentResults = sorted;
555
- updateTable(sorted);
556
- }
557
 
558
- function updateView() {
559
- renderTableHeader();
560
- updateTable(currentResults);
561
- updateCharts(currentResults);
562
  }
563
-
564
- function fetchResults(dataset) {
565
- loadingIndicator.style.display = 'block';
566
- tableBody.innerHTML = '';
567
-
568
- console.log("Fetching results for:", dataset);
569
- fetch(`/api/results?dataset=${dataset}`)
570
- .then(res => {
571
- if (!res.ok) throw new Error("Network response was not ok");
572
- return res.json();
573
- })
574
- .then(data => {
575
- console.log("Data received:", data);
576
- currentResults = data;
577
- updateView();
578
- loadingIndicator.style.display = 'none';
579
- })
580
- .catch(err => {
581
- console.error("Error fetching results:", err);
582
- loadingIndicator.textContent = "Error loading data. Make sure the server is running.";
583
- });
584
  }
585
 
586
- // Initialize
587
- document.addEventListener("DOMContentLoaded", () => {
588
- // Setup sort listeners
589
- document.querySelectorAll('th[data-key]').forEach(th => {
590
- th.addEventListener('click', () => sortTable(th.dataset.key));
591
- });
 
 
 
 
592
 
593
- // Load datasets
594
  fetch("/api/datasets")
595
  .then(res => res.json())
596
- .then(datasets => {
 
597
  datasetSelect.innerHTML = "";
598
- datasets.forEach(ds => {
 
 
 
 
 
 
 
 
 
599
  const option = document.createElement("option");
600
- option.value = ds;
601
- option.textContent = ds;
602
  datasetSelect.appendChild(option);
603
  });
604
 
605
- if (datasets.includes("Authorship")) {
606
- datasetSelect.value = "Authorship";
607
- fetchResults("Authorship");
608
- } else if (datasets.length > 0) {
609
- datasetSelect.value = datasets[0];
610
- fetchResults(datasets[0]);
611
  }
612
  })
613
  .catch(err => {
614
- console.error("Error fetching datasets:", err);
615
- datasetSelect.innerHTML = "<option>Error loading datasets</option>";
616
  });
 
617
 
618
- datasetSelect.addEventListener('change', (e) => {
619
- fetchResults(e.target.value);
620
- });
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
621
  });
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
622
  </script>
623
 
624
  </body>
625
- </html>
 
13
  --text-color: #333;
14
  --border-color: #dee2e6;
15
  --hover-color: #f1f1f1;
16
+ --accent-color: #e67e22;
17
  }
18
 
19
  body {
 
60
  font-size: 14px;
61
  }
62
 
63
+ /* Info Boxes */
64
+ .info-section {
65
+ display: flex;
66
+ gap: 20px;
67
+ margin-bottom: 20px;
68
+ flex-wrap: wrap;
69
+ }
70
+
71
+ .description-box, .metadata-box {
72
+ flex: 1;
73
+ background-color: #e8f4fd;
74
+ border-left: 4px solid #3498db;
75
+ padding: 15px;
76
+ border-radius: 4px;
77
+ min-width: 300px;
78
+ }
79
+
80
+ .metadata-box {
81
+ background-color: #fef9e7;
82
+ border-left-color: #f1c40f;
83
+ }
84
+
85
+ h3 {
86
+ margin-top: 0;
87
+ margin-bottom: 10px;
88
+ font-size: 1.1em;
89
+ color: var(--secondary-color);
90
+ }
91
+
92
+ p {
93
+ margin: 5px 0;
94
+ line-height: 1.5;
95
+ font-size: 0.95em;
96
+ }
97
+
98
+ .version-tag {
99
+ font-size: 0.8em;
100
+ color: #7f8c8d;
101
+ margin-top: 5px;
102
+ }
103
+
104
+ /* Filters */
105
+ .filters-box {
106
+ background-color: #f1f1f1;
107
+ padding: 15px;
108
+ border-radius: 8px;
109
+ margin-bottom: 20px;
110
+ border: 1px solid #ddd;
111
+ display: flex;
112
+ gap: 20px;
113
+ align-items: center;
114
+ flex-wrap: wrap;
115
+ }
116
+
117
+ .filter-group {
118
+ display: flex;
119
+ align-items: center;
120
+ gap: 10px;
121
+ }
122
+
123
+ input[type="range"] {
124
+ width: 120px;
125
+ }
126
+
127
+ .filter-val {
128
+ font-weight: bold;
129
+ min-width: 40px;
130
+ }
131
+
132
+ /* Table */
133
  table {
134
  width: 100%;
135
  border-collapse: collapse;
 
193
  color: var(--primary-color);
194
  }
195
 
196
+ /* Charts */
197
+ .charts-section {
198
+ margin-top: 30px;
199
+ display: flex;
200
+ flex-direction: column;
201
+ gap: 20px;
202
+ }
203
+
204
+ .chart-container {
205
+ background: white;
206
+ padding: 15px;
207
+ border-radius: 8px;
208
+ box-shadow: 0 2px 4px rgba(0,0,0,0.1);
209
+ border: 1px solid #eee;
210
+ position: relative;
211
+ height: 400px;
212
+ width: 100%;
213
+ }
214
+
215
  /* Modal styles */
216
  .modal {
217
  display: none;
 
261
  padding: 20px;
262
  color: #666;
263
  }
264
+
265
+ /* Sidebar for PDF */
266
+ .pdf-sidebar {
267
+ position: fixed;
268
+ top: 0;
269
+ right: -50%; /* Hidden by default */
270
+ width: 50%;
271
+ height: 100%;
272
+ background: white;
273
+ box-shadow: -2px 0 5px rgba(0,0,0,0.2);
274
+ z-index: 2000;
275
+ transition: right 0.3s ease-in-out;
276
+ display: flex;
277
+ flex-direction: column;
278
+ }
279
+
280
+ .pdf-sidebar.open {
281
+ right: 0;
282
+ }
283
+
284
+ .sidebar-header {
285
+ padding: 10px 20px;
286
+ background: var(--primary-color);
287
+ color: white;
288
+ display: flex;
289
+ justify-content: space-between;
290
+ align-items: center;
291
+ }
292
+
293
+ .sidebar-content {
294
+ flex: 1;
295
+ padding: 0;
296
+ }
297
+
298
+ .sidebar-content iframe {
299
+ width: 100%;
300
+ height: 100%;
301
+ border: none;
302
+ }
303
+
304
+ .algo-link {
305
+ color: var(--primary-color);
306
+ cursor: pointer;
307
+ font-weight: bold;
308
+ }
309
+ .algo-link:hover {
310
+ text-decoration: underline;
311
+ }
312
  </style>
313
  </head>
314
  <body>
315
 
316
  <div class="container">
317
  <header>
318
+ <div>
319
+ <h1>🏆 AutoFS Leaderboard</h1>
320
+ <div id="last-updated" class="version-tag">Data Last Updated: Loading...</div>
321
+ </div>
322
  <div style="display:flex; gap:15px; align-items:center;">
323
  <a href="/global" style="text-decoration:none; color:white; background-color:#8e44ad; padding:8px 15px; border-radius:4px; font-size:0.9em;">🌍 Global Rankings</a>
324
  <div class="controls">
 
330
  </div>
331
  </header>
332
 
333
+ <div class="info-section">
334
+ <div class="description-box">
335
+ <h3>About This Dataset</h3>
336
+ <p>
337
+ This dashboard displays the performance of various feature selection algorithms on the
338
+ <strong><span id="desc-dataset-name">Selected</span></strong> dataset.
339
+ Compare algorithms based on accuracy (F1), stability (AUC), and computational efficiency.
340
+ </p>
341
+ </div>
342
+ <div class="metadata-box">
343
+ <h3>Dataset Metadata</h3>
344
+ <p><strong>Name:</strong> <span id="meta-name">-</span></p>
345
+ <p><strong>Last Updated:</strong> <span id="meta-updated">-</span></p>
346
+ <!-- Placeholder for future metadata -->
347
+ <p style="color:#888; font-size:0.8em;">(Additional metadata like samples/features not available)</p>
348
+ </div>
349
+ </div>
350
 
351
+ <div class="filters-box">
352
+ <h3>🔍 Filters</h3>
 
 
 
 
 
353
 
354
+ <div class="filter-group">
355
+ <label>Min F1 Score:</label>
356
+ <input type="range" id="filter-f1" min="0" max="1" step="0.05" value="0">
357
+ <span id="val-f1" class="filter-val">0.00</span>
358
+ </div>
359
 
360
+ <div class="filter-group">
361
+ <label>Max Time (s):</label>
362
+ <input type="range" id="filter-time" min="1" max="500" step="10" value="500">
363
+ <span id="val-time" class="filter-val">500+</span>
364
  </div>
365
+
366
+ <div style="margin-left: auto;">
367
+ <label style="margin-right:10px; font-weight:bold;">Chart View:</label>
368
+ <select id="chart-view-mode" onchange="updateView()">
369
+ <option value="overall">Overall (Mean)</option>
370
+ <option value="classifiers-f1">F1 by Classifier</option>
371
+ <option value="classifiers-auc">AUC by Classifier</option>
372
+ </select>
373
  </div>
374
  </div>
375
 
376
+ <div id="loading-indicator" class="loading" style="display: none;">Loading data...</div>
377
+
378
  <table id="result-table">
379
  <thead>
380
  <!-- Headers generated dynamically -->
 
383
  <!-- Data rows will be populated here -->
384
  </tbody>
385
  </table>
386
+
387
+ <div class="charts-section">
388
+ <div class="chart-container">
389
+ <h3>📊 Performance Comparison</h3>
390
+ <canvas id="scoreChart"></canvas>
391
+ </div>
392
+
393
+ <div class="chart-container">
394
+ <h3>📉 Pareto Frontier (Trade-off)</h3>
395
+ <p style="font-size:0.9em; color:#666; margin-top:-10px;">X: Number of Selected Features (Lower is better) vs Y: F1 Score (Higher is better). Optimal: Top-Left.</p>
396
+ <canvas id="paretoChart"></canvas>
397
+ </div>
398
+ </div>
399
  </div>
400
 
401
  <!-- Modal for details -->
 
407
  </div>
408
  </div>
409
 
410
+ <!-- PDF Sidebar -->
411
+ <div id="pdf-sidebar" class="pdf-sidebar">
412
+ <div class="sidebar-header">
413
+ <h3 id="sidebar-title" style="margin:0; color:white;">Paper Preview</h3>
414
+ <span class="close" onclick="closeSidebar()" style="color:white; opacity:0.8; font-size: 28px; cursor: pointer;">&times;</span>
415
+ </div>
416
+ <div class="sidebar-content">
417
+ <iframe id="pdf-frame" src=""></iframe>
418
+ </div>
419
+ </div>
420
+
421
  <script>
422
  let currentResults = [];
423
+ let filteredResults = [];
424
+ let allDatasets = [];
425
  let sortDirection = 1; // 1 for asc, -1 for desc
426
  let lastSortKey = '';
427
 
428
+ // Filter Elements
429
+ const filterF1 = document.getElementById('filter-f1');
430
+ const filterTime = document.getElementById('filter-time');
431
+ const valF1 = document.getElementById('val-f1');
432
+ const valTime = document.getElementById('val-time');
433
+
434
  const VIEW_CONFIG = {
435
  'overall': [
436
  { key: 'mean_f1', label: 'Mean F1' },
 
454
  const loadingIndicator = document.getElementById("loading-indicator");
455
  const modal = document.getElementById("details-modal");
456
  const closeModal = document.querySelector(".close");
457
+
458
+ // Metadata elements
459
+ const metaName = document.getElementById('meta-name');
460
+ const metaUpdated = document.getElementById('meta-updated');
461
+ const descName = document.getElementById('desc-dataset-name');
462
+ const globalUpdated = document.getElementById('last-updated');
463
 
464
  // Close modal
465
  closeModal.onclick = () => modal.style.display = "none";
 
467
  if (event.target == modal) modal.style.display = "none";
468
  }
469
 
470
+ // Chart instances
471
  let scoreChartInstance = null;
472
+ let paretoChartInstance = null;
473
 
474
+ // Filter Logic
475
+ function applyFilters() {
476
+ const minF1 = parseFloat(filterF1.value);
477
+ const maxTime = parseFloat(filterTime.value);
478
+
479
+ valF1.textContent = minF1.toFixed(2);
480
+ valTime.textContent = maxTime >= 500 ? "500+" : maxTime + "s";
481
+
482
+ filteredResults = currentResults.filter(r => {
483
+ const f1 = r.mean_f1 || 0;
484
+ const time = r.time || 0;
485
+ return f1 >= minF1 && (maxTime >= 500 || time <= maxTime);
486
+ });
487
 
488
+ renderTable(filteredResults);
489
+ updateCharts(filteredResults);
490
+ }
491
+
492
+ filterF1.addEventListener('input', applyFilters);
493
+ filterTime.addEventListener('input', applyFilters);
494
+
495
+ function updateCharts(results) {
496
+ if (!Array.isArray(results)) return;
497
+
498
+ // Use filtered results for charts too
499
+ // Limit to top 20 for bar chart readability
500
+ const topResults = results.slice(0, 20);
501
  const labels = topResults.map(r => r.algorithm || 'Unknown');
 
502
 
503
+ const viewMode = document.getElementById('chart-view-mode').value;
504
  let datasets = [];
505
 
506
  if (viewMode === 'overall') {
 
 
507
  datasets = [
508
  {
509
  label: 'Mean F1',
510
+ data: topResults.map(r => r.mean_f1 || 0),
511
  backgroundColor: 'rgba(52, 152, 219, 0.7)',
512
  borderColor: 'rgba(52, 152, 219, 1)',
513
  borderWidth: 1
514
  },
515
  {
516
  label: 'Mean AUC',
517
+ data: topResults.map(r => r.mean_auc || 0),
518
  backgroundColor: 'rgba(46, 204, 113, 0.7)',
519
  borderColor: 'rgba(46, 204, 113, 1)',
520
  borderWidth: 1
521
  }
522
  ];
523
  } else if (viewMode === 'classifiers-f1') {
524
+ datasets = ['nb', 'svm', 'rf'].map((clf, i) => ({
525
+ label: clf.toUpperCase() + ' F1',
526
+ data: topResults.map(r => r.metrics?.[clf]?.f1 || 0),
527
+ backgroundColor: `hsla(${200 + i*40}, 70%, 60%, 0.7)`,
528
+ borderColor: `hsla(${200 + i*40}, 70%, 60%, 1)`,
 
 
 
 
529
  borderWidth: 1
530
  }));
531
+ } else {
532
+ datasets = ['nb', 'svm', 'rf'].map((clf, i) => ({
533
+ label: clf.toUpperCase() + ' AUC',
534
+ data: topResults.map(r => r.metrics?.[clf]?.auc || 0),
535
+ backgroundColor: `hsla(${30 + i*40}, 70%, 60%, 0.7)`,
536
+ borderColor: `hsla(${30 + i*40}, 70%, 60%, 1)`,
 
 
 
 
537
  borderWidth: 1
538
  }));
539
  }
540
 
541
+ // 1. Performance Chart (Horizontal Bar)
542
+ const ctxScore = document.getElementById('scoreChart').getContext('2d');
543
  if (scoreChartInstance) scoreChartInstance.destroy();
544
 
545
+ scoreChartInstance = new Chart(ctxScore, {
546
  type: 'bar',
547
+ data: { labels: labels, datasets: datasets },
 
 
 
548
  options: {
549
+ indexAxis: 'y', // Horizontal
550
  responsive: true,
551
  maintainAspectRatio: false,
 
 
 
 
 
 
 
552
  scales: {
553
+ x: { beginAtZero: true, max: 1.0 },
554
+ y: { ticks: { autoSkip: false } }
 
 
555
  }
556
  }
557
  });
558
 
559
+ // 2. Pareto Frontier Chart (Scatter)
560
+ // X: Num Selected Features, Y: Mean F1
561
+ const paretoData = results.map(r => ({
562
+ x: r.num_features || (r.selected_features ? r.selected_features.length : 0),
563
+ y: r.mean_f1 || 0,
564
+ algorithm: r.algorithm
565
+ }));
566
 
567
+ const ctxPareto = document.getElementById('paretoChart').getContext('2d');
568
+ if (paretoChartInstance) paretoChartInstance.destroy();
569
+
570
+ paretoChartInstance = new Chart(ctxPareto, {
571
+ type: 'scatter',
572
  data: {
 
573
  datasets: [{
574
+ label: 'Algorithm Performance',
575
+ data: paretoData,
576
+ backgroundColor: 'rgba(230, 126, 34, 0.7)', // Orange accent
577
+ borderColor: 'rgba(230, 126, 34, 1)',
578
+ pointRadius: 6,
579
+ pointHoverRadius: 8
 
580
  }]
581
  },
582
  options: {
583
  responsive: true,
584
  maintainAspectRatio: false,
 
 
 
585
  scales: {
586
+ x: {
587
+ type: 'linear',
588
+ position: 'bottom',
589
+ title: { display: true, text: 'Number of Selected Features' }
590
+ },
591
+ y: {
592
+ title: { display: true, text: 'Mean F1 Score' },
593
+ min: 0, max: 1
594
+ }
595
+ },
596
+ plugins: {
597
+ tooltip: {
598
+ callbacks: {
599
+ label: function(context) {
600
+ const pt = context.raw;
601
+ return `${pt.algorithm}: F1=${pt.y.toFixed(4)}, Feats=${pt.x}`;
602
+ }
603
+ }
604
+ }
605
  }
606
  }
607
  });
608
  }
609
 
610
+ function updateView() {
611
+ renderTableHeader();
612
+ renderTable(filteredResults);
613
+ updateCharts(filteredResults);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
614
  }
615
 
616
  function renderTableHeader() {
617
+ const viewMode = document.getElementById('chart-view-mode').value;
618
+ const config = VIEW_CONFIG[viewMode];
 
 
 
 
619
 
620
+ let headerHTML = `
621
+ <tr>
622
+ <th>Rank</th>
623
+ <th onclick="sortTable('algorithm')">Algorithm <span class="arrow"></span></th>
624
+ `;
625
+
626
+ config.forEach(col => {
627
+ headerHTML += `<th onclick="sortTable('${col.key}')">${col.label} <span class="arrow"></span></th>`;
 
 
 
 
 
628
  });
629
+
630
+ headerHTML += `
631
+ <th onclick="sortTable('time')">Time (s) <span class="arrow"></span></th>
632
+ <th onclick="sortTable('selected_features')">Selected Features <span class="arrow"></span></th>
633
+ </tr>
634
+ `;
635
+
636
+ tableHead.innerHTML = headerHTML;
637
  }
638
 
639
+ function renderTable(results) {
640
  tableBody.innerHTML = "";
641
 
642
  // Robust data handling
643
+ if (!results) { results = []; }
644
+ else if (!Array.isArray(results)) {
645
+ if (results.data && Array.isArray(results.data)) results = results.data;
646
+ else if (results.results && Array.isArray(results.results)) results = results.results;
647
+ else if (results.algorithm) results = [results];
648
+ else results = [];
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
649
  }
650
 
651
  if (results.length === 0) {
 
653
  return;
654
  }
655
 
656
+ const viewMode = document.getElementById('chart-view-mode').value;
657
+ const config = VIEW_CONFIG[viewMode];
658
 
659
+ results.forEach((row, index) => {
660
+ const tr = document.createElement("tr");
 
 
 
 
 
661
 
662
+ // Helper to get nested property safely
663
+ const getVal = (obj, path) => {
664
+ return path.split('.').reduce((acc, part) => acc && acc[part], obj);
665
+ };
666
+
667
+ let metricsHTML = '';
668
+ config.forEach(col => {
669
+ const val = getVal(row, col.key);
670
+ const numVal = (val !== undefined && val !== null) ? Number(val).toFixed(4) : 'N/A';
671
+ metricsHTML += `<td>${numVal}</td>`;
 
672
  });
 
 
 
 
 
 
 
673
 
674
+ // Features
675
+ let featCount = row.num_features;
676
+ if (featCount === undefined && row.selected_features) featCount = row.selected_features.length;
677
+
678
+ let featText = "";
679
+ if (Array.isArray(row.selected_features)) {
680
+ featText = row.selected_features.join(", ");
681
+ } else {
682
+ featText = "N/A";
683
+ }
684
+
685
+ const rank = index + 1;
686
+
687
+ tr.innerHTML = `
688
+ <td>${rank}</td>
689
+ <td class="algo-link" onclick="openPdf('${row.algorithm}')" title="Click to view paper">${row.algorithm || 'Unknown'}</td>
690
+ ${metricsHTML}
691
+ <td>${row.time ? Number(row.time).toFixed(4) : 'N/A'}</td>
692
+ <td class="features-cell" onclick="showDetails('${row.algorithm}', '${featText}')" title="${featText}">
693
+ ${featText}
694
+ </td>
695
+ `;
696
+ tableBody.appendChild(tr);
697
  });
698
  }
699
 
 
701
  if (lastSortKey === key) {
702
  sortDirection *= -1;
703
  } else {
704
+ sortDirection = 1;
705
  lastSortKey = key;
706
  }
707
 
708
+ // Helper to get nested value
709
+ const getVal = (obj, path) => path.split('.').reduce((acc, part) => acc && acc[part], obj);
 
 
 
710
 
711
+ filteredResults.sort((a, b) => {
712
+ let valA = getVal(a, key);
713
+ let valB = getVal(b, key);
714
 
715
+ // Handle array length for selected_features sort
716
+ if (key === 'selected_features') {
717
+ valA = Array.isArray(valA) ? valA.length : 0;
718
+ valB = Array.isArray(valB) ? valB.length : 0;
719
+ }
720
 
721
  if (valA === undefined) valA = -Infinity;
722
  if (valB === undefined) valB = -Infinity;
 
725
  if (valA > valB) return 1 * sortDirection;
726
  return 0;
727
  });
 
 
 
 
 
 
 
728
 
729
+ renderTable(filteredResults);
730
+ updateSortArrows(key);
 
 
731
  }
732
+
733
+ function updateSortArrows(activeKey) {
734
+ document.querySelectorAll('th .arrow').forEach(span => span.textContent = '');
735
+ // Find the th with onclick containing this key
736
+ const ths = document.querySelectorAll('th');
737
+ ths.forEach(th => {
738
+ if (th.getAttribute('onclick').includes(`'${activeKey}'`)) {
739
+ th.querySelector('.arrow').textContent = sortDirection === 1 ? '↑' : '↓';
740
+ }
741
+ });
 
 
 
 
 
 
 
 
 
 
 
742
  }
743
 
744
+ function showDetails(algo, features) {
745
+ document.getElementById("modal-title").innerText = `${algo} - Selected Features`;
746
+ const featArray = features.split(", ");
747
+ const html = featArray.map(f => `<span class="feature-tag">${f}</span>`).join(" ");
748
+ document.getElementById("modal-body").innerHTML = `
749
+ <p><strong>Total Selected:</strong> ${featArray.length}</p>
750
+ <div style="margin-top:10px; line-height:1.6;">${html}</div>
751
+ `;
752
+ modal.style.display = "block";
753
+ }
754
 
755
+ function fetchDatasets() {
756
  fetch("/api/datasets")
757
  .then(res => res.json())
758
+ .then(data => {
759
+ allDatasets = data;
760
  datasetSelect.innerHTML = "";
761
+
762
+ // Sort dates for global updated
763
+ const dates = data.map(d => d.last_updated).filter(d => d !== 'Unknown').sort().reverse();
764
+ if (dates.length > 0) {
765
+ globalUpdated.textContent = `Data Last Updated: ${dates[0]}`;
766
+ } else {
767
+ globalUpdated.textContent = `Data Last Updated: Unknown`;
768
+ }
769
+
770
+ data.forEach(ds => {
771
  const option = document.createElement("option");
772
+ option.value = ds.name;
773
+ option.textContent = ds.name;
774
  datasetSelect.appendChild(option);
775
  });
776
 
777
+ // Default selection
778
+ if (data.length > 0) {
779
+ loadDataset(data[0].name);
 
 
 
780
  }
781
  })
782
  .catch(err => {
783
+ console.error("Error loading datasets:", err);
784
+ datasetSelect.innerHTML = '<option disabled>Error loading</option>';
785
  });
786
+ }
787
 
788
+ function loadDataset(name) {
789
+ datasetSelect.value = name;
790
+ loadingIndicator.style.display = "block";
791
+ tableBody.innerHTML = "";
792
+
793
+ // Update metadata box
794
+ const dsInfo = allDatasets.find(d => d.name === name);
795
+ if (dsInfo) {
796
+ metaName.textContent = dsInfo.name;
797
+ metaUpdated.textContent = dsInfo.last_updated;
798
+ descName.textContent = dsInfo.name;
799
+ }
800
+
801
+ fetch(`/api/results?dataset=${name}`)
802
+ .then(res => res.json())
803
+ .then(data => {
804
+ loadingIndicator.style.display = "none";
805
+ currentResults = data;
806
+
807
+ // Reset filters on new dataset? Or keep them?
808
+ // Let's reset to show all data first, or apply current?
809
+ // Applying current is better UX
810
+ applyFilters();
811
+ renderTableHeader(); // Ensure headers match view mode
812
+ })
813
+ .catch(err => {
814
+ loadingIndicator.style.display = "none";
815
+ console.error("Error:", err);
816
+ tableBody.innerHTML = '<tr><td colspan="10" style="color:red; text-align:center;">Error loading results</td></tr>';
817
+ });
818
+ }
819
+
820
+ datasetSelect.addEventListener("change", (e) => {
821
+ loadDataset(e.target.value);
822
  });
823
+
824
+ // PDF Sidebar Logic
825
+ function openPdf(algoName) {
826
+ if (!algoName) return;
827
+ const sidebar = document.getElementById('pdf-sidebar');
828
+ const frame = document.getElementById('pdf-frame');
829
+
830
+ // Use upper case as observed in file system
831
+ const filename = algoName.toUpperCase() + ".pdf";
832
+
833
+ frame.src = `/pdfs/${filename}`;
834
+ sidebar.classList.add('open');
835
+ }
836
+
837
+ function closeSidebar() {
838
+ const sidebar = document.getElementById('pdf-sidebar');
839
+ sidebar.classList.remove('open');
840
+ // Clear src after transition to avoid flicker or keep memory usage low
841
+ setTimeout(() => {
842
+ document.getElementById('pdf-frame').src = "";
843
+ }, 300);
844
+ }
845
+
846
+ document.addEventListener("DOMContentLoaded", fetchDatasets);
847
+
848
  </script>
849
 
850
  </body>
851
+ </html>
check_datasets_api.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import urllib.request
2
+ import json
3
+
4
+ try:
5
+ url = "http://127.0.0.1:7861/api/datasets"
6
+ with urllib.request.urlopen(url) as response:
7
+ data = json.loads(response.read().decode())
8
+ print(f"Status Code: {response.getcode()}")
9
+ print(f"Type: {type(data)}")
10
+ if isinstance(data, list) and len(data) > 0:
11
+ print(f"First item: {data[0]}")
12
+ if "last_updated" in data[0]:
13
+ print("SUCCESS: last_updated field found.")
14
+ else:
15
+ print("FAILURE: last_updated field MISSING.")
16
+ else:
17
+ print("Data is empty or not a list.")
18
+ print(data)
19
+ except Exception as e:
20
+ print(f"Error: {e}")
config.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ # MODEL_NAME = "/home/fangsensen/.cache/huggingface/hub/models--deepseek-ai--DeepSeek-R1-Distill-Qwen-1.5B/snapshots/ad9f0ae0864d7fbcd1cd905e3c6c5b069cc8b562"
2
+ MODEL_NAME = "/data1/fangsensen/deepseek-math-7b-rl"
3
+
4
+ DB_PATH = "data/results.db"
5
+ DATASET_PATH = "data/Authorship.mat"
6
+ EXPR_SEED_PATH = "data/expression_seed.json"
7
+ ITERATIONS = 10
8
+ TOP_K = 10
9
+ CV_FOLDS = 5
10
+ GPU = True
data/Authorship.mat ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d735be2a10e0e6560fe0534f3915a1ca4adc6ec65848d795b53f80623c3355a1
3
+ size 3345720
data/Dermatology.mat ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b85673218727df5d9fd309b382cc8619d8efca653b9fa4b001f5614dea53eeb
3
+ size 700024
data/Factors.mat ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3e8714849b4647d74752aa75a8813c48c1c621de2cef4fa7da54e57a919cfbb
3
+ size 26030704
data/Movement_libras.mat ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:843da25828f4d6195c95f745dfeba9c197ca7dba2d3f527f655d68c37147d104
3
+ size 262320
data/Musk1.mat ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0242a3fdaf222db642b60fa9fd8224be3d94958f17a4bdce3c8062c48a11e6f
3
+ size 636176
data/Synthetic_control.mat ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50157294b6ccc640fe3b3791ea3c784ab414b1a05864de2f931931983c5b1f56
3
+ size 2270504
data/Waveform.mat ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd453b528573aadab4f3385f5d47a2b66d2ac68563757170b3cc66a5290abb19
3
+ size 12820752
data/Wdbc.mat ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6ccf0ff7747481ebe864f935c6cc3d4c342377b2419ab607fec97a91b2351be
3
+ size 139080
data/analyzor.txt ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "expression": "I(X;Y;Z)",
4
+ "operators": [
5
+ "Interaction Information"
6
+ ],
7
+ "dependency": "Conditional Independence",
8
+ "theoretical_advantage": "Interaction Information is a measure of the unique information shared between three random variables X, Y, and Z. It can be used to detect conditional independence relationships, which can be useful for feature selection and model simplification.",
9
+ "complexity": "medium"
10
+ }
11
+ ],
12
+ [
13
+ {
14
+ "expression": "I(X;Y)",
15
+ "operators": [
16
+ "Mutual Information"
17
+ ],
18
+ "dependency": "measures the mutual dependence between two random variables X and Y",
19
+ "theoretical_advantage": "Mutual Information can be used for feature selection in machine learning, as it measures the dependence between features and the target variable.",
20
+ "complexity": "low"
21
+ }
22
+ ],
23
+ [
24
+ {
25
+ "expression": "I(X;Y|Z)",
26
+ "operators": [
27
+ "MI"
28
+ ],
29
+ "dependency": "Conditional Independence",
30
+ "theoretical_advantage": "It can help identify relevant features for classification or regression tasks.",
31
+ "complexity": "low"
32
+ }
33
+ ],
34
+ [
35
+ {
36
+ "expression": "I(X;Y) - I(X;Z)",
37
+ "operators": [
38
+ "MI",
39
+ "MI"
40
+ ],
41
+ "dependency": "X and Y are independent of Z",
42
+ "theoretical_advantage": "It can be used to identify features that are relevant to the target variable but not related to each other.",
43
+ "complexity": "low"
44
+ }
45
+ ],
46
+ [
47
+ {
48
+ "expression": "I(X;Y|Z) - I(X;Y)",
49
+ "operators": [
50
+ "Mutual Information",
51
+ "Conditional Mutual Information"
52
+ ],
53
+ "dependency": "Conditional Independence",
54
+ "theoretical_advantage": "It can help identify features that are relevant to the target variable given a set of other features.",
55
+ "complexity": "medium"
56
+ }
57
+ ],
data/dna.mat ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0d9a32fd59c16b059be5236efb2f398229f5a41e0034afb27bedc8c126c6f1d
3
+ size 4613568
data/expressions.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ EXPRESSIONS = [
2
+ "I(X;Y)",
3
+ "I(X;Y) / (H(X) + 1e-6)",
4
+ "I(X;Y) - 0.1 * I(X;X_other)",
5
+ "I(X;Y|Z)",
6
+ "I(X;Y) / (H(X) + H(Y))"
7
+ ]
8
+
9
+
data/madelon.mat ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c9a047aaa7511f8ca965e05ca275155e0fc7d6a3fab682ecd30665c2c5a9915
3
+ size 8016240
data/results.db ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:531c443d140ed2b80d26ec6ec7131a74567641c6213d32e977c1b885115da4d4
3
+ size 331776
data/spambase.mat ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d49f4a633f845513ff04e29ed6d9aa8fc0cc01eabddaaa682e10bdd33d4d3361
3
+ size 2135104
data/splice.mat ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c3caf785bab5285ebeca87ddff74807c18a57eef9331caf60eb4780466e3a31
3
+ size 778600
download_model.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from huggingface_hub import snapshot_download
2
+
3
+ MODEL_ID = "deepseek/DeepSeek-R1-Distill" # 替换成你实际使用的模型
4
+ OUT_DIR = "/home/fangsensen/AutoFS/models/DeepSeek-R1-R1-1.5B"
5
+
6
+ snapshot_download(
7
+ repo_id=MODEL_ID,
8
+ local_dir=OUT_DIR,
9
+ local_dir_use_symlinks=False,
10
+ )
11
+ print("Done! Model saved at", OUT_DIR)
extract_metadata.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import scipy.io
4
+ import numpy as np
5
+
6
+ DATA_DIR = "data"
7
+ OUTPUT_FILE = "Webapp/dataset_metadata.json"
8
+
9
+ def get_metadata():
10
+ metadata = {}
11
+ if not os.path.exists(DATA_DIR):
12
+ print(f"Data directory {DATA_DIR} not found.")
13
+ return metadata
14
+
15
+ for filename in os.listdir(DATA_DIR):
16
+ if filename.endswith(".mat"):
17
+ name = filename[:-4] # Remove .mat
18
+ path = os.path.join(DATA_DIR, filename)
19
+ try:
20
+ mat = scipy.io.loadmat(path)
21
+ # Usually X is data, Y is label, or similar keys
22
+ # We need to find the data key.
23
+ # Common keys in such datasets: 'X', 'data', 'features'
24
+
25
+ n_samples = 0
26
+ n_features = 0
27
+ n_classes = 0
28
+
29
+ # Heuristic to find data
30
+ keys = [k for k in mat.keys() if not k.startswith('__')]
31
+
32
+ # Assume the largest array is the data
33
+ max_size = 0
34
+ data_key = None
35
+
36
+ for k in keys:
37
+ if isinstance(mat[k], np.ndarray):
38
+ if mat[k].size > max_size:
39
+ max_size = mat[k].size
40
+ data_key = k
41
+
42
+ if data_key:
43
+ data = mat[data_key]
44
+ if len(data.shape) == 2:
45
+ n_samples, n_features = data.shape
46
+
47
+ # Try to find labels to count classes
48
+ # Usually the other array or 'Y'
49
+ label_key = None
50
+ for k in keys:
51
+ if k != data_key and isinstance(mat[k], np.ndarray):
52
+ # Labels usually have same length as samples
53
+ if mat[k].shape[0] == n_samples or (len(mat[k].shape) > 1 and mat[k].shape[1] == n_samples):
54
+ label_key = k
55
+ break
56
+
57
+ if label_key:
58
+ labels = mat[label_key]
59
+ n_classes = len(np.unique(labels))
60
+
61
+ metadata[name] = {
62
+ "n_samples": int(n_samples),
63
+ "n_features": int(n_features),
64
+ "n_classes": int(n_classes)
65
+ }
66
+ print(f"Processed {name}: {n_samples}x{n_features}, {n_classes} classes")
67
+
68
+ except Exception as e:
69
+ print(f"Error processing {filename}: {e}")
70
+
71
+ return metadata
72
+
73
+ if __name__ == "__main__":
74
+ meta = get_metadata()
75
+ with open(OUTPUT_FILE, "w") as f:
76
+ json.dump(meta, f, indent=2)
77
+ print(f"Metadata saved to {OUTPUT_FILE}")
main.py ADDED
@@ -0,0 +1,244 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # import json, datetime
2
+ # from config import *
3
+ # import pandas as pd
4
+ # import scipy.io as scio
5
+ # from modules.expression_pool import init_db, add_expr, top_exprs
6
+ # from agents.generator_agent import GeneratorAgent
7
+ # from agents.evaluator_agent import evaluate_expression
8
+ # from modules.utils import load_mat_as_numeric
9
+ # # prepare
10
+ # conn = init_db(DB_PATH)
11
+ # # df = pd.read_csv(DATASET_PATH)
12
+ # # df = scio.loadmat(DATASET_PATH) # 读取数据文件
13
+ # X, y = load_mat_as_numeric(DATASET_PATH)
14
+ # X_df = pd.DataFrame(X)
15
+ # # print(df)
16
+ # # X_df = pd.DataFrame(df['X']) # 读取训练数据
17
+ # # print(df['Y'])
18
+ # # y0 = pd.DataFrame(df['Y']) # 读取标签
19
+ # # X_df = df.drop(columns=['label'])
20
+ # # y = y0.values
21
+ # # print("y type:", type(y), "dtype:", getattr(y, "dtype", None))
22
+ # # print("y example:", y[:10])
23
+
24
+
25
+ # # load seed
26
+ # with open(EXPR_SEED_PATH) as f:
27
+ # seeds = json.load(f)
28
+ # # evaluate seeds first
29
+ # for s in seeds:
30
+ # score, fvals, top_idx = evaluate_expression(s['expression'], X_df, y, TOP_K, CV_FOLDS)
31
+ # add_expr(conn, s['expression'], score, s.get('explanation',''), str(s.get('complexity','')))
32
+
33
+ # # init generator
34
+ # gen = GeneratorAgent(MODEL_NAME)
35
+
36
+ # # iterative loop
37
+ # for it in range(ITERATIONS):
38
+ # print("Iteration", it+1)
39
+ # refs = top_exprs(conn, k=TOP_K)
40
+ # # build prompt_text with refs + feature stats
41
+ # # prompt = "Given top expressions: " + str(refs) + "\nGenerate expressions in format: Expression: ... Rationale: ..."
42
+ # top_expressions = [] # List[(expr, score)]
43
+ # top_expressions.append((refs, score))
44
+ # top_expressions = sorted(
45
+ # top_expressions,
46
+ # key=lambda x: -x[1]
47
+ # )[:5]
48
+ # new_text = gen.generate_candidates(top_expressions)
49
+ # for out in new_text:
50
+ # # extract Expression line
51
+ # expr_line = None
52
+ # for line in out.splitlines():
53
+ # if line.strip().lower().startswith("expression"):
54
+ # expr_line = line.split(":",1)[1].strip()
55
+ # break
56
+ # if not expr_line: expr_line = out.strip()
57
+ # score, fvals, top_idx = evaluate_expression(expr_line, X_df, y, TOP_K, CV_FOLDS)
58
+ # add_expr(conn, expr_line, score, out, "")
59
+ # print(f"Candidate {expr_line} -> score {score:.4f}")
60
+
61
+
62
+
63
+ # results = []
64
+
65
+ # for expr in EXPRESSIONS:
66
+ # exec_out = executor.run(expr, X, y)
67
+ # analysis = analyzer.analyze(expr, exec_out["cv_score"])
68
+
69
+ # results.append({
70
+ # "expression": expr,
71
+ # "score": exec_out["cv_score"],
72
+ # "analysis": analysis
73
+ # })
74
+
75
+ # ranking = judge.rank(results)
76
+ #-----------------------------------------------------------------------2.0---------------
77
+
78
+ # from agents.analyzer_agent import AnalyzerAgent
79
+
80
+ # MODEL_PATH = "/data1/fangsensen/deepseek-math-7b-rl"
81
+
82
+ # agent = AnalyzerAgent(
83
+ # name="AnalyzerAgent",
84
+ # model_path=MODEL_PATH
85
+ # )
86
+
87
+ # expressions = [
88
+ # "I(X;Y)",
89
+ # "I(X;Y|Z)",
90
+ # "I(X;Y) - I(X;Z)",
91
+ # "I(X;Y|Z) - I(X;Y)",
92
+ # "I(X;Y;Z)"
93
+ # ]
94
+ # # expressions = [
95
+ # # "I(X;Y|Z) - I(X;Y)",
96
+ # # ]
97
+ # for expr in expressions:
98
+ # print("=" * 80)
99
+ # result = agent.analyze_expression(expr)
100
+ # print(result)
101
+ #-----------------------------------------------------------------------路由---------------
102
+ import numpy as np
103
+ from agents.router_agent import FSRouterAgent
104
+
105
+ import scipy.io as scio
106
+ import pandas as pd
107
+ from sklearn.preprocessing import LabelEncoder
108
+
109
+ def load_mat_dataset(
110
+ file_path,
111
+ feature_keys=("X", "data", "fea"),
112
+ label_keys=("Y", "y", "label"),
113
+ ):
114
+ """
115
+ 通用 .mat 数据集读取函数(FSExecutor / Agent 兼容)
116
+
117
+ Parameters
118
+ ----------
119
+ file_path : str
120
+ .mat 文件路径
121
+ feature_keys : tuple
122
+ 特征矩阵可能的 key
123
+ label_keys : tuple
124
+ 标签可能的 key
125
+
126
+ Returns
127
+ -------
128
+ X : np.ndarray, shape (n_samples, n_features)
129
+ y : np.ndarray, shape (n_samples,)
130
+ meta : dict
131
+ 元信息(类别数、样本数等)
132
+ """
133
+
134
+ data = scio.loadmat(file_path)
135
+
136
+ # ---------- 1. 读取 X ----------
137
+ X = None
138
+ for key in feature_keys:
139
+ if key in data:
140
+ X = data[key]
141
+ break
142
+ if X is None:
143
+ raise KeyError(f"Cannot find feature matrix in {file_path}")
144
+
145
+ X = np.asarray(X)
146
+
147
+ if X.dtype == object:
148
+ X = np.array(
149
+ [[float(v[0]) if isinstance(v, (list, np.ndarray)) else float(v)
150
+ for v in row]
151
+ for row in X]
152
+ )
153
+ else:
154
+ X = X.astype(float)
155
+
156
+
157
+ # ---------- 2. 读取 y ----------
158
+ y = None
159
+ for key in label_keys:
160
+ if key in data:
161
+ y = data[key]
162
+ break
163
+ if y is None:
164
+ raise KeyError(f"Cannot find label vector in {file_path}")
165
+
166
+ # y 常见是 (n,1)
167
+ y = np.asarray(y).reshape(-1)
168
+
169
+ # ---------- 3. 标签清洗 & 编码 ----------
170
+ # 处理 object / string / 混合类型
171
+ if y.dtype == object:
172
+ y = pd.Series(y).apply(lambda x: x[0] if isinstance(x, (list, np.ndarray)) else x)
173
+
174
+ label_encoder = LabelEncoder()
175
+ y = label_encoder.fit_transform(y)
176
+
177
+ # ---------- 4. 元信息 ----------
178
+ meta = {
179
+ "n_samples": X.shape[0],
180
+ "n_features": X.shape[1],
181
+ "n_classes": len(np.unique(y)),
182
+ "classes": np.unique(y),
183
+ "label_encoder": label_encoder,
184
+ }
185
+
186
+ return X, y, meta
187
+
188
+ base_url = "/home/fangsensen/AutoFS/data/"
189
+ datanames = ['dna','Factors','madelon','Movement_libras','Musk1','spambase','splice','Synthetic_control', 'Waveform','Wdbc',]
190
+ # dataname = 'Authorship'
191
+ def main(dataname):
192
+ X, y, meta = load_mat_dataset(
193
+ base_url + dataname + ".mat"
194
+ )
195
+ # X = data.data
196
+ # y = data.target
197
+ #
198
+
199
+ task = {
200
+ "X": X,
201
+ "y": y,
202
+ "algorithms": ["JMIM","CFR","DCSF","IWFS","MRI","MRMD","UCRFS","CSMDCCMR",],
203
+ "n_selected_features": 5,
204
+ "class_specific": False,
205
+ "classifiers": ["nb", "svm", "rf"],
206
+ "cv": 10,
207
+ "random_state": 19,
208
+ "params":{"n_selected_features":15,},
209
+ "dataname":dataname,
210
+ }
211
+
212
+
213
+ router = FSRouterAgent()
214
+ leaderboard = router.run(task)
215
+
216
+ for rank, res in enumerate(leaderboard, 1):
217
+ print(f"Rank {rank}: {res}")
218
+ return leaderboard
219
+
220
+ if __name__ == "__main__":
221
+ for dataname in datanames:
222
+ main(dataname)
223
+
224
+
225
+
226
+
227
+ # {'selected_features': [59, 50, 56, 4, 38, 9, 29, 23, 0, 20, 34, 36, 24, 26, 28],
228
+ # 'num_features': 15,
229
+ # 'metrics': {'nb': {'f1': 0.9181133571145461, 'auc': 0.9807805770573524},
230
+ # 'svm': {'f1': 0.9282600079270711, 'auc': 0.980695564275392},
231
+ # 'rf': {'f1': 0.9219976218787156, 'auc': 0.9768411621948705}},
232
+ # 'time': 7.378173112869263,
233
+ # 'algorithm': 'JMIM'},
234
+
235
+
236
+ # {'selected_features': [59, 50, 56, 4, 38, 0, 9, 29, 23, 20, 36, 34, 24, 28, 26],
237
+ # 'num_features': 15,
238
+ # 'metrics': {'nb': {'f1': 0.9163694015061433, 'auc': 0.9805189493459717},
239
+ # 'svm': {'f1': 0.9265953230281413, 'auc': 0.98064247666047},
240
+ # 'rf': {'f1': 0.9189853349187476, 'auc': 0.9769441217042379}},
241
+ # 'time': 2.0774385929107666,
242
+ # 'algorithm': 'CFR'}
243
+
244
+
modules/expr_to_code.py ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sympy as sp
2
+ import numpy as np
3
+ from sklearn.metrics import mutual_info_score
4
+ # 符号
5
+ import sympy as sp
6
+ import pandas as pd
7
+
8
+ # symbols
9
+ X = sp.Symbol("X")
10
+ Y = sp.Symbol("Y")
11
+ Z = sp.Symbol("Z")
12
+
13
+ class MI(sp.Function):
14
+ nargs = (2,)
15
+
16
+ class CMI(sp.Function):
17
+ nargs = (3,)
18
+
19
+ class II(sp.Function):
20
+ nargs = (3,) # interaction information
21
+
22
+ ALLOWED_LOCALS = {
23
+ "X": X,
24
+ "Y": Y,
25
+ "Z": Z,
26
+ "I": MI, # I(X,Y)
27
+ "CI": CMI, # I(X,Y|Z) 条件互信息
28
+ "II": II # I(X;Y;Z)交互信息
29
+ }
30
+
31
+ def parse_expression(expr_str: str) -> sp.Expr:
32
+ """
33
+ String → SymPy Expression
34
+ """
35
+ expr = sp.sympify(expr_str, locals=ALLOWED_LOCALS)
36
+ return expr
37
+
38
+
39
+ def entropy(x):#计算熵
40
+ _, cnt = np.unique(x, return_counts=True)
41
+ p = cnt / cnt.sum()
42
+ return -np.sum(p * np.log(p + 1e-12))
43
+
44
+ def mi(x, y):#互信息
45
+ return mutual_info_score(x, y)
46
+
47
+ def cmi(x, y, z):#条件互信息(通过熵的加减计算)
48
+ # I(X;Y|Z) = H(X,Z)+H(Y,Z)-H(Z)-H(X,Y,Z)
49
+ return (
50
+ entropy(np.c_[x, z].tolist())
51
+ + entropy(np.c_[y, z].tolist())
52
+ - entropy(z)
53
+ - entropy(np.c_[x, y, z].tolist())
54
+ )
55
+
56
+ def interaction_info(x, y, z):#交互信息
57
+ # I(X;Y;Z) = I(X;Y) - I(X;Y|Z)
58
+ return mi(x, y) - cmi(x, y, z)
59
+
60
+
61
+ def expr_to_callable(expr: sp.Expr):
62
+
63
+ def eval_node(node, ctx):
64
+ if isinstance(node, MI):
65
+ return mi(eval_node(node.args[0], ctx),
66
+ eval_node(node.args[1], ctx))
67
+
68
+ if isinstance(node, CMI):
69
+ return cmi(eval_node(node.args[0], ctx),
70
+ eval_node(node.args[1], ctx),
71
+ eval_node(node.args[2], ctx))
72
+
73
+ if isinstance(node, II):
74
+ return interaction_info(
75
+ eval_node(node.args[0], ctx),
76
+ eval_node(node.args[1], ctx),
77
+ eval_node(node.args[2], ctx)
78
+ )
79
+
80
+ if node == X:
81
+ return ctx["X"]
82
+ if node == Y:
83
+ return ctx["Y"]
84
+ if node == Z:
85
+ return ctx["Z"]
86
+
87
+ if node.is_Number:
88
+ return float(node)
89
+
90
+ if node.is_Add:
91
+ return sum(eval_node(arg, ctx) for arg in node.args)
92
+
93
+ if node.is_Mul:
94
+ r = 1.0
95
+ for arg in node.args:
96
+ r *= eval_node(arg, ctx)
97
+ return r
98
+
99
+ if node.is_Pow:
100
+ base, exp = node.args
101
+ return eval_node(base, ctx) ** eval_node(exp, ctx)
102
+
103
+ raise ValueError(f"Unsupported node: {node}")
104
+
105
+ def f(X_arr, Y_arr, Z_arr=None):
106
+ ctx = {"X": X_arr, "Y": Y_arr}
107
+ if Z_arr is not None:
108
+ ctx["Z"] = Z_arr
109
+ return eval_node(expr, ctx)
110
+
111
+ return f
112
+
113
+
114
+
115
+ from sklearn.preprocessing import LabelEncoder
116
+ def changetosinge(x):
117
+ return float(x)
118
+ # scores = f(X, y, X_other_list)
119
+ def prepare_data(dataname, base_url):
120
+ url = os.path.join(base_url, dataname + '.mat')
121
+ data = scio.loadmat(url)
122
+ X0 = pd.DataFrame(data['X'])
123
+ y0 = pd.DataFrame(data['Y'])
124
+
125
+ if dataname == 'Dermatology':
126
+ Special = X0.iloc[:, -1]
127
+ a = np.array([item[0] for item in Special])
128
+ label_encoder = LabelEncoder()
129
+ a33 = label_encoder.fit_transform(a)
130
+ X0 = X0.iloc[:, :-1]
131
+ X0[33] = a33
132
+
133
+ X0 = X0.applymap(changetosinge)
134
+ y0 = y0.applymap(changetosinge)
135
+ label_encoder = LabelEncoder()
136
+ y_encoded = label_encoder.fit_transform(y0)
137
+ y = pd.DataFrame(y_encoded)
138
+ X = pd.DataFrame()
139
+
140
+ for col in X0.columns:
141
+ X[col] = pd.cut(X0[col], bins=5, labels=False)
142
+
143
+ new_columns = [str(i) for i in range(X.shape[1] + 1)]
144
+ X = X.rename(columns=dict(zip(X.columns, new_columns[:-1])))
145
+ y = y.rename(columns=dict(zip(y.columns, [new_columns[-1]])))
146
+ data_processed = pd.concat([X, y], axis=1)
147
+ # data_processed = pd.DataFrame(X)
148
+
149
+ return data_processed, list(set(y_encoded))
150
+
151
+ import os
152
+ import scipy.io as scio
153
+ dataname = 'Authorship'
154
+ base_url = '/home/fangsensen/AutoFS/data/'
155
+ data_processed, class_set = prepare_data(dataname, base_url)
156
+ # print(data_processed)
157
+ # X_arr = data_processed['0']
158
+ # y_arr = data_processed['69']
159
+ print(111111,X_arr,2222222,y_arr)
160
+ expr = parse_expression("I(X,Y)")
161
+ f = expr_to_callable(expr)
162
+ score = f(X_arr, y_arr)
163
+ print(score)
modules/expression_pool.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sqlite3, os
2
+ def init_db(db_path="data/results.db"):
3
+ os.makedirs(os.path.dirname(db_path), exist_ok=True)
4
+ conn = sqlite3.connect(db_path)
5
+ conn.execute("""
6
+ CREATE TABLE IF NOT EXISTS exprs(
7
+ expr TEXT PRIMARY KEY,
8
+ score REAL,
9
+ rationale TEXT,
10
+ complexity TEXT,
11
+ created_at TEXT
12
+ )""")
13
+ conn.commit()
14
+ return conn
15
+
16
+ def add_expr(conn, expr, score, rationale="", complexity=""):
17
+ conn.execute("INSERT OR REPLACE INTO exprs(expr, score, rationale, complexity, created_at) VALUES(?,?,?,?,datetime('now'))",
18
+ (expr, score, rationale, complexity))
19
+ conn.commit()
20
+
21
+ def top_exprs(conn, k=5, min_score=0.0):
22
+ cur = conn.cursor()
23
+ cur.execute("SELECT expr,score,rationale FROM exprs WHERE score>=? ORDER BY score DESC LIMIT ?", (min_score,k))
24
+ return cur.fetchall()
modules/modules/expr_to_code.py ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sympy as sp
2
+ import numpy as np
3
+ from sklearn.metrics import mutual_info_score
4
+ # 符号
5
+ import sympy as sp
6
+ import pandas as pd
7
+
8
+ # symbols
9
+ X = sp.Symbol("X")
10
+ Y = sp.Symbol("Y")
11
+ Z = sp.Symbol("Z")
12
+
13
+ class MI(sp.Function):
14
+ nargs = (2,)
15
+
16
+ class CMI(sp.Function):
17
+ nargs = (3,)
18
+
19
+ class II(sp.Function):
20
+ nargs = (3,) # interaction information
21
+
22
+ ALLOWED_LOCALS = {
23
+ "X": X,
24
+ "Y": Y,
25
+ "Z": Z,
26
+ "I": MI, # I(X,Y)
27
+ "CI": CMI, # I(X,Y|Z) 条件互信息
28
+ "II": II # I(X;Y;Z)交互信息
29
+ }
30
+
31
+ def parse_expression(expr_str: str) -> sp.Expr:
32
+ """
33
+ String → SymPy Expression
34
+ """
35
+ expr = sp.sympify(expr_str, locals=ALLOWED_LOCALS)
36
+ return expr
37
+
38
+
39
+ def entropy(x):#计算熵
40
+ _, cnt = np.unique(x, return_counts=True)
41
+ p = cnt / cnt.sum()
42
+ return -np.sum(p * np.log(p + 1e-12))
43
+
44
+ def mi(x, y):#互信息
45
+ return mutual_info_score(x, y)
46
+
47
+ def cmi(x, y, z):#条件互信息(通过熵的加减计算)
48
+ # I(X;Y|Z) = H(X,Z)+H(Y,Z)-H(Z)-H(X,Y,Z)
49
+ return (
50
+ entropy(np.c_[x, z].tolist())
51
+ + entropy(np.c_[y, z].tolist())
52
+ - entropy(z)
53
+ - entropy(np.c_[x, y, z].tolist())
54
+ )
55
+
56
+ def interaction_info(x, y, z):#交互信息
57
+ # I(X;Y;Z) = I(X;Y) - I(X;Y|Z)
58
+ return mi(x, y) - cmi(x, y, z)
59
+
60
+
61
+ def expr_to_callable(expr: sp.Expr):
62
+
63
+ def eval_node(node, ctx):
64
+ if isinstance(node, MI):
65
+ return mi(eval_node(node.args[0], ctx),
66
+ eval_node(node.args[1], ctx))
67
+
68
+ if isinstance(node, CMI):
69
+ return cmi(eval_node(node.args[0], ctx),
70
+ eval_node(node.args[1], ctx),
71
+ eval_node(node.args[2], ctx))
72
+
73
+ if isinstance(node, II):
74
+ return interaction_info(
75
+ eval_node(node.args[0], ctx),
76
+ eval_node(node.args[1], ctx),
77
+ eval_node(node.args[2], ctx)
78
+ )
79
+
80
+ if node == X:
81
+ return ctx["X"]
82
+ if node == Y:
83
+ return ctx["Y"]
84
+ if node == Z:
85
+ return ctx["Z"]
86
+
87
+ if node.is_Number:
88
+ return float(node)
89
+
90
+ if node.is_Add:
91
+ return sum(eval_node(arg, ctx) for arg in node.args)
92
+
93
+ if node.is_Mul:
94
+ r = 1.0
95
+ for arg in node.args:
96
+ r *= eval_node(arg, ctx)
97
+ return r
98
+
99
+ if node.is_Pow:
100
+ base, exp = node.args
101
+ return eval_node(base, ctx) ** eval_node(exp, ctx)
102
+
103
+ raise ValueError(f"Unsupported node: {node}")
104
+
105
+ def f(X_arr, Y_arr, Z_arr=None):
106
+ ctx = {"X": X_arr, "Y": Y_arr}
107
+ if Z_arr is not None:
108
+ ctx["Z"] = Z_arr
109
+ return eval_node(expr, ctx)
110
+
111
+ return f
112
+
113
+
114
+
115
+ from sklearn.preprocessing import LabelEncoder
116
+ def changetosinge(x):
117
+ return float(x)
118
+ # scores = f(X, y, X_other_list)
119
+ def prepare_data(dataname, base_url):
120
+ url = os.path.join(base_url, dataname + '.mat')
121
+ data = scio.loadmat(url)
122
+ X0 = pd.DataFrame(data['X'])
123
+ y0 = pd.DataFrame(data['Y'])
124
+
125
+ if dataname == 'Dermatology':
126
+ Special = X0.iloc[:, -1]
127
+ a = np.array([item[0] for item in Special])
128
+ label_encoder = LabelEncoder()
129
+ a33 = label_encoder.fit_transform(a)
130
+ X0 = X0.iloc[:, :-1]
131
+ X0[33] = a33
132
+
133
+ X0 = X0.applymap(changetosinge)
134
+ y0 = y0.applymap(changetosinge)
135
+ label_encoder = LabelEncoder()
136
+ y_encoded = label_encoder.fit_transform(y0)
137
+ y = pd.DataFrame(y_encoded)
138
+ X = pd.DataFrame()
139
+
140
+ for col in X0.columns:
141
+ X[col] = pd.cut(X0[col], bins=5, labels=False)
142
+
143
+ new_columns = [str(i) for i in range(X.shape[1] + 1)]
144
+ X = X.rename(columns=dict(zip(X.columns, new_columns[:-1])))
145
+ y = y.rename(columns=dict(zip(y.columns, [new_columns[-1]])))
146
+ data_processed = pd.concat([X, y], axis=1)
147
+ # data_processed = pd.DataFrame(X)
148
+
149
+ return data_processed, list(set(y_encoded))
150
+
151
+ import os
152
+ import scipy.io as scio
153
+ dataname = 'Authorship'
154
+ base_url = '/home/fangsensen/AutoFS/data/'
155
+ data_processed, class_set = prepare_data(dataname, base_url)
156
+ # print(data_processed)
157
+ # X_arr = data_processed['0']
158
+ # y_arr = data_processed['69']
159
+ print(111111,X_arr,2222222,y_arr)
160
+ expr = parse_expression("I(X,Y)")
161
+ f = expr_to_callable(expr)
162
+ score = f(X_arr, y_arr)
163
+ print(score)
modules/modules/expression_pool.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sqlite3, os
2
+ def init_db(db_path="data/results.db"):
3
+ os.makedirs(os.path.dirname(db_path), exist_ok=True)
4
+ conn = sqlite3.connect(db_path)
5
+ conn.execute("""
6
+ CREATE TABLE IF NOT EXISTS exprs(
7
+ expr TEXT PRIMARY KEY,
8
+ score REAL,
9
+ rationale TEXT,
10
+ complexity TEXT,
11
+ created_at TEXT
12
+ )""")
13
+ conn.commit()
14
+ return conn
15
+
16
+ def add_expr(conn, expr, score, rationale="", complexity=""):
17
+ conn.execute("INSERT OR REPLACE INTO exprs(expr, score, rationale, complexity, created_at) VALUES(?,?,?,?,datetime('now'))",
18
+ (expr, score, rationale, complexity))
19
+ conn.commit()
20
+
21
+ def top_exprs(conn, k=5, min_score=0.0):
22
+ cur = conn.cursor()
23
+ cur.execute("SELECT expr,score,rationale FROM exprs WHERE score>=? ORDER BY score DESC LIMIT ?", (min_score,k))
24
+ return cur.fetchall()
modules/modules/utils.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import scipy.io as sio
3
+
4
+ def load_mat_as_numeric(path, x_key="X", y_key="Y"):
5
+ data = sio.loadmat(path)
6
+ X_raw = data[x_key]
7
+ y_raw = data[y_key]
8
+
9
+ # Step 1: flatten MATLAB cell array elements
10
+ def clean_cell_array(arr):
11
+ cleaned = []
12
+ for row in arr:
13
+ new_row = []
14
+ for elem in row:
15
+ # elem is usually array(['46.0'])
16
+ if isinstance(elem, np.ndarray):
17
+ elem = elem[0] # '46.0'
18
+ elem = elem.strip()
19
+ new_row.append(elem)
20
+ cleaned.append(new_row)
21
+ return np.array(cleaned)
22
+
23
+ X_str = clean_cell_array(X_raw)
24
+ y_str = clean_cell_array(y_raw).reshape(-1)
25
+
26
+ # Step 2: convert X to float
27
+ X = X_str.astype(float)
28
+
29
+ # Step 3: convert y to numeric or keep string
30
+ try:
31
+ y = y_str.astype(float)
32
+ except:
33
+ y = y_str.astype(str)
34
+
35
+ return X, y
modules/utils.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import scipy.io as sio
3
+
4
+ def load_mat_as_numeric(path, x_key="X", y_key="Y"):
5
+ data = sio.loadmat(path)
6
+ X_raw = data[x_key]
7
+ y_raw = data[y_key]
8
+
9
+ # Step 1: flatten MATLAB cell array elements
10
+ def clean_cell_array(arr):
11
+ cleaned = []
12
+ for row in arr:
13
+ new_row = []
14
+ for elem in row:
15
+ # elem is usually array(['46.0'])
16
+ if isinstance(elem, np.ndarray):
17
+ elem = elem[0] # '46.0'
18
+ elem = elem.strip()
19
+ new_row.append(elem)
20
+ cleaned.append(new_row)
21
+ return np.array(cleaned)
22
+
23
+ X_str = clean_cell_array(X_raw)
24
+ y_str = clean_cell_array(y_raw).reshape(-1)
25
+
26
+ # Step 2: convert X to float
27
+ X = X_str.astype(float)
28
+
29
+ # Step 3: convert y to numeric or keep string
30
+ try:
31
+ y = y_str.astype(float)
32
+ except:
33
+ y = y_str.astype(str)
34
+
35
+ return X, y
pdf/CFR.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43040c5cd02372547ccfd0ff233c1f8db8492bb79cd6648471a6b261197a291b
3
+ size 1011137
pdf/CIFE.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40f99e19316a356c47d3c228bb17ac7d4200a55e6d36da9adbc01d13ae978aab
3
+ size 900327
pdf/CMIFS.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9afa507819700f14a66009e7ea7061b178427de7a0c4455d1a666c16b7d261e5
3
+ size 782737
pdf/CMIM.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e59696e31620f0328f7a5aaf9c667180537c03892735ff68cbbbfa721bd72506
3
+ size 192556
pdf/CSMDCCMR.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de200fbb840c74fcd142883c81ff10824a872d06292559abb178bb937ee0e541
3
+ size 1210174
pdf/CSMI.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc3c25a21685f327822a6794f3986a8d89bee172f6fb413f063529464c608dba
3
+ size 5412954
pdf/DCSF.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31b0c67c1eadb708396803b6991298c4b965b0ed827b455a77e02281c9505475
3
+ size 1533397
pdf/DISR.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12163bcb61689372a0fa6c057b55614e70fd263c75cd0869e58fb1a8bc9ef85b
3
+ size 215860
pdf/DWFS.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7d9f02c654e665591d365940f0b7239eb5d64f669210683eef7282aede6c378
3
+ size 817732
pdf/IWFS.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f8387f8508dbe3f74abfb41acf1c68f3da99e93dc465b10e680051fadd7e091
3
+ size 582942
pdf/JMI.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8491d05d11decc319e5d83f3867bc06bf9ccc984f6bd1854060684067bbd14c
3
+ size 1442313
pdf/JMIM.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:880adb6df5fec2761b2aafbbf555dc9aebfd10fe9016dd92eb5b4ff481494dd9
3
+ size 1064488
pdf/MIM.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:232e906e125fa5173af1ed66b446740d11e5c43b2dd911f3af729672141b4fbb
3
+ size 506331
pdf/MRI.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ca2c265bb597e2886c33448b028845f0755ca0cd952eb319dc74f31929ad300
3
+ size 1189309
pdf/MRMD.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d067d694f1c70da19da5455237a5f33601c213f11f452ab73ee0d7ce9ccca8a9
3
+ size 2395695
pdf/MRMR.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb30d4b51eb630aff13a33cdd539d756b99021fa3446ad61cf82322cb5b97dee
3
+ size 1295526
pdf/UCRFS.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1fb819c4f606121529e3f7c4de93e652fb1d7d88173600adb87ea0188e8bd528
3
+ size 1823221
requirements.txt CHANGED
@@ -1,3 +1,11 @@
1
- Flask
2
- pandas
 
3
  numpy
 
 
 
 
 
 
 
 
1
+ bitsandbytes
2
+ accelerate
3
+ sympy
4
  numpy
5
+ pandas
6
+ scikit-learn
7
+ sqlalchemy
8
+ lark-parser
9
+ autogen
10
+ tqdm
11
+ python-dotenv
test.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ import pickle
2
+
3
+
4
+ path = "results/dna.pkl"
5
+ with open(path, "rb") as f:
6
+ results = pickle.load(f)
7
+ print(results)