Upload 78 files
Browse files- .gitattributes +30 -0
- Webapp/app.py +28 -2
- Webapp/templates/global.html +122 -38
- Webapp/templates/index.html +484 -258
- check_datasets_api.py +20 -0
- config.py +10 -0
- data/Authorship.mat +3 -0
- data/Dermatology.mat +3 -0
- data/Factors.mat +3 -0
- data/Movement_libras.mat +3 -0
- data/Musk1.mat +3 -0
- data/Synthetic_control.mat +3 -0
- data/Waveform.mat +3 -0
- data/Wdbc.mat +3 -0
- data/analyzor.txt +57 -0
- data/dna.mat +3 -0
- data/expressions.py +9 -0
- data/madelon.mat +3 -0
- data/results.db +3 -0
- data/spambase.mat +3 -0
- data/splice.mat +3 -0
- download_model.py +11 -0
- extract_metadata.py +77 -0
- main.py +244 -0
- modules/expr_to_code.py +163 -0
- modules/expression_pool.py +24 -0
- modules/modules/expr_to_code.py +163 -0
- modules/modules/expression_pool.py +24 -0
- modules/modules/utils.py +35 -0
- modules/utils.py +35 -0
- pdf/CFR.pdf +3 -0
- pdf/CIFE.pdf +3 -0
- pdf/CMIFS.pdf +3 -0
- pdf/CMIM.pdf +3 -0
- pdf/CSMDCCMR.pdf +3 -0
- pdf/CSMI.pdf +3 -0
- pdf/DCSF.pdf +3 -0
- pdf/DISR.pdf +3 -0
- pdf/DWFS.pdf +3 -0
- pdf/IWFS.pdf +3 -0
- pdf/JMI.pdf +3 -0
- pdf/JMIM.pdf +3 -0
- pdf/MIM.pdf +3 -0
- pdf/MRI.pdf +3 -0
- pdf/MRMD.pdf +3 -0
- pdf/MRMR.pdf +3 -0
- pdf/UCRFS.pdf +3 -0
- requirements.txt +10 -2
- test.py +7 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,33 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
data/Authorship.mat filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
data/Dermatology.mat filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
data/dna.mat filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
data/Factors.mat filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
data/madelon.mat filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
data/Movement_libras.mat filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
data/Musk1.mat filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
data/results.db filter=lfs diff=lfs merge=lfs -text
|
| 44 |
+
data/spambase.mat filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
data/splice.mat filter=lfs diff=lfs merge=lfs -text
|
| 46 |
+
data/Synthetic_control.mat filter=lfs diff=lfs merge=lfs -text
|
| 47 |
+
data/Waveform.mat filter=lfs diff=lfs merge=lfs -text
|
| 48 |
+
data/Wdbc.mat filter=lfs diff=lfs merge=lfs -text
|
| 49 |
+
pdf/CFR.pdf filter=lfs diff=lfs merge=lfs -text
|
| 50 |
+
pdf/CIFE.pdf filter=lfs diff=lfs merge=lfs -text
|
| 51 |
+
pdf/CMIFS.pdf filter=lfs diff=lfs merge=lfs -text
|
| 52 |
+
pdf/CMIM.pdf filter=lfs diff=lfs merge=lfs -text
|
| 53 |
+
pdf/CSMDCCMR.pdf filter=lfs diff=lfs merge=lfs -text
|
| 54 |
+
pdf/CSMI.pdf filter=lfs diff=lfs merge=lfs -text
|
| 55 |
+
pdf/DCSF.pdf filter=lfs diff=lfs merge=lfs -text
|
| 56 |
+
pdf/DISR.pdf filter=lfs diff=lfs merge=lfs -text
|
| 57 |
+
pdf/DWFS.pdf filter=lfs diff=lfs merge=lfs -text
|
| 58 |
+
pdf/IWFS.pdf filter=lfs diff=lfs merge=lfs -text
|
| 59 |
+
pdf/JMI.pdf filter=lfs diff=lfs merge=lfs -text
|
| 60 |
+
pdf/JMIM.pdf filter=lfs diff=lfs merge=lfs -text
|
| 61 |
+
pdf/MIM.pdf filter=lfs diff=lfs merge=lfs -text
|
| 62 |
+
pdf/MRI.pdf filter=lfs diff=lfs merge=lfs -text
|
| 63 |
+
pdf/MRMD.pdf filter=lfs diff=lfs merge=lfs -text
|
| 64 |
+
pdf/MRMR.pdf filter=lfs diff=lfs merge=lfs -text
|
| 65 |
+
pdf/UCRFS.pdf filter=lfs diff=lfs merge=lfs -text
|
Webapp/app.py
CHANGED
|
@@ -2,7 +2,8 @@ import os
|
|
| 2 |
import sys
|
| 3 |
import pickle
|
| 4 |
import json
|
| 5 |
-
|
|
|
|
| 6 |
|
| 7 |
# Add project root to sys.path to import leaderboard
|
| 8 |
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
|
@@ -13,6 +14,7 @@ from leaderboard import rank_results
|
|
| 13 |
PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
|
| 14 |
RESULT_DIR = os.path.join(PROJECT_ROOT, "results")
|
| 15 |
DATASET_DIR = os.path.join(PROJECT_ROOT, "datasets")
|
|
|
|
| 16 |
|
| 17 |
os.makedirs(RESULT_DIR, exist_ok=True)
|
| 18 |
os.makedirs(DATASET_DIR, exist_ok=True)
|
|
@@ -179,7 +181,26 @@ def get_results():
|
|
| 179 |
# ===============================
|
| 180 |
@app.route("/api/datasets")
|
| 181 |
def api_datasets():
|
| 182 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 183 |
|
| 184 |
|
| 185 |
@app.route("/api/global_stats")
|
|
@@ -256,6 +277,11 @@ def get_global_stats():
|
|
| 256 |
return jsonify(final_list)
|
| 257 |
|
| 258 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 259 |
if __name__ == "__main__":
|
| 260 |
port = int(os.environ.get("PORT", 7860))
|
| 261 |
app.run(host="0.0.0.0", port=port, debug=False)
|
|
|
|
| 2 |
import sys
|
| 3 |
import pickle
|
| 4 |
import json
|
| 5 |
+
import datetime
|
| 6 |
+
from flask import Flask, jsonify, request, render_template, send_from_directory
|
| 7 |
|
| 8 |
# Add project root to sys.path to import leaderboard
|
| 9 |
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
|
|
|
| 14 |
PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
|
| 15 |
RESULT_DIR = os.path.join(PROJECT_ROOT, "results")
|
| 16 |
DATASET_DIR = os.path.join(PROJECT_ROOT, "datasets")
|
| 17 |
+
PDF_DIR = os.path.join(PROJECT_ROOT, "pdf")
|
| 18 |
|
| 19 |
os.makedirs(RESULT_DIR, exist_ok=True)
|
| 20 |
os.makedirs(DATASET_DIR, exist_ok=True)
|
|
|
|
| 181 |
# ===============================
|
| 182 |
@app.route("/api/datasets")
|
| 183 |
def api_datasets():
|
| 184 |
+
try:
|
| 185 |
+
datasets = []
|
| 186 |
+
ds_names = list_available_datasets()
|
| 187 |
+
for name in ds_names:
|
| 188 |
+
# Get modification time of the result file
|
| 189 |
+
result_path = os.path.join(RESULT_DIR, f"{name}.json")
|
| 190 |
+
last_updated = "Unknown"
|
| 191 |
+
if os.path.exists(result_path):
|
| 192 |
+
mtime = os.path.getmtime(result_path)
|
| 193 |
+
last_updated = datetime.datetime.fromtimestamp(mtime).strftime('%Y-%m-%d')
|
| 194 |
+
|
| 195 |
+
datasets.append({
|
| 196 |
+
"name": name,
|
| 197 |
+
"last_updated": last_updated
|
| 198 |
+
})
|
| 199 |
+
return jsonify(datasets)
|
| 200 |
+
except Exception as e:
|
| 201 |
+
import traceback
|
| 202 |
+
traceback.print_exc()
|
| 203 |
+
return jsonify({"error": str(e)}), 500
|
| 204 |
|
| 205 |
|
| 206 |
@app.route("/api/global_stats")
|
|
|
|
| 277 |
return jsonify(final_list)
|
| 278 |
|
| 279 |
|
| 280 |
+
@app.route("/pdfs/<path:filename>")
|
| 281 |
+
def serve_pdf(filename):
|
| 282 |
+
return send_from_directory(PDF_DIR, filename)
|
| 283 |
+
|
| 284 |
+
|
| 285 |
if __name__ == "__main__":
|
| 286 |
port = int(os.environ.get("PORT", 7860))
|
| 287 |
app.run(host="0.0.0.0", port=port, debug=False)
|
Webapp/templates/global.html
CHANGED
|
@@ -60,6 +60,24 @@
|
|
| 60 |
background-color: #34495e;
|
| 61 |
}
|
| 62 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
.weights-control {
|
| 64 |
background-color: #f1f1f1;
|
| 65 |
padding: 15px;
|
|
@@ -89,16 +107,21 @@
|
|
| 89 |
|
| 90 |
.slider-group label {
|
| 91 |
font-weight: bold;
|
| 92 |
-
min-width:
|
| 93 |
}
|
| 94 |
|
| 95 |
input[type="number"] {
|
| 96 |
-
width:
|
| 97 |
padding: 5px;
|
| 98 |
border: 1px solid #ccc;
|
| 99 |
border-radius: 4px;
|
| 100 |
}
|
| 101 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
button.recalc-btn {
|
| 103 |
background-color: var(--primary-color);
|
| 104 |
color: white;
|
|
@@ -162,11 +185,16 @@
|
|
| 162 |
background-color: var(--primary-color);
|
| 163 |
}
|
| 164 |
|
| 165 |
-
.
|
| 166 |
-
|
| 167 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 168 |
margin-top: 5px;
|
| 169 |
-
display: none;
|
| 170 |
}
|
| 171 |
</style>
|
| 172 |
</head>
|
|
@@ -174,36 +202,46 @@
|
|
| 174 |
|
| 175 |
<div class="container">
|
| 176 |
<header>
|
| 177 |
-
<
|
|
|
|
|
|
|
|
|
|
| 178 |
<a href="/" class="nav-link">← Back to Dataset View</a>
|
| 179 |
</header>
|
| 180 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 181 |
<div class="weights-control">
|
| 182 |
-
<h3>🏆 Scoring Formula: S =
|
| 183 |
<p style="font-size: 0.9em; color: #666; margin-bottom: 10px;">
|
| 184 |
-
|
| 185 |
-
<br>Constraint: a + b + c = 1.
|
| 186 |
</p>
|
| 187 |
|
| 188 |
<div class="sliders-container">
|
| 189 |
<div class="slider-group">
|
| 190 |
-
<label for="weight-a">
|
| 191 |
-
<input type="number" id="weight-a" value="0.4" step="0.
|
| 192 |
</div>
|
| 193 |
|
| 194 |
<div class="slider-group">
|
| 195 |
-
<label for="weight-b">
|
| 196 |
-
<input type="number" id="weight-b" value="0.4" step="0.
|
| 197 |
</div>
|
| 198 |
|
| 199 |
<div class="slider-group">
|
| 200 |
-
<label for="weight-c">
|
| 201 |
-
<input type="number" id="weight-c" value="0.2"
|
| 202 |
</div>
|
| 203 |
|
| 204 |
<button class="recalc-btn" onclick="calculateAndRender()">Recalculate Rankings</button>
|
| 205 |
</div>
|
| 206 |
-
<div id="weight-warning" class="warning-text">⚠️ Weights must sum to 1.0</div>
|
| 207 |
</div>
|
| 208 |
|
| 209 |
<div id="loading-indicator" style="text-align: center; color: #666;">Loading global stats...</div>
|
|
@@ -215,7 +253,7 @@
|
|
| 215 |
<th data-key="algorithm">Algorithm <span class="arrow">↕</span></th>
|
| 216 |
<th data-key="mean_f1_global">Global F1 <span class="arrow">↕</span></th>
|
| 217 |
<th data-key="mean_auc_global">Global AUC <span class="arrow">↕</span></th>
|
| 218 |
-
<th data-key="mean_time_global">
|
| 219 |
<th data-key="final_score">Final Score <span class="arrow">↕</span></th>
|
| 220 |
</tr>
|
| 221 |
</thead>
|
|
@@ -235,7 +273,27 @@
|
|
| 235 |
const weightA = document.getElementById("weight-a");
|
| 236 |
const weightB = document.getElementById("weight-b");
|
| 237 |
const weightC = document.getElementById("weight-c");
|
| 238 |
-
const
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 239 |
|
| 240 |
function fetchGlobalStats() {
|
| 241 |
fetch("/api/global_stats")
|
|
@@ -251,20 +309,50 @@
|
|
| 251 |
});
|
| 252 |
}
|
| 253 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 254 |
function calculateAndRender() {
|
| 255 |
const a = parseFloat(weightA.value) || 0;
|
| 256 |
const b = parseFloat(weightB.value) || 0;
|
| 257 |
const c = parseFloat(weightC.value) || 0;
|
| 258 |
|
| 259 |
-
// Validation
|
| 260 |
-
const sum = a + b + c;
|
| 261 |
-
if (Math.abs(sum - 1.0) > 0.01) {
|
| 262 |
-
warningText.style.display = 'block';
|
| 263 |
-
warningText.textContent = `⚠️ Weights sum to ${sum.toFixed(2)}. They should sum to 1.0.`;
|
| 264 |
-
} else {
|
| 265 |
-
warningText.style.display = 'none';
|
| 266 |
-
}
|
| 267 |
-
|
| 268 |
// Find min/max time for normalization
|
| 269 |
let minTime = Infinity;
|
| 270 |
let maxTime = -Infinity;
|
|
@@ -273,14 +361,11 @@
|
|
| 273 |
if (d.mean_time_global > maxTime) maxTime = d.mean_time_global;
|
| 274 |
});
|
| 275 |
|
| 276 |
-
// Prevent division by zero if all times are same
|
| 277 |
const timeRange = maxTime - minTime;
|
| 278 |
|
| 279 |
// Process data
|
| 280 |
processedData = rawData.map(d => {
|
| 281 |
// Time Score: 1 if fast, 0 if slow
|
| 282 |
-
// Formula: 1 - (time - min) / (max - min)
|
| 283 |
-
// If range is 0, score is 1 (all same speed)
|
| 284 |
let timeScore = 1.0;
|
| 285 |
if (timeRange > 0.0001) {
|
| 286 |
timeScore = 1.0 - ((d.mean_time_global - minTime) / timeRange);
|
|
@@ -338,8 +423,8 @@
|
|
| 338 |
<td>${safeFixed(row.mean_f1_global)}</td>
|
| 339 |
<td>${safeFixed(row.mean_auc_global)}</td>
|
| 340 |
<td>
|
| 341 |
-
${safeFixed(row.
|
| 342 |
-
<div
|
| 343 |
</td>
|
| 344 |
<td>
|
| 345 |
<strong>${safeFixed(row.final_score)}</strong>
|
|
@@ -358,7 +443,6 @@
|
|
| 358 |
if (activeHeader) activeHeader.textContent = sortDirection === 1 ? '↑' : '↓';
|
| 359 |
}
|
| 360 |
|
| 361 |
-
// Sort handlers
|
| 362 |
document.querySelectorAll('th[data-key]').forEach(th => {
|
| 363 |
th.addEventListener('click', () => {
|
| 364 |
const key = th.dataset.key;
|
|
@@ -368,19 +452,19 @@
|
|
| 368 |
sortKey = key;
|
| 369 |
sortDirection = (key === 'rank' || key === 'mean_time_global') ? 1 : -1;
|
| 370 |
}
|
| 371 |
-
// For rank, it's just index, but let's assume we sort by score desc if rank is clicked
|
| 372 |
if (key === 'rank') {
|
| 373 |
sortKey = 'final_score';
|
| 374 |
sortDirection = -1;
|
| 375 |
}
|
| 376 |
-
|
| 377 |
sortData();
|
| 378 |
renderTable();
|
| 379 |
});
|
| 380 |
});
|
| 381 |
|
| 382 |
-
|
| 383 |
-
|
|
|
|
|
|
|
| 384 |
|
| 385 |
</script>
|
| 386 |
|
|
|
|
| 60 |
background-color: #34495e;
|
| 61 |
}
|
| 62 |
|
| 63 |
+
.description-box {
|
| 64 |
+
background-color: #e8f4fd;
|
| 65 |
+
border-left: 4px solid #3498db;
|
| 66 |
+
padding: 15px;
|
| 67 |
+
margin-bottom: 20px;
|
| 68 |
+
border-radius: 4px;
|
| 69 |
+
}
|
| 70 |
+
|
| 71 |
+
.description-box h3 {
|
| 72 |
+
margin-top: 0;
|
| 73 |
+
color: #2980b9;
|
| 74 |
+
}
|
| 75 |
+
|
| 76 |
+
.description-box p {
|
| 77 |
+
margin: 5px 0;
|
| 78 |
+
line-height: 1.5;
|
| 79 |
+
}
|
| 80 |
+
|
| 81 |
.weights-control {
|
| 82 |
background-color: #f1f1f1;
|
| 83 |
padding: 15px;
|
|
|
|
| 107 |
|
| 108 |
.slider-group label {
|
| 109 |
font-weight: bold;
|
| 110 |
+
min-width: 60px;
|
| 111 |
}
|
| 112 |
|
| 113 |
input[type="number"] {
|
| 114 |
+
width: 80px;
|
| 115 |
padding: 5px;
|
| 116 |
border: 1px solid #ccc;
|
| 117 |
border-radius: 4px;
|
| 118 |
}
|
| 119 |
|
| 120 |
+
input[readonly] {
|
| 121 |
+
background-color: #e9ecef;
|
| 122 |
+
color: #666;
|
| 123 |
+
}
|
| 124 |
+
|
| 125 |
button.recalc-btn {
|
| 126 |
background-color: var(--primary-color);
|
| 127 |
color: white;
|
|
|
|
| 185 |
background-color: var(--primary-color);
|
| 186 |
}
|
| 187 |
|
| 188 |
+
.time-detail {
|
| 189 |
+
font-size: 0.8em;
|
| 190 |
+
color: #666;
|
| 191 |
+
margin-top: 2px;
|
| 192 |
+
}
|
| 193 |
+
|
| 194 |
+
.version-tag {
|
| 195 |
+
font-size: 0.8em;
|
| 196 |
+
color: #7f8c8d;
|
| 197 |
margin-top: 5px;
|
|
|
|
| 198 |
}
|
| 199 |
</style>
|
| 200 |
</head>
|
|
|
|
| 202 |
|
| 203 |
<div class="container">
|
| 204 |
<header>
|
| 205 |
+
<div>
|
| 206 |
+
<h1>🌍 Global Algorithm Rankings</h1>
|
| 207 |
+
<div id="last-updated" class="version-tag">Data Last Updated: Loading...</div>
|
| 208 |
+
</div>
|
| 209 |
<a href="/" class="nav-link">← Back to Dataset View</a>
|
| 210 |
</header>
|
| 211 |
|
| 212 |
+
<div class="description-box">
|
| 213 |
+
<h3>About Global Rankings</h3>
|
| 214 |
+
<p>
|
| 215 |
+
This page provides a comprehensive evaluation of feature selection algorithms across all available datasets.
|
| 216 |
+
Algorithms are ranked based on a weighted score combining <strong>Accuracy (F1)</strong>, <strong>Robustness (AUC)</strong>, and <strong>Efficiency (Time)</strong>.
|
| 217 |
+
You can adjust the importance of each factor below to customize the ranking criteria.
|
| 218 |
+
</p>
|
| 219 |
+
</div>
|
| 220 |
+
|
| 221 |
<div class="weights-control">
|
| 222 |
+
<h3>🏆 Scoring Formula: S = α·F1 + β·AUC + γ·TimeScore</h3>
|
| 223 |
<p style="font-size: 0.9em; color: #666; margin-bottom: 10px;">
|
| 224 |
+
Constraint: α + β + γ = 1. TimeScore is normalized (1 = fastest).
|
|
|
|
| 225 |
</p>
|
| 226 |
|
| 227 |
<div class="sliders-container">
|
| 228 |
<div class="slider-group">
|
| 229 |
+
<label for="weight-a">F1 (α):</label>
|
| 230 |
+
<input type="number" id="weight-a" value="0.4" step="0.05" min="0" max="1">
|
| 231 |
</div>
|
| 232 |
|
| 233 |
<div class="slider-group">
|
| 234 |
+
<label for="weight-b">AUC (β):</label>
|
| 235 |
+
<input type="number" id="weight-b" value="0.4" step="0.05" min="0" max="1">
|
| 236 |
</div>
|
| 237 |
|
| 238 |
<div class="slider-group">
|
| 239 |
+
<label for="weight-c">Time (γ):</label>
|
| 240 |
+
<input type="number" id="weight-c" value="0.2" readonly title="Auto-calculated: 1 - α - β">
|
| 241 |
</div>
|
| 242 |
|
| 243 |
<button class="recalc-btn" onclick="calculateAndRender()">Recalculate Rankings</button>
|
| 244 |
</div>
|
|
|
|
| 245 |
</div>
|
| 246 |
|
| 247 |
<div id="loading-indicator" style="text-align: center; color: #666;">Loading global stats...</div>
|
|
|
|
| 253 |
<th data-key="algorithm">Algorithm <span class="arrow">↕</span></th>
|
| 254 |
<th data-key="mean_f1_global">Global F1 <span class="arrow">↕</span></th>
|
| 255 |
<th data-key="mean_auc_global">Global AUC <span class="arrow">↕</span></th>
|
| 256 |
+
<th data-key="mean_time_global">Efficiency (Time) <span class="arrow">↕</span></th>
|
| 257 |
<th data-key="final_score">Final Score <span class="arrow">↕</span></th>
|
| 258 |
</tr>
|
| 259 |
</thead>
|
|
|
|
| 273 |
const weightA = document.getElementById("weight-a");
|
| 274 |
const weightB = document.getElementById("weight-b");
|
| 275 |
const weightC = document.getElementById("weight-c");
|
| 276 |
+
const lastUpdatedDiv = document.getElementById("last-updated");
|
| 277 |
+
|
| 278 |
+
// Fetch datasets info to get latest date
|
| 279 |
+
function fetchLastUpdated() {
|
| 280 |
+
fetch("/api/datasets")
|
| 281 |
+
.then(res => res.json())
|
| 282 |
+
.then(data => {
|
| 283 |
+
if (data.length > 0) {
|
| 284 |
+
// Sort by date to find latest? Or just take one?
|
| 285 |
+
// Usually we want the overall latest date.
|
| 286 |
+
// Let's just pick the first one's date or find the max date if needed.
|
| 287 |
+
// For simplicity, just showing "Latest"
|
| 288 |
+
const dates = data.map(d => d.last_updated).filter(d => d !== 'Unknown').sort().reverse();
|
| 289 |
+
if (dates.length > 0) {
|
| 290 |
+
lastUpdatedDiv.textContent = `Data Last Updated: ${dates[0]}`;
|
| 291 |
+
} else {
|
| 292 |
+
lastUpdatedDiv.textContent = `Data Last Updated: Unknown`;
|
| 293 |
+
}
|
| 294 |
+
}
|
| 295 |
+
});
|
| 296 |
+
}
|
| 297 |
|
| 298 |
function fetchGlobalStats() {
|
| 299 |
fetch("/api/global_stats")
|
|
|
|
| 309 |
});
|
| 310 |
}
|
| 311 |
|
| 312 |
+
// Weight auto-adjustment logic
|
| 313 |
+
function updateWeights(changedInput) {
|
| 314 |
+
let a = parseFloat(weightA.value) || 0;
|
| 315 |
+
let b = parseFloat(weightB.value) || 0;
|
| 316 |
+
|
| 317 |
+
// Clamp inputs to 0-1
|
| 318 |
+
if (a < 0) a = 0; if (a > 1) a = 1;
|
| 319 |
+
if (b < 0) b = 0; if (b > 1) b = 1;
|
| 320 |
+
|
| 321 |
+
if (changedInput === 'a') {
|
| 322 |
+
// If a changes, we try to adjust c first (c = 1 - a - b)
|
| 323 |
+
// If 1 - a - b < 0, it means a + b > 1, so we must reduce b
|
| 324 |
+
let c = 1 - a - b;
|
| 325 |
+
if (c < 0) {
|
| 326 |
+
b = Math.max(0, 1 - a); // Reduce b
|
| 327 |
+
c = 0; // c becomes 0
|
| 328 |
+
}
|
| 329 |
+
// Update UI
|
| 330 |
+
weightA.value = parseFloat(a.toFixed(2));
|
| 331 |
+
weightB.value = parseFloat(b.toFixed(2));
|
| 332 |
+
weightC.value = parseFloat(c.toFixed(2));
|
| 333 |
+
} else if (changedInput === 'b') {
|
| 334 |
+
// If b changes, we try to adjust c first
|
| 335 |
+
// If 1 - a - b < 0, we must reduce a
|
| 336 |
+
let c = 1 - a - b;
|
| 337 |
+
if (c < 0) {
|
| 338 |
+
a = Math.max(0, 1 - b); // Reduce a
|
| 339 |
+
c = 0;
|
| 340 |
+
}
|
| 341 |
+
// Update UI
|
| 342 |
+
weightA.value = parseFloat(a.toFixed(2));
|
| 343 |
+
weightB.value = parseFloat(b.toFixed(2));
|
| 344 |
+
weightC.value = parseFloat(c.toFixed(2));
|
| 345 |
+
}
|
| 346 |
+
}
|
| 347 |
+
|
| 348 |
+
weightA.addEventListener('input', () => updateWeights('a'));
|
| 349 |
+
weightB.addEventListener('input', () => updateWeights('b'));
|
| 350 |
+
|
| 351 |
function calculateAndRender() {
|
| 352 |
const a = parseFloat(weightA.value) || 0;
|
| 353 |
const b = parseFloat(weightB.value) || 0;
|
| 354 |
const c = parseFloat(weightC.value) || 0;
|
| 355 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 356 |
// Find min/max time for normalization
|
| 357 |
let minTime = Infinity;
|
| 358 |
let maxTime = -Infinity;
|
|
|
|
| 361 |
if (d.mean_time_global > maxTime) maxTime = d.mean_time_global;
|
| 362 |
});
|
| 363 |
|
|
|
|
| 364 |
const timeRange = maxTime - minTime;
|
| 365 |
|
| 366 |
// Process data
|
| 367 |
processedData = rawData.map(d => {
|
| 368 |
// Time Score: 1 if fast, 0 if slow
|
|
|
|
|
|
|
| 369 |
let timeScore = 1.0;
|
| 370 |
if (timeRange > 0.0001) {
|
| 371 |
timeScore = 1.0 - ((d.mean_time_global - minTime) / timeRange);
|
|
|
|
| 423 |
<td>${safeFixed(row.mean_f1_global)}</td>
|
| 424 |
<td>${safeFixed(row.mean_auc_global)}</td>
|
| 425 |
<td>
|
| 426 |
+
<strong>${safeFixed(row.time_score_norm)}</strong>
|
| 427 |
+
<div class="time-detail">${safeFixed(row.mean_time_global, 2)}s</div>
|
| 428 |
</td>
|
| 429 |
<td>
|
| 430 |
<strong>${safeFixed(row.final_score)}</strong>
|
|
|
|
| 443 |
if (activeHeader) activeHeader.textContent = sortDirection === 1 ? '↑' : '↓';
|
| 444 |
}
|
| 445 |
|
|
|
|
| 446 |
document.querySelectorAll('th[data-key]').forEach(th => {
|
| 447 |
th.addEventListener('click', () => {
|
| 448 |
const key = th.dataset.key;
|
|
|
|
| 452 |
sortKey = key;
|
| 453 |
sortDirection = (key === 'rank' || key === 'mean_time_global') ? 1 : -1;
|
| 454 |
}
|
|
|
|
| 455 |
if (key === 'rank') {
|
| 456 |
sortKey = 'final_score';
|
| 457 |
sortDirection = -1;
|
| 458 |
}
|
|
|
|
| 459 |
sortData();
|
| 460 |
renderTable();
|
| 461 |
});
|
| 462 |
});
|
| 463 |
|
| 464 |
+
document.addEventListener("DOMContentLoaded", () => {
|
| 465 |
+
fetchLastUpdated();
|
| 466 |
+
fetchGlobalStats();
|
| 467 |
+
});
|
| 468 |
|
| 469 |
</script>
|
| 470 |
|
Webapp/templates/index.html
CHANGED
|
@@ -13,6 +13,7 @@
|
|
| 13 |
--text-color: #333;
|
| 14 |
--border-color: #dee2e6;
|
| 15 |
--hover-color: #f1f1f1;
|
|
|
|
| 16 |
}
|
| 17 |
|
| 18 |
body {
|
|
@@ -59,6 +60,76 @@
|
|
| 59 |
font-size: 14px;
|
| 60 |
}
|
| 61 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
table {
|
| 63 |
width: 100%;
|
| 64 |
border-collapse: collapse;
|
|
@@ -122,6 +193,25 @@
|
|
| 122 |
color: var(--primary-color);
|
| 123 |
}
|
| 124 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 125 |
/* Modal styles */
|
| 126 |
.modal {
|
| 127 |
display: none;
|
|
@@ -171,13 +261,64 @@
|
|
| 171 |
padding: 20px;
|
| 172 |
color: #666;
|
| 173 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 174 |
</style>
|
| 175 |
</head>
|
| 176 |
<body>
|
| 177 |
|
| 178 |
<div class="container">
|
| 179 |
<header>
|
| 180 |
-
<
|
|
|
|
|
|
|
|
|
|
| 181 |
<div style="display:flex; gap:15px; align-items:center;">
|
| 182 |
<a href="/global" style="text-decoration:none; color:white; background-color:#8e44ad; padding:8px 15px; border-radius:4px; font-size:0.9em;">🌍 Global Rankings</a>
|
| 183 |
<div class="controls">
|
|
@@ -189,29 +330,51 @@
|
|
| 189 |
</div>
|
| 190 |
</header>
|
| 191 |
|
| 192 |
-
<div
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 193 |
|
| 194 |
-
<div class="
|
| 195 |
-
<
|
| 196 |
-
<input type="radio" id="view-overall" name="chart-view" value="overall" checked onchange="updateView()">
|
| 197 |
-
<label for="view-overall" style="margin-right:10px;">Overall (Mean)</label>
|
| 198 |
-
|
| 199 |
-
<input type="radio" id="view-classifiers-f1" name="chart-view" value="classifiers-f1" onchange="updateView()">
|
| 200 |
-
<label for="view-classifiers-f1" style="margin-right:10px;">F1 by Classifier</label>
|
| 201 |
|
| 202 |
-
<
|
| 203 |
-
|
| 204 |
-
|
|
|
|
|
|
|
| 205 |
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
<
|
|
|
|
| 209 |
</div>
|
| 210 |
-
|
| 211 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 212 |
</div>
|
| 213 |
</div>
|
| 214 |
|
|
|
|
|
|
|
| 215 |
<table id="result-table">
|
| 216 |
<thead>
|
| 217 |
<!-- Headers generated dynamically -->
|
|
@@ -220,6 +383,19 @@
|
|
| 220 |
<!-- Data rows will be populated here -->
|
| 221 |
</tbody>
|
| 222 |
</table>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 223 |
</div>
|
| 224 |
|
| 225 |
<!-- Modal for details -->
|
|
@@ -231,11 +407,30 @@
|
|
| 231 |
</div>
|
| 232 |
</div>
|
| 233 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 234 |
<script>
|
| 235 |
let currentResults = [];
|
|
|
|
|
|
|
| 236 |
let sortDirection = 1; // 1 for asc, -1 for desc
|
| 237 |
let lastSortKey = '';
|
| 238 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 239 |
const VIEW_CONFIG = {
|
| 240 |
'overall': [
|
| 241 |
{ key: 'mean_f1', label: 'Mean F1' },
|
|
@@ -259,6 +454,12 @@
|
|
| 259 |
const loadingIndicator = document.getElementById("loading-indicator");
|
| 260 |
const modal = document.getElementById("details-modal");
|
| 261 |
const closeModal = document.querySelector(".close");
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 262 |
|
| 263 |
// Close modal
|
| 264 |
closeModal.onclick = () => modal.style.display = "none";
|
|
@@ -266,217 +467,185 @@
|
|
| 266 |
if (event.target == modal) modal.style.display = "none";
|
| 267 |
}
|
| 268 |
|
| 269 |
-
//
|
| 270 |
let scoreChartInstance = null;
|
| 271 |
-
let
|
| 272 |
|
| 273 |
-
|
| 274 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 275 |
|
| 276 |
-
|
| 277 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 278 |
const labels = topResults.map(r => r.algorithm || 'Unknown');
|
| 279 |
-
const times = topResults.map(r => r.time || 0);
|
| 280 |
|
| 281 |
-
const viewMode = document.
|
| 282 |
let datasets = [];
|
| 283 |
|
| 284 |
if (viewMode === 'overall') {
|
| 285 |
-
const f1Scores = topResults.map(r => r.mean_f1 || 0);
|
| 286 |
-
const aucScores = topResults.map(r => r.mean_auc || 0);
|
| 287 |
datasets = [
|
| 288 |
{
|
| 289 |
label: 'Mean F1',
|
| 290 |
-
data:
|
| 291 |
backgroundColor: 'rgba(52, 152, 219, 0.7)',
|
| 292 |
borderColor: 'rgba(52, 152, 219, 1)',
|
| 293 |
borderWidth: 1
|
| 294 |
},
|
| 295 |
{
|
| 296 |
label: 'Mean AUC',
|
| 297 |
-
data:
|
| 298 |
backgroundColor: 'rgba(46, 204, 113, 0.7)',
|
| 299 |
borderColor: 'rgba(46, 204, 113, 1)',
|
| 300 |
borderWidth: 1
|
| 301 |
}
|
| 302 |
];
|
| 303 |
} else if (viewMode === 'classifiers-f1') {
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
label: cls.toUpperCase() + ' F1',
|
| 310 |
-
data: topResults.map(r => (r.metrics && r.metrics[cls]) ? r.metrics[cls].f1 : 0),
|
| 311 |
-
backgroundColor: colors[idx],
|
| 312 |
-
borderColor: borderColors[idx],
|
| 313 |
borderWidth: 1
|
| 314 |
}));
|
| 315 |
-
} else
|
| 316 |
-
|
| 317 |
-
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
label: cls.toUpperCase() + ' AUC',
|
| 322 |
-
data: topResults.map(r => (r.metrics && r.metrics[cls]) ? r.metrics[cls].auc : 0),
|
| 323 |
-
backgroundColor: colors[idx],
|
| 324 |
-
borderColor: borderColors[idx],
|
| 325 |
borderWidth: 1
|
| 326 |
}));
|
| 327 |
}
|
| 328 |
|
| 329 |
-
//
|
| 330 |
-
const
|
| 331 |
if (scoreChartInstance) scoreChartInstance.destroy();
|
| 332 |
|
| 333 |
-
scoreChartInstance = new Chart(
|
| 334 |
type: 'bar',
|
| 335 |
-
data: {
|
| 336 |
-
labels: labels,
|
| 337 |
-
datasets: datasets
|
| 338 |
-
},
|
| 339 |
options: {
|
|
|
|
| 340 |
responsive: true,
|
| 341 |
maintainAspectRatio: false,
|
| 342 |
-
plugins: {
|
| 343 |
-
title: {
|
| 344 |
-
display: true,
|
| 345 |
-
text: viewMode === 'overall' ? 'Top Algorithms Performance (Mean)' :
|
| 346 |
-
(viewMode === 'classifiers-f1' ? 'F1-Score by Classifier' : 'AUC by Classifier')
|
| 347 |
-
}
|
| 348 |
-
},
|
| 349 |
scales: {
|
| 350 |
-
|
| 351 |
-
|
| 352 |
-
// min: 0.8
|
| 353 |
-
}
|
| 354 |
}
|
| 355 |
}
|
| 356 |
});
|
| 357 |
|
| 358 |
-
//
|
| 359 |
-
|
| 360 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 361 |
|
| 362 |
-
|
| 363 |
-
|
|
|
|
|
|
|
|
|
|
| 364 |
data: {
|
| 365 |
-
labels: labels,
|
| 366 |
datasets: [{
|
| 367 |
-
label: '
|
| 368 |
-
data:
|
| 369 |
-
backgroundColor: 'rgba(
|
| 370 |
-
borderColor: 'rgba(
|
| 371 |
-
|
| 372 |
-
|
| 373 |
-
fill: true
|
| 374 |
}]
|
| 375 |
},
|
| 376 |
options: {
|
| 377 |
responsive: true,
|
| 378 |
maintainAspectRatio: false,
|
| 379 |
-
plugins: {
|
| 380 |
-
title: { display: true, text: 'Execution Time' }
|
| 381 |
-
},
|
| 382 |
scales: {
|
| 383 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 384 |
}
|
| 385 |
}
|
| 386 |
});
|
| 387 |
}
|
| 388 |
|
| 389 |
-
function
|
| 390 |
-
|
| 391 |
-
|
| 392 |
-
|
| 393 |
-
title.textContent = `${result.algorithm} Details`;
|
| 394 |
-
|
| 395 |
-
let featuresHtml = result.selected_features.map(f =>
|
| 396 |
-
`<span class="feature-tag">${f}</span>`
|
| 397 |
-
).join('');
|
| 398 |
-
|
| 399 |
-
let metricsHtml = '<div style="margin-top: 15px;"><h3>Metrics Breakdown</h3>';
|
| 400 |
-
for (const [clf, m] of Object.entries(result.metrics || {})) {
|
| 401 |
-
metricsHtml += `
|
| 402 |
-
<div style="margin-bottom: 10px;">
|
| 403 |
-
<strong>${clf.toUpperCase()}:</strong>
|
| 404 |
-
F1: ${m.f1.toFixed(4)}, AUC: ${m.auc.toFixed(4)}
|
| 405 |
-
</div>`;
|
| 406 |
-
}
|
| 407 |
-
metricsHtml += '</div>';
|
| 408 |
-
|
| 409 |
-
body.innerHTML = `
|
| 410 |
-
<p><strong>Time:</strong> ${result.time.toFixed(4)}s</p>
|
| 411 |
-
<p><strong>Num Features:</strong> ${result.num_features}</p>
|
| 412 |
-
<p><strong>Selected Features (${result.selected_features.length}):</strong></p>
|
| 413 |
-
<div>${featuresHtml}</div>
|
| 414 |
-
${metricsHtml}
|
| 415 |
-
`;
|
| 416 |
-
|
| 417 |
-
modal.style.display = "block";
|
| 418 |
-
}
|
| 419 |
-
|
| 420 |
-
function getValue(obj, path) {
|
| 421 |
-
if (!path) return undefined;
|
| 422 |
-
return path.split('.').reduce((acc, part) => (acc && acc[part] !== undefined) ? acc[part] : undefined, obj);
|
| 423 |
-
}
|
| 424 |
-
|
| 425 |
-
function safeFixed(value, digits=4) {
|
| 426 |
-
if (value === undefined || value === null) return 'N/A';
|
| 427 |
-
return Number(value).toFixed(digits);
|
| 428 |
}
|
| 429 |
|
| 430 |
function renderTableHeader() {
|
| 431 |
-
const viewMode = document.
|
| 432 |
-
const
|
| 433 |
-
|
| 434 |
-
let html = '<tr>';
|
| 435 |
-
html += '<th data-key="rank" style="width: 60px;">#</th>';
|
| 436 |
-
html += '<th data-key="algorithm">Algorithm <span class="arrow">↕</span></th>';
|
| 437 |
|
| 438 |
-
|
| 439 |
-
|
| 440 |
-
|
| 441 |
-
|
| 442 |
-
|
| 443 |
-
|
| 444 |
-
|
| 445 |
-
|
| 446 |
-
tableHead.innerHTML = html;
|
| 447 |
-
|
| 448 |
-
// Re-attach sort listeners
|
| 449 |
-
tableHead.querySelectorAll('th[data-key]').forEach(th => {
|
| 450 |
-
th.addEventListener('click', () => sortTable(th.dataset.key));
|
| 451 |
});
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 452 |
}
|
| 453 |
|
| 454 |
-
function
|
| 455 |
tableBody.innerHTML = "";
|
| 456 |
|
| 457 |
// Robust data handling
|
| 458 |
-
if (!results) {
|
| 459 |
-
|
| 460 |
-
|
| 461 |
-
|
| 462 |
-
if (results.
|
| 463 |
-
|
| 464 |
-
} else if (results.results && Array.isArray(results.results)) {
|
| 465 |
-
results = results.results;
|
| 466 |
-
} else if (typeof results === 'object') {
|
| 467 |
-
// Assume it's a single record? Or convert object values to array?
|
| 468 |
-
// For now, wrap in array if it looks like a record (has algorithm)
|
| 469 |
-
if (results.algorithm) {
|
| 470 |
-
results = [results];
|
| 471 |
-
} else {
|
| 472 |
-
console.error("Invalid data format:", results);
|
| 473 |
-
tableBody.innerHTML = '<tr><td colspan="10" style="text-align:center; color:red;">Error: Invalid data format. Check console for details.</td></tr>';
|
| 474 |
-
return;
|
| 475 |
-
}
|
| 476 |
-
} else {
|
| 477 |
-
tableBody.innerHTML = '<tr><td colspan="10" style="text-align:center; color:red;">Error: Invalid data format</td></tr>';
|
| 478 |
-
return;
|
| 479 |
-
}
|
| 480 |
}
|
| 481 |
|
| 482 |
if (results.length === 0) {
|
|
@@ -484,39 +653,47 @@
|
|
| 484 |
return;
|
| 485 |
}
|
| 486 |
|
| 487 |
-
const viewMode = document.
|
| 488 |
-
const
|
| 489 |
|
| 490 |
-
results.forEach((
|
| 491 |
-
const
|
| 492 |
-
|
| 493 |
-
// Format features for preview
|
| 494 |
-
const featurePreview = (r.selected_features && Array.isArray(r.selected_features))
|
| 495 |
-
? r.selected_features.slice(0, 5).join(', ') + (r.selected_features.length > 5 ? '...' : '')
|
| 496 |
-
: 'N/A';
|
| 497 |
|
| 498 |
-
|
| 499 |
-
|
| 500 |
-
|
| 501 |
-
|
| 502 |
-
|
| 503 |
-
|
| 504 |
-
|
| 505 |
-
|
| 506 |
-
|
| 507 |
-
|
| 508 |
-
</td>`;
|
| 509 |
});
|
| 510 |
-
|
| 511 |
-
const time = r.time || 0;
|
| 512 |
-
html += `<td>${safeFixed(time, 2)}</td>`;
|
| 513 |
-
html += `
|
| 514 |
-
<td class="features-cell" onclick="showDetails(currentResults[${idx}])" title="Click for details">
|
| 515 |
-
${featurePreview} <span style="font-size:0.8em; color:#999;">(Click for details)</span>
|
| 516 |
-
</td>`;
|
| 517 |
|
| 518 |
-
|
| 519 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 520 |
});
|
| 521 |
}
|
| 522 |
|
|
@@ -524,21 +701,22 @@
|
|
| 524 |
if (lastSortKey === key) {
|
| 525 |
sortDirection *= -1;
|
| 526 |
} else {
|
| 527 |
-
sortDirection =
|
| 528 |
lastSortKey = key;
|
| 529 |
}
|
| 530 |
|
| 531 |
-
//
|
| 532 |
-
|
| 533 |
-
document.querySelectorAll('th .arrow').forEach(span => span.textContent = '↕');
|
| 534 |
-
const activeHeader = document.querySelector(`th[data-key="${key}"] .arrow`);
|
| 535 |
-
if (activeHeader) activeHeader.textContent = sortDirection === 1 ? '↑' : '↓';
|
| 536 |
|
| 537 |
-
|
| 538 |
-
let valA =
|
| 539 |
-
let valB =
|
| 540 |
|
| 541 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 542 |
|
| 543 |
if (valA === undefined) valA = -Infinity;
|
| 544 |
if (valB === undefined) valB = -Infinity;
|
|
@@ -547,79 +725,127 @@
|
|
| 547 |
if (valA > valB) return 1 * sortDirection;
|
| 548 |
return 0;
|
| 549 |
});
|
| 550 |
-
|
| 551 |
-
// Don't update currentResults global if it breaks things, but here it's fine.
|
| 552 |
-
// Actually, let's keep currentResults as the master list?
|
| 553 |
-
// No, currentResults should be the sorted list for consistent subsequent sorts.
|
| 554 |
-
currentResults = sorted;
|
| 555 |
-
updateTable(sorted);
|
| 556 |
-
}
|
| 557 |
|
| 558 |
-
|
| 559 |
-
|
| 560 |
-
updateTable(currentResults);
|
| 561 |
-
updateCharts(currentResults);
|
| 562 |
}
|
| 563 |
-
|
| 564 |
-
function
|
| 565 |
-
|
| 566 |
-
|
| 567 |
-
|
| 568 |
-
|
| 569 |
-
|
| 570 |
-
|
| 571 |
-
|
| 572 |
-
|
| 573 |
-
})
|
| 574 |
-
.then(data => {
|
| 575 |
-
console.log("Data received:", data);
|
| 576 |
-
currentResults = data;
|
| 577 |
-
updateView();
|
| 578 |
-
loadingIndicator.style.display = 'none';
|
| 579 |
-
})
|
| 580 |
-
.catch(err => {
|
| 581 |
-
console.error("Error fetching results:", err);
|
| 582 |
-
loadingIndicator.textContent = "Error loading data. Make sure the server is running.";
|
| 583 |
-
});
|
| 584 |
}
|
| 585 |
|
| 586 |
-
|
| 587 |
-
|
| 588 |
-
|
| 589 |
-
|
| 590 |
-
|
| 591 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 592 |
|
| 593 |
-
|
| 594 |
fetch("/api/datasets")
|
| 595 |
.then(res => res.json())
|
| 596 |
-
.then(
|
|
|
|
| 597 |
datasetSelect.innerHTML = "";
|
| 598 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 599 |
const option = document.createElement("option");
|
| 600 |
-
option.value = ds;
|
| 601 |
-
option.textContent = ds;
|
| 602 |
datasetSelect.appendChild(option);
|
| 603 |
});
|
| 604 |
|
| 605 |
-
|
| 606 |
-
|
| 607 |
-
|
| 608 |
-
} else if (datasets.length > 0) {
|
| 609 |
-
datasetSelect.value = datasets[0];
|
| 610 |
-
fetchResults(datasets[0]);
|
| 611 |
}
|
| 612 |
})
|
| 613 |
.catch(err => {
|
| 614 |
-
console.error("Error
|
| 615 |
-
datasetSelect.innerHTML =
|
| 616 |
});
|
|
|
|
| 617 |
|
| 618 |
-
|
| 619 |
-
|
| 620 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 621 |
});
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 622 |
</script>
|
| 623 |
|
| 624 |
</body>
|
| 625 |
-
</html>
|
|
|
|
| 13 |
--text-color: #333;
|
| 14 |
--border-color: #dee2e6;
|
| 15 |
--hover-color: #f1f1f1;
|
| 16 |
+
--accent-color: #e67e22;
|
| 17 |
}
|
| 18 |
|
| 19 |
body {
|
|
|
|
| 60 |
font-size: 14px;
|
| 61 |
}
|
| 62 |
|
| 63 |
+
/* Info Boxes */
|
| 64 |
+
.info-section {
|
| 65 |
+
display: flex;
|
| 66 |
+
gap: 20px;
|
| 67 |
+
margin-bottom: 20px;
|
| 68 |
+
flex-wrap: wrap;
|
| 69 |
+
}
|
| 70 |
+
|
| 71 |
+
.description-box, .metadata-box {
|
| 72 |
+
flex: 1;
|
| 73 |
+
background-color: #e8f4fd;
|
| 74 |
+
border-left: 4px solid #3498db;
|
| 75 |
+
padding: 15px;
|
| 76 |
+
border-radius: 4px;
|
| 77 |
+
min-width: 300px;
|
| 78 |
+
}
|
| 79 |
+
|
| 80 |
+
.metadata-box {
|
| 81 |
+
background-color: #fef9e7;
|
| 82 |
+
border-left-color: #f1c40f;
|
| 83 |
+
}
|
| 84 |
+
|
| 85 |
+
h3 {
|
| 86 |
+
margin-top: 0;
|
| 87 |
+
margin-bottom: 10px;
|
| 88 |
+
font-size: 1.1em;
|
| 89 |
+
color: var(--secondary-color);
|
| 90 |
+
}
|
| 91 |
+
|
| 92 |
+
p {
|
| 93 |
+
margin: 5px 0;
|
| 94 |
+
line-height: 1.5;
|
| 95 |
+
font-size: 0.95em;
|
| 96 |
+
}
|
| 97 |
+
|
| 98 |
+
.version-tag {
|
| 99 |
+
font-size: 0.8em;
|
| 100 |
+
color: #7f8c8d;
|
| 101 |
+
margin-top: 5px;
|
| 102 |
+
}
|
| 103 |
+
|
| 104 |
+
/* Filters */
|
| 105 |
+
.filters-box {
|
| 106 |
+
background-color: #f1f1f1;
|
| 107 |
+
padding: 15px;
|
| 108 |
+
border-radius: 8px;
|
| 109 |
+
margin-bottom: 20px;
|
| 110 |
+
border: 1px solid #ddd;
|
| 111 |
+
display: flex;
|
| 112 |
+
gap: 20px;
|
| 113 |
+
align-items: center;
|
| 114 |
+
flex-wrap: wrap;
|
| 115 |
+
}
|
| 116 |
+
|
| 117 |
+
.filter-group {
|
| 118 |
+
display: flex;
|
| 119 |
+
align-items: center;
|
| 120 |
+
gap: 10px;
|
| 121 |
+
}
|
| 122 |
+
|
| 123 |
+
input[type="range"] {
|
| 124 |
+
width: 120px;
|
| 125 |
+
}
|
| 126 |
+
|
| 127 |
+
.filter-val {
|
| 128 |
+
font-weight: bold;
|
| 129 |
+
min-width: 40px;
|
| 130 |
+
}
|
| 131 |
+
|
| 132 |
+
/* Table */
|
| 133 |
table {
|
| 134 |
width: 100%;
|
| 135 |
border-collapse: collapse;
|
|
|
|
| 193 |
color: var(--primary-color);
|
| 194 |
}
|
| 195 |
|
| 196 |
+
/* Charts */
|
| 197 |
+
.charts-section {
|
| 198 |
+
margin-top: 30px;
|
| 199 |
+
display: flex;
|
| 200 |
+
flex-direction: column;
|
| 201 |
+
gap: 20px;
|
| 202 |
+
}
|
| 203 |
+
|
| 204 |
+
.chart-container {
|
| 205 |
+
background: white;
|
| 206 |
+
padding: 15px;
|
| 207 |
+
border-radius: 8px;
|
| 208 |
+
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
|
| 209 |
+
border: 1px solid #eee;
|
| 210 |
+
position: relative;
|
| 211 |
+
height: 400px;
|
| 212 |
+
width: 100%;
|
| 213 |
+
}
|
| 214 |
+
|
| 215 |
/* Modal styles */
|
| 216 |
.modal {
|
| 217 |
display: none;
|
|
|
|
| 261 |
padding: 20px;
|
| 262 |
color: #666;
|
| 263 |
}
|
| 264 |
+
|
| 265 |
+
/* Sidebar for PDF */
|
| 266 |
+
.pdf-sidebar {
|
| 267 |
+
position: fixed;
|
| 268 |
+
top: 0;
|
| 269 |
+
right: -50%; /* Hidden by default */
|
| 270 |
+
width: 50%;
|
| 271 |
+
height: 100%;
|
| 272 |
+
background: white;
|
| 273 |
+
box-shadow: -2px 0 5px rgba(0,0,0,0.2);
|
| 274 |
+
z-index: 2000;
|
| 275 |
+
transition: right 0.3s ease-in-out;
|
| 276 |
+
display: flex;
|
| 277 |
+
flex-direction: column;
|
| 278 |
+
}
|
| 279 |
+
|
| 280 |
+
.pdf-sidebar.open {
|
| 281 |
+
right: 0;
|
| 282 |
+
}
|
| 283 |
+
|
| 284 |
+
.sidebar-header {
|
| 285 |
+
padding: 10px 20px;
|
| 286 |
+
background: var(--primary-color);
|
| 287 |
+
color: white;
|
| 288 |
+
display: flex;
|
| 289 |
+
justify-content: space-between;
|
| 290 |
+
align-items: center;
|
| 291 |
+
}
|
| 292 |
+
|
| 293 |
+
.sidebar-content {
|
| 294 |
+
flex: 1;
|
| 295 |
+
padding: 0;
|
| 296 |
+
}
|
| 297 |
+
|
| 298 |
+
.sidebar-content iframe {
|
| 299 |
+
width: 100%;
|
| 300 |
+
height: 100%;
|
| 301 |
+
border: none;
|
| 302 |
+
}
|
| 303 |
+
|
| 304 |
+
.algo-link {
|
| 305 |
+
color: var(--primary-color);
|
| 306 |
+
cursor: pointer;
|
| 307 |
+
font-weight: bold;
|
| 308 |
+
}
|
| 309 |
+
.algo-link:hover {
|
| 310 |
+
text-decoration: underline;
|
| 311 |
+
}
|
| 312 |
</style>
|
| 313 |
</head>
|
| 314 |
<body>
|
| 315 |
|
| 316 |
<div class="container">
|
| 317 |
<header>
|
| 318 |
+
<div>
|
| 319 |
+
<h1>🏆 AutoFS Leaderboard</h1>
|
| 320 |
+
<div id="last-updated" class="version-tag">Data Last Updated: Loading...</div>
|
| 321 |
+
</div>
|
| 322 |
<div style="display:flex; gap:15px; align-items:center;">
|
| 323 |
<a href="/global" style="text-decoration:none; color:white; background-color:#8e44ad; padding:8px 15px; border-radius:4px; font-size:0.9em;">🌍 Global Rankings</a>
|
| 324 |
<div class="controls">
|
|
|
|
| 330 |
</div>
|
| 331 |
</header>
|
| 332 |
|
| 333 |
+
<div class="info-section">
|
| 334 |
+
<div class="description-box">
|
| 335 |
+
<h3>About This Dataset</h3>
|
| 336 |
+
<p>
|
| 337 |
+
This dashboard displays the performance of various feature selection algorithms on the
|
| 338 |
+
<strong><span id="desc-dataset-name">Selected</span></strong> dataset.
|
| 339 |
+
Compare algorithms based on accuracy (F1), stability (AUC), and computational efficiency.
|
| 340 |
+
</p>
|
| 341 |
+
</div>
|
| 342 |
+
<div class="metadata-box">
|
| 343 |
+
<h3>Dataset Metadata</h3>
|
| 344 |
+
<p><strong>Name:</strong> <span id="meta-name">-</span></p>
|
| 345 |
+
<p><strong>Last Updated:</strong> <span id="meta-updated">-</span></p>
|
| 346 |
+
<!-- Placeholder for future metadata -->
|
| 347 |
+
<p style="color:#888; font-size:0.8em;">(Additional metadata like samples/features not available)</p>
|
| 348 |
+
</div>
|
| 349 |
+
</div>
|
| 350 |
|
| 351 |
+
<div class="filters-box">
|
| 352 |
+
<h3>🔍 Filters</h3>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 353 |
|
| 354 |
+
<div class="filter-group">
|
| 355 |
+
<label>Min F1 Score:</label>
|
| 356 |
+
<input type="range" id="filter-f1" min="0" max="1" step="0.05" value="0">
|
| 357 |
+
<span id="val-f1" class="filter-val">0.00</span>
|
| 358 |
+
</div>
|
| 359 |
|
| 360 |
+
<div class="filter-group">
|
| 361 |
+
<label>Max Time (s):</label>
|
| 362 |
+
<input type="range" id="filter-time" min="1" max="500" step="10" value="500">
|
| 363 |
+
<span id="val-time" class="filter-val">500+</span>
|
| 364 |
</div>
|
| 365 |
+
|
| 366 |
+
<div style="margin-left: auto;">
|
| 367 |
+
<label style="margin-right:10px; font-weight:bold;">Chart View:</label>
|
| 368 |
+
<select id="chart-view-mode" onchange="updateView()">
|
| 369 |
+
<option value="overall">Overall (Mean)</option>
|
| 370 |
+
<option value="classifiers-f1">F1 by Classifier</option>
|
| 371 |
+
<option value="classifiers-auc">AUC by Classifier</option>
|
| 372 |
+
</select>
|
| 373 |
</div>
|
| 374 |
</div>
|
| 375 |
|
| 376 |
+
<div id="loading-indicator" class="loading" style="display: none;">Loading data...</div>
|
| 377 |
+
|
| 378 |
<table id="result-table">
|
| 379 |
<thead>
|
| 380 |
<!-- Headers generated dynamically -->
|
|
|
|
| 383 |
<!-- Data rows will be populated here -->
|
| 384 |
</tbody>
|
| 385 |
</table>
|
| 386 |
+
|
| 387 |
+
<div class="charts-section">
|
| 388 |
+
<div class="chart-container">
|
| 389 |
+
<h3>📊 Performance Comparison</h3>
|
| 390 |
+
<canvas id="scoreChart"></canvas>
|
| 391 |
+
</div>
|
| 392 |
+
|
| 393 |
+
<div class="chart-container">
|
| 394 |
+
<h3>📉 Pareto Frontier (Trade-off)</h3>
|
| 395 |
+
<p style="font-size:0.9em; color:#666; margin-top:-10px;">X: Number of Selected Features (Lower is better) vs Y: F1 Score (Higher is better). Optimal: Top-Left.</p>
|
| 396 |
+
<canvas id="paretoChart"></canvas>
|
| 397 |
+
</div>
|
| 398 |
+
</div>
|
| 399 |
</div>
|
| 400 |
|
| 401 |
<!-- Modal for details -->
|
|
|
|
| 407 |
</div>
|
| 408 |
</div>
|
| 409 |
|
| 410 |
+
<!-- PDF Sidebar -->
|
| 411 |
+
<div id="pdf-sidebar" class="pdf-sidebar">
|
| 412 |
+
<div class="sidebar-header">
|
| 413 |
+
<h3 id="sidebar-title" style="margin:0; color:white;">Paper Preview</h3>
|
| 414 |
+
<span class="close" onclick="closeSidebar()" style="color:white; opacity:0.8; font-size: 28px; cursor: pointer;">×</span>
|
| 415 |
+
</div>
|
| 416 |
+
<div class="sidebar-content">
|
| 417 |
+
<iframe id="pdf-frame" src=""></iframe>
|
| 418 |
+
</div>
|
| 419 |
+
</div>
|
| 420 |
+
|
| 421 |
<script>
|
| 422 |
let currentResults = [];
|
| 423 |
+
let filteredResults = [];
|
| 424 |
+
let allDatasets = [];
|
| 425 |
let sortDirection = 1; // 1 for asc, -1 for desc
|
| 426 |
let lastSortKey = '';
|
| 427 |
|
| 428 |
+
// Filter Elements
|
| 429 |
+
const filterF1 = document.getElementById('filter-f1');
|
| 430 |
+
const filterTime = document.getElementById('filter-time');
|
| 431 |
+
const valF1 = document.getElementById('val-f1');
|
| 432 |
+
const valTime = document.getElementById('val-time');
|
| 433 |
+
|
| 434 |
const VIEW_CONFIG = {
|
| 435 |
'overall': [
|
| 436 |
{ key: 'mean_f1', label: 'Mean F1' },
|
|
|
|
| 454 |
const loadingIndicator = document.getElementById("loading-indicator");
|
| 455 |
const modal = document.getElementById("details-modal");
|
| 456 |
const closeModal = document.querySelector(".close");
|
| 457 |
+
|
| 458 |
+
// Metadata elements
|
| 459 |
+
const metaName = document.getElementById('meta-name');
|
| 460 |
+
const metaUpdated = document.getElementById('meta-updated');
|
| 461 |
+
const descName = document.getElementById('desc-dataset-name');
|
| 462 |
+
const globalUpdated = document.getElementById('last-updated');
|
| 463 |
|
| 464 |
// Close modal
|
| 465 |
closeModal.onclick = () => modal.style.display = "none";
|
|
|
|
| 467 |
if (event.target == modal) modal.style.display = "none";
|
| 468 |
}
|
| 469 |
|
| 470 |
+
// Chart instances
|
| 471 |
let scoreChartInstance = null;
|
| 472 |
+
let paretoChartInstance = null;
|
| 473 |
|
| 474 |
+
// Filter Logic
|
| 475 |
+
function applyFilters() {
|
| 476 |
+
const minF1 = parseFloat(filterF1.value);
|
| 477 |
+
const maxTime = parseFloat(filterTime.value);
|
| 478 |
+
|
| 479 |
+
valF1.textContent = minF1.toFixed(2);
|
| 480 |
+
valTime.textContent = maxTime >= 500 ? "500+" : maxTime + "s";
|
| 481 |
+
|
| 482 |
+
filteredResults = currentResults.filter(r => {
|
| 483 |
+
const f1 = r.mean_f1 || 0;
|
| 484 |
+
const time = r.time || 0;
|
| 485 |
+
return f1 >= minF1 && (maxTime >= 500 || time <= maxTime);
|
| 486 |
+
});
|
| 487 |
|
| 488 |
+
renderTable(filteredResults);
|
| 489 |
+
updateCharts(filteredResults);
|
| 490 |
+
}
|
| 491 |
+
|
| 492 |
+
filterF1.addEventListener('input', applyFilters);
|
| 493 |
+
filterTime.addEventListener('input', applyFilters);
|
| 494 |
+
|
| 495 |
+
function updateCharts(results) {
|
| 496 |
+
if (!Array.isArray(results)) return;
|
| 497 |
+
|
| 498 |
+
// Use filtered results for charts too
|
| 499 |
+
// Limit to top 20 for bar chart readability
|
| 500 |
+
const topResults = results.slice(0, 20);
|
| 501 |
const labels = topResults.map(r => r.algorithm || 'Unknown');
|
|
|
|
| 502 |
|
| 503 |
+
const viewMode = document.getElementById('chart-view-mode').value;
|
| 504 |
let datasets = [];
|
| 505 |
|
| 506 |
if (viewMode === 'overall') {
|
|
|
|
|
|
|
| 507 |
datasets = [
|
| 508 |
{
|
| 509 |
label: 'Mean F1',
|
| 510 |
+
data: topResults.map(r => r.mean_f1 || 0),
|
| 511 |
backgroundColor: 'rgba(52, 152, 219, 0.7)',
|
| 512 |
borderColor: 'rgba(52, 152, 219, 1)',
|
| 513 |
borderWidth: 1
|
| 514 |
},
|
| 515 |
{
|
| 516 |
label: 'Mean AUC',
|
| 517 |
+
data: topResults.map(r => r.mean_auc || 0),
|
| 518 |
backgroundColor: 'rgba(46, 204, 113, 0.7)',
|
| 519 |
borderColor: 'rgba(46, 204, 113, 1)',
|
| 520 |
borderWidth: 1
|
| 521 |
}
|
| 522 |
];
|
| 523 |
} else if (viewMode === 'classifiers-f1') {
|
| 524 |
+
datasets = ['nb', 'svm', 'rf'].map((clf, i) => ({
|
| 525 |
+
label: clf.toUpperCase() + ' F1',
|
| 526 |
+
data: topResults.map(r => r.metrics?.[clf]?.f1 || 0),
|
| 527 |
+
backgroundColor: `hsla(${200 + i*40}, 70%, 60%, 0.7)`,
|
| 528 |
+
borderColor: `hsla(${200 + i*40}, 70%, 60%, 1)`,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 529 |
borderWidth: 1
|
| 530 |
}));
|
| 531 |
+
} else {
|
| 532 |
+
datasets = ['nb', 'svm', 'rf'].map((clf, i) => ({
|
| 533 |
+
label: clf.toUpperCase() + ' AUC',
|
| 534 |
+
data: topResults.map(r => r.metrics?.[clf]?.auc || 0),
|
| 535 |
+
backgroundColor: `hsla(${30 + i*40}, 70%, 60%, 0.7)`,
|
| 536 |
+
borderColor: `hsla(${30 + i*40}, 70%, 60%, 1)`,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 537 |
borderWidth: 1
|
| 538 |
}));
|
| 539 |
}
|
| 540 |
|
| 541 |
+
// 1. Performance Chart (Horizontal Bar)
|
| 542 |
+
const ctxScore = document.getElementById('scoreChart').getContext('2d');
|
| 543 |
if (scoreChartInstance) scoreChartInstance.destroy();
|
| 544 |
|
| 545 |
+
scoreChartInstance = new Chart(ctxScore, {
|
| 546 |
type: 'bar',
|
| 547 |
+
data: { labels: labels, datasets: datasets },
|
|
|
|
|
|
|
|
|
|
| 548 |
options: {
|
| 549 |
+
indexAxis: 'y', // Horizontal
|
| 550 |
responsive: true,
|
| 551 |
maintainAspectRatio: false,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 552 |
scales: {
|
| 553 |
+
x: { beginAtZero: true, max: 1.0 },
|
| 554 |
+
y: { ticks: { autoSkip: false } }
|
|
|
|
|
|
|
| 555 |
}
|
| 556 |
}
|
| 557 |
});
|
| 558 |
|
| 559 |
+
// 2. Pareto Frontier Chart (Scatter)
|
| 560 |
+
// X: Num Selected Features, Y: Mean F1
|
| 561 |
+
const paretoData = results.map(r => ({
|
| 562 |
+
x: r.num_features || (r.selected_features ? r.selected_features.length : 0),
|
| 563 |
+
y: r.mean_f1 || 0,
|
| 564 |
+
algorithm: r.algorithm
|
| 565 |
+
}));
|
| 566 |
|
| 567 |
+
const ctxPareto = document.getElementById('paretoChart').getContext('2d');
|
| 568 |
+
if (paretoChartInstance) paretoChartInstance.destroy();
|
| 569 |
+
|
| 570 |
+
paretoChartInstance = new Chart(ctxPareto, {
|
| 571 |
+
type: 'scatter',
|
| 572 |
data: {
|
|
|
|
| 573 |
datasets: [{
|
| 574 |
+
label: 'Algorithm Performance',
|
| 575 |
+
data: paretoData,
|
| 576 |
+
backgroundColor: 'rgba(230, 126, 34, 0.7)', // Orange accent
|
| 577 |
+
borderColor: 'rgba(230, 126, 34, 1)',
|
| 578 |
+
pointRadius: 6,
|
| 579 |
+
pointHoverRadius: 8
|
|
|
|
| 580 |
}]
|
| 581 |
},
|
| 582 |
options: {
|
| 583 |
responsive: true,
|
| 584 |
maintainAspectRatio: false,
|
|
|
|
|
|
|
|
|
|
| 585 |
scales: {
|
| 586 |
+
x: {
|
| 587 |
+
type: 'linear',
|
| 588 |
+
position: 'bottom',
|
| 589 |
+
title: { display: true, text: 'Number of Selected Features' }
|
| 590 |
+
},
|
| 591 |
+
y: {
|
| 592 |
+
title: { display: true, text: 'Mean F1 Score' },
|
| 593 |
+
min: 0, max: 1
|
| 594 |
+
}
|
| 595 |
+
},
|
| 596 |
+
plugins: {
|
| 597 |
+
tooltip: {
|
| 598 |
+
callbacks: {
|
| 599 |
+
label: function(context) {
|
| 600 |
+
const pt = context.raw;
|
| 601 |
+
return `${pt.algorithm}: F1=${pt.y.toFixed(4)}, Feats=${pt.x}`;
|
| 602 |
+
}
|
| 603 |
+
}
|
| 604 |
+
}
|
| 605 |
}
|
| 606 |
}
|
| 607 |
});
|
| 608 |
}
|
| 609 |
|
| 610 |
+
function updateView() {
|
| 611 |
+
renderTableHeader();
|
| 612 |
+
renderTable(filteredResults);
|
| 613 |
+
updateCharts(filteredResults);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 614 |
}
|
| 615 |
|
| 616 |
function renderTableHeader() {
|
| 617 |
+
const viewMode = document.getElementById('chart-view-mode').value;
|
| 618 |
+
const config = VIEW_CONFIG[viewMode];
|
|
|
|
|
|
|
|
|
|
|
|
|
| 619 |
|
| 620 |
+
let headerHTML = `
|
| 621 |
+
<tr>
|
| 622 |
+
<th>Rank</th>
|
| 623 |
+
<th onclick="sortTable('algorithm')">Algorithm <span class="arrow"></span></th>
|
| 624 |
+
`;
|
| 625 |
+
|
| 626 |
+
config.forEach(col => {
|
| 627 |
+
headerHTML += `<th onclick="sortTable('${col.key}')">${col.label} <span class="arrow"></span></th>`;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 628 |
});
|
| 629 |
+
|
| 630 |
+
headerHTML += `
|
| 631 |
+
<th onclick="sortTable('time')">Time (s) <span class="arrow"></span></th>
|
| 632 |
+
<th onclick="sortTable('selected_features')">Selected Features <span class="arrow"></span></th>
|
| 633 |
+
</tr>
|
| 634 |
+
`;
|
| 635 |
+
|
| 636 |
+
tableHead.innerHTML = headerHTML;
|
| 637 |
}
|
| 638 |
|
| 639 |
+
function renderTable(results) {
|
| 640 |
tableBody.innerHTML = "";
|
| 641 |
|
| 642 |
// Robust data handling
|
| 643 |
+
if (!results) { results = []; }
|
| 644 |
+
else if (!Array.isArray(results)) {
|
| 645 |
+
if (results.data && Array.isArray(results.data)) results = results.data;
|
| 646 |
+
else if (results.results && Array.isArray(results.results)) results = results.results;
|
| 647 |
+
else if (results.algorithm) results = [results];
|
| 648 |
+
else results = [];
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 649 |
}
|
| 650 |
|
| 651 |
if (results.length === 0) {
|
|
|
|
| 653 |
return;
|
| 654 |
}
|
| 655 |
|
| 656 |
+
const viewMode = document.getElementById('chart-view-mode').value;
|
| 657 |
+
const config = VIEW_CONFIG[viewMode];
|
| 658 |
|
| 659 |
+
results.forEach((row, index) => {
|
| 660 |
+
const tr = document.createElement("tr");
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 661 |
|
| 662 |
+
// Helper to get nested property safely
|
| 663 |
+
const getVal = (obj, path) => {
|
| 664 |
+
return path.split('.').reduce((acc, part) => acc && acc[part], obj);
|
| 665 |
+
};
|
| 666 |
+
|
| 667 |
+
let metricsHTML = '';
|
| 668 |
+
config.forEach(col => {
|
| 669 |
+
const val = getVal(row, col.key);
|
| 670 |
+
const numVal = (val !== undefined && val !== null) ? Number(val).toFixed(4) : 'N/A';
|
| 671 |
+
metricsHTML += `<td>${numVal}</td>`;
|
|
|
|
| 672 |
});
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 673 |
|
| 674 |
+
// Features
|
| 675 |
+
let featCount = row.num_features;
|
| 676 |
+
if (featCount === undefined && row.selected_features) featCount = row.selected_features.length;
|
| 677 |
+
|
| 678 |
+
let featText = "";
|
| 679 |
+
if (Array.isArray(row.selected_features)) {
|
| 680 |
+
featText = row.selected_features.join(", ");
|
| 681 |
+
} else {
|
| 682 |
+
featText = "N/A";
|
| 683 |
+
}
|
| 684 |
+
|
| 685 |
+
const rank = index + 1;
|
| 686 |
+
|
| 687 |
+
tr.innerHTML = `
|
| 688 |
+
<td>${rank}</td>
|
| 689 |
+
<td class="algo-link" onclick="openPdf('${row.algorithm}')" title="Click to view paper">${row.algorithm || 'Unknown'}</td>
|
| 690 |
+
${metricsHTML}
|
| 691 |
+
<td>${row.time ? Number(row.time).toFixed(4) : 'N/A'}</td>
|
| 692 |
+
<td class="features-cell" onclick="showDetails('${row.algorithm}', '${featText}')" title="${featText}">
|
| 693 |
+
${featText}
|
| 694 |
+
</td>
|
| 695 |
+
`;
|
| 696 |
+
tableBody.appendChild(tr);
|
| 697 |
});
|
| 698 |
}
|
| 699 |
|
|
|
|
| 701 |
if (lastSortKey === key) {
|
| 702 |
sortDirection *= -1;
|
| 703 |
} else {
|
| 704 |
+
sortDirection = 1;
|
| 705 |
lastSortKey = key;
|
| 706 |
}
|
| 707 |
|
| 708 |
+
// Helper to get nested value
|
| 709 |
+
const getVal = (obj, path) => path.split('.').reduce((acc, part) => acc && acc[part], obj);
|
|
|
|
|
|
|
|
|
|
| 710 |
|
| 711 |
+
filteredResults.sort((a, b) => {
|
| 712 |
+
let valA = getVal(a, key);
|
| 713 |
+
let valB = getVal(b, key);
|
| 714 |
|
| 715 |
+
// Handle array length for selected_features sort
|
| 716 |
+
if (key === 'selected_features') {
|
| 717 |
+
valA = Array.isArray(valA) ? valA.length : 0;
|
| 718 |
+
valB = Array.isArray(valB) ? valB.length : 0;
|
| 719 |
+
}
|
| 720 |
|
| 721 |
if (valA === undefined) valA = -Infinity;
|
| 722 |
if (valB === undefined) valB = -Infinity;
|
|
|
|
| 725 |
if (valA > valB) return 1 * sortDirection;
|
| 726 |
return 0;
|
| 727 |
});
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 728 |
|
| 729 |
+
renderTable(filteredResults);
|
| 730 |
+
updateSortArrows(key);
|
|
|
|
|
|
|
| 731 |
}
|
| 732 |
+
|
| 733 |
+
function updateSortArrows(activeKey) {
|
| 734 |
+
document.querySelectorAll('th .arrow').forEach(span => span.textContent = '↕');
|
| 735 |
+
// Find the th with onclick containing this key
|
| 736 |
+
const ths = document.querySelectorAll('th');
|
| 737 |
+
ths.forEach(th => {
|
| 738 |
+
if (th.getAttribute('onclick').includes(`'${activeKey}'`)) {
|
| 739 |
+
th.querySelector('.arrow').textContent = sortDirection === 1 ? '↑' : '↓';
|
| 740 |
+
}
|
| 741 |
+
});
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 742 |
}
|
| 743 |
|
| 744 |
+
function showDetails(algo, features) {
|
| 745 |
+
document.getElementById("modal-title").innerText = `${algo} - Selected Features`;
|
| 746 |
+
const featArray = features.split(", ");
|
| 747 |
+
const html = featArray.map(f => `<span class="feature-tag">${f}</span>`).join(" ");
|
| 748 |
+
document.getElementById("modal-body").innerHTML = `
|
| 749 |
+
<p><strong>Total Selected:</strong> ${featArray.length}</p>
|
| 750 |
+
<div style="margin-top:10px; line-height:1.6;">${html}</div>
|
| 751 |
+
`;
|
| 752 |
+
modal.style.display = "block";
|
| 753 |
+
}
|
| 754 |
|
| 755 |
+
function fetchDatasets() {
|
| 756 |
fetch("/api/datasets")
|
| 757 |
.then(res => res.json())
|
| 758 |
+
.then(data => {
|
| 759 |
+
allDatasets = data;
|
| 760 |
datasetSelect.innerHTML = "";
|
| 761 |
+
|
| 762 |
+
// Sort dates for global updated
|
| 763 |
+
const dates = data.map(d => d.last_updated).filter(d => d !== 'Unknown').sort().reverse();
|
| 764 |
+
if (dates.length > 0) {
|
| 765 |
+
globalUpdated.textContent = `Data Last Updated: ${dates[0]}`;
|
| 766 |
+
} else {
|
| 767 |
+
globalUpdated.textContent = `Data Last Updated: Unknown`;
|
| 768 |
+
}
|
| 769 |
+
|
| 770 |
+
data.forEach(ds => {
|
| 771 |
const option = document.createElement("option");
|
| 772 |
+
option.value = ds.name;
|
| 773 |
+
option.textContent = ds.name;
|
| 774 |
datasetSelect.appendChild(option);
|
| 775 |
});
|
| 776 |
|
| 777 |
+
// Default selection
|
| 778 |
+
if (data.length > 0) {
|
| 779 |
+
loadDataset(data[0].name);
|
|
|
|
|
|
|
|
|
|
| 780 |
}
|
| 781 |
})
|
| 782 |
.catch(err => {
|
| 783 |
+
console.error("Error loading datasets:", err);
|
| 784 |
+
datasetSelect.innerHTML = '<option disabled>Error loading</option>';
|
| 785 |
});
|
| 786 |
+
}
|
| 787 |
|
| 788 |
+
function loadDataset(name) {
|
| 789 |
+
datasetSelect.value = name;
|
| 790 |
+
loadingIndicator.style.display = "block";
|
| 791 |
+
tableBody.innerHTML = "";
|
| 792 |
+
|
| 793 |
+
// Update metadata box
|
| 794 |
+
const dsInfo = allDatasets.find(d => d.name === name);
|
| 795 |
+
if (dsInfo) {
|
| 796 |
+
metaName.textContent = dsInfo.name;
|
| 797 |
+
metaUpdated.textContent = dsInfo.last_updated;
|
| 798 |
+
descName.textContent = dsInfo.name;
|
| 799 |
+
}
|
| 800 |
+
|
| 801 |
+
fetch(`/api/results?dataset=${name}`)
|
| 802 |
+
.then(res => res.json())
|
| 803 |
+
.then(data => {
|
| 804 |
+
loadingIndicator.style.display = "none";
|
| 805 |
+
currentResults = data;
|
| 806 |
+
|
| 807 |
+
// Reset filters on new dataset? Or keep them?
|
| 808 |
+
// Let's reset to show all data first, or apply current?
|
| 809 |
+
// Applying current is better UX
|
| 810 |
+
applyFilters();
|
| 811 |
+
renderTableHeader(); // Ensure headers match view mode
|
| 812 |
+
})
|
| 813 |
+
.catch(err => {
|
| 814 |
+
loadingIndicator.style.display = "none";
|
| 815 |
+
console.error("Error:", err);
|
| 816 |
+
tableBody.innerHTML = '<tr><td colspan="10" style="color:red; text-align:center;">Error loading results</td></tr>';
|
| 817 |
+
});
|
| 818 |
+
}
|
| 819 |
+
|
| 820 |
+
datasetSelect.addEventListener("change", (e) => {
|
| 821 |
+
loadDataset(e.target.value);
|
| 822 |
});
|
| 823 |
+
|
| 824 |
+
// PDF Sidebar Logic
|
| 825 |
+
function openPdf(algoName) {
|
| 826 |
+
if (!algoName) return;
|
| 827 |
+
const sidebar = document.getElementById('pdf-sidebar');
|
| 828 |
+
const frame = document.getElementById('pdf-frame');
|
| 829 |
+
|
| 830 |
+
// Use upper case as observed in file system
|
| 831 |
+
const filename = algoName.toUpperCase() + ".pdf";
|
| 832 |
+
|
| 833 |
+
frame.src = `/pdfs/${filename}`;
|
| 834 |
+
sidebar.classList.add('open');
|
| 835 |
+
}
|
| 836 |
+
|
| 837 |
+
function closeSidebar() {
|
| 838 |
+
const sidebar = document.getElementById('pdf-sidebar');
|
| 839 |
+
sidebar.classList.remove('open');
|
| 840 |
+
// Clear src after transition to avoid flicker or keep memory usage low
|
| 841 |
+
setTimeout(() => {
|
| 842 |
+
document.getElementById('pdf-frame').src = "";
|
| 843 |
+
}, 300);
|
| 844 |
+
}
|
| 845 |
+
|
| 846 |
+
document.addEventListener("DOMContentLoaded", fetchDatasets);
|
| 847 |
+
|
| 848 |
</script>
|
| 849 |
|
| 850 |
</body>
|
| 851 |
+
</html>
|
check_datasets_api.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import urllib.request
|
| 2 |
+
import json
|
| 3 |
+
|
| 4 |
+
try:
|
| 5 |
+
url = "http://127.0.0.1:7861/api/datasets"
|
| 6 |
+
with urllib.request.urlopen(url) as response:
|
| 7 |
+
data = json.loads(response.read().decode())
|
| 8 |
+
print(f"Status Code: {response.getcode()}")
|
| 9 |
+
print(f"Type: {type(data)}")
|
| 10 |
+
if isinstance(data, list) and len(data) > 0:
|
| 11 |
+
print(f"First item: {data[0]}")
|
| 12 |
+
if "last_updated" in data[0]:
|
| 13 |
+
print("SUCCESS: last_updated field found.")
|
| 14 |
+
else:
|
| 15 |
+
print("FAILURE: last_updated field MISSING.")
|
| 16 |
+
else:
|
| 17 |
+
print("Data is empty or not a list.")
|
| 18 |
+
print(data)
|
| 19 |
+
except Exception as e:
|
| 20 |
+
print(f"Error: {e}")
|
config.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# MODEL_NAME = "/home/fangsensen/.cache/huggingface/hub/models--deepseek-ai--DeepSeek-R1-Distill-Qwen-1.5B/snapshots/ad9f0ae0864d7fbcd1cd905e3c6c5b069cc8b562"
|
| 2 |
+
MODEL_NAME = "/data1/fangsensen/deepseek-math-7b-rl"
|
| 3 |
+
|
| 4 |
+
DB_PATH = "data/results.db"
|
| 5 |
+
DATASET_PATH = "data/Authorship.mat"
|
| 6 |
+
EXPR_SEED_PATH = "data/expression_seed.json"
|
| 7 |
+
ITERATIONS = 10
|
| 8 |
+
TOP_K = 10
|
| 9 |
+
CV_FOLDS = 5
|
| 10 |
+
GPU = True
|
data/Authorship.mat
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d735be2a10e0e6560fe0534f3915a1ca4adc6ec65848d795b53f80623c3355a1
|
| 3 |
+
size 3345720
|
data/Dermatology.mat
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2b85673218727df5d9fd309b382cc8619d8efca653b9fa4b001f5614dea53eeb
|
| 3 |
+
size 700024
|
data/Factors.mat
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f3e8714849b4647d74752aa75a8813c48c1c621de2cef4fa7da54e57a919cfbb
|
| 3 |
+
size 26030704
|
data/Movement_libras.mat
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:843da25828f4d6195c95f745dfeba9c197ca7dba2d3f527f655d68c37147d104
|
| 3 |
+
size 262320
|
data/Musk1.mat
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c0242a3fdaf222db642b60fa9fd8224be3d94958f17a4bdce3c8062c48a11e6f
|
| 3 |
+
size 636176
|
data/Synthetic_control.mat
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:50157294b6ccc640fe3b3791ea3c784ab414b1a05864de2f931931983c5b1f56
|
| 3 |
+
size 2270504
|
data/Waveform.mat
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dd453b528573aadab4f3385f5d47a2b66d2ac68563757170b3cc66a5290abb19
|
| 3 |
+
size 12820752
|
data/Wdbc.mat
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f6ccf0ff7747481ebe864f935c6cc3d4c342377b2419ab607fec97a91b2351be
|
| 3 |
+
size 139080
|
data/analyzor.txt
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"expression": "I(X;Y;Z)",
|
| 4 |
+
"operators": [
|
| 5 |
+
"Interaction Information"
|
| 6 |
+
],
|
| 7 |
+
"dependency": "Conditional Independence",
|
| 8 |
+
"theoretical_advantage": "Interaction Information is a measure of the unique information shared between three random variables X, Y, and Z. It can be used to detect conditional independence relationships, which can be useful for feature selection and model simplification.",
|
| 9 |
+
"complexity": "medium"
|
| 10 |
+
}
|
| 11 |
+
],
|
| 12 |
+
[
|
| 13 |
+
{
|
| 14 |
+
"expression": "I(X;Y)",
|
| 15 |
+
"operators": [
|
| 16 |
+
"Mutual Information"
|
| 17 |
+
],
|
| 18 |
+
"dependency": "measures the mutual dependence between two random variables X and Y",
|
| 19 |
+
"theoretical_advantage": "Mutual Information can be used for feature selection in machine learning, as it measures the dependence between features and the target variable.",
|
| 20 |
+
"complexity": "low"
|
| 21 |
+
}
|
| 22 |
+
],
|
| 23 |
+
[
|
| 24 |
+
{
|
| 25 |
+
"expression": "I(X;Y|Z)",
|
| 26 |
+
"operators": [
|
| 27 |
+
"MI"
|
| 28 |
+
],
|
| 29 |
+
"dependency": "Conditional Independence",
|
| 30 |
+
"theoretical_advantage": "It can help identify relevant features for classification or regression tasks.",
|
| 31 |
+
"complexity": "low"
|
| 32 |
+
}
|
| 33 |
+
],
|
| 34 |
+
[
|
| 35 |
+
{
|
| 36 |
+
"expression": "I(X;Y) - I(X;Z)",
|
| 37 |
+
"operators": [
|
| 38 |
+
"MI",
|
| 39 |
+
"MI"
|
| 40 |
+
],
|
| 41 |
+
"dependency": "X and Y are independent of Z",
|
| 42 |
+
"theoretical_advantage": "It can be used to identify features that are relevant to the target variable but not related to each other.",
|
| 43 |
+
"complexity": "low"
|
| 44 |
+
}
|
| 45 |
+
],
|
| 46 |
+
[
|
| 47 |
+
{
|
| 48 |
+
"expression": "I(X;Y|Z) - I(X;Y)",
|
| 49 |
+
"operators": [
|
| 50 |
+
"Mutual Information",
|
| 51 |
+
"Conditional Mutual Information"
|
| 52 |
+
],
|
| 53 |
+
"dependency": "Conditional Independence",
|
| 54 |
+
"theoretical_advantage": "It can help identify features that are relevant to the target variable given a set of other features.",
|
| 55 |
+
"complexity": "medium"
|
| 56 |
+
}
|
| 57 |
+
],
|
data/dna.mat
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b0d9a32fd59c16b059be5236efb2f398229f5a41e0034afb27bedc8c126c6f1d
|
| 3 |
+
size 4613568
|
data/expressions.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
EXPRESSIONS = [
|
| 2 |
+
"I(X;Y)",
|
| 3 |
+
"I(X;Y) / (H(X) + 1e-6)",
|
| 4 |
+
"I(X;Y) - 0.1 * I(X;X_other)",
|
| 5 |
+
"I(X;Y|Z)",
|
| 6 |
+
"I(X;Y) / (H(X) + H(Y))"
|
| 7 |
+
]
|
| 8 |
+
|
| 9 |
+
|
data/madelon.mat
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9c9a047aaa7511f8ca965e05ca275155e0fc7d6a3fab682ecd30665c2c5a9915
|
| 3 |
+
size 8016240
|
data/results.db
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:531c443d140ed2b80d26ec6ec7131a74567641c6213d32e977c1b885115da4d4
|
| 3 |
+
size 331776
|
data/spambase.mat
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d49f4a633f845513ff04e29ed6d9aa8fc0cc01eabddaaa682e10bdd33d4d3361
|
| 3 |
+
size 2135104
|
data/splice.mat
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0c3caf785bab5285ebeca87ddff74807c18a57eef9331caf60eb4780466e3a31
|
| 3 |
+
size 778600
|
download_model.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from huggingface_hub import snapshot_download
|
| 2 |
+
|
| 3 |
+
MODEL_ID = "deepseek/DeepSeek-R1-Distill" # 替换成你实际使用的模型
|
| 4 |
+
OUT_DIR = "/home/fangsensen/AutoFS/models/DeepSeek-R1-R1-1.5B"
|
| 5 |
+
|
| 6 |
+
snapshot_download(
|
| 7 |
+
repo_id=MODEL_ID,
|
| 8 |
+
local_dir=OUT_DIR,
|
| 9 |
+
local_dir_use_symlinks=False,
|
| 10 |
+
)
|
| 11 |
+
print("Done! Model saved at", OUT_DIR)
|
extract_metadata.py
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import json
|
| 3 |
+
import scipy.io
|
| 4 |
+
import numpy as np
|
| 5 |
+
|
| 6 |
+
DATA_DIR = "data"
|
| 7 |
+
OUTPUT_FILE = "Webapp/dataset_metadata.json"
|
| 8 |
+
|
| 9 |
+
def get_metadata():
|
| 10 |
+
metadata = {}
|
| 11 |
+
if not os.path.exists(DATA_DIR):
|
| 12 |
+
print(f"Data directory {DATA_DIR} not found.")
|
| 13 |
+
return metadata
|
| 14 |
+
|
| 15 |
+
for filename in os.listdir(DATA_DIR):
|
| 16 |
+
if filename.endswith(".mat"):
|
| 17 |
+
name = filename[:-4] # Remove .mat
|
| 18 |
+
path = os.path.join(DATA_DIR, filename)
|
| 19 |
+
try:
|
| 20 |
+
mat = scipy.io.loadmat(path)
|
| 21 |
+
# Usually X is data, Y is label, or similar keys
|
| 22 |
+
# We need to find the data key.
|
| 23 |
+
# Common keys in such datasets: 'X', 'data', 'features'
|
| 24 |
+
|
| 25 |
+
n_samples = 0
|
| 26 |
+
n_features = 0
|
| 27 |
+
n_classes = 0
|
| 28 |
+
|
| 29 |
+
# Heuristic to find data
|
| 30 |
+
keys = [k for k in mat.keys() if not k.startswith('__')]
|
| 31 |
+
|
| 32 |
+
# Assume the largest array is the data
|
| 33 |
+
max_size = 0
|
| 34 |
+
data_key = None
|
| 35 |
+
|
| 36 |
+
for k in keys:
|
| 37 |
+
if isinstance(mat[k], np.ndarray):
|
| 38 |
+
if mat[k].size > max_size:
|
| 39 |
+
max_size = mat[k].size
|
| 40 |
+
data_key = k
|
| 41 |
+
|
| 42 |
+
if data_key:
|
| 43 |
+
data = mat[data_key]
|
| 44 |
+
if len(data.shape) == 2:
|
| 45 |
+
n_samples, n_features = data.shape
|
| 46 |
+
|
| 47 |
+
# Try to find labels to count classes
|
| 48 |
+
# Usually the other array or 'Y'
|
| 49 |
+
label_key = None
|
| 50 |
+
for k in keys:
|
| 51 |
+
if k != data_key and isinstance(mat[k], np.ndarray):
|
| 52 |
+
# Labels usually have same length as samples
|
| 53 |
+
if mat[k].shape[0] == n_samples or (len(mat[k].shape) > 1 and mat[k].shape[1] == n_samples):
|
| 54 |
+
label_key = k
|
| 55 |
+
break
|
| 56 |
+
|
| 57 |
+
if label_key:
|
| 58 |
+
labels = mat[label_key]
|
| 59 |
+
n_classes = len(np.unique(labels))
|
| 60 |
+
|
| 61 |
+
metadata[name] = {
|
| 62 |
+
"n_samples": int(n_samples),
|
| 63 |
+
"n_features": int(n_features),
|
| 64 |
+
"n_classes": int(n_classes)
|
| 65 |
+
}
|
| 66 |
+
print(f"Processed {name}: {n_samples}x{n_features}, {n_classes} classes")
|
| 67 |
+
|
| 68 |
+
except Exception as e:
|
| 69 |
+
print(f"Error processing {filename}: {e}")
|
| 70 |
+
|
| 71 |
+
return metadata
|
| 72 |
+
|
| 73 |
+
if __name__ == "__main__":
|
| 74 |
+
meta = get_metadata()
|
| 75 |
+
with open(OUTPUT_FILE, "w") as f:
|
| 76 |
+
json.dump(meta, f, indent=2)
|
| 77 |
+
print(f"Metadata saved to {OUTPUT_FILE}")
|
main.py
ADDED
|
@@ -0,0 +1,244 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# import json, datetime
|
| 2 |
+
# from config import *
|
| 3 |
+
# import pandas as pd
|
| 4 |
+
# import scipy.io as scio
|
| 5 |
+
# from modules.expression_pool import init_db, add_expr, top_exprs
|
| 6 |
+
# from agents.generator_agent import GeneratorAgent
|
| 7 |
+
# from agents.evaluator_agent import evaluate_expression
|
| 8 |
+
# from modules.utils import load_mat_as_numeric
|
| 9 |
+
# # prepare
|
| 10 |
+
# conn = init_db(DB_PATH)
|
| 11 |
+
# # df = pd.read_csv(DATASET_PATH)
|
| 12 |
+
# # df = scio.loadmat(DATASET_PATH) # 读取数据文件
|
| 13 |
+
# X, y = load_mat_as_numeric(DATASET_PATH)
|
| 14 |
+
# X_df = pd.DataFrame(X)
|
| 15 |
+
# # print(df)
|
| 16 |
+
# # X_df = pd.DataFrame(df['X']) # 读取训练数据
|
| 17 |
+
# # print(df['Y'])
|
| 18 |
+
# # y0 = pd.DataFrame(df['Y']) # 读取标签
|
| 19 |
+
# # X_df = df.drop(columns=['label'])
|
| 20 |
+
# # y = y0.values
|
| 21 |
+
# # print("y type:", type(y), "dtype:", getattr(y, "dtype", None))
|
| 22 |
+
# # print("y example:", y[:10])
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
# # load seed
|
| 26 |
+
# with open(EXPR_SEED_PATH) as f:
|
| 27 |
+
# seeds = json.load(f)
|
| 28 |
+
# # evaluate seeds first
|
| 29 |
+
# for s in seeds:
|
| 30 |
+
# score, fvals, top_idx = evaluate_expression(s['expression'], X_df, y, TOP_K, CV_FOLDS)
|
| 31 |
+
# add_expr(conn, s['expression'], score, s.get('explanation',''), str(s.get('complexity','')))
|
| 32 |
+
|
| 33 |
+
# # init generator
|
| 34 |
+
# gen = GeneratorAgent(MODEL_NAME)
|
| 35 |
+
|
| 36 |
+
# # iterative loop
|
| 37 |
+
# for it in range(ITERATIONS):
|
| 38 |
+
# print("Iteration", it+1)
|
| 39 |
+
# refs = top_exprs(conn, k=TOP_K)
|
| 40 |
+
# # build prompt_text with refs + feature stats
|
| 41 |
+
# # prompt = "Given top expressions: " + str(refs) + "\nGenerate expressions in format: Expression: ... Rationale: ..."
|
| 42 |
+
# top_expressions = [] # List[(expr, score)]
|
| 43 |
+
# top_expressions.append((refs, score))
|
| 44 |
+
# top_expressions = sorted(
|
| 45 |
+
# top_expressions,
|
| 46 |
+
# key=lambda x: -x[1]
|
| 47 |
+
# )[:5]
|
| 48 |
+
# new_text = gen.generate_candidates(top_expressions)
|
| 49 |
+
# for out in new_text:
|
| 50 |
+
# # extract Expression line
|
| 51 |
+
# expr_line = None
|
| 52 |
+
# for line in out.splitlines():
|
| 53 |
+
# if line.strip().lower().startswith("expression"):
|
| 54 |
+
# expr_line = line.split(":",1)[1].strip()
|
| 55 |
+
# break
|
| 56 |
+
# if not expr_line: expr_line = out.strip()
|
| 57 |
+
# score, fvals, top_idx = evaluate_expression(expr_line, X_df, y, TOP_K, CV_FOLDS)
|
| 58 |
+
# add_expr(conn, expr_line, score, out, "")
|
| 59 |
+
# print(f"Candidate {expr_line} -> score {score:.4f}")
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
# results = []
|
| 64 |
+
|
| 65 |
+
# for expr in EXPRESSIONS:
|
| 66 |
+
# exec_out = executor.run(expr, X, y)
|
| 67 |
+
# analysis = analyzer.analyze(expr, exec_out["cv_score"])
|
| 68 |
+
|
| 69 |
+
# results.append({
|
| 70 |
+
# "expression": expr,
|
| 71 |
+
# "score": exec_out["cv_score"],
|
| 72 |
+
# "analysis": analysis
|
| 73 |
+
# })
|
| 74 |
+
|
| 75 |
+
# ranking = judge.rank(results)
|
| 76 |
+
#-----------------------------------------------------------------------2.0---------------
|
| 77 |
+
|
| 78 |
+
# from agents.analyzer_agent import AnalyzerAgent
|
| 79 |
+
|
| 80 |
+
# MODEL_PATH = "/data1/fangsensen/deepseek-math-7b-rl"
|
| 81 |
+
|
| 82 |
+
# agent = AnalyzerAgent(
|
| 83 |
+
# name="AnalyzerAgent",
|
| 84 |
+
# model_path=MODEL_PATH
|
| 85 |
+
# )
|
| 86 |
+
|
| 87 |
+
# expressions = [
|
| 88 |
+
# "I(X;Y)",
|
| 89 |
+
# "I(X;Y|Z)",
|
| 90 |
+
# "I(X;Y) - I(X;Z)",
|
| 91 |
+
# "I(X;Y|Z) - I(X;Y)",
|
| 92 |
+
# "I(X;Y;Z)"
|
| 93 |
+
# ]
|
| 94 |
+
# # expressions = [
|
| 95 |
+
# # "I(X;Y|Z) - I(X;Y)",
|
| 96 |
+
# # ]
|
| 97 |
+
# for expr in expressions:
|
| 98 |
+
# print("=" * 80)
|
| 99 |
+
# result = agent.analyze_expression(expr)
|
| 100 |
+
# print(result)
|
| 101 |
+
#-----------------------------------------------------------------------路由---------------
|
| 102 |
+
import numpy as np
|
| 103 |
+
from agents.router_agent import FSRouterAgent
|
| 104 |
+
|
| 105 |
+
import scipy.io as scio
|
| 106 |
+
import pandas as pd
|
| 107 |
+
from sklearn.preprocessing import LabelEncoder
|
| 108 |
+
|
| 109 |
+
def load_mat_dataset(
|
| 110 |
+
file_path,
|
| 111 |
+
feature_keys=("X", "data", "fea"),
|
| 112 |
+
label_keys=("Y", "y", "label"),
|
| 113 |
+
):
|
| 114 |
+
"""
|
| 115 |
+
通用 .mat 数据集读取函数(FSExecutor / Agent 兼容)
|
| 116 |
+
|
| 117 |
+
Parameters
|
| 118 |
+
----------
|
| 119 |
+
file_path : str
|
| 120 |
+
.mat 文件路径
|
| 121 |
+
feature_keys : tuple
|
| 122 |
+
特征矩阵可能的 key
|
| 123 |
+
label_keys : tuple
|
| 124 |
+
标签可能的 key
|
| 125 |
+
|
| 126 |
+
Returns
|
| 127 |
+
-------
|
| 128 |
+
X : np.ndarray, shape (n_samples, n_features)
|
| 129 |
+
y : np.ndarray, shape (n_samples,)
|
| 130 |
+
meta : dict
|
| 131 |
+
元信息(类别数、样本数等)
|
| 132 |
+
"""
|
| 133 |
+
|
| 134 |
+
data = scio.loadmat(file_path)
|
| 135 |
+
|
| 136 |
+
# ---------- 1. 读取 X ----------
|
| 137 |
+
X = None
|
| 138 |
+
for key in feature_keys:
|
| 139 |
+
if key in data:
|
| 140 |
+
X = data[key]
|
| 141 |
+
break
|
| 142 |
+
if X is None:
|
| 143 |
+
raise KeyError(f"Cannot find feature matrix in {file_path}")
|
| 144 |
+
|
| 145 |
+
X = np.asarray(X)
|
| 146 |
+
|
| 147 |
+
if X.dtype == object:
|
| 148 |
+
X = np.array(
|
| 149 |
+
[[float(v[0]) if isinstance(v, (list, np.ndarray)) else float(v)
|
| 150 |
+
for v in row]
|
| 151 |
+
for row in X]
|
| 152 |
+
)
|
| 153 |
+
else:
|
| 154 |
+
X = X.astype(float)
|
| 155 |
+
|
| 156 |
+
|
| 157 |
+
# ---------- 2. 读取 y ----------
|
| 158 |
+
y = None
|
| 159 |
+
for key in label_keys:
|
| 160 |
+
if key in data:
|
| 161 |
+
y = data[key]
|
| 162 |
+
break
|
| 163 |
+
if y is None:
|
| 164 |
+
raise KeyError(f"Cannot find label vector in {file_path}")
|
| 165 |
+
|
| 166 |
+
# y 常见是 (n,1)
|
| 167 |
+
y = np.asarray(y).reshape(-1)
|
| 168 |
+
|
| 169 |
+
# ---------- 3. 标签清洗 & 编码 ----------
|
| 170 |
+
# 处理 object / string / 混合类型
|
| 171 |
+
if y.dtype == object:
|
| 172 |
+
y = pd.Series(y).apply(lambda x: x[0] if isinstance(x, (list, np.ndarray)) else x)
|
| 173 |
+
|
| 174 |
+
label_encoder = LabelEncoder()
|
| 175 |
+
y = label_encoder.fit_transform(y)
|
| 176 |
+
|
| 177 |
+
# ---------- 4. 元信息 ----------
|
| 178 |
+
meta = {
|
| 179 |
+
"n_samples": X.shape[0],
|
| 180 |
+
"n_features": X.shape[1],
|
| 181 |
+
"n_classes": len(np.unique(y)),
|
| 182 |
+
"classes": np.unique(y),
|
| 183 |
+
"label_encoder": label_encoder,
|
| 184 |
+
}
|
| 185 |
+
|
| 186 |
+
return X, y, meta
|
| 187 |
+
|
| 188 |
+
base_url = "/home/fangsensen/AutoFS/data/"
|
| 189 |
+
datanames = ['dna','Factors','madelon','Movement_libras','Musk1','spambase','splice','Synthetic_control', 'Waveform','Wdbc',]
|
| 190 |
+
# dataname = 'Authorship'
|
| 191 |
+
def main(dataname):
|
| 192 |
+
X, y, meta = load_mat_dataset(
|
| 193 |
+
base_url + dataname + ".mat"
|
| 194 |
+
)
|
| 195 |
+
# X = data.data
|
| 196 |
+
# y = data.target
|
| 197 |
+
#
|
| 198 |
+
|
| 199 |
+
task = {
|
| 200 |
+
"X": X,
|
| 201 |
+
"y": y,
|
| 202 |
+
"algorithms": ["JMIM","CFR","DCSF","IWFS","MRI","MRMD","UCRFS","CSMDCCMR",],
|
| 203 |
+
"n_selected_features": 5,
|
| 204 |
+
"class_specific": False,
|
| 205 |
+
"classifiers": ["nb", "svm", "rf"],
|
| 206 |
+
"cv": 10,
|
| 207 |
+
"random_state": 19,
|
| 208 |
+
"params":{"n_selected_features":15,},
|
| 209 |
+
"dataname":dataname,
|
| 210 |
+
}
|
| 211 |
+
|
| 212 |
+
|
| 213 |
+
router = FSRouterAgent()
|
| 214 |
+
leaderboard = router.run(task)
|
| 215 |
+
|
| 216 |
+
for rank, res in enumerate(leaderboard, 1):
|
| 217 |
+
print(f"Rank {rank}: {res}")
|
| 218 |
+
return leaderboard
|
| 219 |
+
|
| 220 |
+
if __name__ == "__main__":
|
| 221 |
+
for dataname in datanames:
|
| 222 |
+
main(dataname)
|
| 223 |
+
|
| 224 |
+
|
| 225 |
+
|
| 226 |
+
|
| 227 |
+
# {'selected_features': [59, 50, 56, 4, 38, 9, 29, 23, 0, 20, 34, 36, 24, 26, 28],
|
| 228 |
+
# 'num_features': 15,
|
| 229 |
+
# 'metrics': {'nb': {'f1': 0.9181133571145461, 'auc': 0.9807805770573524},
|
| 230 |
+
# 'svm': {'f1': 0.9282600079270711, 'auc': 0.980695564275392},
|
| 231 |
+
# 'rf': {'f1': 0.9219976218787156, 'auc': 0.9768411621948705}},
|
| 232 |
+
# 'time': 7.378173112869263,
|
| 233 |
+
# 'algorithm': 'JMIM'},
|
| 234 |
+
|
| 235 |
+
|
| 236 |
+
# {'selected_features': [59, 50, 56, 4, 38, 0, 9, 29, 23, 20, 36, 34, 24, 28, 26],
|
| 237 |
+
# 'num_features': 15,
|
| 238 |
+
# 'metrics': {'nb': {'f1': 0.9163694015061433, 'auc': 0.9805189493459717},
|
| 239 |
+
# 'svm': {'f1': 0.9265953230281413, 'auc': 0.98064247666047},
|
| 240 |
+
# 'rf': {'f1': 0.9189853349187476, 'auc': 0.9769441217042379}},
|
| 241 |
+
# 'time': 2.0774385929107666,
|
| 242 |
+
# 'algorithm': 'CFR'}
|
| 243 |
+
|
| 244 |
+
|
modules/expr_to_code.py
ADDED
|
@@ -0,0 +1,163 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import sympy as sp
|
| 2 |
+
import numpy as np
|
| 3 |
+
from sklearn.metrics import mutual_info_score
|
| 4 |
+
# 符号
|
| 5 |
+
import sympy as sp
|
| 6 |
+
import pandas as pd
|
| 7 |
+
|
| 8 |
+
# symbols
|
| 9 |
+
X = sp.Symbol("X")
|
| 10 |
+
Y = sp.Symbol("Y")
|
| 11 |
+
Z = sp.Symbol("Z")
|
| 12 |
+
|
| 13 |
+
class MI(sp.Function):
|
| 14 |
+
nargs = (2,)
|
| 15 |
+
|
| 16 |
+
class CMI(sp.Function):
|
| 17 |
+
nargs = (3,)
|
| 18 |
+
|
| 19 |
+
class II(sp.Function):
|
| 20 |
+
nargs = (3,) # interaction information
|
| 21 |
+
|
| 22 |
+
ALLOWED_LOCALS = {
|
| 23 |
+
"X": X,
|
| 24 |
+
"Y": Y,
|
| 25 |
+
"Z": Z,
|
| 26 |
+
"I": MI, # I(X,Y)
|
| 27 |
+
"CI": CMI, # I(X,Y|Z) 条件互信息
|
| 28 |
+
"II": II # I(X;Y;Z)交互信息
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
+
def parse_expression(expr_str: str) -> sp.Expr:
|
| 32 |
+
"""
|
| 33 |
+
String → SymPy Expression
|
| 34 |
+
"""
|
| 35 |
+
expr = sp.sympify(expr_str, locals=ALLOWED_LOCALS)
|
| 36 |
+
return expr
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
def entropy(x):#计算熵
|
| 40 |
+
_, cnt = np.unique(x, return_counts=True)
|
| 41 |
+
p = cnt / cnt.sum()
|
| 42 |
+
return -np.sum(p * np.log(p + 1e-12))
|
| 43 |
+
|
| 44 |
+
def mi(x, y):#互信息
|
| 45 |
+
return mutual_info_score(x, y)
|
| 46 |
+
|
| 47 |
+
def cmi(x, y, z):#条件互信息(通过熵的加减计算)
|
| 48 |
+
# I(X;Y|Z) = H(X,Z)+H(Y,Z)-H(Z)-H(X,Y,Z)
|
| 49 |
+
return (
|
| 50 |
+
entropy(np.c_[x, z].tolist())
|
| 51 |
+
+ entropy(np.c_[y, z].tolist())
|
| 52 |
+
- entropy(z)
|
| 53 |
+
- entropy(np.c_[x, y, z].tolist())
|
| 54 |
+
)
|
| 55 |
+
|
| 56 |
+
def interaction_info(x, y, z):#交互信息
|
| 57 |
+
# I(X;Y;Z) = I(X;Y) - I(X;Y|Z)
|
| 58 |
+
return mi(x, y) - cmi(x, y, z)
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
def expr_to_callable(expr: sp.Expr):
|
| 62 |
+
|
| 63 |
+
def eval_node(node, ctx):
|
| 64 |
+
if isinstance(node, MI):
|
| 65 |
+
return mi(eval_node(node.args[0], ctx),
|
| 66 |
+
eval_node(node.args[1], ctx))
|
| 67 |
+
|
| 68 |
+
if isinstance(node, CMI):
|
| 69 |
+
return cmi(eval_node(node.args[0], ctx),
|
| 70 |
+
eval_node(node.args[1], ctx),
|
| 71 |
+
eval_node(node.args[2], ctx))
|
| 72 |
+
|
| 73 |
+
if isinstance(node, II):
|
| 74 |
+
return interaction_info(
|
| 75 |
+
eval_node(node.args[0], ctx),
|
| 76 |
+
eval_node(node.args[1], ctx),
|
| 77 |
+
eval_node(node.args[2], ctx)
|
| 78 |
+
)
|
| 79 |
+
|
| 80 |
+
if node == X:
|
| 81 |
+
return ctx["X"]
|
| 82 |
+
if node == Y:
|
| 83 |
+
return ctx["Y"]
|
| 84 |
+
if node == Z:
|
| 85 |
+
return ctx["Z"]
|
| 86 |
+
|
| 87 |
+
if node.is_Number:
|
| 88 |
+
return float(node)
|
| 89 |
+
|
| 90 |
+
if node.is_Add:
|
| 91 |
+
return sum(eval_node(arg, ctx) for arg in node.args)
|
| 92 |
+
|
| 93 |
+
if node.is_Mul:
|
| 94 |
+
r = 1.0
|
| 95 |
+
for arg in node.args:
|
| 96 |
+
r *= eval_node(arg, ctx)
|
| 97 |
+
return r
|
| 98 |
+
|
| 99 |
+
if node.is_Pow:
|
| 100 |
+
base, exp = node.args
|
| 101 |
+
return eval_node(base, ctx) ** eval_node(exp, ctx)
|
| 102 |
+
|
| 103 |
+
raise ValueError(f"Unsupported node: {node}")
|
| 104 |
+
|
| 105 |
+
def f(X_arr, Y_arr, Z_arr=None):
|
| 106 |
+
ctx = {"X": X_arr, "Y": Y_arr}
|
| 107 |
+
if Z_arr is not None:
|
| 108 |
+
ctx["Z"] = Z_arr
|
| 109 |
+
return eval_node(expr, ctx)
|
| 110 |
+
|
| 111 |
+
return f
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
|
| 115 |
+
from sklearn.preprocessing import LabelEncoder
|
| 116 |
+
def changetosinge(x):
|
| 117 |
+
return float(x)
|
| 118 |
+
# scores = f(X, y, X_other_list)
|
| 119 |
+
def prepare_data(dataname, base_url):
|
| 120 |
+
url = os.path.join(base_url, dataname + '.mat')
|
| 121 |
+
data = scio.loadmat(url)
|
| 122 |
+
X0 = pd.DataFrame(data['X'])
|
| 123 |
+
y0 = pd.DataFrame(data['Y'])
|
| 124 |
+
|
| 125 |
+
if dataname == 'Dermatology':
|
| 126 |
+
Special = X0.iloc[:, -1]
|
| 127 |
+
a = np.array([item[0] for item in Special])
|
| 128 |
+
label_encoder = LabelEncoder()
|
| 129 |
+
a33 = label_encoder.fit_transform(a)
|
| 130 |
+
X0 = X0.iloc[:, :-1]
|
| 131 |
+
X0[33] = a33
|
| 132 |
+
|
| 133 |
+
X0 = X0.applymap(changetosinge)
|
| 134 |
+
y0 = y0.applymap(changetosinge)
|
| 135 |
+
label_encoder = LabelEncoder()
|
| 136 |
+
y_encoded = label_encoder.fit_transform(y0)
|
| 137 |
+
y = pd.DataFrame(y_encoded)
|
| 138 |
+
X = pd.DataFrame()
|
| 139 |
+
|
| 140 |
+
for col in X0.columns:
|
| 141 |
+
X[col] = pd.cut(X0[col], bins=5, labels=False)
|
| 142 |
+
|
| 143 |
+
new_columns = [str(i) for i in range(X.shape[1] + 1)]
|
| 144 |
+
X = X.rename(columns=dict(zip(X.columns, new_columns[:-1])))
|
| 145 |
+
y = y.rename(columns=dict(zip(y.columns, [new_columns[-1]])))
|
| 146 |
+
data_processed = pd.concat([X, y], axis=1)
|
| 147 |
+
# data_processed = pd.DataFrame(X)
|
| 148 |
+
|
| 149 |
+
return data_processed, list(set(y_encoded))
|
| 150 |
+
|
| 151 |
+
import os
|
| 152 |
+
import scipy.io as scio
|
| 153 |
+
dataname = 'Authorship'
|
| 154 |
+
base_url = '/home/fangsensen/AutoFS/data/'
|
| 155 |
+
data_processed, class_set = prepare_data(dataname, base_url)
|
| 156 |
+
# print(data_processed)
|
| 157 |
+
# X_arr = data_processed['0']
|
| 158 |
+
# y_arr = data_processed['69']
|
| 159 |
+
print(111111,X_arr,2222222,y_arr)
|
| 160 |
+
expr = parse_expression("I(X,Y)")
|
| 161 |
+
f = expr_to_callable(expr)
|
| 162 |
+
score = f(X_arr, y_arr)
|
| 163 |
+
print(score)
|
modules/expression_pool.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import sqlite3, os
|
| 2 |
+
def init_db(db_path="data/results.db"):
|
| 3 |
+
os.makedirs(os.path.dirname(db_path), exist_ok=True)
|
| 4 |
+
conn = sqlite3.connect(db_path)
|
| 5 |
+
conn.execute("""
|
| 6 |
+
CREATE TABLE IF NOT EXISTS exprs(
|
| 7 |
+
expr TEXT PRIMARY KEY,
|
| 8 |
+
score REAL,
|
| 9 |
+
rationale TEXT,
|
| 10 |
+
complexity TEXT,
|
| 11 |
+
created_at TEXT
|
| 12 |
+
)""")
|
| 13 |
+
conn.commit()
|
| 14 |
+
return conn
|
| 15 |
+
|
| 16 |
+
def add_expr(conn, expr, score, rationale="", complexity=""):
|
| 17 |
+
conn.execute("INSERT OR REPLACE INTO exprs(expr, score, rationale, complexity, created_at) VALUES(?,?,?,?,datetime('now'))",
|
| 18 |
+
(expr, score, rationale, complexity))
|
| 19 |
+
conn.commit()
|
| 20 |
+
|
| 21 |
+
def top_exprs(conn, k=5, min_score=0.0):
|
| 22 |
+
cur = conn.cursor()
|
| 23 |
+
cur.execute("SELECT expr,score,rationale FROM exprs WHERE score>=? ORDER BY score DESC LIMIT ?", (min_score,k))
|
| 24 |
+
return cur.fetchall()
|
modules/modules/expr_to_code.py
ADDED
|
@@ -0,0 +1,163 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import sympy as sp
|
| 2 |
+
import numpy as np
|
| 3 |
+
from sklearn.metrics import mutual_info_score
|
| 4 |
+
# 符号
|
| 5 |
+
import sympy as sp
|
| 6 |
+
import pandas as pd
|
| 7 |
+
|
| 8 |
+
# symbols
|
| 9 |
+
X = sp.Symbol("X")
|
| 10 |
+
Y = sp.Symbol("Y")
|
| 11 |
+
Z = sp.Symbol("Z")
|
| 12 |
+
|
| 13 |
+
class MI(sp.Function):
|
| 14 |
+
nargs = (2,)
|
| 15 |
+
|
| 16 |
+
class CMI(sp.Function):
|
| 17 |
+
nargs = (3,)
|
| 18 |
+
|
| 19 |
+
class II(sp.Function):
|
| 20 |
+
nargs = (3,) # interaction information
|
| 21 |
+
|
| 22 |
+
ALLOWED_LOCALS = {
|
| 23 |
+
"X": X,
|
| 24 |
+
"Y": Y,
|
| 25 |
+
"Z": Z,
|
| 26 |
+
"I": MI, # I(X,Y)
|
| 27 |
+
"CI": CMI, # I(X,Y|Z) 条件互信息
|
| 28 |
+
"II": II # I(X;Y;Z)交互信息
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
+
def parse_expression(expr_str: str) -> sp.Expr:
|
| 32 |
+
"""
|
| 33 |
+
String → SymPy Expression
|
| 34 |
+
"""
|
| 35 |
+
expr = sp.sympify(expr_str, locals=ALLOWED_LOCALS)
|
| 36 |
+
return expr
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
def entropy(x):#计算熵
|
| 40 |
+
_, cnt = np.unique(x, return_counts=True)
|
| 41 |
+
p = cnt / cnt.sum()
|
| 42 |
+
return -np.sum(p * np.log(p + 1e-12))
|
| 43 |
+
|
| 44 |
+
def mi(x, y):#互信息
|
| 45 |
+
return mutual_info_score(x, y)
|
| 46 |
+
|
| 47 |
+
def cmi(x, y, z):#条件互信息(通过熵的加减计算)
|
| 48 |
+
# I(X;Y|Z) = H(X,Z)+H(Y,Z)-H(Z)-H(X,Y,Z)
|
| 49 |
+
return (
|
| 50 |
+
entropy(np.c_[x, z].tolist())
|
| 51 |
+
+ entropy(np.c_[y, z].tolist())
|
| 52 |
+
- entropy(z)
|
| 53 |
+
- entropy(np.c_[x, y, z].tolist())
|
| 54 |
+
)
|
| 55 |
+
|
| 56 |
+
def interaction_info(x, y, z):#交互信息
|
| 57 |
+
# I(X;Y;Z) = I(X;Y) - I(X;Y|Z)
|
| 58 |
+
return mi(x, y) - cmi(x, y, z)
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
def expr_to_callable(expr: sp.Expr):
|
| 62 |
+
|
| 63 |
+
def eval_node(node, ctx):
|
| 64 |
+
if isinstance(node, MI):
|
| 65 |
+
return mi(eval_node(node.args[0], ctx),
|
| 66 |
+
eval_node(node.args[1], ctx))
|
| 67 |
+
|
| 68 |
+
if isinstance(node, CMI):
|
| 69 |
+
return cmi(eval_node(node.args[0], ctx),
|
| 70 |
+
eval_node(node.args[1], ctx),
|
| 71 |
+
eval_node(node.args[2], ctx))
|
| 72 |
+
|
| 73 |
+
if isinstance(node, II):
|
| 74 |
+
return interaction_info(
|
| 75 |
+
eval_node(node.args[0], ctx),
|
| 76 |
+
eval_node(node.args[1], ctx),
|
| 77 |
+
eval_node(node.args[2], ctx)
|
| 78 |
+
)
|
| 79 |
+
|
| 80 |
+
if node == X:
|
| 81 |
+
return ctx["X"]
|
| 82 |
+
if node == Y:
|
| 83 |
+
return ctx["Y"]
|
| 84 |
+
if node == Z:
|
| 85 |
+
return ctx["Z"]
|
| 86 |
+
|
| 87 |
+
if node.is_Number:
|
| 88 |
+
return float(node)
|
| 89 |
+
|
| 90 |
+
if node.is_Add:
|
| 91 |
+
return sum(eval_node(arg, ctx) for arg in node.args)
|
| 92 |
+
|
| 93 |
+
if node.is_Mul:
|
| 94 |
+
r = 1.0
|
| 95 |
+
for arg in node.args:
|
| 96 |
+
r *= eval_node(arg, ctx)
|
| 97 |
+
return r
|
| 98 |
+
|
| 99 |
+
if node.is_Pow:
|
| 100 |
+
base, exp = node.args
|
| 101 |
+
return eval_node(base, ctx) ** eval_node(exp, ctx)
|
| 102 |
+
|
| 103 |
+
raise ValueError(f"Unsupported node: {node}")
|
| 104 |
+
|
| 105 |
+
def f(X_arr, Y_arr, Z_arr=None):
|
| 106 |
+
ctx = {"X": X_arr, "Y": Y_arr}
|
| 107 |
+
if Z_arr is not None:
|
| 108 |
+
ctx["Z"] = Z_arr
|
| 109 |
+
return eval_node(expr, ctx)
|
| 110 |
+
|
| 111 |
+
return f
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
|
| 115 |
+
from sklearn.preprocessing import LabelEncoder
|
| 116 |
+
def changetosinge(x):
|
| 117 |
+
return float(x)
|
| 118 |
+
# scores = f(X, y, X_other_list)
|
| 119 |
+
def prepare_data(dataname, base_url):
|
| 120 |
+
url = os.path.join(base_url, dataname + '.mat')
|
| 121 |
+
data = scio.loadmat(url)
|
| 122 |
+
X0 = pd.DataFrame(data['X'])
|
| 123 |
+
y0 = pd.DataFrame(data['Y'])
|
| 124 |
+
|
| 125 |
+
if dataname == 'Dermatology':
|
| 126 |
+
Special = X0.iloc[:, -1]
|
| 127 |
+
a = np.array([item[0] for item in Special])
|
| 128 |
+
label_encoder = LabelEncoder()
|
| 129 |
+
a33 = label_encoder.fit_transform(a)
|
| 130 |
+
X0 = X0.iloc[:, :-1]
|
| 131 |
+
X0[33] = a33
|
| 132 |
+
|
| 133 |
+
X0 = X0.applymap(changetosinge)
|
| 134 |
+
y0 = y0.applymap(changetosinge)
|
| 135 |
+
label_encoder = LabelEncoder()
|
| 136 |
+
y_encoded = label_encoder.fit_transform(y0)
|
| 137 |
+
y = pd.DataFrame(y_encoded)
|
| 138 |
+
X = pd.DataFrame()
|
| 139 |
+
|
| 140 |
+
for col in X0.columns:
|
| 141 |
+
X[col] = pd.cut(X0[col], bins=5, labels=False)
|
| 142 |
+
|
| 143 |
+
new_columns = [str(i) for i in range(X.shape[1] + 1)]
|
| 144 |
+
X = X.rename(columns=dict(zip(X.columns, new_columns[:-1])))
|
| 145 |
+
y = y.rename(columns=dict(zip(y.columns, [new_columns[-1]])))
|
| 146 |
+
data_processed = pd.concat([X, y], axis=1)
|
| 147 |
+
# data_processed = pd.DataFrame(X)
|
| 148 |
+
|
| 149 |
+
return data_processed, list(set(y_encoded))
|
| 150 |
+
|
| 151 |
+
import os
|
| 152 |
+
import scipy.io as scio
|
| 153 |
+
dataname = 'Authorship'
|
| 154 |
+
base_url = '/home/fangsensen/AutoFS/data/'
|
| 155 |
+
data_processed, class_set = prepare_data(dataname, base_url)
|
| 156 |
+
# print(data_processed)
|
| 157 |
+
# X_arr = data_processed['0']
|
| 158 |
+
# y_arr = data_processed['69']
|
| 159 |
+
print(111111,X_arr,2222222,y_arr)
|
| 160 |
+
expr = parse_expression("I(X,Y)")
|
| 161 |
+
f = expr_to_callable(expr)
|
| 162 |
+
score = f(X_arr, y_arr)
|
| 163 |
+
print(score)
|
modules/modules/expression_pool.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import sqlite3, os
|
| 2 |
+
def init_db(db_path="data/results.db"):
|
| 3 |
+
os.makedirs(os.path.dirname(db_path), exist_ok=True)
|
| 4 |
+
conn = sqlite3.connect(db_path)
|
| 5 |
+
conn.execute("""
|
| 6 |
+
CREATE TABLE IF NOT EXISTS exprs(
|
| 7 |
+
expr TEXT PRIMARY KEY,
|
| 8 |
+
score REAL,
|
| 9 |
+
rationale TEXT,
|
| 10 |
+
complexity TEXT,
|
| 11 |
+
created_at TEXT
|
| 12 |
+
)""")
|
| 13 |
+
conn.commit()
|
| 14 |
+
return conn
|
| 15 |
+
|
| 16 |
+
def add_expr(conn, expr, score, rationale="", complexity=""):
|
| 17 |
+
conn.execute("INSERT OR REPLACE INTO exprs(expr, score, rationale, complexity, created_at) VALUES(?,?,?,?,datetime('now'))",
|
| 18 |
+
(expr, score, rationale, complexity))
|
| 19 |
+
conn.commit()
|
| 20 |
+
|
| 21 |
+
def top_exprs(conn, k=5, min_score=0.0):
|
| 22 |
+
cur = conn.cursor()
|
| 23 |
+
cur.execute("SELECT expr,score,rationale FROM exprs WHERE score>=? ORDER BY score DESC LIMIT ?", (min_score,k))
|
| 24 |
+
return cur.fetchall()
|
modules/modules/utils.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import scipy.io as sio
|
| 3 |
+
|
| 4 |
+
def load_mat_as_numeric(path, x_key="X", y_key="Y"):
|
| 5 |
+
data = sio.loadmat(path)
|
| 6 |
+
X_raw = data[x_key]
|
| 7 |
+
y_raw = data[y_key]
|
| 8 |
+
|
| 9 |
+
# Step 1: flatten MATLAB cell array elements
|
| 10 |
+
def clean_cell_array(arr):
|
| 11 |
+
cleaned = []
|
| 12 |
+
for row in arr:
|
| 13 |
+
new_row = []
|
| 14 |
+
for elem in row:
|
| 15 |
+
# elem is usually array(['46.0'])
|
| 16 |
+
if isinstance(elem, np.ndarray):
|
| 17 |
+
elem = elem[0] # '46.0'
|
| 18 |
+
elem = elem.strip()
|
| 19 |
+
new_row.append(elem)
|
| 20 |
+
cleaned.append(new_row)
|
| 21 |
+
return np.array(cleaned)
|
| 22 |
+
|
| 23 |
+
X_str = clean_cell_array(X_raw)
|
| 24 |
+
y_str = clean_cell_array(y_raw).reshape(-1)
|
| 25 |
+
|
| 26 |
+
# Step 2: convert X to float
|
| 27 |
+
X = X_str.astype(float)
|
| 28 |
+
|
| 29 |
+
# Step 3: convert y to numeric or keep string
|
| 30 |
+
try:
|
| 31 |
+
y = y_str.astype(float)
|
| 32 |
+
except:
|
| 33 |
+
y = y_str.astype(str)
|
| 34 |
+
|
| 35 |
+
return X, y
|
modules/utils.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import scipy.io as sio
|
| 3 |
+
|
| 4 |
+
def load_mat_as_numeric(path, x_key="X", y_key="Y"):
|
| 5 |
+
data = sio.loadmat(path)
|
| 6 |
+
X_raw = data[x_key]
|
| 7 |
+
y_raw = data[y_key]
|
| 8 |
+
|
| 9 |
+
# Step 1: flatten MATLAB cell array elements
|
| 10 |
+
def clean_cell_array(arr):
|
| 11 |
+
cleaned = []
|
| 12 |
+
for row in arr:
|
| 13 |
+
new_row = []
|
| 14 |
+
for elem in row:
|
| 15 |
+
# elem is usually array(['46.0'])
|
| 16 |
+
if isinstance(elem, np.ndarray):
|
| 17 |
+
elem = elem[0] # '46.0'
|
| 18 |
+
elem = elem.strip()
|
| 19 |
+
new_row.append(elem)
|
| 20 |
+
cleaned.append(new_row)
|
| 21 |
+
return np.array(cleaned)
|
| 22 |
+
|
| 23 |
+
X_str = clean_cell_array(X_raw)
|
| 24 |
+
y_str = clean_cell_array(y_raw).reshape(-1)
|
| 25 |
+
|
| 26 |
+
# Step 2: convert X to float
|
| 27 |
+
X = X_str.astype(float)
|
| 28 |
+
|
| 29 |
+
# Step 3: convert y to numeric or keep string
|
| 30 |
+
try:
|
| 31 |
+
y = y_str.astype(float)
|
| 32 |
+
except:
|
| 33 |
+
y = y_str.astype(str)
|
| 34 |
+
|
| 35 |
+
return X, y
|
pdf/CFR.pdf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:43040c5cd02372547ccfd0ff233c1f8db8492bb79cd6648471a6b261197a291b
|
| 3 |
+
size 1011137
|
pdf/CIFE.pdf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:40f99e19316a356c47d3c228bb17ac7d4200a55e6d36da9adbc01d13ae978aab
|
| 3 |
+
size 900327
|
pdf/CMIFS.pdf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9afa507819700f14a66009e7ea7061b178427de7a0c4455d1a666c16b7d261e5
|
| 3 |
+
size 782737
|
pdf/CMIM.pdf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e59696e31620f0328f7a5aaf9c667180537c03892735ff68cbbbfa721bd72506
|
| 3 |
+
size 192556
|
pdf/CSMDCCMR.pdf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:de200fbb840c74fcd142883c81ff10824a872d06292559abb178bb937ee0e541
|
| 3 |
+
size 1210174
|
pdf/CSMI.pdf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fc3c25a21685f327822a6794f3986a8d89bee172f6fb413f063529464c608dba
|
| 3 |
+
size 5412954
|
pdf/DCSF.pdf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:31b0c67c1eadb708396803b6991298c4b965b0ed827b455a77e02281c9505475
|
| 3 |
+
size 1533397
|
pdf/DISR.pdf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:12163bcb61689372a0fa6c057b55614e70fd263c75cd0869e58fb1a8bc9ef85b
|
| 3 |
+
size 215860
|
pdf/DWFS.pdf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f7d9f02c654e665591d365940f0b7239eb5d64f669210683eef7282aede6c378
|
| 3 |
+
size 817732
|
pdf/IWFS.pdf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9f8387f8508dbe3f74abfb41acf1c68f3da99e93dc465b10e680051fadd7e091
|
| 3 |
+
size 582942
|
pdf/JMI.pdf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e8491d05d11decc319e5d83f3867bc06bf9ccc984f6bd1854060684067bbd14c
|
| 3 |
+
size 1442313
|
pdf/JMIM.pdf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:880adb6df5fec2761b2aafbbf555dc9aebfd10fe9016dd92eb5b4ff481494dd9
|
| 3 |
+
size 1064488
|
pdf/MIM.pdf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:232e906e125fa5173af1ed66b446740d11e5c43b2dd911f3af729672141b4fbb
|
| 3 |
+
size 506331
|
pdf/MRI.pdf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4ca2c265bb597e2886c33448b028845f0755ca0cd952eb319dc74f31929ad300
|
| 3 |
+
size 1189309
|
pdf/MRMD.pdf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d067d694f1c70da19da5455237a5f33601c213f11f452ab73ee0d7ce9ccca8a9
|
| 3 |
+
size 2395695
|
pdf/MRMR.pdf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bb30d4b51eb630aff13a33cdd539d756b99021fa3446ad61cf82322cb5b97dee
|
| 3 |
+
size 1295526
|
pdf/UCRFS.pdf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1fb819c4f606121529e3f7c4de93e652fb1d7d88173600adb87ea0188e8bd528
|
| 3 |
+
size 1823221
|
requirements.txt
CHANGED
|
@@ -1,3 +1,11 @@
|
|
| 1 |
-
|
| 2 |
-
|
|
|
|
| 3 |
numpy
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
bitsandbytes
|
| 2 |
+
accelerate
|
| 3 |
+
sympy
|
| 4 |
numpy
|
| 5 |
+
pandas
|
| 6 |
+
scikit-learn
|
| 7 |
+
sqlalchemy
|
| 8 |
+
lark-parser
|
| 9 |
+
autogen
|
| 10 |
+
tqdm
|
| 11 |
+
python-dotenv
|
test.py
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pickle
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
path = "results/dna.pkl"
|
| 5 |
+
with open(path, "rb") as f:
|
| 6 |
+
results = pickle.load(f)
|
| 7 |
+
print(results)
|