Spaces:
Running
Running
Update server.js
Browse files
server.js
CHANGED
|
@@ -66,39 +66,43 @@ async function listHFFiles(folder) {
|
|
| 66 |
async function loadAndProcessData() {
|
| 67 |
console.log("--- Starting Data Sync ---");
|
| 68 |
|
| 69 |
-
// 1. Fetch Metadata
|
| 70 |
const metaList = await fetchHFFile("final_results/metadata.jsonl");
|
| 71 |
const langMap = {};
|
| 72 |
metaList.forEach(m => { langMap[m.lang_code_key] = m; });
|
| 73 |
|
| 74 |
-
// 2. Scan ALL files
|
| 75 |
const files = await listHFFiles("final_results");
|
| 76 |
|
| 77 |
let allRecords = [];
|
| 78 |
|
| 79 |
-
// Load every file found (except metadata which we already loaded)
|
| 80 |
for (const file of files) {
|
| 81 |
if (file.endsWith('metadata.jsonl')) continue;
|
| 82 |
const records = await fetchHFFile(file);
|
| 83 |
allRecords.push(...records);
|
| 84 |
}
|
| 85 |
|
| 86 |
-
console.log(`[Processing] Loaded ${allRecords.length} total records.
|
| 87 |
|
| 88 |
-
// 3. Bucket Records by Task
|
| 89 |
const ttsRecords = [];
|
| 90 |
const slidRecords = [];
|
| 91 |
const asrRecords = [];
|
| 92 |
|
| 93 |
allRecords.forEach(r => {
|
| 94 |
-
|
| 95 |
-
const
|
|
|
|
|
|
|
|
|
|
| 96 |
|
| 97 |
if (task === 'tts') ttsRecords.push(r);
|
| 98 |
else if (task === 'slid') slidRecords.push(r);
|
| 99 |
else if (task === 'asr') asrRecords.push(r);
|
| 100 |
});
|
| 101 |
|
|
|
|
|
|
|
| 102 |
// --- PROCESS SLID ---
|
| 103 |
const slidMap = {};
|
| 104 |
slidRecords.forEach(r => {
|
|
@@ -108,20 +112,29 @@ async function loadAndProcessData() {
|
|
| 108 |
if (!slidMap[code]) {
|
| 109 |
slidMap[code] = { Language: meta.name, ISO: code };
|
| 110 |
}
|
| 111 |
-
|
|
|
|
| 112 |
});
|
| 113 |
const slidFinal = Object.values(slidMap);
|
| 114 |
|
| 115 |
// --- PROCESS TTS ---
|
| 116 |
const ttsGrouped = {};
|
| 117 |
const ttsModels = new Set();
|
|
|
|
| 118 |
ttsRecords.forEach(r => {
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 125 |
});
|
| 126 |
|
| 127 |
// --- PROCESS ASR ---
|
|
@@ -132,7 +145,7 @@ async function loadAndProcessData() {
|
|
| 132 |
asrRecords.forEach(r => {
|
| 133 |
const meta = langMap[r.lang_code] || { name: r.lang_code, family: 'Unknown', lang_code_key: r.lang_code };
|
| 134 |
const fam = meta.family;
|
| 135 |
-
const mod = r.model_name;
|
| 136 |
|
| 137 |
families.add(fam);
|
| 138 |
models.add(mod);
|
|
@@ -187,7 +200,7 @@ async function loadAndProcessData() {
|
|
| 187 |
Array.from(models).forEach(mod => {
|
| 188 |
const rows = [];
|
| 189 |
asrRecords.forEach(r => {
|
| 190 |
-
if(r.model_name === mod) {
|
| 191 |
const meta = langMap[r.lang_code] || { name: r.lang_code, family: 'Unknown', lang_code_key: r.lang_code };
|
| 192 |
rows.push({
|
| 193 |
Language: meta.name,
|
|
|
|
| 66 |
async function loadAndProcessData() {
|
| 67 |
console.log("--- Starting Data Sync ---");
|
| 68 |
|
| 69 |
+
// 1. Fetch Metadata
|
| 70 |
const metaList = await fetchHFFile("final_results/metadata.jsonl");
|
| 71 |
const langMap = {};
|
| 72 |
metaList.forEach(m => { langMap[m.lang_code_key] = m; });
|
| 73 |
|
| 74 |
+
// 2. Scan ALL files
|
| 75 |
const files = await listHFFiles("final_results");
|
| 76 |
|
| 77 |
let allRecords = [];
|
| 78 |
|
|
|
|
| 79 |
for (const file of files) {
|
| 80 |
if (file.endsWith('metadata.jsonl')) continue;
|
| 81 |
const records = await fetchHFFile(file);
|
| 82 |
allRecords.push(...records);
|
| 83 |
}
|
| 84 |
|
| 85 |
+
console.log(`[Processing] Loaded ${allRecords.length} total records. Bucketing by task...`);
|
| 86 |
|
| 87 |
+
// 3. Bucket Records by Task (Checking both 'task' and 'data_type')
|
| 88 |
const ttsRecords = [];
|
| 89 |
const slidRecords = [];
|
| 90 |
const asrRecords = [];
|
| 91 |
|
| 92 |
allRecords.forEach(r => {
|
| 93 |
+
// Robust check: look for 'task' OR 'data_type'
|
| 94 |
+
const identifier = r.task || r.data_type;
|
| 95 |
+
if (!identifier) return;
|
| 96 |
+
|
| 97 |
+
const task = identifier.toLowerCase().trim();
|
| 98 |
|
| 99 |
if (task === 'tts') ttsRecords.push(r);
|
| 100 |
else if (task === 'slid') slidRecords.push(r);
|
| 101 |
else if (task === 'asr') asrRecords.push(r);
|
| 102 |
});
|
| 103 |
|
| 104 |
+
console.log(`[Counts] ASR: ${asrRecords.length}, TTS: ${ttsRecords.length}, SLID: ${slidRecords.length}`);
|
| 105 |
+
|
| 106 |
// --- PROCESS SLID ---
|
| 107 |
const slidMap = {};
|
| 108 |
slidRecords.forEach(r => {
|
|
|
|
| 112 |
if (!slidMap[code]) {
|
| 113 |
slidMap[code] = { Language: meta.name, ISO: code };
|
| 114 |
}
|
| 115 |
+
const modelName = r.model_name || r.model || "Unknown Model";
|
| 116 |
+
slidMap[code][modelName] = r.f1_score;
|
| 117 |
});
|
| 118 |
const slidFinal = Object.values(slidMap);
|
| 119 |
|
| 120 |
// --- PROCESS TTS ---
|
| 121 |
const ttsGrouped = {};
|
| 122 |
const ttsModels = new Set();
|
| 123 |
+
|
| 124 |
ttsRecords.forEach(r => {
|
| 125 |
+
const modelName = r.model || r.model_name || "Unknown Model";
|
| 126 |
+
|
| 127 |
+
ttsModels.add(modelName);
|
| 128 |
+
if(!ttsGrouped[modelName]) ttsGrouped[modelName] = [];
|
| 129 |
+
|
| 130 |
+
const meta = langMap[r.lang_code] || { name: r.language || r.lang_code, lang_code_key: r.lang_code };
|
| 131 |
+
|
| 132 |
+
// Standardize structure for frontend
|
| 133 |
+
r.iso = r.lang_code;
|
| 134 |
+
r.Language = meta.name; // Ensure capital 'Language' key exists for dropdown
|
| 135 |
+
r.language = meta.name; // Ensure lowercase 'language' key exists for table lookups
|
| 136 |
+
|
| 137 |
+
ttsGrouped[modelName].push(r);
|
| 138 |
});
|
| 139 |
|
| 140 |
// --- PROCESS ASR ---
|
|
|
|
| 145 |
asrRecords.forEach(r => {
|
| 146 |
const meta = langMap[r.lang_code] || { name: r.lang_code, family: 'Unknown', lang_code_key: r.lang_code };
|
| 147 |
const fam = meta.family;
|
| 148 |
+
const mod = r.model_name || r.model;
|
| 149 |
|
| 150 |
families.add(fam);
|
| 151 |
models.add(mod);
|
|
|
|
| 200 |
Array.from(models).forEach(mod => {
|
| 201 |
const rows = [];
|
| 202 |
asrRecords.forEach(r => {
|
| 203 |
+
if((r.model_name || r.model) === mod) {
|
| 204 |
const meta = langMap[r.lang_code] || { name: r.lang_code, family: 'Unknown', lang_code_key: r.lang_code };
|
| 205 |
rows.push({
|
| 206 |
Language: meta.name,
|