elmadany commited on
Commit
07445a4
·
verified ·
1 Parent(s): 1194f61

Update server.js

Browse files
Files changed (1) hide show
  1. server.js +29 -16
server.js CHANGED
@@ -66,39 +66,43 @@ async function listHFFiles(folder) {
66
  async function loadAndProcessData() {
67
  console.log("--- Starting Data Sync ---");
68
 
69
- // 1. Fetch Metadata (Crucial for language names)
70
  const metaList = await fetchHFFile("final_results/metadata.jsonl");
71
  const langMap = {};
72
  metaList.forEach(m => { langMap[m.lang_code_key] = m; });
73
 
74
- // 2. Scan ALL files in final_results
75
  const files = await listHFFiles("final_results");
76
 
77
  let allRecords = [];
78
 
79
- // Load every file found (except metadata which we already loaded)
80
  for (const file of files) {
81
  if (file.endsWith('metadata.jsonl')) continue;
82
  const records = await fetchHFFile(file);
83
  allRecords.push(...records);
84
  }
85
 
86
- console.log(`[Processing] Loaded ${allRecords.length} total records. Sorting by task...`);
87
 
88
- // 3. Bucket Records by Task
89
  const ttsRecords = [];
90
  const slidRecords = [];
91
  const asrRecords = [];
92
 
93
  allRecords.forEach(r => {
94
- if (!r.task) return;
95
- const task = r.task.toLowerCase().trim();
 
 
 
96
 
97
  if (task === 'tts') ttsRecords.push(r);
98
  else if (task === 'slid') slidRecords.push(r);
99
  else if (task === 'asr') asrRecords.push(r);
100
  });
101
 
 
 
102
  // --- PROCESS SLID ---
103
  const slidMap = {};
104
  slidRecords.forEach(r => {
@@ -108,20 +112,29 @@ async function loadAndProcessData() {
108
  if (!slidMap[code]) {
109
  slidMap[code] = { Language: meta.name, ISO: code };
110
  }
111
- slidMap[code][r.model_name] = r.f1_score;
 
112
  });
113
  const slidFinal = Object.values(slidMap);
114
 
115
  // --- PROCESS TTS ---
116
  const ttsGrouped = {};
117
  const ttsModels = new Set();
 
118
  ttsRecords.forEach(r => {
119
- ttsModels.add(r.model);
120
- if(!ttsGrouped[r.model]) ttsGrouped[r.model] = [];
121
- const meta = langMap[r.lang_code] || { lang_code_key: r.lang_code };
122
- r.iso = meta.lang_code_key;
123
- r.Language = meta.name;
124
- ttsGrouped[r.model].push(r);
 
 
 
 
 
 
 
125
  });
126
 
127
  // --- PROCESS ASR ---
@@ -132,7 +145,7 @@ async function loadAndProcessData() {
132
  asrRecords.forEach(r => {
133
  const meta = langMap[r.lang_code] || { name: r.lang_code, family: 'Unknown', lang_code_key: r.lang_code };
134
  const fam = meta.family;
135
- const mod = r.model_name;
136
 
137
  families.add(fam);
138
  models.add(mod);
@@ -187,7 +200,7 @@ async function loadAndProcessData() {
187
  Array.from(models).forEach(mod => {
188
  const rows = [];
189
  asrRecords.forEach(r => {
190
- if(r.model_name === mod) {
191
  const meta = langMap[r.lang_code] || { name: r.lang_code, family: 'Unknown', lang_code_key: r.lang_code };
192
  rows.push({
193
  Language: meta.name,
 
66
  async function loadAndProcessData() {
67
  console.log("--- Starting Data Sync ---");
68
 
69
+ // 1. Fetch Metadata
70
  const metaList = await fetchHFFile("final_results/metadata.jsonl");
71
  const langMap = {};
72
  metaList.forEach(m => { langMap[m.lang_code_key] = m; });
73
 
74
+ // 2. Scan ALL files
75
  const files = await listHFFiles("final_results");
76
 
77
  let allRecords = [];
78
 
 
79
  for (const file of files) {
80
  if (file.endsWith('metadata.jsonl')) continue;
81
  const records = await fetchHFFile(file);
82
  allRecords.push(...records);
83
  }
84
 
85
+ console.log(`[Processing] Loaded ${allRecords.length} total records. Bucketing by task...`);
86
 
87
+ // 3. Bucket Records by Task (Checking both 'task' and 'data_type')
88
  const ttsRecords = [];
89
  const slidRecords = [];
90
  const asrRecords = [];
91
 
92
  allRecords.forEach(r => {
93
+ // Robust check: look for 'task' OR 'data_type'
94
+ const identifier = r.task || r.data_type;
95
+ if (!identifier) return;
96
+
97
+ const task = identifier.toLowerCase().trim();
98
 
99
  if (task === 'tts') ttsRecords.push(r);
100
  else if (task === 'slid') slidRecords.push(r);
101
  else if (task === 'asr') asrRecords.push(r);
102
  });
103
 
104
+ console.log(`[Counts] ASR: ${asrRecords.length}, TTS: ${ttsRecords.length}, SLID: ${slidRecords.length}`);
105
+
106
  // --- PROCESS SLID ---
107
  const slidMap = {};
108
  slidRecords.forEach(r => {
 
112
  if (!slidMap[code]) {
113
  slidMap[code] = { Language: meta.name, ISO: code };
114
  }
115
+ const modelName = r.model_name || r.model || "Unknown Model";
116
+ slidMap[code][modelName] = r.f1_score;
117
  });
118
  const slidFinal = Object.values(slidMap);
119
 
120
  // --- PROCESS TTS ---
121
  const ttsGrouped = {};
122
  const ttsModels = new Set();
123
+
124
  ttsRecords.forEach(r => {
125
+ const modelName = r.model || r.model_name || "Unknown Model";
126
+
127
+ ttsModels.add(modelName);
128
+ if(!ttsGrouped[modelName]) ttsGrouped[modelName] = [];
129
+
130
+ const meta = langMap[r.lang_code] || { name: r.language || r.lang_code, lang_code_key: r.lang_code };
131
+
132
+ // Standardize structure for frontend
133
+ r.iso = r.lang_code;
134
+ r.Language = meta.name; // Ensure capital 'Language' key exists for dropdown
135
+ r.language = meta.name; // Ensure lowercase 'language' key exists for table lookups
136
+
137
+ ttsGrouped[modelName].push(r);
138
  });
139
 
140
  // --- PROCESS ASR ---
 
145
  asrRecords.forEach(r => {
146
  const meta = langMap[r.lang_code] || { name: r.lang_code, family: 'Unknown', lang_code_key: r.lang_code };
147
  const fam = meta.family;
148
+ const mod = r.model_name || r.model;
149
 
150
  families.add(fam);
151
  models.add(mod);
 
200
  Array.from(models).forEach(mod => {
201
  const rows = [];
202
  asrRecords.forEach(r => {
203
+ if((r.model_name || r.model) === mod) {
204
  const meta = langMap[r.lang_code] || { name: r.lang_code, family: 'Unknown', lang_code_key: r.lang_code };
205
  rows.push({
206
  Language: meta.name,