elmadany commited on
Commit
4d3a843
·
verified ·
1 Parent(s): 6e5043a

Update server.js

Browse files
Files changed (1) hide show
  1. server.js +45 -41
server.js CHANGED
@@ -2,33 +2,26 @@ const express = require('express');
2
  const app = express();
3
  const PORT = 7860;
4
 
5
- // Environment Variables from HF Space Settings
6
  const HF_TOKEN = process.env.HF_TOKEN;
7
  const REPO_ID = process.env.SIMBA_DATA;
8
 
9
- // In-memory cache
10
  let CACHED_DATA = null;
11
 
12
- // Serve static files from 'public' folder
13
  app.use(express.static('public'));
14
 
15
- // --- Helper: Download File from HF ---
16
  async function fetchHFFile(filename) {
17
  const url = `https://huggingface.co/datasets/${REPO_ID}/resolve/main/${filename}`;
18
  console.log(`Downloading: ${filename}...`);
19
  const res = await fetch(url, { headers: { "Authorization": `Bearer ${HF_TOKEN}` } });
20
- if (!res.ok) {
21
- console.warn(`Failed to fetch ${filename}: ${res.statusText}`);
22
- return [];
23
- }
24
  const text = await res.text();
25
- // Parse JSONL
26
  return text.split('\n').filter(line => line.trim()).map(line => {
27
  try { return JSON.parse(line); } catch(e) { return null; }
28
  }).filter(x => x);
29
  }
30
 
31
- // --- Helper: List Files in Folder ---
32
  async function listHFFiles(folder) {
33
  const url = `https://huggingface.co/api/datasets/${REPO_ID}/tree/main/${folder}`;
34
  const res = await fetch(url, { headers: { "Authorization": `Bearer ${HF_TOKEN}` } });
@@ -37,52 +30,53 @@ async function listHFFiles(folder) {
37
  return files.filter(f => f.path.endsWith('.jsonl')).map(f => f.path);
38
  }
39
 
40
- // --- Main Processor ---
41
  async function loadAndProcessData() {
42
  console.log("Starting Data Sync...");
43
 
44
- // 1. Load Metadata
45
  const metaList = await fetchHFFile("final_results/metadata.jsonl");
46
  const langMap = {};
 
47
  metaList.forEach(m => { langMap[m.lang_code_key] = m; });
48
 
49
- // 2. Load TTS & SLID
50
  const ttsData = await fetchHFFile("final_results/tts_results.jsonl");
51
  const slidData = await fetchHFFile("final_results/slid_results.jsonl");
52
 
53
- // 3. Load ASR Files
54
  const files = await listHFFiles("final_results");
55
  let asrRecords = [];
56
  for (const file of files) {
57
- // Skip metadata/tts/slid if they appear in list
58
  if (file.includes('metadata') || file.includes('tts_') || file.includes('slid_')) continue;
59
  const records = await fetchHFFile(file);
60
  asrRecords.push(...records);
61
  }
62
 
63
- // 4. Process ASR Data
64
  const families = new Set();
65
  const models = new Set();
66
- const familyData = {}; // { Family: { Model: { Lang: {wer, cer} } } }
67
 
68
  asrRecords.forEach(r => {
69
  if (r.task !== 'asr') return;
70
 
71
- // Enrich
72
- const meta = langMap[r.lang_code] || { name: r.lang_code, family: 'Unknown' };
73
  const fam = meta.family;
74
  const mod = r.model_name;
75
 
76
  families.add(fam);
77
  models.add(mod);
78
 
79
- // Init structure
80
  if (!familyData[fam]) familyData[fam] = {};
81
  if (!familyData[fam][mod]) familyData[fam][mod] = {};
82
 
 
83
  familyData[fam][mod][meta.name] = {
84
  wer: (r.datasets_avg_wer || 0) * 100,
85
- cer: (r.datasets_avg_cer || 0) * 100
 
86
  };
87
  });
88
 
@@ -91,18 +85,28 @@ async function loadAndProcessData() {
91
  Array.from(families).sort().forEach(fam => {
92
  const modData = familyData[fam];
93
 
94
- // Find all languages for this family
95
- const langsSet = new Set();
96
- Object.values(modData).forEach(m => Object.keys(m).forEach(l => langsSet.add(l)));
97
- const langs = Array.from(langsSet).sort();
 
 
 
 
 
 
 
 
 
 
 
98
 
99
- // Build Rows
100
  const rows = Object.keys(modData).map(modName => {
101
  const scores = modData[modName];
102
  let sW=0, sC=0, n=0;
103
 
104
- langs.forEach(l => {
105
- if(scores[l]) { sW += scores[l].wer; sC += scores[l].cer; n++; }
106
  });
107
 
108
  const row = {
@@ -111,19 +115,17 @@ async function loadAndProcessData() {
111
  Avg_CER: n ? sC/n : 0
112
  };
113
 
114
- // Add individual lang scores
115
- langs.forEach(l => {
116
- if(scores[l]) {
117
- row[`WER_${l}`] = scores[l].wer;
118
- row[`CER_${l}`] = scores[l].cer;
119
  }
120
  });
121
  return row;
122
  });
123
 
124
- // Sort by WER
125
  rows.sort((a,b) => a.Avg_WER - b.Avg_WER);
126
- asrByFamily[fam] = { data: rows, languages: langs };
127
  });
128
 
129
  // 6. Format ASR By Model
@@ -132,9 +134,10 @@ async function loadAndProcessData() {
132
  const rows = [];
133
  asrRecords.forEach(r => {
134
  if(r.model_name === mod && r.task === 'asr') {
135
- const meta = langMap[r.lang_code] || { name: r.lang_code, family: 'Unknown' };
136
  rows.push({
137
  Language: meta.name,
 
138
  Family: meta.family,
139
  WER: (r.datasets_avg_wer || 0) * 100,
140
  CER: (r.datasets_avg_cer || 0) * 100
@@ -151,6 +154,9 @@ async function loadAndProcessData() {
151
  ttsData.forEach(r => {
152
  ttsModels.add(r.model);
153
  if(!ttsGrouped[r.model]) ttsGrouped[r.model] = [];
 
 
 
154
  ttsGrouped[r.model].push(r);
155
  });
156
 
@@ -164,10 +170,9 @@ async function loadAndProcessData() {
164
  tts: ttsGrouped,
165
  slid: slidData
166
  };
167
- console.log("Data Processing Complete.");
168
  }
169
 
170
- // API Route
171
  app.get('/api/data', async (req, res) => {
172
  if (!CACHED_DATA) {
173
  try { await loadAndProcessData(); }
@@ -176,8 +181,7 @@ app.get('/api/data', async (req, res) => {
176
  res.json(CACHED_DATA);
177
  });
178
 
179
- // Start
180
  app.listen(PORT, '0.0.0.0', () => {
181
- console.log(`Server listening on port ${PORT}`);
182
- loadAndProcessData(); // Initial load
183
  });
 
2
  const app = express();
3
  const PORT = 7860;
4
 
 
5
  const HF_TOKEN = process.env.HF_TOKEN;
6
  const REPO_ID = process.env.SIMBA_DATA;
7
 
 
8
  let CACHED_DATA = null;
9
 
 
10
  app.use(express.static('public'));
11
 
12
+ // --- Fetch Helper ---
13
  async function fetchHFFile(filename) {
14
  const url = `https://huggingface.co/datasets/${REPO_ID}/resolve/main/${filename}`;
15
  console.log(`Downloading: ${filename}...`);
16
  const res = await fetch(url, { headers: { "Authorization": `Bearer ${HF_TOKEN}` } });
17
+ if (!res.ok) return [];
 
 
 
18
  const text = await res.text();
 
19
  return text.split('\n').filter(line => line.trim()).map(line => {
20
  try { return JSON.parse(line); } catch(e) { return null; }
21
  }).filter(x => x);
22
  }
23
 
24
+ // --- List Files Helper ---
25
  async function listHFFiles(folder) {
26
  const url = `https://huggingface.co/api/datasets/${REPO_ID}/tree/main/${folder}`;
27
  const res = await fetch(url, { headers: { "Authorization": `Bearer ${HF_TOKEN}` } });
 
30
  return files.filter(f => f.path.endsWith('.jsonl')).map(f => f.path);
31
  }
32
 
33
+ // --- Processor ---
34
  async function loadAndProcessData() {
35
  console.log("Starting Data Sync...");
36
 
37
+ // 1. Metadata
38
  const metaList = await fetchHFFile("final_results/metadata.jsonl");
39
  const langMap = {};
40
+ // Map code key to full metadata object
41
  metaList.forEach(m => { langMap[m.lang_code_key] = m; });
42
 
43
+ // 2. Load Others
44
  const ttsData = await fetchHFFile("final_results/tts_results.jsonl");
45
  const slidData = await fetchHFFile("final_results/slid_results.jsonl");
46
 
47
+ // 3. Load ASR
48
  const files = await listHFFiles("final_results");
49
  let asrRecords = [];
50
  for (const file of files) {
 
51
  if (file.includes('metadata') || file.includes('tts_') || file.includes('slid_')) continue;
52
  const records = await fetchHFFile(file);
53
  asrRecords.push(...records);
54
  }
55
 
56
+ // 4. Process ASR
57
  const families = new Set();
58
  const models = new Set();
59
+ const familyData = {};
60
 
61
  asrRecords.forEach(r => {
62
  if (r.task !== 'asr') return;
63
 
64
+ // Use metadata to get Family and ISO
65
+ const meta = langMap[r.lang_code] || { name: r.lang_code, family: 'Unknown', lang_code_key: r.lang_code };
66
  const fam = meta.family;
67
  const mod = r.model_name;
68
 
69
  families.add(fam);
70
  models.add(mod);
71
 
 
72
  if (!familyData[fam]) familyData[fam] = {};
73
  if (!familyData[fam][mod]) familyData[fam][mod] = {};
74
 
75
+ // Store by Language Name
76
  familyData[fam][mod][meta.name] = {
77
  wer: (r.datasets_avg_wer || 0) * 100,
78
+ cer: (r.datasets_avg_cer || 0) * 100,
79
+ iso: meta.lang_code_key // Store ISO here
80
  };
81
  });
82
 
 
85
  Array.from(families).sort().forEach(fam => {
86
  const modData = familyData[fam];
87
 
88
+ // Collect Languages with their ISO codes
89
+ const langMapForFamily = new Map();
90
+ Object.values(modData).forEach(m => {
91
+ Object.keys(m).forEach(langName => {
92
+ // Get ISO from the stored data object
93
+ const iso = m[langName].iso;
94
+ langMapForFamily.set(langName, iso);
95
+ });
96
+ });
97
+
98
+ // Create array of objects: { name: "Amharic", iso: "amh" }
99
+ const languages = Array.from(langMapForFamily.keys()).sort().map(name => ({
100
+ name: name,
101
+ iso: langMapForFamily.get(name)
102
+ }));
103
 
 
104
  const rows = Object.keys(modData).map(modName => {
105
  const scores = modData[modName];
106
  let sW=0, sC=0, n=0;
107
 
108
+ languages.forEach(l => {
109
+ if(scores[l.name]) { sW += scores[l.name].wer; sC += scores[l.name].cer; n++; }
110
  });
111
 
112
  const row = {
 
115
  Avg_CER: n ? sC/n : 0
116
  };
117
 
118
+ languages.forEach(l => {
119
+ if(scores[l.name]) {
120
+ row[`WER_${l.name}`] = scores[l.name].wer;
121
+ row[`CER_${l.name}`] = scores[l.name].cer;
 
122
  }
123
  });
124
  return row;
125
  });
126
 
 
127
  rows.sort((a,b) => a.Avg_WER - b.Avg_WER);
128
+ asrByFamily[fam] = { data: rows, languages: languages };
129
  });
130
 
131
  // 6. Format ASR By Model
 
134
  const rows = [];
135
  asrRecords.forEach(r => {
136
  if(r.model_name === mod && r.task === 'asr') {
137
+ const meta = langMap[r.lang_code] || { name: r.lang_code, family: 'Unknown', lang_code_key: r.lang_code };
138
  rows.push({
139
  Language: meta.name,
140
+ ISO: meta.lang_code_key, // Include ISO
141
  Family: meta.family,
142
  WER: (r.datasets_avg_wer || 0) * 100,
143
  CER: (r.datasets_avg_cer || 0) * 100
 
154
  ttsData.forEach(r => {
155
  ttsModels.add(r.model);
156
  if(!ttsGrouped[r.model]) ttsGrouped[r.model] = [];
157
+ // Add ISO if available in metadata, else guess or use code
158
+ const meta = langMap[r.lang_code] || { lang_code_key: r.lang_code };
159
+ r.iso = meta.lang_code_key;
160
  ttsGrouped[r.model].push(r);
161
  });
162
 
 
170
  tts: ttsGrouped,
171
  slid: slidData
172
  };
173
+ console.log("Data Ready.");
174
  }
175
 
 
176
  app.get('/api/data', async (req, res) => {
177
  if (!CACHED_DATA) {
178
  try { await loadAndProcessData(); }
 
181
  res.json(CACHED_DATA);
182
  });
183
 
 
184
  app.listen(PORT, '0.0.0.0', () => {
185
+ console.log(`Server running on ${PORT}`);
186
+ loadAndProcessData();
187
  });