elmadany commited on
Commit
6226dc5
·
verified ·
1 Parent(s): 2663d4c

Create server.js

Browse files
Files changed (1) hide show
  1. server.js +183 -0
server.js ADDED
@@ -0,0 +1,183 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ const express = require('express');
2
+ const app = express();
3
+ const PORT = 7860;
4
+
5
+ // Environment Variables from HF Space Settings
6
+ const HF_TOKEN = process.env.HF_TOKEN;
7
+ const REPO_ID = process.env.SIMBA_DATA;
8
+
9
+ // In-memory cache
10
+ let CACHED_DATA = null;
11
+
12
+ // Serve static files from 'public' folder
13
+ app.use(express.static('public'));
14
+
15
+ // --- Helper: Download File from HF ---
16
+ async function fetchHFFile(filename) {
17
+ const url = `https://huggingface.co/datasets/${REPO_ID}/resolve/main/${filename}`;
18
+ console.log(`Downloading: ${filename}...`);
19
+ const res = await fetch(url, { headers: { "Authorization": `Bearer ${HF_TOKEN}` } });
20
+ if (!res.ok) {
21
+ console.warn(`Failed to fetch ${filename}: ${res.statusText}`);
22
+ return [];
23
+ }
24
+ const text = await res.text();
25
+ // Parse JSONL
26
+ return text.split('\n').filter(line => line.trim()).map(line => {
27
+ try { return JSON.parse(line); } catch(e) { return null; }
28
+ }).filter(x => x);
29
+ }
30
+
31
+ // --- Helper: List Files in Folder ---
32
+ async function listHFFiles(folder) {
33
+ const url = `https://huggingface.co/api/datasets/${REPO_ID}/tree/main/${folder}`;
34
+ const res = await fetch(url, { headers: { "Authorization": `Bearer ${HF_TOKEN}` } });
35
+ if (!res.ok) return [];
36
+ const files = await res.json();
37
+ return files.filter(f => f.path.endsWith('.jsonl')).map(f => f.path);
38
+ }
39
+
40
+ // --- Main Processor ---
41
+ async function loadAndProcessData() {
42
+ console.log("Starting Data Sync...");
43
+
44
+ // 1. Load Metadata
45
+ const metaList = await fetchHFFile("final_results/metadata.jsonl");
46
+ const langMap = {};
47
+ metaList.forEach(m => { langMap[m.lang_code_key] = m; });
48
+
49
+ // 2. Load TTS & SLID
50
+ const ttsData = await fetchHFFile("final_results/tts_results.jsonl");
51
+ const slidData = await fetchHFFile("final_results/slid_results.jsonl");
52
+
53
+ // 3. Load ASR Files
54
+ const files = await listHFFiles("final_results");
55
+ let asrRecords = [];
56
+ for (const file of files) {
57
+ // Skip metadata/tts/slid if they appear in list
58
+ if (file.includes('metadata') || file.includes('tts_') || file.includes('slid_')) continue;
59
+ const records = await fetchHFFile(file);
60
+ asrRecords.push(...records);
61
+ }
62
+
63
+ // 4. Process ASR Data
64
+ const families = new Set();
65
+ const models = new Set();
66
+ const familyData = {}; // { Family: { Model: { Lang: {wer, cer} } } }
67
+
68
+ asrRecords.forEach(r => {
69
+ if (r.task !== 'asr') return;
70
+
71
+ // Enrich
72
+ const meta = langMap[r.lang_code] || { name: r.lang_code, family: 'Unknown' };
73
+ const fam = meta.family;
74
+ const mod = r.model_name;
75
+
76
+ families.add(fam);
77
+ models.add(mod);
78
+
79
+ // Init structure
80
+ if (!familyData[fam]) familyData[fam] = {};
81
+ if (!familyData[fam][mod]) familyData[fam][mod] = {};
82
+
83
+ familyData[fam][mod][meta.name] = {
84
+ wer: (r.datasets_avg_wer || 0) * 100,
85
+ cer: (r.datasets_avg_cer || 0) * 100
86
+ };
87
+ });
88
+
89
+ // 5. Format ASR By Family
90
+ const asrByFamily = {};
91
+ Array.from(families).sort().forEach(fam => {
92
+ const modData = familyData[fam];
93
+
94
+ // Find all languages for this family
95
+ const langsSet = new Set();
96
+ Object.values(modData).forEach(m => Object.keys(m).forEach(l => langsSet.add(l)));
97
+ const langs = Array.from(langsSet).sort();
98
+
99
+ // Build Rows
100
+ const rows = Object.keys(modData).map(modName => {
101
+ const scores = modData[modName];
102
+ let sW=0, sC=0, n=0;
103
+
104
+ langs.forEach(l => {
105
+ if(scores[l]) { sW += scores[l].wer; sC += scores[l].cer; n++; }
106
+ });
107
+
108
+ const row = {
109
+ Model: modName,
110
+ Avg_WER: n ? sW/n : 0,
111
+ Avg_CER: n ? sC/n : 0
112
+ };
113
+
114
+ // Add individual lang scores
115
+ langs.forEach(l => {
116
+ if(scores[l]) {
117
+ row[`WER_${l}`] = scores[l].wer;
118
+ row[`CER_${l}`] = scores[l].cer;
119
+ }
120
+ });
121
+ return row;
122
+ });
123
+
124
+ // Sort by WER
125
+ rows.sort((a,b) => a.Avg_WER - b.Avg_WER);
126
+ asrByFamily[fam] = { data: rows, languages: langs };
127
+ });
128
+
129
+ // 6. Format ASR By Model
130
+ const asrByModel = {};
131
+ Array.from(models).forEach(mod => {
132
+ const rows = [];
133
+ asrRecords.forEach(r => {
134
+ if(r.model_name === mod && r.task === 'asr') {
135
+ const meta = langMap[r.lang_code] || { name: r.lang_code, family: 'Unknown' };
136
+ rows.push({
137
+ Language: meta.name,
138
+ Family: meta.family,
139
+ WER: (r.datasets_avg_wer || 0) * 100,
140
+ CER: (r.datasets_avg_cer || 0) * 100
141
+ });
142
+ }
143
+ });
144
+ rows.sort((a,b) => a.Language.localeCompare(b.Language));
145
+ asrByModel[mod] = rows;
146
+ });
147
+
148
+ // 7. Format TTS
149
+ const ttsGrouped = {};
150
+ const ttsModels = new Set();
151
+ ttsData.forEach(r => {
152
+ ttsModels.add(r.model);
153
+ if(!ttsGrouped[r.model]) ttsGrouped[r.model] = [];
154
+ ttsGrouped[r.model].push(r);
155
+ });
156
+
157
+ CACHED_DATA = {
158
+ metadata: {
159
+ families: Array.from(families).sort(),
160
+ models: Array.from(models).sort(),
161
+ tts_models: Array.from(ttsModels).sort()
162
+ },
163
+ asr: { by_family: asrByFamily, by_model: asrByModel },
164
+ tts: ttsGrouped,
165
+ slid: slidData
166
+ };
167
+ console.log("Data Processing Complete.");
168
+ }
169
+
170
+ // API Route
171
+ app.get('/api/data', async (req, res) => {
172
+ if (!CACHED_DATA) {
173
+ try { await loadAndProcessData(); }
174
+ catch (e) { return res.status(500).json({error: e.message}); }
175
+ }
176
+ res.json(CACHED_DATA);
177
+ });
178
+
179
+ // Start
180
+ app.listen(PORT, '0.0.0.0', () => {
181
+ console.log(`Server listening on port ${PORT}`);
182
+ loadAndProcessData(); // Initial load
183
+ });