Spaces:
Running
Running
| const express = require('express'); | |
| const app = express(); | |
| const PORT = 7860; | |
| // Environment Variables from HF Space Settings | |
| const HF_TOKEN = process.env.HF_TOKEN; | |
| const REPO_ID = process.env.SIMBA_DATA; | |
| // In-memory cache | |
| let CACHED_DATA = null; | |
| // Serve static files from 'public' folder | |
| app.use(express.static('public')); | |
| // --- Helper: Download File from HF --- | |
| async function fetchHFFile(filename) { | |
| const url = `https://huggingface.co/datasets/${REPO_ID}/resolve/main/${filename}`; | |
| console.log(`Downloading: ${filename}...`); | |
| const res = await fetch(url, { headers: { "Authorization": `Bearer ${HF_TOKEN}` } }); | |
| if (!res.ok) { | |
| console.warn(`Failed to fetch ${filename}: ${res.statusText}`); | |
| return []; | |
| } | |
| const text = await res.text(); | |
| // Parse JSONL | |
| return text.split('\n').filter(line => line.trim()).map(line => { | |
| try { return JSON.parse(line); } catch(e) { return null; } | |
| }).filter(x => x); | |
| } | |
| // --- Helper: List Files in Folder --- | |
| async function listHFFiles(folder) { | |
| const url = `https://huggingface.co/api/datasets/${REPO_ID}/tree/main/${folder}`; | |
| const res = await fetch(url, { headers: { "Authorization": `Bearer ${HF_TOKEN}` } }); | |
| if (!res.ok) return []; | |
| const files = await res.json(); | |
| return files.filter(f => f.path.endsWith('.jsonl')).map(f => f.path); | |
| } | |
| // --- Main Processor --- | |
| async function loadAndProcessData() { | |
| console.log("Starting Data Sync..."); | |
| // 1. Load Metadata | |
| const metaList = await fetchHFFile("final_results/metadata.jsonl"); | |
| const langMap = {}; | |
| metaList.forEach(m => { langMap[m.lang_code_key] = m; }); | |
| // 2. Load TTS & SLID | |
| const ttsData = await fetchHFFile("final_results/tts_results.jsonl"); | |
| const slidData = await fetchHFFile("final_results/slid_results.jsonl"); | |
| // 3. Load ASR Files | |
| const files = await listHFFiles("final_results"); | |
| let asrRecords = []; | |
| for (const file of files) { | |
| // Skip metadata/tts/slid if they appear in list | |
| if (file.includes('metadata') || file.includes('tts_') || file.includes('slid_')) continue; | |
| const records = await fetchHFFile(file); | |
| asrRecords.push(...records); | |
| } | |
| // 4. Process ASR Data | |
| const families = new Set(); | |
| const models = new Set(); | |
| const familyData = {}; // { Family: { Model: { Lang: {wer, cer} } } } | |
| asrRecords.forEach(r => { | |
| if (r.task !== 'asr') return; | |
| // Enrich | |
| const meta = langMap[r.lang_code] || { name: r.lang_code, family: 'Unknown' }; | |
| const fam = meta.family; | |
| const mod = r.model_name; | |
| families.add(fam); | |
| models.add(mod); | |
| // Init structure | |
| if (!familyData[fam]) familyData[fam] = {}; | |
| if (!familyData[fam][mod]) familyData[fam][mod] = {}; | |
| familyData[fam][mod][meta.name] = { | |
| wer: (r.datasets_avg_wer || 0) * 100, | |
| cer: (r.datasets_avg_cer || 0) * 100 | |
| }; | |
| }); | |
| // 5. Format ASR By Family | |
| const asrByFamily = {}; | |
| Array.from(families).sort().forEach(fam => { | |
| const modData = familyData[fam]; | |
| // Find all languages for this family | |
| const langsSet = new Set(); | |
| Object.values(modData).forEach(m => Object.keys(m).forEach(l => langsSet.add(l))); | |
| const langs = Array.from(langsSet).sort(); | |
| // Build Rows | |
| const rows = Object.keys(modData).map(modName => { | |
| const scores = modData[modName]; | |
| let sW=0, sC=0, n=0; | |
| langs.forEach(l => { | |
| if(scores[l]) { sW += scores[l].wer; sC += scores[l].cer; n++; } | |
| }); | |
| const row = { | |
| Model: modName, | |
| Avg_WER: n ? sW/n : 0, | |
| Avg_CER: n ? sC/n : 0 | |
| }; | |
| // Add individual lang scores | |
| langs.forEach(l => { | |
| if(scores[l]) { | |
| row[`WER_${l}`] = scores[l].wer; | |
| row[`CER_${l}`] = scores[l].cer; | |
| } | |
| }); | |
| return row; | |
| }); | |
| // Sort by WER | |
| rows.sort((a,b) => a.Avg_WER - b.Avg_WER); | |
| asrByFamily[fam] = { data: rows, languages: langs }; | |
| }); | |
| // 6. Format ASR By Model | |
| const asrByModel = {}; | |
| Array.from(models).forEach(mod => { | |
| const rows = []; | |
| asrRecords.forEach(r => { | |
| if(r.model_name === mod && r.task === 'asr') { | |
| const meta = langMap[r.lang_code] || { name: r.lang_code, family: 'Unknown' }; | |
| rows.push({ | |
| Language: meta.name, | |
| Family: meta.family, | |
| WER: (r.datasets_avg_wer || 0) * 100, | |
| CER: (r.datasets_avg_cer || 0) * 100 | |
| }); | |
| } | |
| }); | |
| rows.sort((a,b) => a.Language.localeCompare(b.Language)); | |
| asrByModel[mod] = rows; | |
| }); | |
| // 7. Format TTS | |
| const ttsGrouped = {}; | |
| const ttsModels = new Set(); | |
| ttsData.forEach(r => { | |
| ttsModels.add(r.model); | |
| if(!ttsGrouped[r.model]) ttsGrouped[r.model] = []; | |
| ttsGrouped[r.model].push(r); | |
| }); | |
| CACHED_DATA = { | |
| metadata: { | |
| families: Array.from(families).sort(), | |
| models: Array.from(models).sort(), | |
| tts_models: Array.from(ttsModels).sort() | |
| }, | |
| asr: { by_family: asrByFamily, by_model: asrByModel }, | |
| tts: ttsGrouped, | |
| slid: slidData | |
| }; | |
| console.log("Data Processing Complete."); | |
| } | |
| // API Route | |
| app.get('/api/data', async (req, res) => { | |
| if (!CACHED_DATA) { | |
| try { await loadAndProcessData(); } | |
| catch (e) { return res.status(500).json({error: e.message}); } | |
| } | |
| res.json(CACHED_DATA); | |
| }); | |
| // Start | |
| app.listen(PORT, '0.0.0.0', () => { | |
| console.log(`Server listening on port ${PORT}`); | |
| loadAndProcessData(); // Initial load | |
| }); |