File size: 3,057 Bytes
da6bb80
d10c686
 
 
 
8147070
d10c686
 
da6bb80
d10c686
 
 
 
9e4d07b
d10c686
 
8147070
 
 
d10c686
8147070
d10c686
8147070
 
 
 
 
 
 
d10c686
 
 
 
 
 
 
da6bb80
d10c686
da6bb80
8147070
 
 
 
 
 
da6bb80
 
 
 
d10c686
da6bb80
8147070
d10c686
 
da6bb80
 
d10c686
 
da6bb80
d10c686
da6bb80
 
 
 
 
 
8147070
da6bb80
d10c686
 
 
 
 
 
 
da6bb80
8147070
d10c686
 
8147070
d10c686
da6bb80
d10c686
8147070
d10c686
 
 
da6bb80
 
d10c686
 
 
8147070
d10c686
 
 
 
 
 
 
 
 
 
 
 
 
 
8147070
da6bb80
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110

// @ts-nocheck
import express from 'express';
import cors from 'cors';
import multer from 'multer';
import { commit } from '@huggingface/hub'; 
import path from 'path';
import { fileURLToPath } from 'url';

const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);

const app = express();
const port = process.env.PORT || 3001;

app.use(cors());
// Increase body limit for large batch JSON/Form data
app.use(express.json({ limit: '500mb' }));
app.use(express.urlencoded({ limit: '500mb', extended: true }));

// Configure Multer with higher limits
const storage = multer.memoryStorage();
const upload = multer({ 
    storage: storage,
    limits: {
        fileSize: 500 * 1024 * 1024, // 500MB per file max
        fieldSize: 500 * 1024 * 1024, // 500MB per field
    }
});

const SERVER_CONFIG = {
  TOKEN: process.env.HF_TOKEN || '',
  REPO: process.env.HF_REPO || 'TwanAPI/DataTwan',
  TYPE: 'dataset'
};

app.post('/api/upload', upload.array('files'), async (req, res) => {
  try {
    const files = req.files;
    let paths = req.body.paths;

    // Normalize paths to array if it's a single string
    if (!Array.isArray(paths)) {
        paths = [paths].filter(Boolean);
    }

    if (!files || files.length === 0) {
      return res.status(400).json({ error: 'No files provided' });
    }

    if (files.length !== paths.length) {
      return res.status(400).json({ error: `Mismatch: ${files.length} files vs ${paths.length} paths` });
    }

    if (!SERVER_CONFIG.TOKEN) {
       return res.status(500).json({ error: 'Server misconfiguration: HF_TOKEN secret is missing.' });
    }

    console.log(`[SERVER] Processing batch of ${files.length} files...`);

    const operations = files.map((file, index) => ({
      operation: 'addOrUpdate',
      path: paths[index],
      content: new Blob([file.buffer])
    }));

    // Using commit is atomic and faster for datasets than individual LFS uploads for small-medium files
    const response = await commit({
      credentials: {
        accessToken: SERVER_CONFIG.TOKEN,
      },
      repo: {
        type: SERVER_CONFIG.TYPE,
        name: SERVER_CONFIG.REPO
      },
      operations: operations,
      title: `Batch upload of ${files.length} files`
    });

    const commitHash = response.oid;
    const urlPrefix = "https://huggingface.co/datasets";
    const urls = paths.map(p => `${urlPrefix}/${SERVER_CONFIG.REPO}/blob/${commitHash}/${p}`);

    console.log(`[SERVER] Batch Committed: ${commitHash}`);

    res.json({ 
      success: true, 
      count: files.length,
      urls: urls 
    });

  } catch (error) {
    console.error('[SERVER] Error:', error);
    res.status(500).json({ 
      success: false, 
      error: error.message || 'Internal Server Error' 
    });
  }
});

app.use(express.static(path.join(__dirname, 'dist')));

app.get('*', (req, res) => {
  res.sendFile(path.join(__dirname, 'dist', 'index.html'));
});

app.listen(port, () => {
  console.log(`✅ Server running on port ${port} | Repo: ${SERVER_CONFIG.REPO}`);
});