File size: 3,087 Bytes
c558ad2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
/* eslint-disable no-console */
const sharp = require('sharp');
const tar = require('tar-stream');

// Deterministic RNG (like numpy default_rng with a seed).
function mulberry32(seed) {
  let t = seed >>> 0;
  return function rand() {
    t += 0x6d2b79f5;
    let r = Math.imul(t ^ (t >>> 15), 1 | t);
    r ^= r + Math.imul(r ^ (r >>> 7), 61 | r);
    return ((r ^ (r >>> 14)) >>> 0) / 4294967296;
  };
}

// Generate random RGB images as Uint8Array (Node equivalent of numpy integers).
function randomImages(numRows, imageSize, seed) {
  const rng = mulberry32(seed || 0);
  const images = [];
  const pixels = imageSize * imageSize * 3;

  for (let i = 0; i < numRows; i += 1) {
    const data = new Uint8Array(pixels);
    for (let j = 0; j < pixels; j += 1) {
      data[j] = Math.floor(rng() * 256);
    }
    images.push({ data, width: imageSize, height: imageSize });
  }
  return images;
}

// Encode a single RGB image into compressed bytes (Node equivalent of _encode_image).
async function encodeImage(img, imageFormat, jpegQuality = 90) {
  const ext = (imageFormat || 'jpeg').toLowerCase().replace('.', '');
  const input = sharp(Buffer.from(img.data), {
    raw: { width: img.width, height: img.height, channels: 3 }
  });

  if (ext === 'jpg' || ext === 'jpeg') {
    return input.jpeg({ quality: jpegQuality }).toBuffer();
  }
  return input.png().toBuffer();
}

function streamToBuffer(stream) {
  return new Promise((resolve, reject) => {
    const chunks = [];
    stream.on('data', (chunk) => chunks.push(chunk));
    stream.on('end', () => resolve(Buffer.concat(chunks)));
    stream.on('error', reject);
  });
}

// Build a WebDataset-compatible tar payload (application/x-tar).
async function buildWebdatasetTar(encodedImages, imageFormat) {
  const ext = (imageFormat || 'jpeg').toLowerCase().replace('.', '');
  const pack = tar.pack();

  encodedImages.forEach((buf, index) => {
    const name = `${String(index).padStart(6, '0')}.${ext}`;
    pack.entry({ name }, buf);
  });
  pack.finalize();

  return streamToBuffer(pack);
}

// Node equivalent of your Python preprocessing wrapper.
async function buildPayload({
  numRows,
  imageSize,
  imageFormat,
  dtype = 'bytes',
  seed = 0
}) {
  const images = randomImages(numRows, imageSize, seed);
  const encoded = await Promise.all(images.map((img) => encodeImage(img, imageFormat)));

  if (dtype === 'bytes') {
    return {
      payload: encoded[0],
      headers: { 'Content-Type': `image/${imageFormat.toLowerCase().replace('.', '')}` }
    };
  }

  const payload = await buildWebdatasetTar(encoded, imageFormat);
  return { payload, headers: { 'Content-Type': 'application/x-tar' } };
}

module.exports = {
  randomImages,
  encodeImage,
  buildWebdatasetTar,
  buildPayload
};

if (require.main === module) {
  buildPayload({ numRows: 2, imageSize: 64, imageFormat: 'jpeg', dtype: 'bytes', seed: 42 })
    .then((result) => {
      console.log('Payload bytes:', result.payload.length);
      console.log('Headers:', result.headers);
    })
    .catch((err) => {
      console.error(err);
    });
}