gablilli commited on
Commit
844453f
·
verified ·
1 Parent(s): 1b8ec0a

fix: __data.json response fixes

Browse files
Files changed (1) hide show
  1. providers/src/sanoma/auth.js +151 -128
providers/src/sanoma/auth.js CHANGED
@@ -219,149 +219,172 @@ export async function fetchBooks(client) {
219
 
220
  const lines = response.data.split('\n').filter(line => line.trim());
221
  const jsonObjects = lines.map(line => JSON.parse(line));
 
222
 
223
- let allData = [];
224
-
225
- jsonObjects.forEach(obj => {
226
- if (obj.data && Array.isArray(obj.data)) {
227
- for (let i = 0; i < obj.data.length; i++) {
228
- if (obj.data[i] !== undefined) allData[i] = obj.data[i];
229
- }
230
  }
231
- if (obj.nodes) {
232
- obj.nodes.forEach(node => {
233
- if (node && Array.isArray(node.data)) {
234
- for (let i = 0; i < node.data.length; i++) {
235
- if (node.data[i] !== undefined) allData[i] = node.data[i];
236
- }
237
- }
238
- });
239
  }
240
- });
241
 
242
- jsonObjects.filter(obj => obj.type === 'chunk' && obj.data).forEach(chunk => {
243
- let chunkData = chunk.data;
244
- if (Array.isArray(chunkData[0])) chunkData = chunkData[0];
245
- for (let i = 0; i < chunkData.length; i++) {
246
- if (chunkData[i] !== undefined) allData[i] = chunkData[i];
247
  }
248
- });
249
-
250
- const books = [];
251
- const seenOperas = new Set();
252
- const resolved = new Map();
253
 
254
- function decompressValue(val) {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
255
  if (typeof val === 'number') {
256
- if (val < 0 || val >= allData.length || allData[val] === undefined) return val;
257
- if (resolved.has(val)) return resolved.get(val);
258
- const target = allData[val];
259
- if (Array.isArray(target)) {
260
- const newArr = [];
261
- resolved.set(val, newArr);
262
- for (let j = 0; j < target.length; j++) newArr.push(decompressValue(target[j]));
263
- return newArr;
264
- } else if (target && typeof target === 'object') {
265
- const newObj = {};
266
- resolved.set(val, newObj);
267
- for (const key in target) newObj[key] = decompressValue(target[key]);
268
- return newObj;
269
- } else {
270
- resolved.set(val, target);
271
- return target;
272
- }
273
  }
274
  return val;
275
- }
276
 
277
- for (let i = 0; i < allData.length; i++) {
278
- const item = allData[i];
279
-
280
- if (item && typeof item === 'object' && !Array.isArray(item) && 'opera_id' in item && 'display_name' in item) {
281
- const fullyResolved = decompressValue(i);
282
- if (!fullyResolved || !fullyResolved.opera_id || seenOperas.has(fullyResolved.opera_id)) continue;
283
- seenOperas.add(fullyResolved.opera_id);
284
-
285
- const productsMap = new Map();
286
- const crawlVisited = new Set();
287
-
288
- function extractProducts(node, namePath, inheritedIsbn) {
289
- if (!node || typeof node !== 'object') return;
290
- if (crawlVisited.has(node)) return;
291
- crawlVisited.add(node);
292
-
293
- let currentNames = [...namePath];
294
- const potentialNames = [node.display_name, node.title, node.name, node.category_label, node.category_name];
295
-
296
- for (const n of potentialNames) {
297
- const str = String(n || '').trim();
298
- if (str && str !== 'Prodotti' && str !== 'null' && str !== 'undefined' && str !== '[object Object]' && !/^\d+$/.test(str)) {
299
- let isRedundant = false;
300
- for (let j = 0; j < currentNames.length; j++) {
301
- const existing = currentNames[j];
302
- if (existing.toLowerCase() === str.toLowerCase()) { isRedundant = true; break; }
303
- if (str.toLowerCase().includes(existing.toLowerCase()) && str.length > existing.length) { currentNames[j] = str; isRedundant = true; break; }
304
- if (existing.toLowerCase().includes(str.toLowerCase())) { isRedundant = true; break; }
305
- }
306
- if (!isRedundant) currentNames.push(str);
307
- }
308
- }
309
-
310
- const currentIsbn = node.isbn || node.paper_isbn || inheritedIsbn;
311
- let gediCode = null;
312
- if (node.external_id && /^\d{5,10}$/.test(String(node.external_id))) gediCode = String(node.external_id);
313
- else if (node.id && /^\d{5,10}$/.test(String(node.id))) gediCode = String(node.id);
314
-
315
- if (gediCode) {
316
- let finalParts = [];
317
- for (let j = 0; j < currentNames.length; j++) {
318
- let isRedundant = false;
319
- let currFirstWord = currentNames[j].trim().split(/[\s\-_]+/)[0].toLowerCase();
320
- for (let k = j + 1; k < currentNames.length; k++) {
321
- let nextFirstWord = currentNames[k].trim().split(/[\s\-_]+/)[0].toLowerCase();
322
- if (currFirstWord && currFirstWord === nextFirstWord) { isRedundant = true; break; }
323
- }
324
- if (!isRedundant) finalParts.push(currentNames[j]);
325
- }
326
- let finalName = finalParts.join(' - ') || `Volume (${gediCode})`;
327
-
328
- if (!productsMap.has(gediCode)) {
329
- productsMap.set(gediCode, { isbn: currentIsbn || '', name: finalName, gedi: gediCode, resources: [] });
330
- } else if (finalName.length > productsMap.get(gediCode).name.length) {
331
- productsMap.get(gediCode).name = finalName;
332
- }
333
-
334
- productsMap.get(gediCode).resources.push({
335
- type: node.category_name || '',
336
- category_id: node.category_id || '',
337
- external_id: node.external_id || '',
338
- code: node.internal_code || '',
339
- url: node.url || ''
340
- });
341
- }
342
-
343
- if (Array.isArray(node)) {
344
- for (let k = 0; k < node.length; k++) {
345
- if (typeof node[k] === 'object') extractProducts(node[k], currentNames, currentIsbn);
346
- }
347
- } else {
348
- for (const key in node) {
349
- if (typeof node[key] === 'object') extractProducts(node[key], currentNames, currentIsbn);
350
- }
351
- }
352
  }
353
-
354
- let initialPath = [];
355
- if (fullyResolved.display_name) initialPath.push(fullyResolved.display_name);
356
- extractProducts(fullyResolved.included || fullyResolved, initialPath, '');
357
-
358
- for (const product of productsMap.values()) {
359
- books.push({ name: product.name, opera_id: fullyResolved.opera_id, products: [product] });
 
 
 
 
 
 
360
  }
 
 
 
 
 
361
  }
362
  }
363
 
364
- return books;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
365
  }
366
 
367
  function normalizePlaceBookUrl(url) {
 
219
 
220
  const lines = response.data.split('\n').filter(line => line.trim());
221
  const jsonObjects = lines.map(line => JSON.parse(line));
222
+ const booksByGedi = new Map();
223
 
224
+ function mergeBookProduct(operaId, product) {
225
+ const existing = booksByGedi.get(product.gedi);
226
+
227
+ if (!existing) {
228
+ booksByGedi.set(product.gedi, { name: product.name, opera_id: operaId, products: [product] });
229
+ return;
 
230
  }
231
+
232
+ const existingProduct = existing.products[0];
233
+ if ((product.name || '').length > (existingProduct.name || '').length) {
234
+ existing.name = product.name;
235
+ existingProduct.name = product.name;
 
 
 
236
  }
 
237
 
238
+ if (!existingProduct.isbn && product.isbn) {
239
+ existingProduct.isbn = product.isbn;
 
 
 
240
  }
 
 
 
 
 
241
 
242
+ const resourceKey = (resource) => JSON.stringify([
243
+ resource?.type || '',
244
+ resource?.category_id || '',
245
+ resource?.external_id || '',
246
+ resource?.code || '',
247
+ resource?.url || ''
248
+ ]);
249
+ const seenResources = new Set((existingProduct.resources || []).map(resourceKey));
250
+ for (const resource of product.resources || []) {
251
+ const key = resourceKey(resource);
252
+ if (!seenResources.has(key)) {
253
+ existingProduct.resources.push(resource);
254
+ seenResources.add(key);
255
+ }
256
+ }
257
+ }
258
+
259
+ function extractBooksFromDataTable(dataTable) {
260
+ if (!Array.isArray(dataTable) || dataTable.length === 0) return;
261
+
262
+ const resolved = new Map();
263
+
264
+ function decompressValue(val) {
265
  if (typeof val === 'number') {
266
+ if (val < 0 || val >= dataTable.length || dataTable[val] === undefined) return val;
267
+ if (resolved.has(val)) return resolved.get(val);
268
+ const target = dataTable[val];
269
+ if (Array.isArray(target)) {
270
+ const newArr = [];
271
+ resolved.set(val, newArr);
272
+ for (let j = 0; j < target.length; j++) newArr.push(decompressValue(target[j]));
273
+ return newArr;
274
+ } else if (target && typeof target === 'object') {
275
+ const newObj = {};
276
+ resolved.set(val, newObj);
277
+ for (const key in target) newObj[key] = decompressValue(target[key]);
278
+ return newObj;
279
+ } else {
280
+ resolved.set(val, target);
281
+ return target;
282
+ }
283
  }
284
  return val;
285
+ }
286
 
287
+ for (let i = 0; i < dataTable.length; i++) {
288
+ const item = dataTable[i];
289
+ if (!item || typeof item !== 'object' || Array.isArray(item)) continue;
290
+ if (!('opera_id' in item) || !('display_name' in item)) continue;
291
+
292
+ const fullyResolved = decompressValue(i);
293
+ if (!fullyResolved || !fullyResolved.opera_id || fullyResolved.is_opera !== true) continue;
294
+ const operaName = String(fullyResolved.display_name || '').trim();
295
+ const includedProducts = Array.isArray(fullyResolved.included) ? fullyResolved.included : [];
296
+
297
+ for (const includedProduct of includedProducts) {
298
+ if (!includedProduct || typeof includedProduct !== 'object' || Array.isArray(includedProduct)) continue;
299
+
300
+ const productName = String(includedProduct.display_name || includedProduct.title || includedProduct.name || '').trim();
301
+ const productIsbn = includedProduct.isbn || includedProduct.paper_isbn || '';
302
+ const resources = Array.isArray(includedProduct.resource) ? includedProduct.resource : [];
303
+
304
+ for (const resource of resources) {
305
+ if (!resource || typeof resource !== 'object' || Array.isArray(resource)) continue;
306
+
307
+ const gediCode = resource.external_id && /^\d{5,10}$/.test(String(resource.external_id))
308
+ ? String(resource.external_id)
309
+ : null;
310
+ const resourceUrl = typeof resource.url === 'string' ? resource.url : '';
311
+ const isLibromedia = resourceUrl.includes('/prodotti_digitali/libromedia/');
312
+
313
+ if (!gediCode || !isLibromedia) continue;
314
+
315
+ const resourceName = String(resource.display_name || '').trim();
316
+ const finalName = buildSanomaBookName(operaName, productName, resourceName, gediCode);
317
+
318
+ mergeBookProduct(fullyResolved.opera_id, {
319
+ isbn: productIsbn || resource.paper_isbn || '',
320
+ name: finalName,
321
+ gedi: gediCode,
322
+ resources: [{
323
+ type: resource.category_name || '',
324
+ category_id: resource.category_id || '',
325
+ external_id: resource.external_id || '',
326
+ code: resource.internal_code || '',
327
+ url: resource.url || ''
328
+ }]
329
+ });
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
330
  }
331
+ }
332
+ }
333
+ }
334
+
335
+ for (const obj of jsonObjects) {
336
+ if (Array.isArray(obj.data)) {
337
+ extractBooksFromDataTable(obj.data);
338
+ }
339
+
340
+ if (Array.isArray(obj.nodes)) {
341
+ for (const node of obj.nodes) {
342
+ if (Array.isArray(node?.data)) {
343
+ extractBooksFromDataTable(node.data);
344
  }
345
+ }
346
+ }
347
+
348
+ if (obj.type === 'chunk' && Array.isArray(obj.data)) {
349
+ extractBooksFromDataTable(obj.data);
350
  }
351
  }
352
 
353
+ return Array.from(booksByGedi.values());
354
+ }
355
+
356
+ function normalizeBookLabel(value) {
357
+ return String(value || '')
358
+ .trim()
359
+ .replace(/\s+/g, ' ')
360
+ .toLowerCase();
361
+ }
362
+
363
+ function buildSanomaBookName(operaName, productName, resourceName, gediCode) {
364
+ const opera = String(operaName || '').trim();
365
+ const product = String(productName || '').trim();
366
+ const resource = String(resourceName || '').trim();
367
+
368
+ if (!resource) {
369
+ return product || opera || `Volume (${gediCode})`;
370
+ }
371
+
372
+ const normalizedOpera = normalizeBookLabel(opera);
373
+ const normalizedProduct = normalizeBookLabel(product);
374
+ const normalizedResource = normalizeBookLabel(resource);
375
+
376
+ if (!product || normalizedResource === normalizedProduct) {
377
+ return product || resource || opera || `Volume (${gediCode})`;
378
+ }
379
+
380
+ if (
381
+ (normalizedOpera && normalizedResource.includes(normalizedOpera))
382
+ || (normalizedProduct && normalizedResource.includes(normalizedProduct))
383
+ ) {
384
+ return resource;
385
+ }
386
+
387
+ return `${product} - ${resource}`;
388
  }
389
 
390
  function normalizePlaceBookUrl(url) {