nomagick commited on
Commit
9242bb3
·
unverified ·
1 Parent(s): a879311

fix: detect poorly transformed contents

Browse files
backend/functions/src/services/snapshot-formatter.ts CHANGED
@@ -295,9 +295,10 @@ export class SnapshotFormatter extends AsyncService {
295
  }
296
 
297
  if (
298
- !contentText || (contentText.startsWith('<') && contentText.endsWith('>'))
299
  && toBeTurnedToMd !== jsDomElementOfHTML
300
  ) {
 
301
  try {
302
  contentText = this.jsdomControl.runTurndown(turnDownService, jsDomElementOfHTML);
303
  } catch (err) {
@@ -310,7 +311,7 @@ export class SnapshotFormatter extends AsyncService {
310
  }
311
  }
312
  }
313
- if (!contentText || (contentText.startsWith('<') || contentText.endsWith('>'))) {
314
  contentText = snapshot.text;
315
  }
316
  } while (false);
@@ -551,11 +552,58 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`;
551
  return delimiter + extraSpace + content + (delimiter === '```' && !content.endsWith(extraSpace) ? extraSpace : '') + delimiter;
552
  }
553
  });
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
554
 
555
  return turnDownService;
556
  }
557
 
558
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
559
  }
560
 
561
  const snapshotFormatter = container.resolve(SnapshotFormatter);
 
295
  }
296
 
297
  if (
298
+ this.isPoorlyTransformed(contentText, toBeTurnedToMd)
299
  && toBeTurnedToMd !== jsDomElementOfHTML
300
  ) {
301
+ toBeTurnedToMd = jsDomElementOfHTML;
302
  try {
303
  contentText = this.jsdomControl.runTurndown(turnDownService, jsDomElementOfHTML);
304
  } catch (err) {
 
311
  }
312
  }
313
  }
314
+ if (this.isPoorlyTransformed(contentText, toBeTurnedToMd)) {
315
  contentText = snapshot.text;
316
  }
317
  } while (false);
 
552
  return delimiter + extraSpace + content + (delimiter === '```' && !content.endsWith(extraSpace) ? extraSpace : '') + delimiter;
553
  }
554
  });
555
+ turnDownService.addRule('flattened-tables', {
556
+ filter: (node) => {
557
+ if (node.tagName !== 'TABLE') {
558
+ return false;
559
+ }
560
+ let parentHasTable = false;
561
+ let ptr = node.parentElement;
562
+ while (ptr) {
563
+ if (ptr.tagName === 'TABLE') {
564
+ parentHasTable = true;
565
+ break;
566
+ }
567
+ ptr = ptr.parentElement;
568
+ }
569
+
570
+ return parentHasTable;
571
+ },
572
+ replacement: (innerText) => {
573
+ return innerText.trim();
574
+ }
575
+ });
576
 
577
  return turnDownService;
578
  }
579
 
580
 
581
+ isPoorlyTransformed(content?: string, node?: Element) {
582
+ if (!content) {
583
+ return true;
584
+ }
585
+
586
+ if (content.startsWith('<') && content.endsWith('>')) {
587
+ return true;
588
+ }
589
+
590
+ if (content.includes('<table') && content.includes('</table>')) {
591
+ const tableElms = node?.querySelectorAll('table') || [];
592
+ const deepTableElms = node?.querySelectorAll('table table');
593
+
594
+ if ((deepTableElms?.length || 0) / tableElms.length > 0.6) {
595
+ return true;
596
+ }
597
+
598
+ const tbodyElms = node?.querySelectorAll('tbody') || [];
599
+ const deepTbodyElms = node?.querySelectorAll('tbody tbody');
600
+ if ((deepTbodyElms?.length || 0) / tbodyElms.length > 0.6) {
601
+ return true;
602
+ }
603
+ }
604
+
605
+ return false;
606
+ }
607
  }
608
 
609
  const snapshotFormatter = container.resolve(SnapshotFormatter);
thinapps-shared CHANGED
@@ -1 +1 @@
1
- Subproject commit 2b78c2af94ead181a1f9d864531132e30405fa7e
 
1
+ Subproject commit 7bdc246a49a3a30f785a98fef46569131505b99a