Spaces:
Build error
Build error
fix: detect poorly transformed contents
Browse files
backend/functions/src/services/snapshot-formatter.ts
CHANGED
|
@@ -295,9 +295,10 @@ export class SnapshotFormatter extends AsyncService {
|
|
| 295 |
}
|
| 296 |
|
| 297 |
if (
|
| 298 |
-
|
| 299 |
&& toBeTurnedToMd !== jsDomElementOfHTML
|
| 300 |
) {
|
|
|
|
| 301 |
try {
|
| 302 |
contentText = this.jsdomControl.runTurndown(turnDownService, jsDomElementOfHTML);
|
| 303 |
} catch (err) {
|
|
@@ -310,7 +311,7 @@ export class SnapshotFormatter extends AsyncService {
|
|
| 310 |
}
|
| 311 |
}
|
| 312 |
}
|
| 313 |
-
if (
|
| 314 |
contentText = snapshot.text;
|
| 315 |
}
|
| 316 |
} while (false);
|
|
@@ -551,11 +552,58 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`;
|
|
| 551 |
return delimiter + extraSpace + content + (delimiter === '```' && !content.endsWith(extraSpace) ? extraSpace : '') + delimiter;
|
| 552 |
}
|
| 553 |
});
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 554 |
|
| 555 |
return turnDownService;
|
| 556 |
}
|
| 557 |
|
| 558 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 559 |
}
|
| 560 |
|
| 561 |
const snapshotFormatter = container.resolve(SnapshotFormatter);
|
|
|
|
| 295 |
}
|
| 296 |
|
| 297 |
if (
|
| 298 |
+
this.isPoorlyTransformed(contentText, toBeTurnedToMd)
|
| 299 |
&& toBeTurnedToMd !== jsDomElementOfHTML
|
| 300 |
) {
|
| 301 |
+
toBeTurnedToMd = jsDomElementOfHTML;
|
| 302 |
try {
|
| 303 |
contentText = this.jsdomControl.runTurndown(turnDownService, jsDomElementOfHTML);
|
| 304 |
} catch (err) {
|
|
|
|
| 311 |
}
|
| 312 |
}
|
| 313 |
}
|
| 314 |
+
if (this.isPoorlyTransformed(contentText, toBeTurnedToMd)) {
|
| 315 |
contentText = snapshot.text;
|
| 316 |
}
|
| 317 |
} while (false);
|
|
|
|
| 552 |
return delimiter + extraSpace + content + (delimiter === '```' && !content.endsWith(extraSpace) ? extraSpace : '') + delimiter;
|
| 553 |
}
|
| 554 |
});
|
| 555 |
+
turnDownService.addRule('flattened-tables', {
|
| 556 |
+
filter: (node) => {
|
| 557 |
+
if (node.tagName !== 'TABLE') {
|
| 558 |
+
return false;
|
| 559 |
+
}
|
| 560 |
+
let parentHasTable = false;
|
| 561 |
+
let ptr = node.parentElement;
|
| 562 |
+
while (ptr) {
|
| 563 |
+
if (ptr.tagName === 'TABLE') {
|
| 564 |
+
parentHasTable = true;
|
| 565 |
+
break;
|
| 566 |
+
}
|
| 567 |
+
ptr = ptr.parentElement;
|
| 568 |
+
}
|
| 569 |
+
|
| 570 |
+
return parentHasTable;
|
| 571 |
+
},
|
| 572 |
+
replacement: (innerText) => {
|
| 573 |
+
return innerText.trim();
|
| 574 |
+
}
|
| 575 |
+
});
|
| 576 |
|
| 577 |
return turnDownService;
|
| 578 |
}
|
| 579 |
|
| 580 |
|
| 581 |
+
isPoorlyTransformed(content?: string, node?: Element) {
|
| 582 |
+
if (!content) {
|
| 583 |
+
return true;
|
| 584 |
+
}
|
| 585 |
+
|
| 586 |
+
if (content.startsWith('<') && content.endsWith('>')) {
|
| 587 |
+
return true;
|
| 588 |
+
}
|
| 589 |
+
|
| 590 |
+
if (content.includes('<table') && content.includes('</table>')) {
|
| 591 |
+
const tableElms = node?.querySelectorAll('table') || [];
|
| 592 |
+
const deepTableElms = node?.querySelectorAll('table table');
|
| 593 |
+
|
| 594 |
+
if ((deepTableElms?.length || 0) / tableElms.length > 0.6) {
|
| 595 |
+
return true;
|
| 596 |
+
}
|
| 597 |
+
|
| 598 |
+
const tbodyElms = node?.querySelectorAll('tbody') || [];
|
| 599 |
+
const deepTbodyElms = node?.querySelectorAll('tbody tbody');
|
| 600 |
+
if ((deepTbodyElms?.length || 0) / tbodyElms.length > 0.6) {
|
| 601 |
+
return true;
|
| 602 |
+
}
|
| 603 |
+
}
|
| 604 |
+
|
| 605 |
+
return false;
|
| 606 |
+
}
|
| 607 |
}
|
| 608 |
|
| 609 |
const snapshotFormatter = container.resolve(SnapshotFormatter);
|
thinapps-shared
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
Subproject commit
|
|
|
|
| 1 |
+
Subproject commit 7bdc246a49a3a30f785a98fef46569131505b99a
|