nomagick commited on
Commit
e417cd8
·
unverified ·
1 Parent(s): 36bf5d9

fix: tidyMarkdown feature in turndown rues

Browse files
backend/functions/src/cloud-functions/crawler.ts CHANGED
@@ -75,6 +75,38 @@ export class CrawlerHost extends RPCHost {
75
  replacement: (innerText) => `${innerText}\n===============\n`
76
  });
77
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
 
79
  return turnDownService;
80
  }
 
75
  replacement: (innerText) => `${innerText}\n===============\n`
76
  });
77
  }
78
+ turnDownService.addRule('improved-paragraph', {
79
+ filter: 'p',
80
+ replacement: (innerText) => {
81
+ const trimmed = innerText.trim();
82
+ if (!trimmed) {
83
+ return '';
84
+ }
85
+
86
+ return `${trimmed.replace(/\n{3,}/g, '\n\n')}\n\n`;
87
+ }
88
+ });
89
+ turnDownService.addRule('improved-inline-link', {
90
+ filter: function (node, options) {
91
+ return (
92
+ options.linkStyle === 'inlined' &&
93
+ node.nodeName === 'A' &&
94
+ node.getAttribute('href')
95
+ );
96
+ },
97
+
98
+ replacement: function (content, node) {
99
+ let href = node.getAttribute('href');
100
+ if (href) href = href.replace(/([()])/g, '\\$1');
101
+ let title = cleanAttribute(node.getAttribute('title'));
102
+ if (title) title = ' "' + title.replace(/"/g, '\\"') + '"';
103
+
104
+ const fixedContent = content.replace(/\s+/g, ' ').trim();
105
+ const fixedHref = href.replace(/\s+/g, '').trim();
106
+
107
+ return `[${fixedContent}](${fixedHref}${title || ''})`;
108
+ }
109
+ });
110
 
111
  return turnDownService;
112
  }