hanxiao commited on
Commit
9b19012
·
1 Parent(s): 7fc30dd

fix: clean broken markdown

Browse files
backend/functions/src/cloud-functions/crawler.ts CHANGED
@@ -8,6 +8,7 @@ import { Request, Response } from 'express';
8
  import normalizeUrl from "@esm2cjs/normalize-url";
9
 
10
  function tidyMarkdown(markdown: string): string {
 
11
  // Step 1: Handle complex broken links with text and optional images spread across multiple lines
12
  let normalizedMarkdown = markdown.replace(/\[\s*([^]+?)\s*\]\s*\(\s*([^)]+)\s*\)/g, (match, text, url) => {
13
  // Remove internal new lines and excessive spaces within the text
@@ -39,7 +40,10 @@ function tidyMarkdown(markdown: string): string {
39
  // Step 3: Replace more than two consecutive empty lines with exactly two empty lines
40
  normalizedMarkdown = normalizedMarkdown.replace(/\n{3,}/g, '\n\n');
41
 
42
- return normalizedMarkdown;
 
 
 
43
  }
44
 
45
  @singleton()
 
8
  import normalizeUrl from "@esm2cjs/normalize-url";
9
 
10
  function tidyMarkdown(markdown: string): string {
11
+
12
  // Step 1: Handle complex broken links with text and optional images spread across multiple lines
13
  let normalizedMarkdown = markdown.replace(/\[\s*([^]+?)\s*\]\s*\(\s*([^)]+)\s*\)/g, (match, text, url) => {
14
  // Remove internal new lines and excessive spaces within the text
 
40
  // Step 3: Replace more than two consecutive empty lines with exactly two empty lines
41
  normalizedMarkdown = normalizedMarkdown.replace(/\n{3,}/g, '\n\n');
42
 
43
+ // Step 4: Remove leading spaces from each line
44
+ normalizedMarkdown = normalizedMarkdown.replace(/^[ \t]+/gm, '');
45
+
46
+ return normalizedMarkdown.trim();
47
  }
48
 
49
  @singleton()