Spaces:
Build error
Build error
chore: clean code
Browse files
backend/functions/package.json
CHANGED
|
@@ -26,6 +26,7 @@
|
|
| 26 |
},
|
| 27 |
"main": "build/index.js",
|
| 28 |
"dependencies": {
|
|
|
|
| 29 |
"@google-cloud/translate": "^8.2.0",
|
| 30 |
"@mozilla/readability": "^0.5.0",
|
| 31 |
"@napi-rs/canvas": "^0.1.44",
|
|
|
|
| 26 |
},
|
| 27 |
"main": "build/index.js",
|
| 28 |
"dependencies": {
|
| 29 |
+
"@esm2cjs/normalize-url": "^8.0.0",
|
| 30 |
"@google-cloud/translate": "^8.2.0",
|
| 31 |
"@mozilla/readability": "^0.5.0",
|
| 32 |
"@napi-rs/canvas": "^0.1.44",
|
backend/functions/src/cloud-functions/crawler.ts
CHANGED
|
@@ -5,6 +5,7 @@ import _ from 'lodash';
|
|
| 5 |
import { PageSnapshot, PuppeteerControl } from '../services/puppeteer';
|
| 6 |
import TurnDownService from 'turndown';
|
| 7 |
import { Request, Response } from 'express';
|
|
|
|
| 8 |
|
| 9 |
|
| 10 |
@singleton()
|
|
@@ -57,11 +58,8 @@ ${contentText.trim()}
|
|
| 57 |
res: Response,
|
| 58 |
},
|
| 59 |
) {
|
| 60 |
-
const
|
| 61 |
-
const
|
| 62 |
-
const host = rawPath.shift();
|
| 63 |
-
const urlToCrawl = new URL(`${ctx.req.protocol}://${host}/${rawPath.join('/')}`);
|
| 64 |
-
urlToCrawl.search = url.search;
|
| 65 |
|
| 66 |
if (!ctx.req.accepts('text/plain') && ctx.req.accepts('text/event-stream')) {
|
| 67 |
const sseStream = new OutputServerEventStream();
|
|
@@ -88,7 +86,7 @@ ${contentText.trim()}
|
|
| 88 |
});
|
| 89 |
}
|
| 90 |
} catch (err: any) {
|
| 91 |
-
this.logger.error(`Failed to crawl ${
|
| 92 |
sseStream.write({
|
| 93 |
event: 'error',
|
| 94 |
data: marshalErrorLike(err),
|
|
|
|
| 5 |
import { PageSnapshot, PuppeteerControl } from '../services/puppeteer';
|
| 6 |
import TurnDownService from 'turndown';
|
| 7 |
import { Request, Response } from 'express';
|
| 8 |
+
import normalizeUrl from "@esm2cjs/normalize-url";
|
| 9 |
|
| 10 |
|
| 11 |
@singleton()
|
|
|
|
| 58 |
res: Response,
|
| 59 |
},
|
| 60 |
) {
|
| 61 |
+
const noSlashURL = ctx.req.url.slice(1);
|
| 62 |
+
const urlToCrawl = new URL(normalizeUrl(noSlashURL));
|
|
|
|
|
|
|
|
|
|
| 63 |
|
| 64 |
if (!ctx.req.accepts('text/plain') && ctx.req.accepts('text/event-stream')) {
|
| 65 |
const sseStream = new OutputServerEventStream();
|
|
|
|
| 86 |
});
|
| 87 |
}
|
| 88 |
} catch (err: any) {
|
| 89 |
+
this.logger.error(`Failed to crawl ${urlToCrawl.toString()}`, { err: marshalErrorLike(err) });
|
| 90 |
sseStream.write({
|
| 91 |
event: 'error',
|
| 92 |
data: marshalErrorLike(err),
|