Spaces:
Build error
Build error
fix: image url
Browse files
backend/functions/src/cloud-functions/crawler.ts
CHANGED
|
@@ -190,7 +190,12 @@ export class CrawlerHost extends RPCHost {
|
|
| 190 |
}
|
| 191 |
}
|
| 192 |
|
| 193 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 194 |
const alt = cleanAttribute(node.getAttribute('alt'));
|
| 195 |
if (!src) {
|
| 196 |
return '';
|
|
|
|
| 190 |
}
|
| 191 |
}
|
| 192 |
|
| 193 |
+
let src;
|
| 194 |
+
try {
|
| 195 |
+
src = new URL(linkPreferredSrc, nominalUrl).toString();
|
| 196 |
+
} catch (_err) {
|
| 197 |
+
void 0;
|
| 198 |
+
}
|
| 199 |
const alt = cleanAttribute(node.getAttribute('alt'));
|
| 200 |
if (!src) {
|
| 201 |
return '';
|
backend/functions/src/services/puppeteer.ts
CHANGED
|
@@ -206,7 +206,7 @@ function briefImgs(elem) {
|
|
| 206 |
}
|
| 207 |
|
| 208 |
return {
|
| 209 |
-
src: linkPreferredSrc,
|
| 210 |
loaded: x.complete,
|
| 211 |
width: x.width,
|
| 212 |
height: x.height,
|
|
@@ -437,7 +437,17 @@ document.addEventListener('load', handlePageLoad);
|
|
| 437 |
const textContent = elem.textContent;
|
| 438 |
const cleanedText = textContent?.split('\n').map((x: any) => x.trimEnd()).join('\n').replace(/\n{3,}/g, '\n\n');
|
| 439 |
|
| 440 |
-
const imageTags = Array.from(elem.querySelectorAll('img[src],img[data-src]'))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 441 |
|
| 442 |
const imageSet = new Set(imageTags);
|
| 443 |
|
|
|
|
| 206 |
}
|
| 207 |
|
| 208 |
return {
|
| 209 |
+
src: new URL(linkPreferredSrc, document.location.href).toString(),
|
| 210 |
loaded: x.complete,
|
| 211 |
width: x.width,
|
| 212 |
height: x.height,
|
|
|
|
| 437 |
const textContent = elem.textContent;
|
| 438 |
const cleanedText = textContent?.split('\n').map((x: any) => x.trimEnd()).join('\n').replace(/\n{3,}/g, '\n\n');
|
| 439 |
|
| 440 |
+
const imageTags = Array.from(elem.querySelectorAll('img[src],img[data-src]'))
|
| 441 |
+
.map((x: any) => [x.getAttribute('src'), x.getAttribute('data-src')])
|
| 442 |
+
.flat()
|
| 443 |
+
.map((x) => {
|
| 444 |
+
try {
|
| 445 |
+
return new URL(x, snapshot.href).toString();
|
| 446 |
+
} catch (err) {
|
| 447 |
+
return null;
|
| 448 |
+
}
|
| 449 |
+
})
|
| 450 |
+
.filter(Boolean);
|
| 451 |
|
| 452 |
const imageSet = new Set(imageTags);
|
| 453 |
|