Spaces:
Build error
Build error
fix: do img filtering in node instead of browser
Browse files- package-lock.json +4 -4
- package.json +1 -1
- src/api/crawler.ts +0 -1
- src/services/alt-text.ts +31 -2
- src/services/puppeteer.ts +1 -12
- src/services/snapshot-formatter.ts +1 -3
package-lock.json
CHANGED
|
@@ -17,7 +17,7 @@
|
|
| 17 |
"axios": "^1.3.3",
|
| 18 |
"bcrypt": "^5.1.0",
|
| 19 |
"busboy": "^1.6.0",
|
| 20 |
-
"civkit": "^0.9.0-
|
| 21 |
"core-js": "^3.37.1",
|
| 22 |
"cors": "^2.8.5",
|
| 23 |
"dayjs": "^1.11.9",
|
|
@@ -4003,9 +4003,9 @@
|
|
| 4003 |
}
|
| 4004 |
},
|
| 4005 |
"node_modules/civkit": {
|
| 4006 |
-
"version": "0.9.0-
|
| 4007 |
-
"resolved": "https://registry.npmjs.org/civkit/-/civkit-0.9.0-
|
| 4008 |
-
"integrity": "sha512-
|
| 4009 |
"license": "AGPL",
|
| 4010 |
"dependencies": {
|
| 4011 |
"lodash": "^4.17.21",
|
|
|
|
| 17 |
"axios": "^1.3.3",
|
| 18 |
"bcrypt": "^5.1.0",
|
| 19 |
"busboy": "^1.6.0",
|
| 20 |
+
"civkit": "^0.9.0-2570394",
|
| 21 |
"core-js": "^3.37.1",
|
| 22 |
"cors": "^2.8.5",
|
| 23 |
"dayjs": "^1.11.9",
|
|
|
|
| 4003 |
}
|
| 4004 |
},
|
| 4005 |
"node_modules/civkit": {
|
| 4006 |
+
"version": "0.9.0-2570394",
|
| 4007 |
+
"resolved": "https://registry.npmjs.org/civkit/-/civkit-0.9.0-2570394.tgz",
|
| 4008 |
+
"integrity": "sha512-w77agnElTEP6g+l66KhX1Ib9z7JXbR3FaR5/2yTUPIPjm32qsWkmKRvv0mZ83IcMSSmTjF9LxboYAliyTx7cIA==",
|
| 4009 |
"license": "AGPL",
|
| 4010 |
"dependencies": {
|
| 4011 |
"lodash": "^4.17.21",
|
package.json
CHANGED
|
@@ -26,7 +26,7 @@
|
|
| 26 |
"axios": "^1.3.3",
|
| 27 |
"bcrypt": "^5.1.0",
|
| 28 |
"busboy": "^1.6.0",
|
| 29 |
-
"civkit": "^0.9.0-
|
| 30 |
"core-js": "^3.37.1",
|
| 31 |
"cors": "^2.8.5",
|
| 32 |
"dayjs": "^1.11.9",
|
|
|
|
| 26 |
"axios": "^1.3.3",
|
| 27 |
"bcrypt": "^5.1.0",
|
| 28 |
"busboy": "^1.6.0",
|
| 29 |
+
"civkit": "^0.9.0-2570394",
|
| 30 |
"core-js": "^3.37.1",
|
| 31 |
"cors": "^2.8.5",
|
| 32 |
"dayjs": "^1.11.9",
|
src/api/crawler.ts
CHANGED
|
@@ -1069,7 +1069,6 @@ export class CrawlerHost extends RPCHost {
|
|
| 1069 |
title: snapshot.title,
|
| 1070 |
content: snapshot.parsed?.textContent,
|
| 1071 |
url: presumedURL?.href || snapshot.href,
|
| 1072 |
-
[Symbol.dispose]: () => undefined,
|
| 1073 |
};
|
| 1074 |
|
| 1075 |
Object.defineProperty(output, 'textRepresentation', {
|
|
|
|
| 1069 |
title: snapshot.title,
|
| 1070 |
content: snapshot.parsed?.textContent,
|
| 1071 |
url: presumedURL?.href || snapshot.href,
|
|
|
|
| 1072 |
};
|
| 1073 |
|
| 1074 |
Object.defineProperty(output, 'textRepresentation', {
|
src/services/alt-text.ts
CHANGED
|
@@ -33,8 +33,11 @@ export class AltTextService extends AsyncService {
|
|
| 33 |
try {
|
| 34 |
const img = await this.canvasService.loadImage(url);
|
| 35 |
const contentTypeHint = Reflect.get(img, 'contentType');
|
|
|
|
|
|
|
|
|
|
| 36 |
if (Math.min(img.naturalHeight, img.naturalWidth) < 64) {
|
| 37 |
-
|
| 38 |
}
|
| 39 |
const resized = this.canvasService.fitImageToSquareBox(img, 1024);
|
| 40 |
const exported = await this.canvasService.canvasToBuffer(resized, 'image/png');
|
|
@@ -63,6 +66,32 @@ export class AltTextService extends AsyncService {
|
|
| 63 |
}
|
| 64 |
const digest = md5Hasher.hash(imgBrief.src);
|
| 65 |
const shortDigest = Buffer.from(digest, 'hex').toString('base64url');
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
|
| 67 |
const existing = await ImgAlt.fromFirestore(shortDigest);
|
| 68 |
|
|
@@ -102,4 +131,4 @@ export class AltTextService extends AsyncService {
|
|
| 102 |
|
| 103 |
return generatedCaption;
|
| 104 |
}
|
| 105 |
-
}
|
|
|
|
| 33 |
try {
|
| 34 |
const img = await this.canvasService.loadImage(url);
|
| 35 |
const contentTypeHint = Reflect.get(img, 'contentType');
|
| 36 |
+
if (Math.min(img.naturalHeight, img.naturalWidth) <= 1) {
|
| 37 |
+
return `A ${img.naturalWidth}x${img.naturalHeight} image, likely be a tacker probe`;
|
| 38 |
+
}
|
| 39 |
if (Math.min(img.naturalHeight, img.naturalWidth) < 64) {
|
| 40 |
+
return `A ${img.naturalWidth}x${img.naturalHeight} small image, likely a logo, icon or avatar`;
|
| 41 |
}
|
| 42 |
const resized = this.canvasService.fitImageToSquareBox(img, 1024);
|
| 43 |
const exported = await this.canvasService.canvasToBuffer(resized, 'image/png');
|
|
|
|
| 66 |
}
|
| 67 |
const digest = md5Hasher.hash(imgBrief.src);
|
| 68 |
const shortDigest = Buffer.from(digest, 'hex').toString('base64url');
|
| 69 |
+
let dims: number[] = [];
|
| 70 |
+
do {
|
| 71 |
+
if (imgBrief.loaded) {
|
| 72 |
+
if (imgBrief.naturalWidth && imgBrief.naturalHeight) {
|
| 73 |
+
if (Math.min(imgBrief.naturalWidth, imgBrief.naturalHeight) < 64) {
|
| 74 |
+
dims = [imgBrief.naturalWidth, imgBrief.naturalHeight];
|
| 75 |
+
break;
|
| 76 |
+
}
|
| 77 |
+
}
|
| 78 |
+
}
|
| 79 |
+
|
| 80 |
+
if (imgBrief.width && imgBrief.height) {
|
| 81 |
+
if (Math.min(imgBrief.width, imgBrief.height) < 64) {
|
| 82 |
+
dims = [imgBrief.width, imgBrief.height];
|
| 83 |
+
break;
|
| 84 |
+
}
|
| 85 |
+
}
|
| 86 |
+
|
| 87 |
+
} while (false);
|
| 88 |
+
|
| 89 |
+
if (Math.min(...dims) <= 1) {
|
| 90 |
+
return `A ${dims[0]}x${dims[1]} image, likely be a tacker probe`;
|
| 91 |
+
}
|
| 92 |
+
if (Math.min(...dims) < 64) {
|
| 93 |
+
return `A ${dims[0]}x${dims[1]} small image, likely a logo, icon or avatar`;
|
| 94 |
+
}
|
| 95 |
|
| 96 |
const existing = await ImgAlt.fromFirestore(shortDigest);
|
| 97 |
|
|
|
|
| 131 |
|
| 132 |
return generatedCaption;
|
| 133 |
}
|
| 134 |
+
};
|
src/services/puppeteer.ts
CHANGED
|
@@ -407,18 +407,7 @@ function giveSnapshot(stopActiveSnapshot, overrideDomAnalysis) {
|
|
| 407 |
if (document.baseURI !== r.href) {
|
| 408 |
r.rebase = document.baseURI;
|
| 409 |
}
|
| 410 |
-
r.imgs = briefImgs()
|
| 411 |
-
if (x.complete) {
|
| 412 |
-
if (Math.min(x.width, x.height, x.naturalWidth, x.naturalHeight) < 64) {
|
| 413 |
-
return false;
|
| 414 |
-
}
|
| 415 |
-
}
|
| 416 |
-
const m = Math.min(x.width, x.height);
|
| 417 |
-
if (m && m < 64) {
|
| 418 |
-
return false;
|
| 419 |
-
}
|
| 420 |
-
return true;
|
| 421 |
-
});
|
| 422 |
|
| 423 |
return r;
|
| 424 |
}
|
|
|
|
| 407 |
if (document.baseURI !== r.href) {
|
| 408 |
r.rebase = document.baseURI;
|
| 409 |
}
|
| 410 |
+
r.imgs = briefImgs();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 411 |
|
| 412 |
return r;
|
| 413 |
}
|
src/services/snapshot-formatter.ts
CHANGED
|
@@ -43,7 +43,7 @@ export interface FormattedPage {
|
|
| 43 |
|
| 44 |
textRepresentation?: string;
|
| 45 |
|
| 46 |
-
[Symbol.dispose]: () => void;
|
| 47 |
}
|
| 48 |
|
| 49 |
export const md5Hasher = new HashManager('md5', 'hex');
|
|
@@ -199,7 +199,6 @@ export class SnapshotFormatter extends AsyncService {
|
|
| 199 |
description: (snapshot.description || '').trim(),
|
| 200 |
url: nominalUrl?.toString() || snapshot.href?.trim(),
|
| 201 |
publishedTime: snapshot.parsed?.publishedTime || undefined,
|
| 202 |
-
[Symbol.dispose]: () => { },
|
| 203 |
};
|
| 204 |
|
| 205 |
Object.assign(f, formatted);
|
|
@@ -395,7 +394,6 @@ export class SnapshotFormatter extends AsyncService {
|
|
| 395 |
url: nominalUrl?.toString() || snapshot.href?.trim(),
|
| 396 |
content: contentText,
|
| 397 |
publishedTime: snapshot.parsed?.publishedTime || undefined,
|
| 398 |
-
[Symbol.dispose]: () => { },
|
| 399 |
};
|
| 400 |
|
| 401 |
if (snapshot.status) {
|
|
|
|
| 43 |
|
| 44 |
textRepresentation?: string;
|
| 45 |
|
| 46 |
+
[Symbol.dispose]?: () => void;
|
| 47 |
}
|
| 48 |
|
| 49 |
export const md5Hasher = new HashManager('md5', 'hex');
|
|
|
|
| 199 |
description: (snapshot.description || '').trim(),
|
| 200 |
url: nominalUrl?.toString() || snapshot.href?.trim(),
|
| 201 |
publishedTime: snapshot.parsed?.publishedTime || undefined,
|
|
|
|
| 202 |
};
|
| 203 |
|
| 204 |
Object.assign(f, formatted);
|
|
|
|
| 394 |
url: nominalUrl?.toString() || snapshot.href?.trim(),
|
| 395 |
content: contentText,
|
| 396 |
publishedTime: snapshot.parsed?.publishedTime || undefined,
|
|
|
|
| 397 |
};
|
| 398 |
|
| 399 |
if (snapshot.status) {
|