File size: 5,499 Bytes
b3fb4c5
 
66db317
3bb7315
b3fb4c5
 
 
3b1978f
b3fb4c5
 
 
 
 
 
62dc75f
b3fb4c5
 
 
66db317
b3fb4c5
3b1978f
 
b3fb4c5
 
 
 
 
 
 
 
 
 
 
 
3bb7315
a471a61
 
 
3bb7315
a471a61
3bb7315
b3fb4c5
 
 
3bb7315
 
 
 
b3fb4c5
3bb7315
 
b3fb4c5
 
 
 
 
 
 
 
8a2b095
b3fb4c5
 
 
62dc75f
8a2b095
 
b3fb4c5
 
a471a61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b3fb4c5
 
 
8a2b095
 
b3fb4c5
 
8a2b095
b3fb4c5
8a2b095
 
 
 
b3fb4c5
 
3b1978f
 
3bb7315
3b1978f
 
8a2b095
 
 
b3fb4c5
 
 
 
 
 
 
 
 
8a2b095
 
b3fb4c5
 
 
8a2b095
b3fb4c5
a471a61
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import { AssertionFailureError, AsyncService, HashManager } from 'civkit';
import { singleton } from 'tsyringe';
import { GlobalLogger } from './logger';
import { CanvasService } from './canvas';
import { ImageInterrogationManager } from '../shared/services/common-iminterrogate';
import { ImgBrief } from './puppeteer';
import { ImgAlt } from '../db/img-alt';
import { AsyncLocalContext } from './async-context';

const md5Hasher = new HashManager('md5', 'hex');

@singleton()
export class AltTextService extends AsyncService {

    altsToIgnore = 'image,img,photo,picture,pic,alt,figure,fig'.split(',');
    logger = this.globalLogger.child({ service: this.constructor.name });

    constructor(
        protected globalLogger: GlobalLogger,
        protected imageInterrogator: ImageInterrogationManager,
        protected canvasService: CanvasService,
        protected asyncLocalContext: AsyncLocalContext
    ) {
        super(...arguments);
    }

    override async init() {
        await this.dependencyReady();
        this.emit('ready');
    }

    async caption(url: string) {
        try {
            const img = await this.canvasService.loadImage(url);
            const contentTypeHint = Reflect.get(img, 'contentType');
            if (Math.min(img.naturalHeight, img.naturalWidth) <= 1) {
                return `A ${img.naturalWidth}x${img.naturalHeight} image, likely be a tacker probe`;
            }
            if (Math.min(img.naturalHeight, img.naturalWidth) < 64) {
                return `A ${img.naturalWidth}x${img.naturalHeight} small image, likely a logo, icon or avatar`;
            }
            const resized = this.canvasService.fitImageToSquareBox(img, 1024);
            const exported = await this.canvasService.canvasToBuffer(resized, 'image/png');

            const svgHint = contentTypeHint.includes('svg') ? `Beware this image is a SVG rendered on a gray background, the gray background is not part of the image.\n\n` : '';
            const svgSystemHint = contentTypeHint.includes('svg') ? ` Sometimes the system renders SVG on a gray background. When this happens, you must not include the gray background in the description.` : '';

            const r = await this.imageInterrogator.interrogate('vertex-gemini-2.0-flash', {
                image: exported,
                prompt: `${svgHint}Give a concise image caption descriptive sentence in third person. Start directly with the description.`,
                system: `You are BLIP2, an image caption model. You will generate Alt Text (in web pages) for any image for a11y purposes. You must not start with "This image is sth...", instead, start direly with "sth..."${svgSystemHint}`,
            });

            return r.replaceAll(/[\n\"]|(\.\s*$)/g, '').trim();
        } catch (err) {
            throw new AssertionFailureError({ message: `Could not generate alt text for url ${url}`, cause: err });
        }
    }

    async getAltText(imgBrief: ImgBrief) {
        if (!imgBrief.src) {
            return undefined;
        }
        if (imgBrief.alt && !this.altsToIgnore.includes(imgBrief.alt.trim().toLowerCase())) {
            return imgBrief.alt;
        }
        const digest = md5Hasher.hash(imgBrief.src);
        const shortDigest = Buffer.from(digest, 'hex').toString('base64url');
        let dims: number[] = [];
        do {
            if (imgBrief.loaded) {
                if (imgBrief.naturalWidth && imgBrief.naturalHeight) {
                    if (Math.min(imgBrief.naturalWidth, imgBrief.naturalHeight) < 64) {
                        dims = [imgBrief.naturalWidth, imgBrief.naturalHeight];
                        break;
                    }
                }
            }

            if (imgBrief.width && imgBrief.height) {
                if (Math.min(imgBrief.width, imgBrief.height) < 64) {
                    dims = [imgBrief.width, imgBrief.height];
                    break;
                }
            }

        } while (false);

        if (Math.min(...dims) <= 1) {
            return `A ${dims[0]}x${dims[1]} image, likely be a tacker probe`;
        }
        if (Math.min(...dims) < 64) {
            return `A ${dims[0]}x${dims[1]} small image, likely a logo, icon or avatar`;
        }

        const existing = await ImgAlt.fromFirestore(shortDigest);

        if (existing) {
            return existing.generatedAlt || existing.originalAlt || '';
        }

        let generatedCaption = '';

        try {
            generatedCaption = await this.caption(imgBrief.src);
        } catch (err) {
            this.logger.warn(`Unable to generate alt text for ${imgBrief.src}`, { err });
        }

        if (this.asyncLocalContext.ctx.DNT) {
            // Don't cache alt text if DNT is set
            return generatedCaption;
        }

        // Don't try again until the next day
        const expireMixin = generatedCaption ? {} : { expireAt: new Date(Date.now() + 1000 * 3600 * 24) };

        await ImgAlt.COLLECTION.doc(shortDigest).set(
            {
                _id: shortDigest,
                src: imgBrief.src || '',
                width: imgBrief.naturalWidth || 0,
                height: imgBrief.naturalHeight || 0,
                urlDigest: digest,
                originalAlt: imgBrief.alt || '',
                generatedAlt: generatedCaption || '',
                createdAt: new Date(),
                ...expireMixin
            }, { merge: true }
        );

        return generatedCaption;
    }
};