import fs from 'fs/promises' import type { Response, NextFunction } from 'express' import sharp from 'sharp' import type { ExtendedRequest } from '@/types' import { assetCacheControl, defaultCacheControl } from '@/frame/middleware/cache-control' import { setFastlySurrogateKey, SURROGATE_ENUMS } from '@/frame/middleware/set-fastly-surrogate-key' /** * This is the indicator that is a virtual part of the URL. * Similar to `/cb-1234/` in asset URLs, it's just there to tell the * middleware that the image can be aggressively cached. It's not * part of the actual file-on-disk path. * Similarly, `/mw-1000/` is virtual and will be observed and removed from * the pathname before trying to look it up as disk-on-file. * The exact pattern needs to match how it's set in whatever Markdown * processing code that might make dynamic asset URLs. * So if you change this, make sure you change the code that expects * to be able to inject this into the URL. */ const maxWidthPathPartRegex = /\/mw-(\d+)\// /** * * Why not any free number? If we allowed it to be any integer number * someone would put our backend servers at risk by doing something like: * * const makeURL = () => `${BASE}/assets/mw-${Math.floor(Math.random()*1000)}/foo.png` * await Promise.all([...Array(10000).keys()].map(makeURL)) * * Which would be lots of distinctly different and valid URLs that the * CDN can never really "protect us" on because they're too often distinct. * * At the moment, the only business need is for 1,000 pixels, so the array * only has one. But can change in the future and make this sentence moot. */ const VALID_MAX_WIDTHS = [1440, 1000] export default async function dynamicAssets( req: ExtendedRequest, res: Response, next: NextFunction, ) { if (!req.url.startsWith('/assets/')) return next() if (!(req.method === 'GET' || req.method === 'HEAD')) { return res.status(405).type('text/plain').send('Method Not Allowed') } // To protect from possible denial of service, we never allow what // we're going to do (the image file operation), if the whole thing // won't be aggressively cached. // If we didn't do this, someone making 2 requests, ... // // > GET /assets/images/site/logo.web?random=10476583 // > GET /assets/images/site/logo.web?random=20196996 // // ...would be treated as 2 distinct backend requests. Sure, each one // would be cached in the CDN, but that's not helping if someone does... // // while (true) { // startFetchThread(`/assets/images/site/logo.web?whatever=${rand()}`) // } // // So we "force" any deviation of the URL to a redirect to the canonical // URL (which, again, is heavily cached). if (Object.keys(req.query).length > 0) { // Cache the 404 so it won't be re-attempted over and over defaultCacheControl(res) // This redirects to the same URL we're currently on, but with the // query string part omitted. // For example: // // > GET /assets/images/site/logo.web?foo=bar // < 302 // < location: /assets/images/site/logo.web // return res.redirect(302, req.path) } // From PNG to WEBP, if the PNG exists if (req.path.endsWith('.webp')) { const { url, maxWidth, error } = deconstructImageURL(req.path) if (error) { return res.status(400).type('text/plain').send(error.toString()) } try { const originalBuffer = await fs.readFile(url.slice(1).replace(/\.webp$/, '.png')) const image = sharp(originalBuffer) if (maxWidth) { const { width } = await image.metadata() if (width === undefined) throw new Error('image metadata does not have a width') if (width > maxWidth) { image.resize({ width: maxWidth }) } } // The default in sharp.webp() for effort is 4. It's a sensible // balance between time and compression. // If you make it low, it makes the webp conversion faster. // If you make it high, the webp conversion is slower but the // resulting WEBP file are smaller. // Given that our App Service containers aren't very strong in // terms of CPU, we avoid the highest effort. But given how // well our CDN protects repeated requests for the same image, // we can pay this cost once and reap it for a very long time. // Be mindful at the highest (6), it can be extremely slow so // let's avoid that for now. // // For more information about the effort option, see: // https://www.peterbe.com/plog/comparing-different-efforts-with-webp-in-sharp // let effort = 5 if (process.env.NODE_ENV === 'test') { // When running tests, we want to make the conversion as fast // as possible because the resulting WEBP buffer will most // likely never be enjoyed by network or human eyes. effort = 1 } else if (process.env.NODE_ENV === 'development') { // If you're doing local development (or review), the // network is not precious (localhost:4000) and you have no // CDN to cache it for you. Make it low but not too unrealistically // low. effort = 1 } // Note that by default, sharp will use a lossy compression. // (i.e. `{lossless: false}` in the options) // The difference is that a lossless image is slightly crisper // but becomes on average 1.8x larger. // Given how we serve images, no human would be able to tell the // difference simply by looking at the image as it appears as an // image tag in the web page. // Also given that rendering-for-viewing is the "end of the line" // for the image meaning it just ends up being viewed and not // resaved as a source file. If we had intention to overwrite all // original PNG source files to WEBP, we should consider lossless // to preserve as much quality as possible at the source level. // The default quality is 80% which, combined with `lossless:false` // makes our images 2.8x smaller than the average PNG. const buffer = await image.webp({ effort }).toBuffer() assetCacheControl(res) return res.type('image/webp').send(buffer) } catch (catchError) { if ( catchError instanceof Error && 'code' in catchError && (catchError as NodeJS.ErrnoException).code !== 'ENOENT' ) { throw catchError } } } // Cache the 404 so it won't be re-attempted over and over defaultCacheControl(res) // There's a preceeding middleware that sets the Surrogate-Key to // "manual-purge" based on the URL possibly having the `/cb-xxxxx/` // checksum in it. But, if it failed, we don't want that. So // undo that if it was set. // It's handy too to not overly cache 404s in the CDN because // it could be that the next prod deployment fixes the missing image. // For example, a PR landed that introduced the *reference* to the image // but forgot to check in the new image, then a follow-up PR adds the image. setFastlySurrogateKey(res, SURROGATE_ENUMS.DEFAULT) // Don't use something like `next(404)` because we don't want a fancy // HTML "Page not found" page response because a failed asset lookup // is impossibly a typo in the browser address bar or an accidentally // broken link, like it might be to a regular HTML page. res.status(404).type('text/plain').send('Asset not found') } function deconstructImageURL(url: string) { let error let maxWidth const match = url.match(maxWidthPathPartRegex) if (match) { const [whole, number] = match maxWidth = parseInt(number) if (isNaN(maxWidth) || maxWidth <= 0 || !VALID_MAX_WIDTHS.includes(maxWidth)) { error = new Error(`width number (${maxWidth}) is not a valid number`) } else { url = url.replace(whole, '/') } } return { url, maxWidth, error } }