Spaces:
Running
Running
| const { | |
| mkdir, | |
| readFile, | |
| rm, | |
| stat, | |
| truncate, | |
| writeFile, | |
| } = require('fs/promises') | |
| const pMap = require('p-map') | |
| const contentPath = require('./content/path') | |
| const fsm = require('fs-minipass') | |
| const glob = require('./util/glob.js') | |
| const index = require('./entry-index') | |
| const path = require('path') | |
| const ssri = require('ssri') | |
| const hasOwnProperty = (obj, key) => | |
| Object.prototype.hasOwnProperty.call(obj, key) | |
| const verifyOpts = (opts) => ({ | |
| concurrency: 20, | |
| log: { silly () {} }, | |
| ...opts, | |
| }) | |
| module.exports = verify | |
| async function verify (cache, opts) { | |
| opts = verifyOpts(opts) | |
| opts.log.silly('verify', 'verifying cache at', cache) | |
| const steps = [ | |
| markStartTime, | |
| fixPerms, | |
| garbageCollect, | |
| rebuildIndex, | |
| cleanTmp, | |
| writeVerifile, | |
| markEndTime, | |
| ] | |
| const stats = {} | |
| for (const step of steps) { | |
| const label = step.name | |
| const start = new Date() | |
| const s = await step(cache, opts) | |
| if (s) { | |
| Object.keys(s).forEach((k) => { | |
| stats[k] = s[k] | |
| }) | |
| } | |
| const end = new Date() | |
| if (!stats.runTime) { | |
| stats.runTime = {} | |
| } | |
| stats.runTime[label] = end - start | |
| } | |
| stats.runTime.total = stats.endTime - stats.startTime | |
| opts.log.silly( | |
| 'verify', | |
| 'verification finished for', | |
| cache, | |
| 'in', | |
| `${stats.runTime.total}ms` | |
| ) | |
| return stats | |
| } | |
| async function markStartTime (cache, opts) { | |
| return { startTime: new Date() } | |
| } | |
| async function markEndTime (cache, opts) { | |
| return { endTime: new Date() } | |
| } | |
| async function fixPerms (cache, opts) { | |
| opts.log.silly('verify', 'fixing cache permissions') | |
| await mkdir(cache, { recursive: true }) | |
| return null | |
| } | |
| // Implements a naive mark-and-sweep tracing garbage collector. | |
| // | |
| // The algorithm is basically as follows: | |
| // 1. Read (and filter) all index entries ("pointers") | |
| // 2. Mark each integrity value as "live" | |
| // 3. Read entire filesystem tree in `content-vX/` dir | |
| // 4. If content is live, verify its checksum and delete it if it fails | |
| // 5. If content is not marked as live, rm it. | |
| // | |
| async function garbageCollect (cache, opts) { | |
| opts.log.silly('verify', 'garbage collecting content') | |
| const indexStream = index.lsStream(cache) | |
| const liveContent = new Set() | |
| indexStream.on('data', (entry) => { | |
| if (opts.filter && !opts.filter(entry)) { | |
| return | |
| } | |
| // integrity is stringified, re-parse it so we can get each hash | |
| const integrity = ssri.parse(entry.integrity) | |
| for (const algo in integrity) { | |
| liveContent.add(integrity[algo].toString()) | |
| } | |
| }) | |
| await new Promise((resolve, reject) => { | |
| indexStream.on('end', resolve).on('error', reject) | |
| }) | |
| const contentDir = contentPath.contentDir(cache) | |
| const files = await glob(path.join(contentDir, '**'), { | |
| follow: false, | |
| nodir: true, | |
| nosort: true, | |
| }) | |
| const stats = { | |
| verifiedContent: 0, | |
| reclaimedCount: 0, | |
| reclaimedSize: 0, | |
| badContentCount: 0, | |
| keptSize: 0, | |
| } | |
| await pMap( | |
| files, | |
| async (f) => { | |
| const split = f.split(/[/\\]/) | |
| const digest = split.slice(split.length - 3).join('') | |
| const algo = split[split.length - 4] | |
| const integrity = ssri.fromHex(digest, algo) | |
| if (liveContent.has(integrity.toString())) { | |
| const info = await verifyContent(f, integrity) | |
| if (!info.valid) { | |
| stats.reclaimedCount++ | |
| stats.badContentCount++ | |
| stats.reclaimedSize += info.size | |
| } else { | |
| stats.verifiedContent++ | |
| stats.keptSize += info.size | |
| } | |
| } else { | |
| // No entries refer to this content. We can delete. | |
| stats.reclaimedCount++ | |
| const s = await stat(f) | |
| await rm(f, { recursive: true, force: true }) | |
| stats.reclaimedSize += s.size | |
| } | |
| return stats | |
| }, | |
| { concurrency: opts.concurrency } | |
| ) | |
| return stats | |
| } | |
| async function verifyContent (filepath, sri) { | |
| const contentInfo = {} | |
| try { | |
| const { size } = await stat(filepath) | |
| contentInfo.size = size | |
| contentInfo.valid = true | |
| await ssri.checkStream(new fsm.ReadStream(filepath), sri) | |
| } catch (err) { | |
| if (err.code === 'ENOENT') { | |
| return { size: 0, valid: false } | |
| } | |
| if (err.code !== 'EINTEGRITY') { | |
| throw err | |
| } | |
| await rm(filepath, { recursive: true, force: true }) | |
| contentInfo.valid = false | |
| } | |
| return contentInfo | |
| } | |
| async function rebuildIndex (cache, opts) { | |
| opts.log.silly('verify', 'rebuilding index') | |
| const entries = await index.ls(cache) | |
| const stats = { | |
| missingContent: 0, | |
| rejectedEntries: 0, | |
| totalEntries: 0, | |
| } | |
| const buckets = {} | |
| for (const k in entries) { | |
| /* istanbul ignore else */ | |
| if (hasOwnProperty(entries, k)) { | |
| const hashed = index.hashKey(k) | |
| const entry = entries[k] | |
| const excluded = opts.filter && !opts.filter(entry) | |
| excluded && stats.rejectedEntries++ | |
| if (buckets[hashed] && !excluded) { | |
| buckets[hashed].push(entry) | |
| } else if (buckets[hashed] && excluded) { | |
| // skip | |
| } else if (excluded) { | |
| buckets[hashed] = [] | |
| buckets[hashed]._path = index.bucketPath(cache, k) | |
| } else { | |
| buckets[hashed] = [entry] | |
| buckets[hashed]._path = index.bucketPath(cache, k) | |
| } | |
| } | |
| } | |
| await pMap( | |
| Object.keys(buckets), | |
| (key) => { | |
| return rebuildBucket(cache, buckets[key], stats, opts) | |
| }, | |
| { concurrency: opts.concurrency } | |
| ) | |
| return stats | |
| } | |
| async function rebuildBucket (cache, bucket, stats, opts) { | |
| await truncate(bucket._path) | |
| // This needs to be serialized because cacache explicitly | |
| // lets very racy bucket conflicts clobber each other. | |
| for (const entry of bucket) { | |
| const content = contentPath(cache, entry.integrity) | |
| try { | |
| await stat(content) | |
| await index.insert(cache, entry.key, entry.integrity, { | |
| metadata: entry.metadata, | |
| size: entry.size, | |
| time: entry.time, | |
| }) | |
| stats.totalEntries++ | |
| } catch (err) { | |
| if (err.code === 'ENOENT') { | |
| stats.rejectedEntries++ | |
| stats.missingContent++ | |
| } else { | |
| throw err | |
| } | |
| } | |
| } | |
| } | |
| function cleanTmp (cache, opts) { | |
| opts.log.silly('verify', 'cleaning tmp directory') | |
| return rm(path.join(cache, 'tmp'), { recursive: true, force: true }) | |
| } | |
| async function writeVerifile (cache, opts) { | |
| const verifile = path.join(cache, '_lastverified') | |
| opts.log.silly('verify', 'writing verifile to ' + verifile) | |
| return writeFile(verifile, `${Date.now()}`) | |
| } | |
| module.exports.lastRun = lastRun | |
| async function lastRun (cache) { | |
| const data = await readFile(path.join(cache, '_lastverified'), { encoding: 'utf8' }) | |
| return new Date(+data) | |
| } | |