Spaces:
Paused
Paused
| import { execFile } from "node:child_process"; | |
| import { promises as fs } from "node:fs"; | |
| import path from "node:path"; | |
| import { promisify } from "node:util"; | |
| const execFileAsync = promisify(execFile); | |
| const DEFAULT_QUERY = "\"Co-Authored-By: Paperclip <noreply@paperclip.ing>\""; | |
| const DEFAULT_CACHE_FILE = path.resolve("data/paperclip-commit-metrics-cache.json"); | |
| const DEFAULT_SEARCH_START = "2008-01-01T00:00:00Z"; | |
| const SEARCH_WINDOW_LIMIT = 900; | |
| const MIN_WINDOW_MS = 60_000; | |
| const DEFAULT_STATS_FETCH_LIMIT = 250; | |
| const DEFAULT_STATS_CONCURRENCY = 4; | |
| const DEFAULT_SEARCH_FIELD = "committer-date"; | |
| const PAPERCLIP_EMAIL = "noreply@paperclip.ing"; | |
| const PAPERCLIP_NAME = "paperclip"; | |
| interface CliOptions { | |
| cacheFile: string; | |
| end: Date; | |
| excludeOwners: string[]; | |
| exportFormat: "csv" | "json"; | |
| includePrivate: boolean; | |
| json: boolean; | |
| output: string | null; | |
| query: string; | |
| refreshSearch: boolean; | |
| refreshStats: boolean; | |
| searchField: "author-date" | "committer-date"; | |
| start: Date; | |
| statsConcurrency: number; | |
| statsFetchLimit: number; | |
| skipStats: boolean; | |
| } | |
| interface SearchCommitItem { | |
| author: { | |
| login?: string; | |
| } | null; | |
| commit: { | |
| author: { | |
| date: string; | |
| email: string | null; | |
| name: string | null; | |
| } | null; | |
| message: string; | |
| }; | |
| html_url: string; | |
| repository: { | |
| full_name: string; | |
| html_url: string; | |
| }; | |
| sha: string; | |
| } | |
| interface CommitStats { | |
| additions: number; | |
| deletions: number; | |
| total: number; | |
| } | |
| interface CachedCommit { | |
| authorEmail: string | null; | |
| authorLogin: string | null; | |
| authorName: string | null; | |
| committedAt: string | null; | |
| contributors: ContributorRecord[]; | |
| htmlUrl: string; | |
| repositoryFullName: string; | |
| repositoryUrl: string; | |
| sha: string; | |
| } | |
| interface CachedCommitStats extends CommitStats { | |
| fetchedAt: string; | |
| } | |
| interface ContributorRecord { | |
| displayName: string; | |
| email: string | null; | |
| key: string; | |
| login: string | null; | |
| } | |
| interface WindowCacheEntry { | |
| completedAt: string; | |
| key: string; | |
| shas: string[]; | |
| totalCount: number; | |
| } | |
| interface CacheFile { | |
| commits: Record<string, CachedCommit>; | |
| queryKey: string; | |
| searchField: CliOptions["searchField"]; | |
| stats: Record<string, CachedCommitStats>; | |
| updatedAt: string | null; | |
| version: number; | |
| windows: Record<string, WindowCacheEntry>; | |
| } | |
| interface SearchResponse { | |
| incomplete_results: boolean; | |
| items: SearchCommitItem[]; | |
| total_count: number; | |
| } | |
| interface SearchWindowResult { | |
| shas: Set<string>; | |
| totalCount: number; | |
| } | |
| interface Summary { | |
| cacheFile: string; | |
| contributors: { | |
| count: number; | |
| sample: ContributorRecord[]; | |
| }; | |
| detectedQuery: string; | |
| lineStats: { | |
| additions: number; | |
| complete: boolean; | |
| coveredCommits: number; | |
| deletions: number; | |
| missingCommits: number; | |
| totalChanges: number; | |
| }; | |
| range: { | |
| end: string; | |
| searchField: CliOptions["searchField"]; | |
| start: string; | |
| }; | |
| filters: { | |
| excludedOwners: string[]; | |
| }; | |
| repos: { | |
| count: number; | |
| sample: string[]; | |
| }; | |
| statsFetch: { | |
| fetchedThisRun: number; | |
| skipped: boolean; | |
| }; | |
| totals: { | |
| commits: number; | |
| }; | |
| } | |
| async function main() { | |
| const options = parseArgs(process.argv.slice(2)); | |
| const cache = await loadCache(options.cacheFile, options); | |
| const client = new GitHubClient(await resolveGitHubToken()); | |
| const { shas } = await searchWindow(client, cache, options, options.start, options.end); | |
| const sortedShas = [...shas].sort(); | |
| let fetchedThisRun = 0; | |
| if (!options.skipStats) { | |
| fetchedThisRun = await enrichCommitStats(client, cache, options, sortedShas); | |
| } | |
| cache.updatedAt = new Date().toISOString(); | |
| await saveCache(options.cacheFile, cache); | |
| const filteredShas = sortFilteredShas(cache, filterShas(cache, sortedShas, options)); | |
| const summary = buildSummary(cache, options, filteredShas, fetchedThisRun); | |
| if (options.output) { | |
| await writeExport(options.output, options.exportFormat, cache, filteredShas, summary); | |
| } | |
| if (options.json) { | |
| console.log(JSON.stringify(summary, null, 2)); | |
| return; | |
| } | |
| printSummary(summary); | |
| } | |
| function parseArgs(argv: string[]): CliOptions { | |
| const options: CliOptions = { | |
| cacheFile: DEFAULT_CACHE_FILE, | |
| end: new Date(), | |
| excludeOwners: [], | |
| exportFormat: "csv", | |
| includePrivate: false, | |
| json: false, | |
| output: null, | |
| query: DEFAULT_QUERY, | |
| refreshSearch: false, | |
| refreshStats: false, | |
| searchField: DEFAULT_SEARCH_FIELD, | |
| start: new Date(DEFAULT_SEARCH_START), | |
| statsConcurrency: DEFAULT_STATS_CONCURRENCY, | |
| statsFetchLimit: DEFAULT_STATS_FETCH_LIMIT, | |
| skipStats: false, | |
| }; | |
| for (let index = 0; index < argv.length; index += 1) { | |
| const arg = argv[index]; | |
| switch (arg) { | |
| case "--cache-file": | |
| options.cacheFile = requireValue(argv, ++index, arg); | |
| break; | |
| case "--end": | |
| options.end = parseDateArg(requireValue(argv, ++index, arg), arg); | |
| break; | |
| case "--exclude-owner": | |
| options.excludeOwners.push(requireValue(argv, ++index, arg).toLowerCase()); | |
| break; | |
| case "--export-format": { | |
| const value = requireValue(argv, ++index, arg); | |
| if (value !== "csv" && value !== "json") { | |
| throw new Error(`Invalid --export-format value: ${value}`); | |
| } | |
| options.exportFormat = value; | |
| break; | |
| } | |
| case "--include-private": | |
| options.includePrivate = true; | |
| break; | |
| case "--json": | |
| options.json = true; | |
| break; | |
| case "--output": | |
| options.output = requireValue(argv, ++index, arg); | |
| break; | |
| case "--query": | |
| options.query = requireValue(argv, ++index, arg); | |
| break; | |
| case "--refresh-search": | |
| options.refreshSearch = true; | |
| break; | |
| case "--refresh-stats": | |
| options.refreshStats = true; | |
| break; | |
| case "--search-field": { | |
| const value = requireValue(argv, ++index, arg); | |
| if (value !== "author-date" && value !== "committer-date") { | |
| throw new Error(`Invalid --search-field value: ${value}`); | |
| } | |
| options.searchField = value; | |
| break; | |
| } | |
| case "--skip-stats": | |
| options.skipStats = true; | |
| break; | |
| case "--start": | |
| options.start = parseDateArg(requireValue(argv, ++index, arg), arg); | |
| break; | |
| case "--stats-concurrency": | |
| options.statsConcurrency = parsePositiveInt(requireValue(argv, ++index, arg), arg); | |
| break; | |
| case "--stats-fetch-limit": | |
| options.statsFetchLimit = parseNonNegativeInt(requireValue(argv, ++index, arg), arg); | |
| break; | |
| case "--help": | |
| printHelp(); | |
| process.exit(0); | |
| break; | |
| default: | |
| throw new Error(`Unknown argument: ${arg}`); | |
| } | |
| } | |
| if (Number.isNaN(options.start.getTime()) || Number.isNaN(options.end.getTime())) { | |
| throw new Error("Invalid start or end date"); | |
| } | |
| if (options.start >= options.end) { | |
| throw new Error("--start must be earlier than --end"); | |
| } | |
| return options; | |
| } | |
| function requireValue(argv: string[], index: number, flag: string): string { | |
| const value = argv[index]; | |
| if (!value) { | |
| throw new Error(`Missing value for ${flag}`); | |
| } | |
| return value; | |
| } | |
| function parseDateArg(value: string, flag: string): Date { | |
| const parsed = new Date(value); | |
| if (Number.isNaN(parsed.getTime())) { | |
| throw new Error(`Invalid date for ${flag}: ${value}`); | |
| } | |
| return parsed; | |
| } | |
| function parsePositiveInt(value: string, flag: string): number { | |
| const parsed = Number.parseInt(value, 10); | |
| if (!Number.isFinite(parsed) || parsed <= 0) { | |
| throw new Error(`Invalid positive integer for ${flag}: ${value}`); | |
| } | |
| return parsed; | |
| } | |
| function parseNonNegativeInt(value: string, flag: string): number { | |
| const parsed = Number.parseInt(value, 10); | |
| if (!Number.isFinite(parsed) || parsed < 0) { | |
| throw new Error(`Invalid non-negative integer for ${flag}: ${value}`); | |
| } | |
| return parsed; | |
| } | |
| function printHelp() { | |
| console.log(`Usage: tsx scripts/paperclip-commit-metrics.ts [options] | |
| Options: | |
| --start <date> ISO date/time lower bound (default: ${DEFAULT_SEARCH_START}) | |
| --end <date> ISO date/time upper bound (default: now) | |
| --query <search> Commit search string (default: ${DEFAULT_QUERY}) | |
| --search-field <field> author-date | committer-date (default: ${DEFAULT_SEARCH_FIELD}) | |
| --include-private Include repos visible to the current token | |
| --exclude-owner <owner> Exclude repositories owned by this GitHub owner/org (repeatable) | |
| --cache-file <path> Cache path (default: ${DEFAULT_CACHE_FILE}) | |
| --skip-stats Skip additions/deletions enrichment | |
| --stats-fetch-limit <n> Max uncached commit stats to fetch this run (default: ${DEFAULT_STATS_FETCH_LIMIT}) | |
| --stats-concurrency <n> Parallel commit stat requests (default: ${DEFAULT_STATS_CONCURRENCY}) | |
| --output <path> Write the full filtered result set to a file | |
| --export-format <format> csv | json for --output exports (default: csv) | |
| --refresh-search Ignore cached search windows | |
| --refresh-stats Re-fetch cached commit stats | |
| --json Print JSON summary | |
| --help Show this help | |
| `); | |
| } | |
| async function resolveGitHubToken(): Promise<string> { | |
| const envToken = process.env.GITHUB_TOKEN ?? process.env.GH_TOKEN; | |
| if (envToken) { | |
| return envToken; | |
| } | |
| const { stdout } = await execFileAsync("gh", ["auth", "token"]); | |
| const token = stdout.trim(); | |
| if (!token) { | |
| throw new Error("Unable to resolve a GitHub token. Set GITHUB_TOKEN/GH_TOKEN or run `gh auth login`."); | |
| } | |
| return token; | |
| } | |
| async function loadCache(cacheFile: string, options: CliOptions): Promise<CacheFile> { | |
| try { | |
| const raw = await fs.readFile(cacheFile, "utf8"); | |
| const parsed = JSON.parse(raw) as CacheFile; | |
| if (parsed.version !== 1 || parsed.queryKey !== buildQueryKey(options) || parsed.searchField !== options.searchField) { | |
| return createEmptyCache(options); | |
| } | |
| return parsed; | |
| } catch (error) { | |
| if ((error as NodeJS.ErrnoException).code === "ENOENT") { | |
| return createEmptyCache(options); | |
| } | |
| throw error; | |
| } | |
| } | |
| function createEmptyCache(options: CliOptions): CacheFile { | |
| return { | |
| commits: {}, | |
| queryKey: buildQueryKey(options), | |
| searchField: options.searchField, | |
| stats: {}, | |
| updatedAt: null, | |
| version: 1, | |
| windows: {}, | |
| }; | |
| } | |
| function buildQueryKey(options: CliOptions): string { | |
| const visibility = options.includePrivate ? "all" : "public"; | |
| return JSON.stringify({ | |
| query: options.query, | |
| searchField: options.searchField, | |
| visibility, | |
| }); | |
| } | |
| async function saveCache(cacheFile: string, cache: CacheFile): Promise<void> { | |
| await fs.mkdir(path.dirname(cacheFile), { recursive: true }); | |
| await fs.writeFile(cacheFile, JSON.stringify(cache, null, 2), "utf8"); | |
| } | |
| async function searchWindow( | |
| client: GitHubClient, | |
| cache: CacheFile, | |
| options: CliOptions, | |
| start: Date, | |
| end: Date, | |
| ): Promise<SearchWindowResult> { | |
| const windowKey = makeWindowKey(start, end); | |
| if (!options.refreshSearch) { | |
| const cached = cache.windows[windowKey]; | |
| if (cached) { | |
| return { shas: new Set(cached.shas), totalCount: cached.totalCount }; | |
| } | |
| } | |
| const firstPage = await searchPage(client, options, start, end, 1, 100); | |
| if (firstPage.incomplete_results) { | |
| throw new Error(`GitHub returned incomplete search results for window ${windowKey}`); | |
| } | |
| if (firstPage.total_count > SEARCH_WINDOW_LIMIT) { | |
| const durationMs = end.getTime() - start.getTime(); | |
| if (durationMs <= MIN_WINDOW_MS) { | |
| throw new Error( | |
| `Search window ${windowKey} still has ${firstPage.total_count} results after splitting to ${durationMs}ms.`, | |
| ); | |
| } | |
| const midpoint = new Date(start.getTime() + Math.floor(durationMs / 2)); | |
| const left = await searchWindow(client, cache, options, start, midpoint); | |
| const right = await searchWindow(client, cache, options, new Date(midpoint.getTime() + 1), end); | |
| const shas = new Set([...left.shas, ...right.shas]); | |
| cache.windows[windowKey] = { | |
| completedAt: new Date().toISOString(), | |
| key: windowKey, | |
| shas: [...shas], | |
| totalCount: shas.size, | |
| }; | |
| return { shas, totalCount: shas.size }; | |
| } | |
| const pageCount = Math.ceil(firstPage.total_count / 100); | |
| const shas = new Set<string>(); | |
| ingestSearchItems(cache, firstPage.items, shas); | |
| for (let page = 2; page <= pageCount; page += 1) { | |
| const response = await searchPage(client, options, start, end, page, 100); | |
| if (response.incomplete_results) { | |
| throw new Error(`GitHub returned incomplete search results for window ${windowKey} on page ${page}`); | |
| } | |
| ingestSearchItems(cache, response.items, shas); | |
| } | |
| cache.windows[windowKey] = { | |
| completedAt: new Date().toISOString(), | |
| key: windowKey, | |
| shas: [...shas], | |
| totalCount: firstPage.total_count, | |
| }; | |
| return { shas, totalCount: firstPage.total_count }; | |
| } | |
| async function searchPage( | |
| client: GitHubClient, | |
| options: CliOptions, | |
| start: Date, | |
| end: Date, | |
| page: number, | |
| perPage: number, | |
| ): Promise<SearchResponse> { | |
| const searchQuery = buildSearchQuery(options, start, end); | |
| const params = new URLSearchParams({ | |
| page: String(page), | |
| per_page: String(perPage), | |
| q: searchQuery, | |
| }); | |
| return client.getJson<SearchResponse>(`/search/commits?${params.toString()}`); | |
| } | |
| function buildSearchQuery(options: CliOptions, start: Date, end: Date): string { | |
| const qualifiers = [`${options.searchField}:${formatQueryDate(start)}..${formatQueryDate(end)}`]; | |
| if (!options.includePrivate) { | |
| qualifiers.push("is:public"); | |
| } | |
| return `${options.query} ${qualifiers.join(" ")}`.trim(); | |
| } | |
| function filterShas(cache: CacheFile, shas: string[], options: CliOptions): string[] { | |
| if (options.excludeOwners.length === 0) { | |
| return shas; | |
| } | |
| const excludedOwners = new Set(options.excludeOwners); | |
| return shas.filter((sha) => { | |
| const commit = cache.commits[sha]; | |
| if (!commit) { | |
| return false; | |
| } | |
| return !excludedOwners.has(getRepoOwner(commit.repositoryFullName)); | |
| }); | |
| } | |
| function sortFilteredShas(cache: CacheFile, shas: string[]): string[] { | |
| return [...shas].sort((leftSha, rightSha) => { | |
| const left = cache.commits[leftSha]; | |
| const right = cache.commits[rightSha]; | |
| const leftTime = left?.committedAt ? Date.parse(left.committedAt) : 0; | |
| const rightTime = right?.committedAt ? Date.parse(right.committedAt) : 0; | |
| if (rightTime !== leftTime) { | |
| return rightTime - leftTime; | |
| } | |
| const repoCompare = (left?.repositoryFullName ?? "").localeCompare(right?.repositoryFullName ?? ""); | |
| if (repoCompare !== 0) { | |
| return repoCompare; | |
| } | |
| return leftSha.localeCompare(rightSha); | |
| }); | |
| } | |
| function formatQueryDate(value: Date): string { | |
| return new Date(Math.floor(value.getTime() / 1000) * 1000).toISOString().replace(".000Z", "Z"); | |
| } | |
| function ingestSearchItems(cache: CacheFile, items: SearchCommitItem[], shas: Set<string>) { | |
| for (const item of items) { | |
| shas.add(item.sha); | |
| cache.commits[item.sha] = { | |
| authorEmail: item.commit.author?.email ?? null, | |
| authorLogin: item.author?.login ?? null, | |
| authorName: item.commit.author?.name ?? null, | |
| committedAt: item.commit.author?.date ?? null, | |
| contributors: extractContributors(item), | |
| htmlUrl: item.html_url, | |
| repositoryFullName: item.repository.full_name, | |
| repositoryUrl: item.repository.html_url, | |
| sha: item.sha, | |
| }; | |
| } | |
| } | |
| function extractContributors(item: SearchCommitItem): ContributorRecord[] { | |
| const contributors = new Map<string, ContributorRecord>(); | |
| const primaryAuthor = normalizeContributor({ | |
| email: item.commit.author?.email ?? null, | |
| login: item.author?.login ?? null, | |
| name: item.commit.author?.name ?? null, | |
| }); | |
| if (primaryAuthor) { | |
| contributors.set(primaryAuthor.key, primaryAuthor); | |
| } | |
| const coAuthorPattern = /^co-authored-by:\s*(.+?)\s*<([^>]+)>\s*$/gim; | |
| for (const match of item.commit.message.matchAll(coAuthorPattern)) { | |
| const contributor = normalizeContributor({ | |
| email: match[2] ?? null, | |
| login: null, | |
| name: match[1] ?? null, | |
| }); | |
| if (contributor) { | |
| contributors.set(contributor.key, contributor); | |
| } | |
| } | |
| return [...contributors.values()]; | |
| } | |
| function normalizeContributor(input: { | |
| email: string | null; | |
| login: string | null; | |
| name: string | null; | |
| }): ContributorRecord | null { | |
| const email = normalizeOptional(input.email); | |
| const login = normalizeOptional(input.login); | |
| const displayName = normalizeOptional(input.name) ?? login ?? email; | |
| if (!displayName && !email && !login) { | |
| return null; | |
| } | |
| if ((email && email === PAPERCLIP_EMAIL) || (displayName && displayName.toLowerCase() === PAPERCLIP_NAME)) { | |
| return null; | |
| } | |
| const key = login ? `login:${login}` : email ? `email:${email}` : `name:${displayName!.toLowerCase()}`; | |
| return { | |
| displayName: displayName ?? email ?? login ?? "unknown", | |
| email, | |
| key, | |
| login, | |
| }; | |
| } | |
| function normalizeOptional(value: string | null | undefined): string | null { | |
| const trimmed = value?.trim(); | |
| return trimmed ? trimmed : null; | |
| } | |
| function getRepoOwner(repositoryFullName: string): string { | |
| return repositoryFullName.split("/", 1)[0]?.toLowerCase() ?? ""; | |
| } | |
| async function enrichCommitStats( | |
| client: GitHubClient, | |
| cache: CacheFile, | |
| options: CliOptions, | |
| shas: string[], | |
| ): Promise<number> { | |
| const pending = shas.filter((sha) => options.refreshStats || !cache.stats[sha]).slice(0, options.statsFetchLimit); | |
| let nextIndex = 0; | |
| let fetched = 0; | |
| const workers = Array.from({ length: Math.min(options.statsConcurrency, pending.length) }, async () => { | |
| while (true) { | |
| const currentIndex = nextIndex; | |
| nextIndex += 1; | |
| const sha = pending[currentIndex]; | |
| if (!sha) { | |
| return; | |
| } | |
| const commit = cache.commits[sha]; | |
| if (!commit) { | |
| continue; | |
| } | |
| const stats = await fetchCommitStats(client, commit.repositoryFullName, sha); | |
| cache.stats[sha] = { | |
| ...stats, | |
| fetchedAt: new Date().toISOString(), | |
| }; | |
| fetched += 1; | |
| } | |
| }); | |
| await Promise.all(workers); | |
| return fetched; | |
| } | |
| async function fetchCommitStats(client: GitHubClient, repositoryFullName: string, sha: string): Promise<CommitStats> { | |
| const response = await client.getJson<{ stats?: CommitStats }>( | |
| `/repos/${repositoryFullName}/commits/${sha}`, | |
| ); | |
| return { | |
| additions: response.stats?.additions ?? 0, | |
| deletions: response.stats?.deletions ?? 0, | |
| total: response.stats?.total ?? 0, | |
| }; | |
| } | |
| function buildSummary(cache: CacheFile, options: CliOptions, shas: string[], fetchedThisRun: number): Summary { | |
| const repoNames = new Set<string>(); | |
| const contributors = new Map<string, ContributorRecord>(); | |
| let additions = 0; | |
| let deletions = 0; | |
| let coveredCommits = 0; | |
| for (const sha of shas) { | |
| const commit = cache.commits[sha]; | |
| if (!commit) { | |
| continue; | |
| } | |
| repoNames.add(commit.repositoryFullName); | |
| for (const contributor of commit.contributors) { | |
| contributors.set(contributor.key, contributor); | |
| } | |
| const stats = cache.stats[sha]; | |
| if (stats) { | |
| additions += stats.additions; | |
| deletions += stats.deletions; | |
| coveredCommits += 1; | |
| } | |
| } | |
| const contributorSample = [...contributors.values()] | |
| .sort((left, right) => left.displayName.localeCompare(right.displayName)) | |
| .slice(0, 10); | |
| const repoSample = [...repoNames].sort((left, right) => left.localeCompare(right)).slice(0, 10); | |
| return { | |
| cacheFile: options.cacheFile, | |
| contributors: { | |
| count: contributors.size, | |
| sample: contributorSample, | |
| }, | |
| detectedQuery: buildSearchQuery(options, options.start, options.end), | |
| lineStats: { | |
| additions, | |
| complete: coveredCommits === shas.length, | |
| coveredCommits, | |
| deletions, | |
| missingCommits: shas.length - coveredCommits, | |
| totalChanges: additions + deletions, | |
| }, | |
| range: { | |
| end: options.end.toISOString(), | |
| searchField: options.searchField, | |
| start: options.start.toISOString(), | |
| }, | |
| filters: { | |
| excludedOwners: [...options.excludeOwners].sort(), | |
| }, | |
| repos: { | |
| count: repoNames.size, | |
| sample: repoSample, | |
| }, | |
| statsFetch: { | |
| fetchedThisRun, | |
| skipped: options.skipStats, | |
| }, | |
| totals: { | |
| commits: shas.length, | |
| }, | |
| }; | |
| } | |
| function printSummary(summary: Summary) { | |
| console.log("Paperclip commit metrics"); | |
| console.log(`Query: ${summary.detectedQuery}`); | |
| console.log(`Range: ${summary.range.start} -> ${summary.range.end} (${summary.range.searchField})`); | |
| if (summary.filters.excludedOwners.length > 0) { | |
| console.log(`Excluded owners: ${summary.filters.excludedOwners.join(", ")}`); | |
| } | |
| console.log(`Commits: ${summary.totals.commits}`); | |
| console.log(`Distinct repos: ${summary.repos.count}`); | |
| console.log(`Distinct contributors: ${summary.contributors.count}`); | |
| console.log( | |
| `Line stats: +${summary.lineStats.additions} / -${summary.lineStats.deletions} / ${summary.lineStats.totalChanges} total`, | |
| ); | |
| console.log( | |
| `Line stat coverage: ${summary.lineStats.coveredCommits}/${summary.totals.commits}` + | |
| (summary.lineStats.complete ? " (complete)" : " (partial; rerun to hydrate more commits)"), | |
| ); | |
| console.log(`Stats fetched this run: ${summary.statsFetch.fetchedThisRun}${summary.statsFetch.skipped ? " (skipped)" : ""}`); | |
| console.log(`Cache: ${summary.cacheFile}`); | |
| if (summary.repos.sample.length > 0) { | |
| console.log(`Sample repos: ${summary.repos.sample.join(", ")}`); | |
| } | |
| if (summary.contributors.sample.length > 0) { | |
| console.log( | |
| `Sample contributors: ${summary.contributors.sample | |
| .map((contributor) => contributor.login ?? contributor.displayName) | |
| .join(", ")}`, | |
| ); | |
| } | |
| } | |
| async function writeExport( | |
| outputPath: string, | |
| format: CliOptions["exportFormat"], | |
| cache: CacheFile, | |
| shas: string[], | |
| summary: Summary, | |
| ): Promise<void> { | |
| await fs.mkdir(path.dirname(outputPath), { recursive: true }); | |
| if (format === "json") { | |
| const report = { | |
| summary, | |
| commits: shas.map((sha) => buildExportRow(cache, sha)), | |
| }; | |
| await fs.writeFile(outputPath, JSON.stringify(report, null, 2), "utf8"); | |
| return; | |
| } | |
| const header = [ | |
| "committedAt", | |
| "repository", | |
| "repositoryUrl", | |
| "sha", | |
| "commitUrl", | |
| "authorLogin", | |
| "authorName", | |
| "authorEmail", | |
| "contributors", | |
| "additions", | |
| "deletions", | |
| "totalChanges", | |
| ]; | |
| const rows = [header.join(",")]; | |
| for (const sha of shas) { | |
| const row = buildExportRow(cache, sha); | |
| rows.push( | |
| [ | |
| row.committedAt, | |
| row.repository, | |
| row.repositoryUrl, | |
| row.sha, | |
| row.commitUrl, | |
| row.authorLogin, | |
| row.authorName, | |
| row.authorEmail, | |
| row.contributors, | |
| String(row.additions), | |
| String(row.deletions), | |
| String(row.totalChanges), | |
| ] | |
| .map(escapeCsv) | |
| .join(","), | |
| ); | |
| } | |
| await fs.writeFile(outputPath, `${rows.join("\n")}\n`, "utf8"); | |
| } | |
| function buildExportRow(cache: CacheFile, sha: string) { | |
| const commit = cache.commits[sha]; | |
| if (!commit) { | |
| throw new Error(`Missing cached commit for sha ${sha}`); | |
| } | |
| const stats = cache.stats[sha]; | |
| return { | |
| additions: stats?.additions ?? 0, | |
| authorEmail: commit.authorEmail ?? "", | |
| authorLogin: commit.authorLogin ?? "", | |
| authorName: commit.authorName ?? "", | |
| commitUrl: commit.htmlUrl, | |
| committedAt: commit.committedAt ?? "", | |
| contributors: commit.contributors.map((contributor) => contributor.login ?? contributor.displayName).join(" | "), | |
| deletions: stats?.deletions ?? 0, | |
| repository: commit.repositoryFullName, | |
| repositoryUrl: commit.repositoryUrl, | |
| sha: commit.sha, | |
| totalChanges: stats?.total ?? 0, | |
| }; | |
| } | |
| function escapeCsv(value: string): string { | |
| if (value.includes(",") || value.includes("\"") || value.includes("\n")) { | |
| return `"${value.replaceAll("\"", "\"\"")}"`; | |
| } | |
| return value; | |
| } | |
| function makeWindowKey(start: Date, end: Date): string { | |
| return `${start.toISOString()}..${end.toISOString()}`; | |
| } | |
| class GitHubClient { | |
| private readonly apiBase = "https://api.github.com"; | |
| private readonly token: string; | |
| constructor(token: string) { | |
| this.token = token; | |
| } | |
| async getJson<T>(pathname: string): Promise<T> { | |
| while (true) { | |
| const response = await fetch(`${this.apiBase}${pathname}`, { | |
| headers: { | |
| Accept: "application/vnd.github+json", | |
| Authorization: `Bearer ${this.token}`, | |
| "User-Agent": "paperclip-commit-metrics", | |
| "X-GitHub-Api-Version": "2022-11-28", | |
| }, | |
| }); | |
| if (response.ok) { | |
| return (await response.json()) as T; | |
| } | |
| const retryAfter = response.headers.get("retry-after"); | |
| if ((response.status === 403 || response.status === 429) && retryAfter) { | |
| const waitMs = Math.max(Number.parseInt(retryAfter, 10) * 1000, 1_000); | |
| console.error(`GitHub secondary rate limit hit for ${pathname}; waiting ${Math.ceil(waitMs / 1000)}s...`); | |
| await sleep(waitMs); | |
| continue; | |
| } | |
| const remaining = response.headers.get("x-ratelimit-remaining"); | |
| const resetAt = response.headers.get("x-ratelimit-reset"); | |
| if ((response.status === 403 || response.status === 429) && remaining === "0" && resetAt) { | |
| const waitMs = Math.max(Number.parseInt(resetAt, 10) * 1000 - Date.now() + 1_000, 1_000); | |
| console.error(`GitHub rate limit hit for ${pathname}; waiting ${Math.ceil(waitMs / 1000)}s...`); | |
| await sleep(waitMs); | |
| continue; | |
| } | |
| const body = await response.text(); | |
| throw new Error(`GitHub API request failed (${response.status}) for ${pathname}: ${body}`); | |
| } | |
| } | |
| } | |
| function sleep(ms: number): Promise<void> { | |
| return new Promise((resolve) => setTimeout(resolve, ms)); | |
| } | |
| main().catch((error) => { | |
| console.error(error instanceof Error ? error.message : String(error)); | |
| process.exit(1); | |
| }); | |