codex-proxy / scripts /extract-fingerprint.ts
icebear0828
fix: model store dual index, desktop UI fallback, dynamic port support
b94940f
raw
history blame
19.4 kB
#!/usr/bin/env tsx
/**
* extract-fingerprint.ts — Extracts key fingerprint values from a Codex Desktop
* installation (macOS .app or Windows extracted ASAR).
*
* Usage:
* npx tsx scripts/extract-fingerprint.ts --path "C:/path/to/Codex" [--asar-out ./asar-out]
*
* The path can point to:
* - A macOS .app bundle (Codex.app)
* - A directory containing an already-extracted ASAR (with package.json and .vite/build/main.js)
* - A Windows install dir containing resources/app.asar
*/
import { readFileSync, writeFileSync, existsSync, mkdirSync, readdirSync } from "fs";
import { resolve, join } from "path";
import { createHash } from "crypto";
import { execSync } from "child_process";
import yaml from "js-yaml";
import type { ExtractedFingerprint } from "./types.js";
const ROOT = resolve(import.meta.dirname, "..");
const OUTPUT_PATH = resolve(ROOT, "data/extracted-fingerprint.json");
const PROMPTS_DIR = resolve(ROOT, "data/extracted-prompts");
const PATTERNS_PATH = resolve(ROOT, "config/extraction-patterns.yaml");
interface ExtractionPatterns {
package_json: { version_key: string; build_number_key: string; sparkle_feed_key: string };
main_js: Record<string, {
pattern?: string;
group?: number;
global?: boolean;
start_marker?: string;
end_marker?: string;
end_pattern?: string;
description: string;
}>;
}
function sha256(content: string): string {
return `sha256:${createHash("sha256").update(content, "utf-8").digest("hex").slice(0, 16)}`;
}
function loadPatterns(): ExtractionPatterns {
const raw = yaml.load(readFileSync(PATTERNS_PATH, "utf-8")) as ExtractionPatterns;
return raw;
}
/**
* Find the extracted ASAR root given an input path.
* Tries multiple layout conventions.
*/
function findAsarRoot(inputPath: string): string {
// Direct: path has package.json (already extracted)
if (existsSync(join(inputPath, "package.json"))) {
return inputPath;
}
// macOS .app bundle
const macResources = join(inputPath, "Contents/Resources");
if (existsSync(join(macResources, "app.asar"))) {
return extractAsar(join(macResources, "app.asar"));
}
// Windows: resources/app.asar
const winResources = join(inputPath, "resources");
if (existsSync(join(winResources, "app.asar"))) {
return extractAsar(join(winResources, "app.asar"));
}
// Already extracted: check for nested 'extracted' dir
const extractedDir = join(inputPath, "extracted");
if (existsSync(join(extractedDir, "package.json"))) {
return extractedDir;
}
// Check recovered/extracted pattern
const recoveredExtracted = join(inputPath, "recovered/extracted");
if (existsSync(join(recoveredExtracted, "package.json"))) {
return recoveredExtracted;
}
throw new Error(
`Cannot find Codex source at ${inputPath}. Expected package.json or app.asar.`
);
}
function extractAsar(asarPath: string): string {
const outDir = resolve(ROOT, ".asar-out");
console.log(`[extract] Extracting ASAR: ${asarPath}${outDir}`);
execSync(`npx @electron/asar extract "${asarPath}" "${outDir}"`, {
stdio: "inherit",
});
return outDir;
}
/**
* Step A: Extract from package.json
*/
function extractFromPackageJson(root: string): {
version: string;
buildNumber: string;
sparkleFeedUrl: string | null;
electronVersion: string | null;
} {
const pkgPath = join(root, "package.json");
const pkg = JSON.parse(readFileSync(pkgPath, "utf-8"));
return {
version: pkg.version ?? "unknown",
buildNumber: String(pkg.codexBuildNumber ?? "unknown"),
sparkleFeedUrl: pkg.codexSparkleFeedUrl ?? null,
electronVersion: pkg.devDependencies?.electron ?? null,
};
}
/**
* Step B: Extract values from main.js using patterns
*/
function extractFromMainJs(
content: string,
patterns: ExtractionPatterns["main_js"],
): {
apiBaseUrl: string | null;
originator: string | null;
whamEndpoints: string[];
userAgentContains: string;
} {
// API base URL
let apiBaseUrl: string | null = null;
const apiPattern = patterns.api_base_url;
if (apiPattern?.pattern) {
const m = content.match(new RegExp(apiPattern.pattern));
if (m) apiBaseUrl = m[0];
}
// Fail fast on critical fields
if (!apiBaseUrl) {
console.error("[extract] CRITICAL: Failed to extract API base URL from main.js");
console.error("[extract] The extraction pattern may need updating for this version.");
throw new Error("Failed to extract critical field: api_base_url");
}
// Originator
let originator: string | null = null;
const origPattern = patterns.originator;
if (origPattern?.pattern) {
const m = content.match(new RegExp(origPattern.pattern));
if (m) originator = m[origPattern.group ?? 0] ?? m[0];
}
// Fail fast on critical fields
if (!originator) {
console.error("[extract] CRITICAL: Failed to extract originator from main.js");
console.error("[extract] The extraction pattern may need updating for this version.");
throw new Error("Failed to extract critical field: originator");
}
// WHAM endpoints — deduplicate, use capture group if specified
const endpoints: Set<string> = new Set();
const epPattern = patterns.wham_endpoints;
if (epPattern?.pattern) {
const re = new RegExp(epPattern.pattern, "g");
const epGroupIdx = epPattern.group ?? 0;
for (const m of content.matchAll(re)) {
endpoints.add(m[epGroupIdx] ?? m[0]);
}
}
return {
apiBaseUrl,
originator,
whamEndpoints: [...endpoints].sort(),
userAgentContains: "Codex Desktop/",
};
}
/**
* Find the nearest `[` bracket within maxDistance chars before the given position.
* Prevents unbounded `lastIndexOf("[")` from matching a wrong bracket thousands of chars away.
*/
function findNearbyBracket(content: string, position: number, maxDistance = 50): number {
const searchStart = Math.max(0, position - maxDistance);
const slice = content.slice(searchStart, position);
const idx = slice.lastIndexOf("[");
return idx !== -1 ? searchStart + idx : -1;
}
/**
* Step B (continued): Extract system prompts from main.js
*/
function extractPrompts(content: string): {
desktopContext: string | null;
titleGeneration: string | null;
prGeneration: string | null;
automationResponse: string | null;
} {
// Desktop context: from "# Codex desktop context" to the end of the template literal.
// In minified code the closing backtick may be followed by `,` `;` or `)` — simple
// indexOf("`;") can match the wrong position. Instead, walk line-by-line and stop
// at the first line that looks like minified JS (identifier assignment, JS keyword).
let desktopContext: string | null = null;
const dcStart = content.indexOf("# Codex desktop context");
if (dcStart !== -1) {
const remaining = content.slice(dcStart);
const lines = remaining.split("\n");
const cleanLines: string[] = [];
for (const line of lines) {
// Detect minified JS: consecutive punctuation/whitespace followed by identifier assignment
if (/^[`,;)\]}\s]+[A-Za-z_$]/.test(line)) break;
if (/^[`'";}\])\s]*(?:async\s+)?(?:function|class|const|let|var|return|throw|if|for|while)\b/.test(line)) break;
cleanLines.push(line);
}
if (cleanLines.length > 0) {
cleanLines[cleanLines.length - 1] = cleanLines[cleanLines.length - 1].replace(/`\s*$/, "");
}
desktopContext = cleanLines.join("\n").trim() || null;
}
// Title generation: from the function that builds the array
let titleGeneration: string | null = null;
const titleMarker = "You are a helpful assistant. You will be presented with a user prompt";
const titleStart = content.indexOf(titleMarker);
if (titleStart !== -1) {
// Find the enclosing array end: ].join(
const joinIdx = content.indexOf("].join(", titleStart);
if (joinIdx !== -1) {
// Find the opening [ within 50 chars before the marker (not unbounded lastIndexOf)
const bracketStart = findNearbyBracket(content, titleStart);
if (bracketStart !== -1) {
const arrayContent = content.slice(bracketStart + 1, joinIdx);
// Parse string literals from the array
titleGeneration = parseStringArray(arrayContent);
}
}
}
// PR generation
let prGeneration: string | null = null;
const prMarker = "You are a helpful assistant. Generate a pull request title";
const prStart = content.indexOf(prMarker);
if (prStart !== -1) {
const joinIdx = content.indexOf("].join(", prStart);
if (joinIdx !== -1) {
const bracketStart = findNearbyBracket(content, prStart);
if (bracketStart !== -1) {
const arrayContent = content.slice(bracketStart + 1, joinIdx);
prGeneration = parseStringArray(arrayContent);
}
}
}
// Automation response: template literal starting with "Response MUST end with"
let automationResponse: string | null = null;
const autoMarker = "Response MUST end with a remark-directive block";
const autoStart = content.indexOf(autoMarker);
if (autoStart !== -1) {
const autoRemaining = content.slice(autoStart);
const autoLines = autoRemaining.split("\n");
const autoClean: string[] = [];
for (const line of autoLines) {
if (/^[`,;)\]}\s]+[A-Za-z_$]/.test(line)) break;
if (/^[`'";}\])\s]*(?:async\s+)?(?:function|class|const|let|var|return|throw|if|for|while)\b/.test(line)) break;
autoClean.push(line);
}
if (autoClean.length > 0) {
autoClean[autoClean.length - 1] = autoClean[autoClean.length - 1].replace(/`\s*$/, "");
}
automationResponse = autoClean.join("\n").trim() || null;
}
return { desktopContext, titleGeneration, prGeneration, automationResponse };
}
/**
* Parse a JavaScript string array content into a single joined string.
* Handles simple quoted strings separated by commas.
*/
function parseStringArray(arrayContent: string): string {
const lines: string[] = [];
// Match quoted strings (both single and double quotes) and template literals
const stringRe = /"((?:[^"\\]|\\.)*)"|'((?:[^'\\]|\\.)*)'/g;
for (const m of arrayContent.matchAll(stringRe)) {
const str = m[1] ?? m[2] ?? "";
// Unescape common sequences
lines.push(
str
.replace(/\\n/g, "\n")
.replace(/\\t/g, "\t")
.replace(/\\"/g, '"')
.replace(/\\'/g, "'")
.replace(/\\\\/g, "\\")
);
}
return lines.join("\n");
}
/** Safety net: strip any trailing minified JS that slipped through extraction. */
function sanitizePrompt(raw: string): string {
const lines = raw.split("\n");
const clean: string[] = [];
for (const line of lines) {
if (/^[`,;)\]}\s]*[A-Za-z_$][A-Za-z0-9_$]*\s*=/.test(line)) break;
if (/^[`'";}\])\s]*(?:async\s+)?(?:function|class|const|let|var|return|throw|if|for|while)\b/.test(line)) break;
clean.push(line);
}
if (clean.length > 0) {
clean[clean.length - 1] = clean[clean.length - 1].replace(/`\s*$/, "");
}
return clean.join("\n").trim();
}
function savePrompt(name: string, content: string | null): { hash: string | null; path: string | null } {
if (!content) return { hash: null, path: null };
const sanitized = sanitizePrompt(content);
if (!sanitized) return { hash: null, path: null };
// Validate: reject suspiciously short or garbled content
if (sanitized.length < 50) {
console.warn(`[extract] Prompt "${name}" too short (${sanitized.length} chars), skipping save`);
return { hash: null, path: null };
}
const garbageLines = sanitized.split("\n").filter((l) => /^[,`'"]\s*$/.test(l.trim()));
if (garbageLines.length > 3) {
console.warn(`[extract] Prompt "${name}" has ${garbageLines.length} garbled lines, skipping save`);
return { hash: null, path: null };
}
mkdirSync(PROMPTS_DIR, { recursive: true });
const filePath = join(PROMPTS_DIR, `${name}.md`);
writeFileSync(filePath, sanitized);
return {
hash: sha256(content),
path: filePath,
};
}
async function main() {
// Parse --path argument
const pathIdx = process.argv.indexOf("--path");
if (pathIdx === -1 || !process.argv[pathIdx + 1]) {
console.error("Usage: npx tsx scripts/extract-fingerprint.ts --path <codex-path>");
console.error("");
console.error(" <codex-path> can be:");
console.error(" - macOS: /path/to/Codex.app");
console.error(" - Windows: C:/path/to/Codex (containing resources/app.asar)");
console.error(" - Extracted: directory with package.json and .vite/build/main.js");
process.exit(1);
}
const inputPath = resolve(process.argv[pathIdx + 1]);
console.log(`[extract] Input: ${inputPath}`);
// Find ASAR root
const asarRoot = findAsarRoot(inputPath);
console.log(`[extract] ASAR root: ${asarRoot}`);
// Load extraction patterns
const patterns = loadPatterns();
// Step A: package.json
console.log("[extract] Reading package.json...");
const { version, buildNumber, sparkleFeedUrl, electronVersion } = extractFromPackageJson(asarRoot);
console.log(` version: ${version}`);
console.log(` build: ${buildNumber}`);
console.log(` electron: ${electronVersion ?? "not found"}`);
// Resolve Chromium version from Electron version
let chromiumVersion: string | null = null;
if (electronVersion) {
const electronMajor = parseInt(electronVersion.replace(/^[^0-9]*/, ""), 10);
if (!isNaN(electronMajor)) {
try {
const { versions } = await import("electron-to-chromium");
const versionMap = versions as Record<string, string>;
// versions keys use "major.minor" format (e.g. "40.0"), try both
const chromium = versionMap[`${electronMajor}.0`] ?? versionMap[electronMajor.toString()];
if (chromium) {
chromiumVersion = chromium;
console.log(` chromium: ${chromiumVersion} (from electron ${electronMajor})`);
} else {
console.warn(`[extract] No Chromium mapping for Electron ${electronMajor}`);
}
} catch {
console.warn("[extract] electron-to-chromium not available, skipping chromium resolution");
}
}
}
// Step B: main.js (or main-XXXXX.js chunk)
console.log("[extract] Loading main.js...");
const mainJs = await (async () => {
const buildDir = join(asarRoot, ".vite/build");
// Find the main JS: prefer main-*.js chunk (Vite code-split), fall back to main.js
let mainPath = join(buildDir, "main.js");
if (existsSync(buildDir)) {
const files = readdirSync(buildDir);
const chunk = files.find((f) => /^main-[A-Za-z0-9_-]+\.js$/.test(f));
if (chunk) {
mainPath = join(buildDir, chunk);
console.log(`[extract] Found chunk: ${chunk}`);
}
}
if (!existsSync(mainPath)) {
console.warn("[extract] main.js not found, skipping JS extraction");
return null;
}
const content = readFileSync(mainPath, "utf-8");
const lineCount = content.split("\n").length;
if (lineCount < 100 && content.length > 100000) {
console.log("[extract] main.js appears minified, attempting beautify...");
try {
const jsBeautify = await import("js-beautify");
return jsBeautify.default.js(content, { indent_size: 2 });
} catch {
console.warn("[extract] js-beautify not available, using raw content");
return content;
}
}
return content;
})();
let mainJsResults = {
apiBaseUrl: null as string | null,
originator: null as string | null,
whamEndpoints: [] as string[],
userAgentContains: "Codex Desktop/",
};
let promptResults = {
desktopContext: null as string | null,
titleGeneration: null as string | null,
prGeneration: null as string | null,
automationResponse: null as string | null,
};
if (mainJs) {
console.log(`[extract] main.js loaded (${mainJs.split("\n").length} lines)`);
try {
mainJsResults = extractFromMainJs(mainJs, patterns.main_js);
} catch (err) {
console.warn(`[extract] Primary extraction failed: ${(err as Error).message}`);
console.log("[extract] Scanning all .vite/build/*.js for fallback...");
const buildDir = join(asarRoot, ".vite/build");
if (existsSync(buildDir)) {
const jsFiles = readdirSync(buildDir).filter((f) => f.endsWith(".js"));
for (const file of jsFiles) {
const content = readFileSync(join(buildDir, file), "utf-8");
const origPattern = patterns.main_js.originator;
if (origPattern?.pattern) {
const m = content.match(new RegExp(origPattern.pattern));
if (m) {
mainJsResults.originator = m[origPattern.group ?? 0] ?? m[0];
console.log(`[extract] Originator found in fallback file: ${file}`);
break;
}
}
}
}
// Re-extract non-critical fields from mainJs
const apiPattern = patterns.main_js.api_base_url;
if (apiPattern?.pattern) {
const m = mainJs.match(new RegExp(apiPattern.pattern));
if (m) mainJsResults.apiBaseUrl = m[0];
}
}
console.log(` API base URL: ${mainJsResults.apiBaseUrl}`);
console.log(` originator: ${mainJsResults.originator}`);
console.log(` WHAM endpoints: ${mainJsResults.whamEndpoints.length} found`);
// Extract system prompts
console.log("[extract] Extracting system prompts...");
promptResults = extractPrompts(mainJs);
console.log(` desktop-context: ${promptResults.desktopContext ? "found" : "NOT FOUND"}`);
console.log(` title-generation: ${promptResults.titleGeneration ? "found" : "NOT FOUND"}`);
console.log(` pr-generation: ${promptResults.prGeneration ? "found" : "NOT FOUND"}`);
console.log(` automation-response: ${promptResults.automationResponse ? "found" : "NOT FOUND"}`);
}
// Save extracted prompts
const dc = savePrompt("desktop-context", promptResults.desktopContext);
const tg = savePrompt("title-generation", promptResults.titleGeneration);
const pr = savePrompt("pr-generation", promptResults.prGeneration);
const ar = savePrompt("automation-response", promptResults.automationResponse);
// Build output
const fingerprint: ExtractedFingerprint = {
app_version: version,
build_number: buildNumber,
electron_version: electronVersion,
chromium_version: chromiumVersion,
api_base_url: mainJsResults.apiBaseUrl,
originator: mainJsResults.originator,
wham_endpoints: mainJsResults.whamEndpoints,
user_agent_contains: mainJsResults.userAgentContains,
sparkle_feed_url: sparkleFeedUrl,
prompts: {
desktop_context_hash: dc.hash,
desktop_context_path: dc.path,
title_generation_hash: tg.hash,
title_generation_path: tg.path,
pr_generation_hash: pr.hash,
pr_generation_path: pr.path,
automation_response_hash: ar.hash,
automation_response_path: ar.path,
},
extracted_at: new Date().toISOString(),
source_path: inputPath,
};
// Write output
mkdirSync(resolve(ROOT, "data"), { recursive: true });
writeFileSync(OUTPUT_PATH, JSON.stringify(fingerprint, null, 2));
console.log(`\n[extract] Fingerprint written to ${OUTPUT_PATH}`);
console.log(`[extract] Prompts written to ${PROMPTS_DIR}/`);
console.log("[extract] Done.");
}
main().catch((err) => {
console.error("[extract] Fatal:", err);
process.exit(1);
});