/** * PDF/A Conversion using Ghostscript WASM * Converts PDFs to PDF/A-1b, PDF/A-2b, or PDF/A-3b format. * Requires user to configure Ghostscript URL in WASM Settings. */ import { getWasmBaseUrl, fetchWasmFile, isWasmAvailable, } from '../config/wasm-cdn-config.js'; import { PDFDocument, PDFDict, PDFName, PDFArray } from 'pdf-lib'; interface GhostscriptModule { FS: { writeFile(path: string, data: Uint8Array | string): void; readFile(path: string, opts?: { encoding?: string }): Uint8Array; unlink(path: string): void; stat(path: string): { size: number }; }; callMain(args: string[]): number; } export type PdfALevel = 'PDF/A-1b' | 'PDF/A-2b' | 'PDF/A-3b'; let cachedGsModule: GhostscriptModule | null = null; export function setCachedGsModule(module: GhostscriptModule): void { cachedGsModule = module; } export function getCachedGsModule(): GhostscriptModule | null { return cachedGsModule; } export async function loadGsModule(): Promise { const gsBaseUrl = getWasmBaseUrl('ghostscript')!; const normalizedUrl = gsBaseUrl.endsWith('/') ? gsBaseUrl : `${gsBaseUrl}/`; const gsJsUrl = `${normalizedUrl}gs.js`; const response = await fetch(gsJsUrl); if (!response.ok) { throw new Error(`Failed to fetch gs.js: HTTP ${response.status}`); } const jsText = await response.text(); const blob = new Blob([jsText], { type: 'application/javascript' }); const blobUrl = URL.createObjectURL(blob); try { const gsModule = await import(/* @vite-ignore */ blobUrl); const ModuleFactory = gsModule.default; return (await ModuleFactory({ locateFile: (path: string) => { if (path.endsWith('.wasm')) { return `${normalizedUrl}gs.wasm`; } return `${normalizedUrl}${path}`; }, print: (text: string) => console.log('[GS]', text), printErr: (text: string) => console.error('[GS Error]', text), })) as GhostscriptModule; } finally { URL.revokeObjectURL(blobUrl); } } export async function convertToPdfA( pdfData: Uint8Array, level: PdfALevel = 'PDF/A-2b', onProgress?: (msg: string) => void ): Promise { if (!isWasmAvailable('ghostscript')) { throw new Error( 'Ghostscript is not configured. Please configure it in WASM Settings.' ); } onProgress?.('Loading Ghostscript...'); let gs: GhostscriptModule; if (cachedGsModule) { gs = cachedGsModule; } else { gs = await loadGsModule(); cachedGsModule = gs; } const pdfaMap: Record = { 'PDF/A-1b': '1', 'PDF/A-2b': '2', 'PDF/A-3b': '3', }; const inputPath = '/tmp/input.pdf'; const outputPath = '/tmp/output.pdf'; const iccPath = '/tmp/pdfa.icc'; const pdfaDefPath = '/tmp/pdfa.ps'; gs.FS.writeFile(inputPath, pdfData); console.log('[Ghostscript] Input file size:', pdfData.length); onProgress?.(`Converting to ${level}...`); try { const iccFileName = 'sRGB_IEC61966-2-1_no_black_scaling.icc'; const iccUrl = `${import.meta.env.BASE_URL}${iccFileName}`; const response = await fetch(iccUrl); if (!response.ok) { throw new Error( `Failed to fetch ICC profile from ${iccUrl}: HTTP ${response.status}` ); } const iccData = new Uint8Array(await response.arrayBuffer()); console.log( '[Ghostscript] sRGB v2 ICC profile loaded:', iccData.length, 'bytes' ); gs.FS.writeFile(iccPath, iccData); console.log('[Ghostscript] sRGB ICC profile written to FS:', iccPath); const iccHex = Array.from(iccData) .map((b) => b.toString(16).padStart(2, '0')) .join(''); console.log('[Ghostscript] ICC profile hex length:', iccHex.length); const pdfaSubtype = level === 'PDF/A-1b' ? '/GTS_PDFA1' : '/GTS_PDFA'; const pdfaPS = `%! % PDF/A definition file for ${level} % Define the ICC profile stream object with embedded hex data [/_objdef {icc_PDFA} /type /stream /OBJ pdfmark [{icc_PDFA} << /N 3 >> /PUT pdfmark [{icc_PDFA} <${iccHex}> /PUT pdfmark % Define the OutputIntent dictionary [/_objdef {OutputIntent_PDFA} /type /dict /OBJ pdfmark [{OutputIntent_PDFA} << /Type /OutputIntent /S ${pdfaSubtype} /DestOutputProfile {icc_PDFA} /OutputConditionIdentifier (sRGB IEC61966-2.1) /Info (sRGB IEC61966-2.1) /RegistryName (http://www.color.org) >> /PUT pdfmark % Attach OutputIntent to the document Catalog [{Catalog} << /OutputIntents [ {OutputIntent_PDFA} ] >> /PUT pdfmark `; gs.FS.writeFile(pdfaDefPath, pdfaPS); console.log( '[Ghostscript] PDFA PostScript created with embedded ICC hex data' ); } catch (e) { console.error('[Ghostscript] Failed to setup PDF/A assets:', e); throw new Error('Conversion failed: could not create PDF/A definition'); } const args = [ '-dNOSAFER', '-dBATCH', '-dNOPAUSE', '-sDEVICE=pdfwrite', `-dPDFA=${pdfaMap[level]}`, '-dPDFACompatibilityPolicy=1', `-dCompatibilityLevel=${level === 'PDF/A-1b' ? '1.4' : '1.7'}`, '-sColorConversionStrategy=UseDeviceIndependentColor', '-sICCProfilesDir=/tmp/', `-sOutputICCProfile=${iccPath}`, `-sDefaultRGBProfile=${iccPath}`, `-sBlendColorProfile=${iccPath}`, '-dCompressPages=true', '-dWriteObjStms=false', '-dWriteXRefStm=false', '-dEmbedAllFonts=true', '-dSubsetFonts=true', '-dAutoRotatePages=/None', `-sOutputFile=${outputPath}`, pdfaDefPath, inputPath, ]; console.log('[Ghostscript] Running PDF/A conversion...'); try { console.log('[Ghostscript] Checking version:'); gs.callMain(['--version']); } catch (e) { console.warn('[Ghostscript] Could not check version:', e); } let exitCode: number; try { exitCode = gs.callMain(args); } catch (e) { console.error('[Ghostscript] Exception:', e); throw new Error(`Ghostscript threw an exception: ${e}`); } console.log('[Ghostscript] Exit code:', exitCode); if (exitCode !== 0) { try { gs.FS.unlink(inputPath); } catch { /* ignore */ } try { gs.FS.unlink(outputPath); } catch { /* ignore */ } try { gs.FS.unlink(iccPath); } catch { /* ignore */ } try { gs.FS.unlink(pdfaDefPath); } catch { /* ignore */ } throw new Error(`Ghostscript conversion failed with exit code ${exitCode}`); } // Read output let output: Uint8Array; try { const stat = gs.FS.stat(outputPath); console.log('[Ghostscript] Output file size:', stat.size); output = gs.FS.readFile(outputPath); } catch (e) { console.error('[Ghostscript] Failed to read output:', e); throw new Error('Ghostscript did not produce output file'); } // Cleanup try { gs.FS.unlink(inputPath); } catch { /* ignore */ } try { gs.FS.unlink(outputPath); } catch { /* ignore */ } try { gs.FS.unlink(iccPath); } catch { /* ignore */ } try { gs.FS.unlink(pdfaDefPath); } catch { /* ignore */ } if (level !== 'PDF/A-1b') { onProgress?.('Post-processing for transparency compliance...'); console.log( '[Ghostscript] Adding Group dictionaries to pages for transparency compliance...' ); try { output = await addPageGroupDictionaries(output); console.log('[Ghostscript] Page Group dictionaries added successfully'); } catch (e) { console.error('[Ghostscript] Failed to add Group dictionaries:', e); } } return output; } async function addPageGroupDictionaries( pdfData: Uint8Array ): Promise { const pdfDoc = await PDFDocument.load(pdfData, { ignoreEncryption: true, updateMetadata: false, }); const catalog = pdfDoc.catalog; const outputIntentsArray = catalog.lookup(PDFName.of('OutputIntents')); let iccProfileRef: ReturnType = undefined; if (outputIntentsArray instanceof PDFArray) { const firstIntent = outputIntentsArray.lookup(0); if (firstIntent instanceof PDFDict) { iccProfileRef = firstIntent.get(PDFName.of('DestOutputProfile')); } } const updateGroupCS = (groupDict: PDFDict) => { if (!iccProfileRef) return; const currentCS = groupDict.get(PDFName.of('CS')); if (currentCS instanceof PDFName) { const csName = currentCS.decodeText(); if ( csName === 'DeviceRGB' || csName === 'DeviceGray' || csName === 'DeviceCMYK' ) { const iccColorSpace = pdfDoc.context.obj([ PDFName.of('ICCBased'), iccProfileRef, ]); groupDict.set(PDFName.of('CS'), iccColorSpace); } } else if (!currentCS) { const iccColorSpace = pdfDoc.context.obj([ PDFName.of('ICCBased'), iccProfileRef, ]); groupDict.set(PDFName.of('CS'), iccColorSpace); } }; const pages = pdfDoc.getPages(); for (const page of pages) { const pageDict = page.node; const existingGroup = pageDict.lookup(PDFName.of('Group')); if (existingGroup) { if (existingGroup instanceof PDFDict) { updateGroupCS(existingGroup); } } else if (iccProfileRef) { const colorSpace = pdfDoc.context.obj([ PDFName.of('ICCBased'), iccProfileRef, ]); const groupDict = pdfDoc.context.obj({ Type: 'Group', S: 'Transparency', I: false, K: false, }); (groupDict as PDFDict).set(PDFName.of('CS'), colorSpace); pageDict.set(PDFName.of('Group'), groupDict); } } if (iccProfileRef) { pdfDoc.context.enumerateIndirectObjects().forEach(([ref, obj]) => { if ( obj instanceof PDFDict || (obj && typeof obj === 'object' && 'dict' in obj) ) { const dict = 'dict' in obj ? (obj as { dict: PDFDict }).dict : (obj as PDFDict); const subtype = dict.get(PDFName.of('Subtype')); if (subtype instanceof PDFName && subtype.decodeText() === 'Form') { const group = dict.lookup(PDFName.of('Group')); if (group instanceof PDFDict) { updateGroupCS(group); } } } }); } return await pdfDoc.save({ useObjectStreams: false, addDefaultPage: false, updateFieldAppearances: false, }); } export async function convertFileToPdfA( file: File, level: PdfALevel = 'PDF/A-2b', onProgress?: (msg: string) => void ): Promise { const arrayBuffer = await file.arrayBuffer(); const pdfData = new Uint8Array(arrayBuffer); const result = await convertToPdfA(pdfData, level, onProgress); const copy = new Uint8Array(result.length); copy.set(result); return new Blob([copy], { type: 'application/pdf' }); } export async function convertFontsToOutlines( pdfData: Uint8Array, onProgress?: (msg: string) => void ): Promise { if (!isWasmAvailable('ghostscript')) { throw new Error( 'Ghostscript is not configured. Please configure it in WASM Settings.' ); } onProgress?.('Loading Ghostscript...'); let gs: GhostscriptModule; if (cachedGsModule) { gs = cachedGsModule; } else { gs = await loadGsModule(); cachedGsModule = gs; } const inputPath = '/tmp/input.pdf'; const outputPath = '/tmp/output.pdf'; gs.FS.writeFile(inputPath, pdfData); onProgress?.('Converting fonts to outlines...'); const args = [ '-dNOSAFER', '-dBATCH', '-dNOPAUSE', '-sDEVICE=pdfwrite', '-dNoOutputFonts', '-dCompressPages=true', '-dAutoRotatePages=/None', `-sOutputFile=${outputPath}`, inputPath, ]; let exitCode: number; try { exitCode = gs.callMain(args); } catch (e) { try { gs.FS.unlink(inputPath); } catch {} throw new Error(`Ghostscript threw an exception: ${e}`); } if (exitCode !== 0) { try { gs.FS.unlink(inputPath); } catch {} try { gs.FS.unlink(outputPath); } catch {} throw new Error(`Ghostscript conversion failed with exit code ${exitCode}`); } let output: Uint8Array; try { output = gs.FS.readFile(outputPath); } catch (e) { throw new Error('Ghostscript did not produce output file'); } try { gs.FS.unlink(inputPath); } catch {} try { gs.FS.unlink(outputPath); } catch {} return output; } export async function convertFileToOutlines( file: File, onProgress?: (msg: string) => void ): Promise { const arrayBuffer = await file.arrayBuffer(); const pdfData = new Uint8Array(arrayBuffer); const result = await convertFontsToOutlines(pdfData, onProgress); const copy = new Uint8Array(result.length); copy.set(result); return new Blob([copy], { type: 'application/pdf' }); }