/** * PDF to SVG Conversion Server * * Runs alongside the Vite dev server on HuggingFace Spaces. * Handles PDF to SVG conversion using the pdf2svg binary. * * Endpoints: * - POST /convert/pdf - Upload PDF and get SVG pages * - GET /convert/status/:jobId - Check job status * - GET /convert/page/:jobId/:pageNum - Download a converted SVG page */ import http from 'http'; import { spawn, execSync } from 'child_process'; import fs from 'fs'; import path from 'path'; import os from 'os'; const PORT = process.env.PDF_CONVERT_PORT || 7861; const TEMP_DIR = path.join(os.tmpdir(), 'pdf_convert'); // Ensure temp directory exists if (!fs.existsSync(TEMP_DIR)) { fs.mkdirSync(TEMP_DIR, { recursive: true }); } // Job storage const jobs = new Map(); /** * Parse multipart form data (simple implementation for file upload) */ function parseMultipart(buffer, boundary) { const parts = []; const boundaryBuffer = Buffer.from(`--${boundary}`); let start = buffer.indexOf(boundaryBuffer); while (start !== -1) { const end = buffer.indexOf(boundaryBuffer, start + boundaryBuffer.length); if (end === -1) break; const part = buffer.slice(start + boundaryBuffer.length, end); const headerEnd = part.indexOf('\r\n\r\n'); if (headerEnd !== -1) { const headers = part.slice(0, headerEnd).toString(); const content = part.slice(headerEnd + 4, part.length - 2); // -2 for trailing \r\n const nameMatch = headers.match(/name="([^"]+)"/); const filenameMatch = headers.match(/filename="([^"]+)"/); if (nameMatch) { parts.push({ name: nameMatch[1], filename: filenameMatch ? filenameMatch[1] : null, content: content }); } } start = end; } return parts; } /** * Get PDF page count using pdfinfo or pdf2svg */ function getPdfPageCount(pdfPath) { try { // Try pdfinfo first const output = execSync(`pdfinfo "${pdfPath}" 2>/dev/null | grep -i "Pages:" | awk '{print $2}'`, { encoding: 'utf8' }); const count = parseInt(output.trim(), 10); if (!isNaN(count)) return count; } catch (e) { // pdfinfo not available, try alternative method } try { // Try using pdf2svg on page 1 to check if it works, then binary search for count // This is a fallback if pdfinfo isn't available let maxPage = 1; let testPage = 1; // Test increasing pages until we fail while (testPage <= 1000) { const testOutput = path.join(TEMP_DIR, `test_${Date.now()}.svg`); try { execSync(`pdf2svg "${pdfPath}" "${testOutput}" ${testPage} 2>/dev/null`, { encoding: 'utf8' }); fs.unlinkSync(testOutput); maxPage = testPage; testPage++; } catch (e) { break; } } return maxPage; } catch (e) { console.error('Failed to get page count:', e.message); return 1; } } /** * Convert a single PDF page to SVG */ async function convertPage(pdfPath, pageNum, outputPath) { return new Promise((resolve, reject) => { const proc = spawn('pdf2svg', [pdfPath, outputPath, String(pageNum)]); let stderr = ''; proc.stderr.on('data', (data) => { stderr += data.toString(); }); proc.on('close', (code) => { if (code === 0 && fs.existsSync(outputPath)) { resolve(outputPath); } else { reject(new Error(`pdf2svg failed: ${stderr || 'Unknown error'}`)); } }); proc.on('error', (err) => { reject(err); }); }); } /** * Process a PDF conversion job */ async function processJob(jobId) { const job = jobs.get(jobId); if (!job) return; job.status = 'processing'; job.updatedAt = Date.now(); try { // Get page count const pageCount = getPdfPageCount(job.pdfPath); job.pageCount = pageCount; // Convert each page for (let i = 1; i <= pageCount; i++) { const outputPath = path.join(job.outputDir, `page_${i}.svg`); await convertPage(job.pdfPath, i, outputPath); job.processedPages = i; job.updatedAt = Date.now(); console.log(`[PDF Convert] Job ${jobId}: Page ${i}/${pageCount} converted`); } job.status = 'completed'; job.updatedAt = Date.now(); console.log(`[PDF Convert] Job ${jobId}: Completed - ${pageCount} pages`); } catch (e) { job.status = 'failed'; job.error = e.message; job.updatedAt = Date.now(); console.error(`[PDF Convert] Job ${jobId}: Failed -`, e.message); } } /** * Handle HTTP requests */ async function handleRequest(req, res) { const url = new URL(req.url, `http://localhost:${PORT}`); // CORS headers res.setHeader('Access-Control-Allow-Origin', '*'); res.setHeader('Access-Control-Allow-Methods', 'GET, POST, OPTIONS'); res.setHeader('Access-Control-Allow-Headers', 'Content-Type'); if (req.method === 'OPTIONS') { res.writeHead(200); res.end(); return; } // Health check if (url.pathname === '/convert/health') { res.writeHead(200, { 'Content-Type': 'application/json' }); res.end(JSON.stringify({ status: 'ok', pdf2svg: true })); return; } // Upload PDF if (req.method === 'POST' && url.pathname === '/convert/pdf') { const chunks = []; req.on('data', chunk => chunks.push(chunk)); req.on('end', async () => { try { const buffer = Buffer.concat(chunks); const contentType = req.headers['content-type'] || ''; let pdfBuffer; if (contentType.includes('multipart/form-data')) { const boundary = contentType.split('boundary=')[1]; const parts = parseMultipart(buffer, boundary); const filePart = parts.find(p => p.filename && p.filename.endsWith('.pdf')); if (!filePart) { res.writeHead(400, { 'Content-Type': 'application/json' }); res.end(JSON.stringify({ error: 'No PDF file found' })); return; } pdfBuffer = filePart.content; } else if (contentType === 'application/pdf') { pdfBuffer = buffer; } else { res.writeHead(400, { 'Content-Type': 'application/json' }); res.end(JSON.stringify({ error: 'Invalid content type' })); return; } // Create job const jobId = `job_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`; const jobDir = path.join(TEMP_DIR, jobId); fs.mkdirSync(jobDir, { recursive: true }); const pdfPath = path.join(jobDir, 'source.pdf'); fs.writeFileSync(pdfPath, pdfBuffer); const job = { id: jobId, status: 'pending', pageCount: 0, processedPages: 0, pdfPath: pdfPath, outputDir: jobDir, createdAt: Date.now(), updatedAt: Date.now() }; jobs.set(jobId, job); // Start processing async processJob(jobId); res.writeHead(200, { 'Content-Type': 'application/json' }); res.end(JSON.stringify({ jobId, status: 'pending', statusUrl: `/convert/status/${jobId}` })); } catch (e) { console.error('Upload error:', e); res.writeHead(500, { 'Content-Type': 'application/json' }); res.end(JSON.stringify({ error: e.message })); } }); return; } // Check job status const statusMatch = url.pathname.match(/^\/convert\/status\/(.+)$/); if (req.method === 'GET' && statusMatch) { const jobId = statusMatch[1]; const job = jobs.get(jobId); if (!job) { res.writeHead(404, { 'Content-Type': 'application/json' }); res.end(JSON.stringify({ error: 'Job not found' })); return; } const pages = []; if (job.status === 'completed' || job.processedPages > 0) { for (let i = 1; i <= job.processedPages; i++) { pages.push({ page: i, url: `/convert/page/${jobId}/${i}` }); } } res.writeHead(200, { 'Content-Type': 'application/json' }); res.end(JSON.stringify({ id: job.id, status: job.status, pageCount: job.pageCount, processedPages: job.processedPages, error: job.error, pages: pages })); return; } // Download page const pageMatch = url.pathname.match(/^\/convert\/page\/(.+)\/(\d+)$/); if (req.method === 'GET' && pageMatch) { const jobId = pageMatch[1]; const pageNum = parseInt(pageMatch[2], 10); const job = jobs.get(jobId); if (!job) { res.writeHead(404, { 'Content-Type': 'application/json' }); res.end(JSON.stringify({ error: 'Job not found' })); return; } const svgPath = path.join(job.outputDir, `page_${pageNum}.svg`); if (!fs.existsSync(svgPath)) { res.writeHead(404, { 'Content-Type': 'application/json' }); res.end(JSON.stringify({ error: 'Page not found' })); return; } const svgContent = fs.readFileSync(svgPath, 'utf8'); res.writeHead(200, { 'Content-Type': 'image/svg+xml' }); res.end(svgContent); return; } // 404 for unknown routes res.writeHead(404, { 'Content-Type': 'application/json' }); res.end(JSON.stringify({ error: 'Not found' })); } // Create server const server = http.createServer(handleRequest); server.listen(PORT, () => { console.log(`[PDF Convert Server] Running on port ${PORT}`); console.log(`[PDF Convert Server] Endpoints:`); console.log(` POST /convert/pdf - Upload PDF file`); console.log(` GET /convert/status/:jobId - Check job status`); console.log(` GET /convert/page/:jobId/:pageNum - Download SVG page`); }); // Cleanup old jobs periodically (every 30 minutes) setInterval(() => { const now = Date.now(); const maxAge = 2 * 60 * 60 * 1000; // 2 hours for (const [jobId, job] of jobs.entries()) { if (now - job.createdAt > maxAge) { // Clean up files try { if (fs.existsSync(job.outputDir)) { fs.rmSync(job.outputDir, { recursive: true, force: true }); } } catch (e) { console.error(`[PDF Convert] Failed to cleanup job ${jobId}:`, e.message); } jobs.delete(jobId); console.log(`[PDF Convert] Cleaned up old job: ${jobId}`); } } }, 30 * 60 * 1000);