Spaces:

Jaimodiji
/

my-multiplayer-app

Sleeping

App Files Files Community

my-multiplayer-app / cmd /pdf_convert_server.mjs

Jaimodiji's picture

Upload folder using huggingface_hub

bf92db0 verified about 2 months ago

history blame contribute delete

11.5 kB

	/**
	* PDF to SVG Conversion Server
	*
	* Runs alongside the Vite dev server on HuggingFace Spaces.
	* Handles PDF to SVG conversion using the pdf2svg binary.
	*
	* Endpoints:
	* - POST /convert/pdf - Upload PDF and get SVG pages
	* - GET /convert/status/:jobId - Check job status
	* - GET /convert/page/:jobId/:pageNum - Download a converted SVG page
	*/

	import http from 'http';
	import { spawn, execSync } from 'child_process';
	import fs from 'fs';
	import path from 'path';
	import os from 'os';

	const PORT = process.env.PDF_CONVERT_PORT \|\| 7861;
	const TEMP_DIR = path.join(os.tmpdir(), 'pdf_convert');

	// Ensure temp directory exists
	if (!fs.existsSync(TEMP_DIR)) {
	fs.mkdirSync(TEMP_DIR, { recursive: true });
	}

	// Job storage
	const jobs = new Map();

	/**
	* Parse multipart form data (simple implementation for file upload)
	*/
	function parseMultipart(buffer, boundary) {
	const parts = [];
	const boundaryBuffer = Buffer.from(`--${boundary}`);
	let start = buffer.indexOf(boundaryBuffer);

	while (start !== -1) {
	const end = buffer.indexOf(boundaryBuffer, start + boundaryBuffer.length);
	if (end === -1) break;

	const part = buffer.slice(start + boundaryBuffer.length, end);
	const headerEnd = part.indexOf('\r\n\r\n');
	if (headerEnd !== -1) {
	const headers = part.slice(0, headerEnd).toString();
	const content = part.slice(headerEnd + 4, part.length - 2); // -2 for trailing \r\n

	const nameMatch = headers.match(/name="([^"]+)"/);
	const filenameMatch = headers.match(/filename="([^"]+)"/);

	if (nameMatch) {
	parts.push({
	name: nameMatch[1],
	filename: filenameMatch ? filenameMatch[1] : null,
	content: content
	});
	}
	}
	start = end;
	}
	return parts;
	}

	/**
	* Get PDF page count using pdfinfo or pdf2svg
	*/
	function getPdfPageCount(pdfPath) {
	try {
	// Try pdfinfo first
	const output = execSync(`pdfinfo "${pdfPath}" 2>/dev/null \| grep -i "Pages:" \| awk '{print $2}'`, { encoding: 'utf8' });
	const count = parseInt(output.trim(), 10);
	if (!isNaN(count)) return count;
	} catch (e) {
	// pdfinfo not available, try alternative method
	}

	try {
	// Try using pdf2svg on page 1 to check if it works, then binary search for count
	// This is a fallback if pdfinfo isn't available
	let maxPage = 1;
	let testPage = 1;

	// Test increasing pages until we fail
	while (testPage <= 1000) {
	const testOutput = path.join(TEMP_DIR, `test_${Date.now()}.svg`);
	try {
	execSync(`pdf2svg "${pdfPath}" "${testOutput}" ${testPage} 2>/dev/null`, { encoding: 'utf8' });
	fs.unlinkSync(testOutput);
	maxPage = testPage;
	testPage++;
	} catch (e) {
	break;
	}
	}
	return maxPage;
	} catch (e) {
	console.error('Failed to get page count:', e.message);
	return 1;
	}
	}

	/**
	* Convert a single PDF page to SVG
	*/
	async function convertPage(pdfPath, pageNum, outputPath) {
	return new Promise((resolve, reject) => {
	const proc = spawn('pdf2svg', [pdfPath, outputPath, String(pageNum)]);

	let stderr = '';
	proc.stderr.on('data', (data) => {
	stderr += data.toString();
	});

	proc.on('close', (code) => {
	if (code === 0 && fs.existsSync(outputPath)) {
	resolve(outputPath);
	} else {
	reject(new Error(`pdf2svg failed: ${stderr \|\| 'Unknown error'}`));
	}
	});

	proc.on('error', (err) => {
	reject(err);
	});
	});
	}

	/**
	* Process a PDF conversion job
	*/
	async function processJob(jobId) {
	const job = jobs.get(jobId);
	if (!job) return;

	job.status = 'processing';
	job.updatedAt = Date.now();

	try {
	// Get page count
	const pageCount = getPdfPageCount(job.pdfPath);
	job.pageCount = pageCount;

	// Convert each page
	for (let i = 1; i <= pageCount; i++) {
	const outputPath = path.join(job.outputDir, `page_${i}.svg`);
	await convertPage(job.pdfPath, i, outputPath);
	job.processedPages = i;
	job.updatedAt = Date.now();
	console.log(`[PDF Convert] Job ${jobId}: Page ${i}/${pageCount} converted`);
	}

	job.status = 'completed';
	job.updatedAt = Date.now();
	console.log(`[PDF Convert] Job ${jobId}: Completed - ${pageCount} pages`);

	} catch (e) {
	job.status = 'failed';
	job.error = e.message;
	job.updatedAt = Date.now();
	console.error(`[PDF Convert] Job ${jobId}: Failed -`, e.message);
	}
	}

	/**
	* Handle HTTP requests
	*/
	async function handleRequest(req, res) {
	const url = new URL(req.url, `http://localhost:${PORT}`);

	// CORS headers
	res.setHeader('Access-Control-Allow-Origin', '*');
	res.setHeader('Access-Control-Allow-Methods', 'GET, POST, OPTIONS');
	res.setHeader('Access-Control-Allow-Headers', 'Content-Type');

	if (req.method === 'OPTIONS') {
	res.writeHead(200);
	res.end();
	return;
	}

	// Health check
	if (url.pathname === '/convert/health') {
	res.writeHead(200, { 'Content-Type': 'application/json' });
	res.end(JSON.stringify({ status: 'ok', pdf2svg: true }));
	return;
	}

	// Upload PDF
	if (req.method === 'POST' && url.pathname === '/convert/pdf') {
	const chunks = [];
	req.on('data', chunk => chunks.push(chunk));
	req.on('end', async () => {
	try {
	const buffer = Buffer.concat(chunks);
	const contentType = req.headers['content-type'] \|\| '';

	let pdfBuffer;

	if (contentType.includes('multipart/form-data')) {
	const boundary = contentType.split('boundary=')[1];
	const parts = parseMultipart(buffer, boundary);
	const filePart = parts.find(p => p.filename && p.filename.endsWith('.pdf'));
	if (!filePart) {
	res.writeHead(400, { 'Content-Type': 'application/json' });
	res.end(JSON.stringify({ error: 'No PDF file found' }));
	return;
	}
	pdfBuffer = filePart.content;
	} else if (contentType === 'application/pdf') {
	pdfBuffer = buffer;
	} else {
	res.writeHead(400, { 'Content-Type': 'application/json' });
	res.end(JSON.stringify({ error: 'Invalid content type' }));
	return;
	}

	// Create job
	const jobId = `job_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
	const jobDir = path.join(TEMP_DIR, jobId);
	fs.mkdirSync(jobDir, { recursive: true });

	const pdfPath = path.join(jobDir, 'source.pdf');
	fs.writeFileSync(pdfPath, pdfBuffer);

	const job = {
	id: jobId,
	status: 'pending',
	pageCount: 0,
	processedPages: 0,
	pdfPath: pdfPath,
	outputDir: jobDir,
	createdAt: Date.now(),
	updatedAt: Date.now()
	};

	jobs.set(jobId, job);

	// Start processing async
	processJob(jobId);

	res.writeHead(200, { 'Content-Type': 'application/json' });
	res.end(JSON.stringify({
	jobId,
	status: 'pending',
	statusUrl: `/convert/status/${jobId}`
	}));

	} catch (e) {
	console.error('Upload error:', e);
	res.writeHead(500, { 'Content-Type': 'application/json' });
	res.end(JSON.stringify({ error: e.message }));
	}
	});
	return;
	}

	// Check job status
	const statusMatch = url.pathname.match(/^\/convert\/status\/(.+)$/);
	if (req.method === 'GET' && statusMatch) {
	const jobId = statusMatch[1];
	const job = jobs.get(jobId);

	if (!job) {
	res.writeHead(404, { 'Content-Type': 'application/json' });
	res.end(JSON.stringify({ error: 'Job not found' }));
	return;
	}

	const pages = [];
	if (job.status === 'completed' \|\| job.processedPages > 0) {
	for (let i = 1; i <= job.processedPages; i++) {
	pages.push({
	page: i,
	url: `/convert/page/${jobId}/${i}`
	});
	}
	}

	res.writeHead(200, { 'Content-Type': 'application/json' });
	res.end(JSON.stringify({
	id: job.id,
	status: job.status,
	pageCount: job.pageCount,
	processedPages: job.processedPages,
	error: job.error,
	pages: pages
	}));
	return;
	}

	// Download page
	const pageMatch = url.pathname.match(/^\/convert\/page\/(.+)\/(\d+)$/);
	if (req.method === 'GET' && pageMatch) {
	const jobId = pageMatch[1];
	const pageNum = parseInt(pageMatch[2], 10);
	const job = jobs.get(jobId);

	if (!job) {
	res.writeHead(404, { 'Content-Type': 'application/json' });
	res.end(JSON.stringify({ error: 'Job not found' }));
	return;
	}

	const svgPath = path.join(job.outputDir, `page_${pageNum}.svg`);
	if (!fs.existsSync(svgPath)) {
	res.writeHead(404, { 'Content-Type': 'application/json' });
	res.end(JSON.stringify({ error: 'Page not found' }));
	return;
	}

	const svgContent = fs.readFileSync(svgPath, 'utf8');
	res.writeHead(200, { 'Content-Type': 'image/svg+xml' });
	res.end(svgContent);
	return;
	}

	// 404 for unknown routes
	res.writeHead(404, { 'Content-Type': 'application/json' });
	res.end(JSON.stringify({ error: 'Not found' }));
	}

	// Create server
	const server = http.createServer(handleRequest);

	server.listen(PORT, () => {
	console.log(`[PDF Convert Server] Running on port ${PORT}`);
	console.log(`[PDF Convert Server] Endpoints:`);
	console.log(` POST /convert/pdf - Upload PDF file`);
	console.log(` GET /convert/status/:jobId - Check job status`);
	console.log(` GET /convert/page/:jobId/:pageNum - Download SVG page`);
	});

	// Cleanup old jobs periodically (every 30 minutes)
	setInterval(() => {
	const now = Date.now();
	const maxAge = 2 * 60 * 60 * 1000; // 2 hours

	for (const [jobId, job] of jobs.entries()) {
	if (now - job.createdAt > maxAge) {
	// Clean up files
	try {
	if (fs.existsSync(job.outputDir)) {
	fs.rmSync(job.outputDir, { recursive: true, force: true });
	}
	} catch (e) {
	console.error(`[PDF Convert] Failed to cleanup job ${jobId}:`, e.message);
	}
	jobs.delete(jobId);
	console.log(`[PDF Convert] Cleaned up old job: ${jobId}`);
	}
	}
	}, 30 * 60 * 1000);