import { promises as fs } from 'fs'; import path from 'path'; /** * Determines the base data directory path based on environment. * On Hugging Face Spaces with persistent storage, uses /data * Otherwise, uses the local dataset folder. */ export function getDataDir(): string { // Check for explicit environment variable first if (process.env.DATA_DIR) { return process.env.DATA_DIR; } // On HF Spaces with persistent storage, /data is available // We check this at runtime since /data only exists at runtime, not build time if (process.env.SPACE_ID || isHuggingFaceSpaces()) { return '/data'; } // Default to local dataset directory return path.join(process.cwd(), 'dataset'); } /** * Check if running on Hugging Face Spaces */ function isHuggingFaceSpaces(): boolean { // HF Spaces sets SPACE_ID environment variable return !!process.env.SPACE_ID; } /** * Get the full path to a subdirectory within the data directory */ export function getDataPath(...subPaths: string[]): string { return path.join(getDataDir(), ...subPaths); } /** * Get audio directory path for a speaker */ export function getAudioPath(speakerId?: string): string { if (speakerId) { return getDataPath('audio', speakerId); } return getDataPath('audio'); } /** * Get transcriptions directory path for a speaker */ export function getTranscriptionsPath(speakerId?: string): string { if (speakerId) { return getDataPath('transcriptions', speakerId); } return getDataPath('transcriptions'); } /** * Get metadata directory path */ export function getMetadataPath(): string { return getDataPath('metadata'); } /** * Get fonts directory path */ export function getFontsPath(): string { return getDataPath('fonts'); } /** * Safely create a directory, handling errors gracefully */ export async function ensureDir(dirPath: string): Promise { try { await fs.mkdir(dirPath, { recursive: true }); } catch (error: unknown) { // Ignore EEXIST errors (directory already exists) if (error instanceof Error && 'code' in error && (error as NodeJS.ErrnoException).code !== 'EEXIST') { console.error(`Failed to create directory ${dirPath}:`, error); throw error; } } } /** * Sanitize a string for use in file paths * Prevents path traversal attacks and invalid characters */ export function sanitizePath(input: string, maxLength: number = 50): string { if (!input || typeof input !== 'string') { return 'unknown'; } // Remove any path traversal attempts and invalid characters return input .replace(/\.\./g, '') // Prevent path traversal .replace(/[\/\\:*?"<>|]/g, '_') // Remove invalid path characters .replace(/[^a-zA-Z0-9_-]/g, '_') // Keep only safe characters .substring(0, maxLength) .replace(/^_+|_+$/g, '') // Trim leading/trailing underscores || 'unknown'; } /** * Initialize the data directory structure * Creates all necessary subdirectories */ export async function initializeDataDirs(): Promise { const dirs = [ getDataPath(), getAudioPath(), getTranscriptionsPath(), getMetadataPath(), getFontsPath(), ]; for (const dir of dirs) { await ensureDir(dir); } console.log(`[DataPath] Initialized data directories at: ${getDataDir()}`); }