Spaces:
Running
Running
File size: 6,369 Bytes
88b6846 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 |
import { promises as fs } from 'fs';
import path from 'path';
import { getDataDir, getAudioPath, getTranscriptionsPath, getMetadataPath, getFontsPath } from './dataPath';
// Cleanup interval in milliseconds (6 hours)
const CLEANUP_INTERVAL_MS = 6 * 60 * 60 * 1000;
// Maximum age for files in milliseconds (24 hours)
const MAX_FILE_AGE_MS = 24 * 60 * 60 * 1000;
// Flag to track if cleanup scheduler is running
let cleanupSchedulerRunning = false;
/**
* Delete files older than MAX_FILE_AGE_MS from a directory
* Recursively processes subdirectories
*/
async function cleanupDirectory(dirPath: string, dryRun: boolean = false): Promise<number> {
let deletedCount = 0;
const now = Date.now();
try {
const entries = await fs.readdir(dirPath, { withFileTypes: true });
for (const entry of entries) {
const fullPath = path.join(dirPath, entry.name);
if (entry.isDirectory()) {
// Recursively clean subdirectories
deletedCount += await cleanupDirectory(fullPath, dryRun);
// Try to remove empty directories
try {
const contents = await fs.readdir(fullPath);
if (contents.length === 0) {
if (!dryRun) {
await fs.rmdir(fullPath);
}
console.log(`[Cleanup] Removed empty directory: ${fullPath}`);
}
} catch {
// Directory might not be empty or already removed
}
} else if (entry.isFile()) {
try {
const stats = await fs.stat(fullPath);
const fileAge = now - stats.mtimeMs;
if (fileAge > MAX_FILE_AGE_MS) {
if (!dryRun) {
await fs.unlink(fullPath);
}
deletedCount++;
console.log(`[Cleanup] Deleted old file: ${entry.name} (age: ${Math.round(fileAge / 3600000)}h)`);
}
} catch (error) {
console.error(`[Cleanup] Error processing file ${fullPath}:`, error);
}
}
}
} catch (error) {
// Directory might not exist yet
if ((error as NodeJS.ErrnoException).code !== 'ENOENT') {
console.error(`[Cleanup] Error reading directory ${dirPath}:`, error);
}
}
return deletedCount;
}
/**
* Run cleanup on all data directories
*/
export async function runCleanup(dryRun: boolean = false): Promise<{ totalDeleted: number; timestamp: string }> {
const startTime = Date.now();
console.log(`[Cleanup] Starting cleanup at ${new Date().toISOString()}...`);
let totalDeleted = 0;
// Directories to clean
const directoriesToClean = [
getAudioPath(),
getTranscriptionsPath(),
];
for (const dir of directoriesToClean) {
try {
const deleted = await cleanupDirectory(dir, dryRun);
totalDeleted += deleted;
} catch (error) {
console.error(`[Cleanup] Error cleaning ${dir}:`, error);
}
}
// Clean up old metadata entries
try {
await cleanupMetadata();
} catch (error) {
console.error('[Cleanup] Error cleaning metadata:', error);
}
const duration = Date.now() - startTime;
console.log(`[Cleanup] Completed in ${duration}ms. Deleted ${totalDeleted} files.`);
return {
totalDeleted,
timestamp: new Date().toISOString()
};
}
/**
* Clean up old entries from metadata file
*/
async function cleanupMetadata(): Promise<void> {
const metadataPath = path.join(getMetadataPath(), 'dataset_info.json');
try {
const content = await fs.readFile(metadataPath, 'utf-8');
const metadata = JSON.parse(content);
// Update last cleanup timestamp
metadata.last_cleanup = new Date().toISOString();
// Clear old recent_recordings if they exist
if (metadata.recent_recordings && Array.isArray(metadata.recent_recordings)) {
const now = Date.now();
metadata.recent_recordings = metadata.recent_recordings.filter((rec: { timestamp?: string }) => {
if (!rec.timestamp) return false;
const recTime = new Date(rec.timestamp).getTime();
return (now - recTime) < MAX_FILE_AGE_MS;
});
}
await fs.writeFile(metadataPath, JSON.stringify(metadata, null, 2));
} catch (error) {
// Metadata file might not exist
if ((error as NodeJS.ErrnoException).code !== 'ENOENT') {
throw error;
}
}
}
/**
* Start the cleanup scheduler
* Runs cleanup on startup and then periodically
*/
export function startCleanupScheduler(): void {
// Only run on HF Spaces (when /data exists or SPACE_ID is set)
const isHFSpaces = !!process.env.SPACE_ID || getDataDir() === '/data';
if (!isHFSpaces) {
console.log('[Cleanup] Not running on HF Spaces, skipping cleanup scheduler');
return;
}
if (cleanupSchedulerRunning) {
console.log('[Cleanup] Scheduler already running');
return;
}
cleanupSchedulerRunning = true;
console.log('[Cleanup] Starting cleanup scheduler (24h max age, 6h interval)');
// Run cleanup on startup (with a small delay to let the app initialize)
setTimeout(async () => {
try {
await runCleanup();
} catch (error) {
console.error('[Cleanup] Error during startup cleanup:', error);
}
}, 5000);
// Schedule periodic cleanup
setInterval(async () => {
try {
await runCleanup();
} catch (error) {
console.error('[Cleanup] Error during scheduled cleanup:', error);
}
}, CLEANUP_INTERVAL_MS);
}
/**
* Get cleanup status information
*/
export function getCleanupConfig() {
return {
maxFileAgeMs: MAX_FILE_AGE_MS,
maxFileAgeHours: MAX_FILE_AGE_MS / 3600000,
cleanupIntervalMs: CLEANUP_INTERVAL_MS,
cleanupIntervalHours: CLEANUP_INTERVAL_MS / 3600000,
isSchedulerRunning: cleanupSchedulerRunning,
dataDir: getDataDir(),
};
}
|