File size: 6,369 Bytes
88b6846
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
import { promises as fs } from 'fs';
import path from 'path';
import { getDataDir, getAudioPath, getTranscriptionsPath, getMetadataPath, getFontsPath } from './dataPath';

// Cleanup interval in milliseconds (6 hours)
const CLEANUP_INTERVAL_MS = 6 * 60 * 60 * 1000;

// Maximum age for files in milliseconds (24 hours)
const MAX_FILE_AGE_MS = 24 * 60 * 60 * 1000;

// Flag to track if cleanup scheduler is running
let cleanupSchedulerRunning = false;

/**
 * Delete files older than MAX_FILE_AGE_MS from a directory
 * Recursively processes subdirectories
 */
async function cleanupDirectory(dirPath: string, dryRun: boolean = false): Promise<number> {
    let deletedCount = 0;
    const now = Date.now();

    try {
        const entries = await fs.readdir(dirPath, { withFileTypes: true });

        for (const entry of entries) {
            const fullPath = path.join(dirPath, entry.name);

            if (entry.isDirectory()) {
                // Recursively clean subdirectories
                deletedCount += await cleanupDirectory(fullPath, dryRun);

                // Try to remove empty directories
                try {
                    const contents = await fs.readdir(fullPath);
                    if (contents.length === 0) {
                        if (!dryRun) {
                            await fs.rmdir(fullPath);
                        }
                        console.log(`[Cleanup] Removed empty directory: ${fullPath}`);
                    }
                } catch {
                    // Directory might not be empty or already removed
                }
            } else if (entry.isFile()) {
                try {
                    const stats = await fs.stat(fullPath);
                    const fileAge = now - stats.mtimeMs;

                    if (fileAge > MAX_FILE_AGE_MS) {
                        if (!dryRun) {
                            await fs.unlink(fullPath);
                        }
                        deletedCount++;
                        console.log(`[Cleanup] Deleted old file: ${entry.name} (age: ${Math.round(fileAge / 3600000)}h)`);
                    }
                } catch (error) {
                    console.error(`[Cleanup] Error processing file ${fullPath}:`, error);
                }
            }
        }
    } catch (error) {
        // Directory might not exist yet
        if ((error as NodeJS.ErrnoException).code !== 'ENOENT') {
            console.error(`[Cleanup] Error reading directory ${dirPath}:`, error);
        }
    }

    return deletedCount;
}

/**
 * Run cleanup on all data directories
 */
export async function runCleanup(dryRun: boolean = false): Promise<{ totalDeleted: number; timestamp: string }> {
    const startTime = Date.now();
    console.log(`[Cleanup] Starting cleanup at ${new Date().toISOString()}...`);

    let totalDeleted = 0;

    // Directories to clean
    const directoriesToClean = [
        getAudioPath(),
        getTranscriptionsPath(),
    ];

    for (const dir of directoriesToClean) {
        try {
            const deleted = await cleanupDirectory(dir, dryRun);
            totalDeleted += deleted;
        } catch (error) {
            console.error(`[Cleanup] Error cleaning ${dir}:`, error);
        }
    }

    // Clean up old metadata entries
    try {
        await cleanupMetadata();
    } catch (error) {
        console.error('[Cleanup] Error cleaning metadata:', error);
    }

    const duration = Date.now() - startTime;
    console.log(`[Cleanup] Completed in ${duration}ms. Deleted ${totalDeleted} files.`);

    return {
        totalDeleted,
        timestamp: new Date().toISOString()
    };
}

/**
 * Clean up old entries from metadata file
 */
async function cleanupMetadata(): Promise<void> {
    const metadataPath = path.join(getMetadataPath(), 'dataset_info.json');

    try {
        const content = await fs.readFile(metadataPath, 'utf-8');
        const metadata = JSON.parse(content);

        // Update last cleanup timestamp
        metadata.last_cleanup = new Date().toISOString();

        // Clear old recent_recordings if they exist
        if (metadata.recent_recordings && Array.isArray(metadata.recent_recordings)) {
            const now = Date.now();
            metadata.recent_recordings = metadata.recent_recordings.filter((rec: { timestamp?: string }) => {
                if (!rec.timestamp) return false;
                const recTime = new Date(rec.timestamp).getTime();
                return (now - recTime) < MAX_FILE_AGE_MS;
            });
        }

        await fs.writeFile(metadataPath, JSON.stringify(metadata, null, 2));
    } catch (error) {
        // Metadata file might not exist
        if ((error as NodeJS.ErrnoException).code !== 'ENOENT') {
            throw error;
        }
    }
}

/**
 * Start the cleanup scheduler
 * Runs cleanup on startup and then periodically
 */
export function startCleanupScheduler(): void {
    // Only run on HF Spaces (when /data exists or SPACE_ID is set)
    const isHFSpaces = !!process.env.SPACE_ID || getDataDir() === '/data';

    if (!isHFSpaces) {
        console.log('[Cleanup] Not running on HF Spaces, skipping cleanup scheduler');
        return;
    }

    if (cleanupSchedulerRunning) {
        console.log('[Cleanup] Scheduler already running');
        return;
    }

    cleanupSchedulerRunning = true;
    console.log('[Cleanup] Starting cleanup scheduler (24h max age, 6h interval)');

    // Run cleanup on startup (with a small delay to let the app initialize)
    setTimeout(async () => {
        try {
            await runCleanup();
        } catch (error) {
            console.error('[Cleanup] Error during startup cleanup:', error);
        }
    }, 5000);

    // Schedule periodic cleanup
    setInterval(async () => {
        try {
            await runCleanup();
        } catch (error) {
            console.error('[Cleanup] Error during scheduled cleanup:', error);
        }
    }, CLEANUP_INTERVAL_MS);
}

/**
 * Get cleanup status information
 */
export function getCleanupConfig() {
    return {
        maxFileAgeMs: MAX_FILE_AGE_MS,
        maxFileAgeHours: MAX_FILE_AGE_MS / 3600000,
        cleanupIntervalMs: CLEANUP_INTERVAL_MS,
        cleanupIntervalHours: CLEANUP_INTERVAL_MS / 3600000,
        isSchedulerRunning: cleanupSchedulerRunning,
        dataDir: getDataDir(),
    };
}