| import crypto from 'node:crypto'; |
| import fs from 'node:fs'; |
| import path from 'node:path'; |
| import express from 'express'; |
| import mime from 'mime-types'; |
| import { getSettingsBackupFilePrefix } from './settings.js'; |
| import { CHAT_BACKUPS_PREFIX } from './chats.js'; |
| import { isPathUnderParent, tryParse } from '../util.js'; |
| import { SETTINGS_FILE } from '../constants.js'; |
|
|
| const sha256 = str => crypto.createHash('sha256').update(str).digest('hex'); |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| |
| |
| |
| |
| |
|
|
| |
| |
| |
| |
|
|
| |
| |
| |
| |
|
|
| |
| |
| |
| |
| |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| |
| |
| |
| |
| |
|
|
| |
| |
| |
| |
| export class DataMaidService { |
| |
| |
| |
| static TOKENS = new Map(); |
|
|
| |
| |
| |
| |
| |
| constructor(handle, directories) { |
| this.handle = handle; |
| this.directories = directories; |
| } |
|
|
| |
| |
| |
| |
| async generateReport() { |
| |
| const report = { |
| images: await this.#collectImages(), |
| files: await this.#collectFiles(), |
| chats: await this.#collectChats(), |
| groupChats: await this.#collectGroupChats(), |
| avatarThumbnails: await this.#collectAvatarThumbnails(), |
| backgroundThumbnails: await this.#collectBackgroundThumbnails(), |
| personaThumbnails: await this.#collectPersonaThumbnails(), |
| chatBackups: await this.#collectChatBackups(), |
| settingsBackups: await this.#collectSettingsBackups(), |
| }; |
|
|
| return report; |
| } |
|
|
|
|
| |
| |
| |
| |
| |
| |
| |
| async #sanitizeRecord(name, withParent) { |
| const stat = fs.existsSync(name) ? await fs.promises.stat(name) : null; |
| return { |
| name: path.basename(name), |
| hash: sha256(name), |
| parent: withParent ? path.basename(path.dirname(name)) : void 0, |
| size: stat?.size, |
| mtime: stat?.mtimeMs, |
| }; |
| } |
|
|
| |
| |
| |
| |
| |
| async sanitizeReport(report) { |
| const sanitizedReport = { |
| images: await Promise.all(report.images.map(i => this.#sanitizeRecord(i, true))), |
| files: await Promise.all(report.files.map(i => this.#sanitizeRecord(i, false))), |
| chats: await Promise.all(report.chats.map(i => this.#sanitizeRecord(i, true))), |
| groupChats: await Promise.all(report.groupChats.map(i => this.#sanitizeRecord(i, false))), |
| avatarThumbnails: await Promise.all(report.avatarThumbnails.map(i => this.#sanitizeRecord(i, false))), |
| backgroundThumbnails: await Promise.all(report.backgroundThumbnails.map(i => this.#sanitizeRecord(i, false))), |
| personaThumbnails: await Promise.all(report.personaThumbnails.map(i => this.#sanitizeRecord(i, false))), |
| chatBackups: await Promise.all(report.chatBackups.map(i => this.#sanitizeRecord(i, false))), |
| settingsBackups: await Promise.all(report.settingsBackups.map(i => this.#sanitizeRecord(i, false))), |
| }; |
|
|
| return sanitizedReport; |
| } |
|
|
| |
| |
| |
| |
| |
| |
| async #collectImages() { |
| const result = []; |
|
|
| try { |
| const messages = await this.#parseAllChats(x => !!x?.extra?.image || !!x?.extra?.video || Array.isArray(x?.extra?.image_swipes) || Array.isArray(x?.extra?.media)); |
| const knownImages = new Set(); |
| for (const message of messages) { |
| if (message?.extra?.image) { |
| knownImages.add(message.extra.image); |
| } |
| if (message?.extra?.video) { |
| knownImages.add(message.extra.video); |
| } |
| if (Array.isArray(message?.extra?.image_swipes)) { |
| for (const swipe of message.extra.image_swipes) { |
| knownImages.add(swipe); |
| } |
| } |
| if (Array.isArray(message?.extra?.media)) { |
| for (const media of message.extra.media) { |
| if (media?.url) { |
| knownImages.add(media.url); |
| } |
| } |
| } |
| } |
| const metadata = await this.#parseAllMetadata(x => Array.isArray(x?.chat_backgrounds) && x.chat_backgrounds.length > 0); |
| for (const meta of metadata) { |
| if (Array.isArray(meta?.chat_backgrounds)) { |
| for (const background of meta.chat_backgrounds) { |
| if (background) { |
| knownImages.add(background); |
| } |
| } |
| } |
| } |
| const knownImageFullPaths = new Set(); |
| knownImages.forEach(image => { |
| if (image.startsWith('http') || image.startsWith('data:')) { |
| return; |
| } |
| knownImageFullPaths.add(path.normalize(path.join(this.directories.root, image))); |
| }); |
| const images = await fs.promises.readdir(this.directories.userImages, { withFileTypes: true }); |
| for (const dirent of images) { |
| const direntPath = path.join(dirent.parentPath, dirent.name); |
| if (dirent.isFile() && !knownImageFullPaths.has(direntPath)) { |
| result.push(direntPath); |
| } |
| if (dirent.isDirectory()) { |
| const subdirFiles = await fs.promises.readdir(direntPath, { withFileTypes: true }); |
| for (const file of subdirFiles) { |
| const subdirFilePath = path.join(direntPath, file.name); |
| if (file.isFile() && !knownImageFullPaths.has(subdirFilePath)) { |
| result.push(subdirFilePath); |
| } |
| } |
| } |
| } |
| } catch (error) { |
| console.error('[Data Maid] Error collecting user images:', error); |
| } |
|
|
| return result; |
| } |
|
|
| |
| |
| |
| |
| |
| |
| async #collectFiles() { |
| const result = []; |
|
|
| try { |
| const messages = await this.#parseAllChats(x => !!x?.extra?.file?.url || (Array.isArray(x?.extra?.files) && x.extra.files.length > 0)); |
| const knownFiles = new Set(); |
| for (const message of messages) { |
| if (message?.extra?.file?.url) { |
| knownFiles.add(message.extra.file.url); |
| } |
| if (Array.isArray(message?.extra?.files)) { |
| for (const file of message.extra.files) { |
| if (file?.url) { |
| knownFiles.add(file.url); |
| } |
| } |
| } |
| } |
| const metadata = await this.#parseAllMetadata(x => Array.isArray(x?.attachments) && x.attachments.length > 0); |
| for (const meta of metadata) { |
| if (Array.isArray(meta?.attachments)) { |
| for (const attachment of meta.attachments) { |
| if (attachment?.url) { |
| knownFiles.add(attachment.url); |
| } |
| } |
| } |
| } |
| const pathToSettings = path.join(this.directories.root, SETTINGS_FILE); |
| if (fs.existsSync(pathToSettings)) { |
| try { |
| const settingsContent = await fs.promises.readFile(pathToSettings, 'utf-8'); |
| const settings = tryParse(settingsContent); |
| if (Array.isArray(settings?.extension_settings?.attachments)) { |
| for (const file of settings.extension_settings.attachments) { |
| if (file?.url) { |
| knownFiles.add(file.url); |
| } |
| } |
| } |
| if (typeof settings?.extension_settings?.character_attachments === 'object') { |
| for (const files of Object.values(settings.extension_settings.character_attachments)) { |
| if (!Array.isArray(files)) { |
| continue; |
| } |
| for (const file of files) { |
| if (file?.url) { |
| knownFiles.add(file.url); |
| } |
| } |
| } |
| } |
| } catch (error) { |
| console.error('[Data Maid] Error reading settings file:', error); |
| } |
| } |
| const knownFileFullPaths = new Set(); |
| knownFiles.forEach(file => { |
| knownFileFullPaths.add(path.normalize(path.join(this.directories.root, file))); |
| }); |
| const files = await fs.promises.readdir(this.directories.files, { withFileTypes: true }); |
| for (const file of files) { |
| const filePath = path.join(this.directories.files, file.name); |
| if (file.isFile() && !knownFileFullPaths.has(filePath)) { |
| result.push(filePath); |
| } |
| } |
| } catch (error) { |
| console.error('[Data Maid] Error collecting user files:', error); |
| } |
|
|
| return result; |
| } |
|
|
| |
| |
| |
| |
| |
| async #collectChats() { |
| const result = []; |
|
|
| try { |
| const knownChatFolders = new Set(); |
| const characters = await fs.promises.readdir(this.directories.characters, { withFileTypes: true }); |
| for (const file of characters) { |
| if (file.isFile() && path.parse(file.name).ext === '.png') { |
| knownChatFolders.add(file.name.replace('.png', '')); |
| } |
| } |
| const chatFolders = await fs.promises.readdir(this.directories.chats, { withFileTypes: true }); |
| for (const folder of chatFolders) { |
| if (folder.isDirectory() && !knownChatFolders.has(folder.name)) { |
| const chatFiles = await fs.promises.readdir(path.join(this.directories.chats, folder.name), { withFileTypes: true }); |
| for (const file of chatFiles) { |
| if (file.isFile() && path.parse(file.name).ext === '.jsonl') { |
| result.push(path.join(this.directories.chats, folder.name, file.name)); |
| } |
| } |
| } |
| } |
| } catch (error) { |
| console.error('[Data Maid] Error collecting character chats:', error); |
| } |
|
|
| return result; |
| } |
|
|
| |
| |
| |
| |
| |
| async #collectGroupChats() { |
| const result = []; |
|
|
| try { |
| const groups = await fs.promises.readdir(this.directories.groups, { withFileTypes: true }); |
| const knownGroupChats = new Set(); |
| for (const file of groups) { |
| if (file.isFile() && path.parse(file.name).ext === '.json') { |
| try { |
| const pathToFile = path.join(this.directories.groups, file.name); |
| const fileContent = await fs.promises.readFile(pathToFile, 'utf-8'); |
| const groupData = tryParse(fileContent); |
| if (groupData?.chat_id) { |
| knownGroupChats.add(groupData.chat_id); |
| } |
| if (Array.isArray(groupData?.chats)) { |
| for (const chat of groupData.chats) { |
| knownGroupChats.add(chat); |
| } |
| } |
| } catch (error) { |
| console.error(`[Data Maid] Error parsing group chat file ${file.name}:`, error); |
| } |
| } |
| } |
| const groupChats = await fs.promises.readdir(this.directories.groupChats, { withFileTypes: true }); |
| for (const file of groupChats) { |
| if (file.isFile() && path.parse(file.name).ext === '.jsonl') { |
| if (!knownGroupChats.has(path.parse(file.name).name)) { |
| result.push(path.join(this.directories.groupChats, file.name)); |
| } |
| } |
| } |
| } catch (error) { |
| console.error('[Data Maid] Error collecting group chats:', error); |
| } |
|
|
| return result; |
| } |
|
|
| |
| |
| |
| |
| async #collectAvatarThumbnails() { |
| const result = []; |
|
|
| try { |
| const knownAvatars = new Set(); |
| const avatars = await fs.promises.readdir(this.directories.characters, { withFileTypes: true }); |
| for (const file of avatars) { |
| if (file.isFile()) { |
| knownAvatars.add(file.name); |
| } |
| } |
| const avatarThumbnails = await fs.promises.readdir(this.directories.thumbnailsAvatar, { withFileTypes: true }); |
| for (const file of avatarThumbnails) { |
| if (file.isFile() && !knownAvatars.has(file.name)) { |
| result.push(path.join(this.directories.thumbnailsAvatar, file.name)); |
| } |
| } |
| } catch (error) { |
| console.error('[Data Maid] Error collecting avatar thumbnails:', error); |
| } |
|
|
| return result; |
| } |
|
|
| |
| |
| |
| |
| async #collectBackgroundThumbnails() { |
| const result = []; |
|
|
| try { |
| const knownBackgrounds = new Set(); |
| const backgrounds = await fs.promises.readdir(this.directories.backgrounds, { withFileTypes: true }); |
| for (const file of backgrounds) { |
| if (file.isFile()) { |
| knownBackgrounds.add(file.name); |
| } |
| } |
| const backgroundThumbnails = await fs.promises.readdir(this.directories.thumbnailsBg, { withFileTypes: true }); |
| for (const file of backgroundThumbnails) { |
| if (file.isFile() && !knownBackgrounds.has(file.name)) { |
| result.push(path.join(this.directories.thumbnailsBg, file.name)); |
| } |
| } |
| } catch (error) { |
| console.error('[Data Maid] Error collecting background thumbnails:', error); |
| } |
|
|
| return result; |
| } |
|
|
| |
| |
| |
| |
| async #collectPersonaThumbnails() { |
| const result = []; |
|
|
| try { |
| const knownPersonas = new Set(); |
| const personas = await fs.promises.readdir(this.directories.avatars, { withFileTypes: true }); |
| for (const file of personas) { |
| if (file.isFile()) { |
| knownPersonas.add(file.name); |
| } |
| } |
| const personaThumbnails = await fs.promises.readdir(this.directories.thumbnailsPersona, { withFileTypes: true }); |
| for (const file of personaThumbnails) { |
| if (file.isFile() && !knownPersonas.has(file.name)) { |
| result.push(path.join(this.directories.thumbnailsPersona, file.name)); |
| } |
| } |
| } catch (error) { |
| console.error('[Data Maid] Error collecting persona thumbnails:', error); |
| } |
|
|
| return result; |
| } |
|
|
| |
| |
| |
| |
| async #collectChatBackups() { |
| const result = []; |
|
|
| try { |
| const prefix = CHAT_BACKUPS_PREFIX; |
| const backups = await fs.promises.readdir(this.directories.backups, { withFileTypes: true }); |
| for (const file of backups) { |
| if (file.isFile() && file.name.startsWith(prefix)) { |
| result.push(path.join(this.directories.backups, file.name)); |
| } |
| } |
| } catch (error) { |
| console.error('[Data Maid] Error collecting chat backups:', error); |
| } |
|
|
| return result; |
| } |
|
|
| |
| |
| |
| |
| async #collectSettingsBackups() { |
| const result = []; |
|
|
| try { |
| const prefix = getSettingsBackupFilePrefix(this.handle); |
| const backups = await fs.promises.readdir(this.directories.backups, { withFileTypes: true }); |
| for (const file of backups) { |
| if (file.isFile() && file.name.startsWith(prefix)) { |
| result.push(path.join(this.directories.backups, file.name)); |
| } |
| } |
| } catch (error) { |
| console.error('[Data Maid] Error collecting settings backups:', error); |
| } |
|
|
| return result; |
| } |
|
|
| |
| |
| |
| |
| |
| |
| async #parseAllChats(filterFn) { |
| try { |
| const allChats = []; |
|
|
| const groupChats = await fs.promises.readdir(this.directories.groupChats, { withFileTypes: true }); |
| for (const file of groupChats) { |
| if (file.isFile() && path.parse(file.name).ext === '.jsonl') { |
| const chatMessages = await this.#parseChatFile(path.join(this.directories.groupChats, file.name)); |
| allChats.push(...chatMessages.filter(filterFn)); |
| } |
| } |
|
|
| const chatDirectories = await fs.promises.readdir(this.directories.chats, { withFileTypes: true }); |
| for (const directory of chatDirectories) { |
| if (directory.isDirectory()) { |
| const chatFiles = await fs.promises.readdir(path.join(this.directories.chats, directory.name), { withFileTypes: true }); |
| for (const file of chatFiles) { |
| if (file.isFile() && path.parse(file.name).ext === '.jsonl') { |
| const chatMessages = await this.#parseChatFile(path.join(this.directories.chats, directory.name, file.name)); |
| allChats.push(...chatMessages.filter(filterFn)); |
| } |
| } |
| } |
| } |
|
|
| return allChats; |
| } catch (error) { |
| console.error('[Data Maid] Error parsing chats:', error); |
| return []; |
| } |
| } |
|
|
| |
| |
| |
| |
| |
| |
| async #parseAllMetadata(filterFn) { |
| try { |
| const allMetadata = []; |
|
|
| const groups = await fs.promises.readdir(this.directories.groups, { withFileTypes: true }); |
| for (const file of groups) { |
| if (file.isFile() && path.parse(file.name).ext === '.json') { |
| try { |
| const pathToFile = path.join(this.directories.groups, file.name); |
| const fileContent = await fs.promises.readFile(pathToFile, 'utf-8'); |
| const groupData = tryParse(fileContent); |
| if (groupData?.chat_metadata && filterFn(groupData.chat_metadata)) { |
| console.warn('Found group chat metadata in group definition - this is deprecated behavior.'); |
| allMetadata.push(groupData.chat_metadata); |
| } |
| if (groupData?.past_metadata) { |
| console.warn('Found group past chat metadata in group definition - this is deprecated behavior.'); |
| allMetadata.push(...Object.values(groupData.past_metadata).filter(filterFn)); |
| } |
| } catch (error) { |
| console.error(`[Data Maid] Error parsing group chat file ${file.name}:`, error); |
| } |
| } |
| } |
|
|
| const groupChats = await fs.promises.readdir(this.directories.groupChats, { withFileTypes: true }); |
| for (const file of groupChats) { |
| if (file.isFile() && path.parse(file.name).ext === '.jsonl') { |
| const chatMessages = await this.#parseChatFile(path.join(this.directories.groupChats, file.name)); |
| const chatMetadata = chatMessages?.[0]?.chat_metadata; |
| if (chatMetadata && filterFn(chatMetadata)) { |
| allMetadata.push(chatMetadata); |
| } |
| } |
| } |
|
|
| const chatDirectories = await fs.promises.readdir(this.directories.chats, { withFileTypes: true }); |
| for (const directory of chatDirectories) { |
| if (directory.isDirectory()) { |
| const chatFiles = await fs.promises.readdir(path.join(this.directories.chats, directory.name), { withFileTypes: true }); |
| for (const file of chatFiles) { |
| if (file.isFile() && path.parse(file.name).ext === '.jsonl') { |
| const chatMessages = await this.#parseChatFile(path.join(this.directories.chats, directory.name, file.name)); |
| const chatMetadata = chatMessages?.[0]?.chat_metadata; |
| if (chatMetadata && filterFn(chatMetadata)) { |
| allMetadata.push(chatMetadata); |
| } |
| } |
| } |
| } |
| } |
|
|
| return allMetadata; |
| } catch (error) { |
| console.error('[Data Maid] Error parsing chats:', error); |
| return []; |
| } |
| } |
|
|
| |
| |
| |
| |
| |
| |
| async #parseChatFile(filePath) { |
| try { |
| const content = await fs.promises.readFile(filePath, 'utf-8'); |
| const chatData = content.split('\n').map(tryParse).filter(Boolean); |
| return chatData; |
| } catch (error) { |
| console.error(`[Data Maid] Error reading chat file ${filePath}:`, error); |
| return []; |
| } |
| } |
|
|
| |
| |
| |
| |
| |
| |
| |
| static generateToken(handle, report) { |
| |
| for (const [token, entry] of this.TOKENS.entries()) { |
| if (entry.handle === handle) { |
| this.TOKENS.delete(token); |
| } |
| } |
|
|
| const token = crypto.randomBytes(32).toString('hex'); |
| const tokenEntry = { |
| handle, |
| paths: Object.values(report).filter(v => Array.isArray(v)).flat().map(x => ({ path: x, hash: sha256(x) })), |
| }; |
| this.TOKENS.set(token, tokenEntry); |
| return token; |
| } |
| } |
|
|
| export const router = express.Router(); |
|
|
| router.post('/report', async (req, res) => { |
| try { |
| if (!req.user || !req.user.directories) { |
| return res.sendStatus(403); |
| } |
|
|
| const dataMaid = new DataMaidService(req.user.profile.handle, req.user.directories); |
| const rawReport = await dataMaid.generateReport(); |
|
|
| const report = await dataMaid.sanitizeReport(rawReport); |
| const token = DataMaidService.generateToken(req.user.profile.handle, rawReport); |
|
|
| return res.json({ report, token }); |
| } catch (error) { |
| console.error('[Data Maid] Error generating data maid report:', error); |
| return res.sendStatus(500); |
| } |
| }); |
|
|
| router.post('/finalize', async (req, res) => { |
| try { |
| if (!req.user || !req.user.directories) { |
| return res.sendStatus(403); |
| } |
|
|
| if (!req.body.token) { |
| return res.sendStatus(400); |
| } |
|
|
| const token = req.body.token.toString(); |
| if (!DataMaidService.TOKENS.has(token)) { |
| return res.sendStatus(403); |
| } |
|
|
| const tokenEntry = DataMaidService.TOKENS.get(token); |
| if (!tokenEntry || tokenEntry.handle !== req.user.profile.handle) { |
| return res.sendStatus(403); |
| } |
|
|
| |
| DataMaidService.TOKENS.delete(token); |
| return res.sendStatus(204); |
| } catch (error) { |
| console.error('[Data Maid] Error finalizing the token:', error); |
| return res.sendStatus(500); |
| } |
| }); |
|
|
| router.get('/view', async (req, res) => { |
| try { |
| if (!req.user || !req.user.directories) { |
| return res.sendStatus(403); |
| } |
|
|
| if (!req.query.token || !req.query.hash) { |
| return res.sendStatus(400); |
| } |
|
|
| const token = req.query.token.toString(); |
| const hash = req.query.hash.toString(); |
|
|
| if (!DataMaidService.TOKENS.has(token)) { |
| return res.sendStatus(403); |
| } |
|
|
| const tokenEntry = DataMaidService.TOKENS.get(token); |
| if (!tokenEntry || tokenEntry.handle !== req.user.profile.handle) { |
| return res.sendStatus(403); |
| } |
|
|
| const fileEntry = tokenEntry.paths.find(entry => entry.hash === hash); |
| if (!fileEntry) { |
| return res.sendStatus(404); |
| } |
|
|
| if (!isPathUnderParent(req.user.directories.root, fileEntry.path)) { |
| console.warn('[Data Maid] Attempted access to a file outside of the user directory:', fileEntry.path); |
| return res.sendStatus(403); |
| } |
|
|
| const pathToFile = fileEntry.path; |
| const fileExists = fs.existsSync(pathToFile); |
|
|
| if (!fileExists) { |
| return res.sendStatus(404); |
| } |
|
|
| const fileBuffer = await fs.promises.readFile(pathToFile); |
| const mimeType = mime.lookup(pathToFile) || 'text/plain'; |
| res.setHeader('Content-Type', mimeType); |
| return res.send(fileBuffer); |
| } catch (error) { |
| console.error('[Data Maid] Error viewing file:', error); |
| return res.sendStatus(500); |
| } |
| }); |
|
|
| router.post('/delete', async (req, res) => { |
| try { |
| if (!req.user || !req.user.directories) { |
| return res.sendStatus(403); |
| } |
|
|
| const { token, hashes } = req.body; |
| if (!token || !Array.isArray(hashes) || hashes.length === 0) { |
| return res.sendStatus(400); |
| } |
|
|
| if (!DataMaidService.TOKENS.has(token)) { |
| return res.sendStatus(403); |
| } |
|
|
| const tokenEntry = DataMaidService.TOKENS.get(token); |
| if (!tokenEntry || tokenEntry.handle !== req.user.profile.handle) { |
| return res.sendStatus(403); |
| } |
|
|
| for (const hash of hashes) { |
| const fileEntry = tokenEntry.paths.find(entry => entry.hash === hash); |
| if (!fileEntry) { |
| continue; |
| } |
|
|
| if (!isPathUnderParent(req.user.directories.root, fileEntry.path)) { |
| console.warn('[Data Maid] Attempted deletion of a file outside of the user directory:', fileEntry.path); |
| continue; |
| } |
|
|
| const pathToFile = fileEntry.path; |
| const fileExists = fs.existsSync(pathToFile); |
|
|
| if (!fileExists) { |
| continue; |
| } |
|
|
| await fs.promises.unlink(pathToFile); |
| } |
|
|
| return res.sendStatus(204); |
| } catch (error) { |
| console.error('[Data Maid] Error deleting files:', error); |
| return res.sendStatus(500); |
| } |
| }); |
|
|