TransHub_backend / SECURITY_ENHANCEMENT_PLAN.md
linguabot's picture
Upload folder using huggingface_hub
da819ac verified

πŸ”’ Security Enhancement Plan for Transcreation Sandbox

1. Enhanced Authentication & Authorization

Current State:

  • Simple token-based auth with user_ and visitor_ prefixes
  • Basic role-based access (admin/visitor)
  • No session management or token expiration

Recommended Improvements:

A. JWT Implementation

// Enhanced JWT with proper expiration and refresh tokens
const jwt = require('jsonwebtoken');
const refreshTokens = new Set();

const generateTokens = (user) => {
  const accessToken = jwt.sign(
    { userId: user._id, role: user.role },
    process.env.JWT_SECRET,
    { expiresIn: '15m' }
  );
  
  const refreshToken = jwt.sign(
    { userId: user._id },
    process.env.JWT_REFRESH_SECRET,
    { expiresIn: '7d' }
  );
  
  refreshTokens.add(refreshToken);
  return { accessToken, refreshToken };
};

B. Role-Based Access Control (RBAC)

// Enhanced middleware with granular permissions
const requireRole = (roles) => {
  return (req, res, next) => {
    if (!req.user || !roles.includes(req.user.role)) {
      return res.status(403).json({
        success: false,
        message: 'Insufficient permissions'
      });
    }
    next();
  };
};

// Usage: requireRole(['admin', 'moderator'])

C. API Rate Limiting Enhancement

// Per-user rate limiting
const userRateLimit = rateLimit({
  windowMs: 15 * 60 * 1000,
  max: (req) => {
    if (req.user?.role === 'admin') return 1000;
    if (req.user?.role === 'moderator') return 500;
    return 100; // visitors
  },
  keyGenerator: (req) => req.user?.id || req.ip,
  message: 'Too many requests from this user'
});

2. Data Protection & Encryption

A. Database Encryption

// MongoDB Atlas already provides encryption at rest
// Additional field-level encryption for sensitive data
const crypto = require('crypto');

const encryptField = (text) => {
  const cipher = crypto.createCipher('aes-256-cbc', process.env.ENCRYPTION_KEY);
  let encrypted = cipher.update(text, 'utf8', 'hex');
  encrypted += cipher.final('hex');
  return encrypted;
};

const decryptField = (encryptedText) => {
  const decipher = crypto.createDecipher('aes-256-cbc', process.env.ENCRYPTION_KEY);
  let decrypted = decipher.update(encryptedText, 'hex', 'utf8');
  decrypted += decipher.final('utf8');
  return decrypted;
};

B. Input Validation & Sanitization

// Enhanced input validation
const Joi = require('joi');

const subtitleSchema = Joi.object({
  segmentId: Joi.number().integer().min(1).max(100).required(),
  startTime: Joi.string().pattern(/^\d{2}:\d{2}:\d{2},\d{3}$/).required(),
  endTime: Joi.string().pattern(/^\d{2}:\d{2}:\d{2},\d{3}$/).required(),
  englishText: Joi.string().max(500).required(),
  chineseTranslation: Joi.string().max(500).optional()
});

const validateSubtitle = (data) => {
  return subtitleSchema.validate(data);
};

3. Content Protection System

A. Enhanced Protection with Checksums

// Add checksums to detect unauthorized changes
const crypto = require('crypto');

const generateChecksum = (content) => {
  return crypto.createHash('sha256').update(content).digest('hex');
};

// Enhanced Subtitle Schema
const subtitleSchema = new mongoose.Schema({
  // ... existing fields ...
  contentChecksum: { type: String, required: true },
  lastVerified: { type: Date, default: Date.now },
  verificationHistory: [{
    timestamp: Date,
    checksum: String,
    verifiedBy: String,
    status: String
  }]
});

// Verification method
subtitleSchema.methods.verifyIntegrity = function() {
  const currentChecksum = generateChecksum(this.englishText + this.startTime + this.endTime);
  return currentChecksum === this.contentChecksum;
};

B. Watermarking System

// Add invisible watermarks to detect unauthorized copying
const addWatermark = (text, userId) => {
  const watermark = Buffer.from(userId).toString('base64').slice(0, 8);
  return text + '\u200B' + watermark; // Zero-width space + watermark
};

const extractWatermark = (text) => {
  const parts = text.split('\u200B');
  return parts.length > 1 ? parts[1] : null;
};

4. API Security

A. Request Validation

// Enhanced request validation middleware
const validateRequest = (schema) => {
  return (req, res, next) => {
    const { error } = schema.validate(req.body);
    if (error) {
      return res.status(400).json({
        success: false,
        message: 'Invalid request data',
        details: error.details
      });
    }
    next();
  };
};

B. CORS Configuration

// Strict CORS configuration
const corsOptions = {
  origin: [
    'https://linguabot-transcreation-frontend.hf.space',
    'https://linguabot-transcreation-backend.hf.space'
  ],
  credentials: true,
  methods: ['GET', 'POST', 'PUT', 'DELETE'],
  allowedHeaders: ['Content-Type', 'Authorization', 'user-role'],
  maxAge: 86400 // 24 hours
};

app.use(cors(corsOptions));

2. Backup & Version Control Strategy

A. Database Backup System

Automated MongoDB Atlas Backups

// Enhanced backup system
const backupSystem = {
  // Daily automated backups (MongoDB Atlas handles this)
  // Manual backup triggers
  async createManualBackup() {
    const timestamp = new Date().toISOString();
    const backupName = `manual-backup-${timestamp}`;
    
    // Export all collections
    const collections = ['subtitles', 'sourcetexts', 'submissions', 'users'];
    const backupData = {};
    
    for (const collection of collections) {
      const data = await mongoose.connection.db.collection(collection).find({}).toArray();
      backupData[collection] = data;
    }
    
    // Save to backup storage
    await this.saveBackup(backupName, backupData);
    return backupName;
  },
  
  async restoreFromBackup(backupName) {
    const backupData = await this.loadBackup(backupName);
    
    for (const [collection, data] of Object.entries(backupData)) {
      await mongoose.connection.db.collection(collection).deleteMany({});
      if (data.length > 0) {
        await mongoose.connection.db.collection(collection).insertMany(data);
      }
    }
  }
};

B. Git-Based Version Control for Content

// Content version control system
const gitVersionControl = {
  async commitContentChanges(collection, documentId, changes, userId) {
    const commitMessage = `Update ${collection} ${documentId} by ${userId}`;
    const timestamp = new Date().toISOString();
    
    // Create git commit for content changes
    const gitData = {
      collection,
      documentId,
      changes,
      timestamp,
      userId,
      commitHash: await this.createGitCommit(commitMessage, changes)
    };
    
    // Store in version control collection
    await mongoose.connection.db.collection('versionControl').insertOne(gitData);
    return gitData;
  },
  
  async getContentHistory(documentId) {
    return await mongoose.connection.db.collection('versionControl')
      .find({ documentId })
      .sort({ timestamp: -1 })
      .toArray();
  }
};

C. Frontend State Management

Redux/Zustand for State Persistence

// Enhanced state management with persistence
import { create } from 'zustand';
import { persist } from 'zustand/middleware';

const useAppStore = create(
  persist(
    (set, get) => ({
      // User state
      user: null,
      isAuthenticated: false,
      
      // Content state
      subtitles: [],
      sourceTexts: [],
      submissions: [],
      
      // Protection state
      protectedContent: new Set(),
      
      // Actions
      setUser: (user) => set({ user, isAuthenticated: !!user }),
      updateSubtitles: (subtitles) => set({ subtitles }),
      markProtected: (contentId) => set((state) => ({
        protectedContent: new Set([...state.protectedContent, contentId])
      }))
    }),
    {
      name: 'transcreation-sandbox-storage',
      partialize: (state) => ({
        user: state.user,
        isAuthenticated: state.isAuthenticated
      })
    }
  )
);

3. Monitoring & Alerting

A. Security Monitoring

// Security event logging
const securityLogger = {
  logSecurityEvent(event, details) {
    const logEntry = {
      timestamp: new Date(),
      event,
      details,
      ip: req.ip,
      userAgent: req.get('User-Agent'),
      userId: req.user?.id
    };
    
    // Log to security collection
    mongoose.connection.db.collection('securityLogs').insertOne(logEntry);
    
    // Alert on suspicious activities
    if (this.isSuspiciousActivity(event, details)) {
      this.sendSecurityAlert(logEntry);
    }
  },
  
  isSuspiciousActivity(event, details) {
    const suspiciousPatterns = [
      'multiple_failed_logins',
      'unauthorized_access_attempt',
      'data_export_attempt',
      'bulk_deletion_attempt'
    ];
    
    return suspiciousPatterns.some(pattern => event.includes(pattern));
  }
};

B. Performance Monitoring

// Performance monitoring middleware
const performanceMonitor = (req, res, next) => {
  const start = Date.now();
  
  res.on('finish', () => {
    const duration = Date.now() - start;
    const logEntry = {
      timestamp: new Date(),
      method: req.method,
      path: req.path,
      statusCode: res.statusCode,
      duration,
      userId: req.user?.id
    };
    
    // Log slow requests
    if (duration > 1000) {
      console.warn('Slow request detected:', logEntry);
    }
    
    // Store in performance logs
    mongoose.connection.db.collection('performanceLogs').insertOne(logEntry);
  });
  
  next();
};

4. Implementation Priority

Phase 1 (Immediate - 1-2 weeks)

  1. βœ… Enhanced rate limiting (already implemented)
  2. βœ… Input validation and sanitization
  3. βœ… Content protection with checksums
  4. βœ… Automated backup verification

Phase 2 (Short-term - 2-4 weeks)

  1. JWT implementation with refresh tokens
  2. Enhanced RBAC system
  3. Security monitoring and alerting
  4. Git-based version control for content

Phase 3 (Medium-term - 1-2 months)

  1. Field-level encryption for sensitive data
  2. Advanced watermarking system
  3. Comprehensive audit logging
  4. Automated security testing

5. Recommended Tools & Services

Security Tools:

  • Helmet.js: Security headers
  • Joi: Input validation
  • Rate-limiter-flexible: Advanced rate limiting
  • Winston: Structured logging

Monitoring Tools:

  • MongoDB Atlas: Built-in monitoring
  • Sentry: Error tracking
  • LogRocket: User session replay
  • DataDog: Application performance monitoring

Backup Services:

  • MongoDB Atlas: Automated backups
  • AWS S3: Additional backup storage
  • GitHub: Code and content version control

6. Security Checklist

βœ… Implemented:

  • Basic authentication
  • Content protection flags
  • Rate limiting
  • CORS configuration

πŸ”„ In Progress:

  • Enhanced input validation
  • Security monitoring

πŸ“‹ To Implement:

  • JWT with refresh tokens
  • Field-level encryption
  • Comprehensive audit logging
  • Automated security testing
  • Advanced watermarking

This comprehensive security plan will significantly enhance the protection of your transcreation sandbox while maintaining usability for legitimate users.