training-load-dataviz / src /utils /csvValidator.ts
glutamatt's picture
glutamatt HF Staff
nice
f63ce21 verified
/**
* CSV Validation utilities
* Checks for required fields and data quality
*/
export interface ValidationResult {
isValid: boolean;
errors: string[];
warnings: string[];
}
export interface FieldValidation {
found: boolean;
fieldName?: string;
nonNullCount: number;
nonZeroCount: number;
totalRows: number;
}
/**
* Validate CSV structure and required fields
* @param data - Parsed CSV data array
* @returns Validation result with errors and warnings
*/
export function validateCSV(data: any[]): ValidationResult {
const errors: string[] = [];
const warnings: string[] = [];
if (!data || data.length === 0) {
errors.push('CSV file is empty or contains no valid data rows.');
return { isValid: false, errors, warnings };
}
const totalRows = data.length;
// Required fields validation
const dateField = validateField(data, ['Date', 'Activity Date', 'date']);
const timeField = validateField(data, ['Time', 'Duration', 'Moving Time', 'Elapsed Time']);
const activityTypeField = validateField(data, ['Activity Type', 'Type', 'Sport']);
// Check required fields
if (!dateField.found) {
errors.push('Missing required field: "Date". Expected column names: Date, Activity Date, or date.');
} else if (dateField.nonNullCount === 0) {
errors.push(`Field "${dateField.fieldName}" has no valid values (all rows are empty or "--").`);
} else if (dateField.nonNullCount < totalRows) {
warnings.push(`Field "${dateField.fieldName}": ${totalRows - dateField.nonNullCount} out of ${totalRows} rows have missing dates.`);
}
if (!timeField.found) {
errors.push('Missing required field: "Time" or "Duration". Expected column names: Time, Duration, Moving Time, or Elapsed Time.');
} else if (timeField.nonNullCount === 0) {
errors.push(`Field "${timeField.fieldName}" has no valid values (all rows are empty or "--").`);
} else if (timeField.nonNullCount < totalRows) {
warnings.push(`Field "${timeField.fieldName}": ${totalRows - timeField.nonNullCount} out of ${totalRows} rows have missing duration.`);
}
if (!activityTypeField.found) {
errors.push('Missing required field: "Activity Type". Expected column names: Activity Type, Type, or Sport.');
} else if (activityTypeField.nonNullCount === 0) {
errors.push(`Field "${activityTypeField.fieldName}" has no valid values (all rows are empty or "--").`);
}
// Optional but important fields validation
const distanceField = validateField(data, ['Distance', 'distance']);
const normalizedPowerField = validateFieldByPrefix(data, 'Normalized Power');
// Distance warnings
if (!distanceField.found) {
warnings.push('Optional field "Distance" is missing. Distance-based ACWR chart will be empty.');
} else if (distanceField.nonZeroCount === 0) {
warnings.push('Field "Distance" exists but all values are 0 or empty. Distance-based ACWR chart will be empty.');
} else if (distanceField.nonZeroCount < totalRows * 0.5) {
warnings.push(`Field "Distance": Only ${distanceField.nonZeroCount} out of ${totalRows} activities have distance values.`);
}
// Power/TSS warnings
const tssField = validateFieldByPrefix(data, 'Training Stress Score');
if (!normalizedPowerField.found && !tssField.found) {
warnings.push('Neither "Normalized Power" nor "Training Stress Score" fields found. TSS will be calculated using default FTP (343W). Ensure activities have Normalized Power data for accurate TSS calculation.');
} else if (normalizedPowerField.found && normalizedPowerField.nonZeroCount === 0) {
warnings.push('Field "Normalized Power" exists but all values are 0 or empty. TSS calculation may be limited.');
} else if (normalizedPowerField.found && normalizedPowerField.nonZeroCount < totalRows * 0.3) {
warnings.push(`Field "Normalized Power": Only ${normalizedPowerField.nonZeroCount} out of ${totalRows} activities have power data. TSS-based ACWR will have limited data.`);
}
// Summary
if (errors.length === 0 && warnings.length === 0) {
warnings.push(`βœ“ CSV validation passed. Successfully found ${totalRows} activities with all required fields.`);
}
return {
isValid: errors.length === 0,
errors,
warnings,
};
}
/**
* Validate a field by checking multiple possible column names
*/
function validateField(data: any[], possibleNames: string[]): FieldValidation {
for (const name of possibleNames) {
if (data[0] && name in data[0]) {
const nonNullCount = data.filter(row => {
const value = row[name];
return value && value !== '--' && value !== '';
}).length;
const nonZeroCount = data.filter(row => {
const value = row[name];
if (!value || value === '--' || value === '') return false;
const parsed = parseFloat(value);
return !isNaN(parsed) && parsed !== 0;
}).length;
return {
found: true,
fieldName: name,
nonNullCount,
nonZeroCount,
totalRows: data.length,
};
}
}
return {
found: false,
nonNullCount: 0,
nonZeroCount: 0,
totalRows: data.length,
};
}
/**
* Validate a field by checking if any column name starts with a prefix
*/
function validateFieldByPrefix(data: any[], prefix: string): FieldValidation {
if (!data[0]) {
return {
found: false,
nonNullCount: 0,
nonZeroCount: 0,
totalRows: data.length,
};
}
const fieldName = Object.keys(data[0]).find(key => key.startsWith(prefix));
if (!fieldName) {
return {
found: false,
nonNullCount: 0,
nonZeroCount: 0,
totalRows: data.length,
};
}
const nonNullCount = data.filter(row => {
const value = row[fieldName];
return value && value !== '--' && value !== '';
}).length;
const nonZeroCount = data.filter(row => {
const value = row[fieldName];
if (!value || value === '--' || value === '') return false;
const parsed = parseFloat(value);
return !isNaN(parsed) && parsed !== 0;
}).length;
return {
found: true,
fieldName,
nonNullCount,
nonZeroCount,
totalRows: data.length,
};
}