|
|
| """
|
| Project Cleanup Script - NO BACKUP VERSION
|
| Removes redundant files and directories to reduce project size.
|
| WARNING: This script does NOT create backups. Use with caution.
|
| """
|
|
|
| import os
|
| import shutil
|
|
|
| def safe_remove(path):
|
| """Safely remove a file or directory"""
|
| try:
|
| if os.path.isfile(path):
|
| os.remove(path)
|
| print(f" β
Removed file: {path}")
|
| return True
|
| elif os.path.isdir(path):
|
| shutil.rmtree(path)
|
| print(f" β
Removed directory: {path}")
|
| return True
|
| else:
|
| print(f" β οΈ Not found: {path}")
|
| return False
|
| except Exception as e:
|
| print(f" β Error removing {path}: {e}")
|
| return False
|
|
|
| def cleanup_project():
|
| """Remove redundant files and directories"""
|
|
|
|
|
| old_models = [
|
| "bert_hallucination_model",
|
| "bias_fixed_mega_model",
|
| "competition_model",
|
| "enhanced_contradiction_model",
|
| "enhanced_hallucination_model",
|
| "enhanced_hallucination_model_v2",
|
| "enhanced_model",
|
| "enhanced_v2_model",
|
| "final_balanced_mega_model",
|
| "fine_tuned_hallucination_model",
|
| "fixed_enhanced_model",
|
| "mega_hallucination_model_20250814_151158",
|
| "progressive_model_1",
|
| "progressive_model_2",
|
| "progressive_model_3",
|
| "progressive_stage_1_20250814_155617",
|
| "progressive_stage_2_20250814_155850",
|
| "progressive_stage_3_20250814_160640",
|
| "stable_mega_enhanced_model",
|
| "stable_mega_model_20250814_155042",
|
| "cv_fold_0",
|
| "ensemble_models",
|
| "evaluation_results",
|
| "model_checkpoints",
|
| "training_checkpoints"
|
| ]
|
|
|
|
|
| old_training_scripts = [
|
| "train_bert_alternative.py",
|
| "train_bias_fix.py",
|
| "train_competition_model.py",
|
| "train_enhanced_contradiction.py",
|
| "train_enhanced_model.py",
|
| "train_enhanced_performance.py",
|
| "train_final_balance.py",
|
| "train_fixed_enhanced.py",
|
| "train_incremental_improvement.py",
|
| "train_mega_model.py",
|
| "train_progressive_complete.py",
|
| "train_simple_competition.py",
|
| "train_simple_model.py",
|
| "train_simple_production.py",
|
| "train_simple_working_model.py",
|
| "train_smart_incremental.py",
|
| "train_stable_mega.py",
|
| "train_working_enhanced.py"
|
| ]
|
|
|
|
|
| old_test_scripts = [
|
| "test_all_models.py",
|
| "test_bias_fixed.py",
|
| "test_complete_mega_model.py",
|
| "test_detector_direct.py",
|
| "test_device_patterns.py",
|
| "test_enhanced_api.py",
|
| "test_enhanced_model.py",
|
| "test_enhanced_v2_comprehensive.py",
|
| "test_final_balance.py",
|
| "test_fresh_predictions.py",
|
| "test_hybrid_accuracy.py",
|
| "test_mega_final.py",
|
| "test_mega_model.py",
|
| "test_model.py",
|
| "test_raw_model.py",
|
| "test_regex_fix.py",
|
| "test_reliability.py",
|
| "test_stable_mega.py"
|
| ]
|
|
|
|
|
| debug_files = [
|
| "debug_prediction.py",
|
| "debug_rules.py",
|
| "apply_quick_fix.py",
|
| "hybrid_detector.py",
|
| "standalone_test.py",
|
| "simple_test_server.py",
|
| "comprehensive_server_test.py",
|
| "comprehensive_test.py"
|
| ]
|
|
|
|
|
| old_data_files = [
|
| "advanced_training_data.csv",
|
| "bias_fix_training.csv",
|
| "combined_training_data.csv",
|
| "competition_training_data.csv",
|
| "edge_cases_training_data.csv",
|
| "halueval_combined_training.csv",
|
| "halueval_training_prepared.csv",
|
| "mega_training_data.csv",
|
| "sample_training_data.csv"
|
| ]
|
|
|
|
|
| cache_files = [
|
| "cache_persistent.pkl",
|
| "app.log",
|
| "__pycache__",
|
| ".pytest_cache",
|
| "logs",
|
| "tmp",
|
| "model_cache",
|
| "plots"
|
| ]
|
|
|
|
|
| analysis_scripts = [
|
| "analyze_halueval.py",
|
| "analyze_training_metrics.py",
|
| "complete_data_audit.py",
|
| "create_verified_dataset.py",
|
| "diagnose_bias.py",
|
| "fact_check_report.py",
|
| "monitor_training.py",
|
| "training_summary.py",
|
| "verify_fixes_and_retrain.py",
|
| "analyze_cleanup.py"
|
| ]
|
|
|
|
|
| config_files = [
|
| "competition_config.json",
|
| "monitoring_config.json",
|
| "setup_report.json",
|
| "requirements_competition.txt"
|
| ]
|
|
|
|
|
| batch_files = [
|
| "run_simple_training.bat",
|
| "run_training.bat"
|
| ]
|
|
|
|
|
| docs_files = [
|
| "COMPETITION_README.md",
|
| "RELIABILITY_ANALYSIS.md",
|
| "setup_instructions.md"
|
| ]
|
|
|
|
|
| all_files_to_remove = (
|
| old_models + old_training_scripts + old_test_scripts +
|
| debug_files + old_data_files + cache_files +
|
| analysis_scripts + config_files + batch_files + docs_files
|
| )
|
|
|
| print("π§Ή STARTING PROJECT CLEANUP (NO BACKUP)")
|
| print("=" * 60)
|
| print("β οΈ WARNING: No backup will be created!")
|
| print("π― Target: Remove ~90 redundant files/directories")
|
| print("β
Keep: Essential production files only")
|
| print("=" * 60)
|
|
|
| removed_count = 0
|
|
|
| for item in all_files_to_remove:
|
| if os.path.exists(item):
|
| if safe_remove(item):
|
| removed_count += 1
|
|
|
| print("=" * 60)
|
| print(f"π Cleanup completed! Removed {removed_count} items.")
|
| print("\nπ REMAINING ESSENTIAL FILES:")
|
| print(" β
app/ (core application)")
|
| print(" β
complete_halueval_model/ (working AI model)")
|
| print(" β
comprehensive_training_data.csv (main training data)")
|
| print(" β
training.csv (backup training data)")
|
| print(" β
test_comprehensive_hybrid.py (main test)")
|
| print(" β
test_api.py (API tests)")
|
| print(" β
test_mega_accuracy.py (accuracy test)")
|
| print(" β
requirements.txt (dependencies)")
|
| print(" β
config.yaml (configuration)")
|
| print(" β
README.md (documentation)")
|
| print(" β
Dockerfile & docker-compose.yml (deployment)")
|
| print(" β
prepare_training_data.py (training utility)")
|
| print(" β
combine_training_data.py (data utility)")
|
| print(" β
run_server.bat (server script)")
|
| print(" β
setup_competition.py (setup utility)")
|
|
|
| print(f"\nπΎ Project size significantly reduced!")
|
| print(f"ποΈ Removed {removed_count} redundant files")
|
|
|
| return removed_count
|
|
|
| def main():
|
| """Main function"""
|
| print("PROJECT CLEANUP TOOL - NO BACKUP VERSION")
|
| print("=" * 60)
|
| print("β οΈ WARNING: This will permanently delete files!")
|
| print("π¦ This will remove ~90 redundant files to save space")
|
| print("β
Essential production files will be preserved")
|
| print("β NO BACKUP will be created")
|
| print()
|
|
|
|
|
| response = input("Are you SURE you want to proceed? (type 'YES' to confirm): ")
|
| if response != 'YES':
|
| print("β Cleanup cancelled.")
|
| return
|
|
|
|
|
| removed_count = cleanup_project()
|
|
|
| print(f"\nπ Project cleanup completed successfully!")
|
| print(f"π Removed {removed_count} redundant items")
|
| print("π§ All essential files preserved")
|
| print("πΎ Project size significantly reduced")
|
|
|
|
|
| try:
|
| os.remove("cleanup_no_backup.py")
|
| print("π§Ή Cleanup script removed")
|
| except:
|
| pass
|
|
|
| if __name__ == "__main__":
|
| main()
|
|
|