hallucination-detector-project / cleanup_no_backup.py
KShoichi's picture
Upload cleanup_no_backup.py with huggingface_hub
aff56a0 verified
#!/usr/bin/env python3
"""
Project Cleanup Script - NO BACKUP VERSION
Removes redundant files and directories to reduce project size.
WARNING: This script does NOT create backups. Use with caution.
"""
import os
import shutil
def safe_remove(path):
"""Safely remove a file or directory"""
try:
if os.path.isfile(path):
os.remove(path)
print(f" βœ… Removed file: {path}")
return True
elif os.path.isdir(path):
shutil.rmtree(path)
print(f" βœ… Removed directory: {path}")
return True
else:
print(f" ⚠️ Not found: {path}")
return False
except Exception as e:
print(f" ❌ Error removing {path}: {e}")
return False
def cleanup_project():
"""Remove redundant files and directories"""
# OLD MODELS TO REMOVE
old_models = [
"bert_hallucination_model",
"bias_fixed_mega_model",
"competition_model",
"enhanced_contradiction_model",
"enhanced_hallucination_model",
"enhanced_hallucination_model_v2",
"enhanced_model",
"enhanced_v2_model",
"final_balanced_mega_model",
"fine_tuned_hallucination_model",
"fixed_enhanced_model",
"mega_hallucination_model_20250814_151158",
"progressive_model_1",
"progressive_model_2",
"progressive_model_3",
"progressive_stage_1_20250814_155617",
"progressive_stage_2_20250814_155850",
"progressive_stage_3_20250814_160640",
"stable_mega_enhanced_model",
"stable_mega_model_20250814_155042",
"cv_fold_0",
"ensemble_models",
"evaluation_results",
"model_checkpoints",
"training_checkpoints"
]
# OLD TRAINING SCRIPTS TO REMOVE
old_training_scripts = [
"train_bert_alternative.py",
"train_bias_fix.py",
"train_competition_model.py",
"train_enhanced_contradiction.py",
"train_enhanced_model.py",
"train_enhanced_performance.py",
"train_final_balance.py",
"train_fixed_enhanced.py",
"train_incremental_improvement.py",
"train_mega_model.py",
"train_progressive_complete.py",
"train_simple_competition.py",
"train_simple_model.py",
"train_simple_production.py",
"train_simple_working_model.py",
"train_smart_incremental.py",
"train_stable_mega.py",
"train_working_enhanced.py"
]
# OLD TEST SCRIPTS TO REMOVE
old_test_scripts = [
"test_all_models.py",
"test_bias_fixed.py",
"test_complete_mega_model.py",
"test_detector_direct.py",
"test_device_patterns.py",
"test_enhanced_api.py",
"test_enhanced_model.py",
"test_enhanced_v2_comprehensive.py",
"test_final_balance.py",
"test_fresh_predictions.py",
"test_hybrid_accuracy.py",
"test_mega_final.py",
"test_mega_model.py",
"test_model.py",
"test_raw_model.py",
"test_regex_fix.py",
"test_reliability.py",
"test_stable_mega.py"
]
# DEBUG/DEVELOPMENT FILES TO REMOVE
debug_files = [
"debug_prediction.py",
"debug_rules.py",
"apply_quick_fix.py",
"hybrid_detector.py",
"standalone_test.py",
"simple_test_server.py",
"comprehensive_server_test.py",
"comprehensive_test.py"
]
# OLD DATA FILES TO REMOVE
old_data_files = [
"advanced_training_data.csv",
"bias_fix_training.csv",
"combined_training_data.csv",
"competition_training_data.csv",
"edge_cases_training_data.csv",
"halueval_combined_training.csv",
"halueval_training_prepared.csv",
"mega_training_data.csv",
"sample_training_data.csv"
]
# CACHE/TEMP FILES TO REMOVE
cache_files = [
"cache_persistent.pkl",
"app.log",
"__pycache__",
".pytest_cache",
"logs",
"tmp",
"model_cache",
"plots"
]
# OLD ANALYSIS SCRIPTS TO REMOVE
analysis_scripts = [
"analyze_halueval.py",
"analyze_training_metrics.py",
"complete_data_audit.py",
"create_verified_dataset.py",
"diagnose_bias.py",
"fact_check_report.py",
"monitor_training.py",
"training_summary.py",
"verify_fixes_and_retrain.py",
"analyze_cleanup.py"
]
# CONFIG FILES TO REMOVE
config_files = [
"competition_config.json",
"monitoring_config.json",
"setup_report.json",
"requirements_competition.txt"
]
# BATCH FILES TO REMOVE (keeping only essential ones)
batch_files = [
"run_simple_training.bat",
"run_training.bat"
]
# DOCUMENTATION TO REMOVE (keeping only README.md)
docs_files = [
"COMPETITION_README.md",
"RELIABILITY_ANALYSIS.md",
"setup_instructions.md"
]
# ALL FILES TO REMOVE
all_files_to_remove = (
old_models + old_training_scripts + old_test_scripts +
debug_files + old_data_files + cache_files +
analysis_scripts + config_files + batch_files + docs_files
)
print("🧹 STARTING PROJECT CLEANUP (NO BACKUP)")
print("=" * 60)
print("⚠️ WARNING: No backup will be created!")
print("🎯 Target: Remove ~90 redundant files/directories")
print("βœ… Keep: Essential production files only")
print("=" * 60)
removed_count = 0
for item in all_files_to_remove:
if os.path.exists(item):
if safe_remove(item):
removed_count += 1
print("=" * 60)
print(f"πŸŽ‰ Cleanup completed! Removed {removed_count} items.")
print("\nπŸ“ REMAINING ESSENTIAL FILES:")
print(" βœ… app/ (core application)")
print(" βœ… complete_halueval_model/ (working AI model)")
print(" βœ… comprehensive_training_data.csv (main training data)")
print(" βœ… training.csv (backup training data)")
print(" βœ… test_comprehensive_hybrid.py (main test)")
print(" βœ… test_api.py (API tests)")
print(" βœ… test_mega_accuracy.py (accuracy test)")
print(" βœ… requirements.txt (dependencies)")
print(" βœ… config.yaml (configuration)")
print(" βœ… README.md (documentation)")
print(" βœ… Dockerfile & docker-compose.yml (deployment)")
print(" βœ… prepare_training_data.py (training utility)")
print(" βœ… combine_training_data.py (data utility)")
print(" βœ… run_server.bat (server script)")
print(" βœ… setup_competition.py (setup utility)")
print(f"\nπŸ’Ύ Project size significantly reduced!")
print(f"πŸ—‘οΈ Removed {removed_count} redundant files")
return removed_count
def main():
"""Main function"""
print("PROJECT CLEANUP TOOL - NO BACKUP VERSION")
print("=" * 60)
print("⚠️ WARNING: This will permanently delete files!")
print("πŸ“¦ This will remove ~90 redundant files to save space")
print("βœ… Essential production files will be preserved")
print("❌ NO BACKUP will be created")
print()
# Ask for confirmation
response = input("Are you SURE you want to proceed? (type 'YES' to confirm): ")
if response != 'YES':
print("❌ Cleanup cancelled.")
return
# Perform cleanup
removed_count = cleanup_project()
print(f"\nπŸŽ‰ Project cleanup completed successfully!")
print(f"πŸ“Š Removed {removed_count} redundant items")
print("πŸ”§ All essential files preserved")
print("πŸ’Ύ Project size significantly reduced")
# Clean up this script too
try:
os.remove("cleanup_no_backup.py")
print("🧹 Cleanup script removed")
except:
pass
if __name__ == "__main__":
main()