anycoder-227d4fd7 / utils.py
tk-flea's picture
Upload utils.py with huggingface_hub
0e6243c verified
Raw
History Blame Contribute Delete
11.1 kB
import os
import configparser
import glob
import re
from datetime import datetime, timedelta
from pathlib import Path
from typing import Dict, List, Any, Optional
def parse_winapp2_ini(content: str) -> Dict[str, Dict[str, Any]]:
"""
Parse winapp2.ini configuration file content.
Args:
content: String content of the INI file
Returns:
Dictionary of sections with their configuration
"""
config = {}
# Split by sections
section_pattern = re.compile(r'^\[([^]]+)\]', re.MULTILINE)
sections = section_pattern.split(content)
# First element is before first section (usually empty)
i = 1
while i < len(sections):
section_name = sections[i].strip()
if i + 1 < len(sections):
section_content = sections[i + 1]
# Parse section content
section_data = {}
for line in section_content.split('\n'):
line = line.strip()
if '=' in line and not line.startswith('#'):
key, value = line.split('=', 1)
key = key.strip()
value = value.strip()
# Handle multi-value fields (semicolon separated)
if key in ['path', 'file', 'regex']:
values = [v.strip() for v in value.split(';') if v.strip()]
section_data[key] = values
else:
section_data[key] = value
if section_data:
config[section_name] = section_data
i += 2
return config
def expand_path_pattern(pattern: str, base_dir: str) -> List[str]:
"""
Expand path patterns with wildcards to actual paths.
Args:
pattern: Path pattern with potential wildcards
base_dir: Base directory to search from
Returns:
List of expanded paths
"""
paths = []
# Handle Windows environment variables
pattern = os.path.expandvars(pattern)
# Replace common Windows variables
replacements = {
'%USERPROFILE%': os.getenv('USERPROFILE', 'C:/Users'),
'%APPDATA%': os.getenv('APPDATA', 'C:/Users/Public/AppData/Roaming'),
'%LOCALAPPDATA%': os.getenv('LOCALAPPDATA', 'C:/Users/Public/AppData/Local'),
'%TEMP%': os.getenv('TEMP', 'C:/Windows/Temp'),
'%SYSTEMROOT%': os.getenv('SYSTEMROOT', 'C:/Windows'),
}
for var, value in replacements.items():
pattern = pattern.replace(var, value)
# Handle wildcard patterns
if '*' in pattern or '?' in pattern:
# Try glob pattern
try:
matched = glob.glob(pattern, recursive=True)
paths.extend(matched)
except:
pass
# Try with base_dir
try:
full_pattern = os.path.join(base_dir, pattern.lstrip('/\\'))
matched = glob.glob(full_pattern, recursive=True)
paths.extend(matched)
except:
pass
else:
# Direct path
if os.path.exists(pattern):
paths.append(pattern)
else:
# Try with base_dir
full_path = os.path.join(base_dir, pattern.lstrip('/\\'))
if os.path.exists(full_path):
paths.append(full_path)
return paths
def match_file_pattern(filename: str, patterns: List[str]) -> bool:
"""
Check if filename matches any of the given patterns.
Args:
filename: File name to check
patterns: List of patterns (wildcards or regex)
Returns:
True if filename matches any pattern
"""
for pattern in patterns:
# Try wildcard match
if '*' in pattern or '?' in pattern:
# Convert to regex
regex_pattern = pattern.replace('*', '.*').replace('?', '.')
if re.match(regex_pattern, filename, re.IGNORECASE):
return True
else:
# Direct match
if filename.lower() == pattern.lower():
return True
return False
def scan_files(
config: Dict[str, Dict[str, Any]],
base_dir: str,
include_hidden: bool = False,
include_system: bool = False,
max_age_days: int = 365
) -> List[Dict[str, Any]]:
"""
Scan for files matching the configuration patterns.
Args:
config: Parsed INI configuration
base_dir: Base directory to scan
include_hidden: Include hidden files
include_system: Include system files
max_age_days: Maximum age of files to include
Returns:
List of file information dictionaries
"""
scanned_files = []
cutoff_date = datetime.now() - timedelta(days=max_age_days)
for section_name, section_data in config.items():
paths = section_data.get('path', [])
file_patterns = section_data.get('file', [])
regex_patterns = section_data.get('regex', [])
# Expand path patterns
expanded_paths = []
for path_pattern in paths:
expanded_paths.extend(expand_path_pattern(path_pattern, base_dir))
# Scan each path
for path in expanded_paths:
try:
if os.path.isdir(path):
# Scan directory
for root, dirs, files in os.walk(path):
# Filter hidden/system directories
if not include_hidden:
dirs[:] = [d for d in dirs if not d.startswith('.')]
if not include_system:
dirs[:] = [d for d in dirs if d.lower() not in ['windows', 'system32', 'syswow64']]
for filename in files:
file_path = os.path.join(root, filename)
# Check if file matches patterns
if file_patterns and not match_file_pattern(filename, file_patterns):
continue
if regex_patterns:
match_found = False
for regex in regex_patterns:
try:
if re.search(regex, file_path):
match_found = True
break
except:
pass
if not match_found:
continue
# Check file attributes
if not include_hidden and filename.startswith('.'):
continue
# Get file info
try:
stat_info = os.stat(file_path)
file_age = datetime.fromtimestamp(stat_info.st_mtime)
if file_age > cutoff_date:
continue
file_info = {
'section': section_name,
'path': file_path,
'size': stat_info.st_size,
'modified': stat_info.st_mtime,
'age_days': (datetime.now() - file_age).days
}
scanned_files.append(file_info)
except (OSError, PermissionError):
pass
elif os.path.isfile(path):
# Single file
filename = os.path.basename(path)
if file_patterns and not match_file_pattern(filename, file_patterns):
continue
try:
stat_info = os.stat(path)
file_age = datetime.fromtimestamp(stat_info.st_mtime)
if file_age > cutoff_date:
continue
file_info = {
'section': section_name,
'path': path,
'size': stat_info.st_size,
'modified': stat_info.st_mtime,
'age_days': (datetime.now() - file_age).days
}
scanned_files.append(file_info)
except (OSError, PermissionError):
pass
except (OSError, PermissionError) as e:
pass
return scanned_files
def delete_files(
files: List[Dict[str, Any]],
dry_run: bool = True
) -> Dict[str, Any]:
"""
Delete or preview deletion of files.
Args:
files: List of file information dictionaries
dry_run: If True, don't actually delete files
Returns:
Dictionary with results
"""
results = {
'processed': 0,
'deleted': 0,
'size_freed': 0,
'errors': []
}
for file_info in files:
try:
path = file_info['path']
size = file_info['size']
results['processed'] += 1
if not dry_run:
if os.path.isfile(path):
os.remove(path)
results['deleted'] += 1
results['size_freed'] += size
elif os.path.isdir(path):
shutil.rmtree(path)
results['deleted'] += 1
results['size_freed'] += size
else:
# Dry run - just count
results['deleted'] += 1
results['size_freed'] += size
except (OSError, PermissionError) as e:
results['errors'].append(f"Error deleting {path}: {str(e)}")
return results
def get_file_size(path: str) -> int:
"""Get file size in bytes."""
try:
return os.stat(path).st_size
except:
return 0
def format_size(size_bytes: int) -> str:
"""Format size in human-readable format."""
if size_bytes < 0:
return "0 B"
units = ['B', 'KB', 'MB', 'GB', 'TB']
size = float(size_bytes)
for unit in units:
if size < 1024:
return f"{size:.2f} {unit}"
size /= 1024
return f"{size:.2f} {units[-1]}"