File size: 7,520 Bytes
2df0cf9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
"""
Dockerfile validation service
Tests generated Dockerfiles for syntax and build errors
"""

import subprocess
import tempfile
import os
from pathlib import Path
from typing import Dict, List, Optional
import shutil


class DockerfileValidator:
    """Validates Dockerfiles without requiring actual Docker build"""
    
    def __init__(self, logger):
        self.logger = logger
    
    def validate_dockerfile(self, dockerfile_content: str, stack_type: str) -> Dict:
        """
        Validate Dockerfile for syntax errors and best practices
        
        Returns:
            {
                "valid": bool,
                "errors": List[str],
                "warnings": List[str],
                "suggestions": List[str]
            }
        """
        errors = []
        warnings = []
        suggestions = []
        
        # Basic syntax validation
        lines = dockerfile_content.split('\n')
        
        # Check for required directives
        has_from = any('FROM' in line for line in lines)
        has_workdir = any('WORKDIR' in line for line in lines)
        has_expose = any('EXPOSE' in line for line in lines)
        has_cmd = any('CMD' in line or 'ENTRYPOINT' in line for line in lines)
        
        if not has_from:
            errors.append("Missing FROM directive - every Dockerfile must start with a base image")
        
        if not has_workdir:
            warnings.append("No WORKDIR directive found - consider setting a working directory")
        
        if not has_expose:
            warnings.append("No EXPOSE directive found - consider documenting the port your app uses")
        
        if not has_cmd:
            warnings.append("No CMD or ENTRYPOINT directive found - how will your container start?")
        
        # Check CMD format (should be exec form)
        for line in lines:
            if line.strip().startswith('CMD '):
                if 'CMD [' not in line:
                    errors.append(f"CMD should use exec form: CMD [\"command\", \"arg\"] not shell form")
                elif '" "' in line or '  ' in line:
                    errors.append(f"CMD format error: arguments should be separate array elements")
        
        # Check for common issues
        if 'apt-get update' in dockerfile_content and 'rm -rf /var/lib/apt/lists/*' not in dockerfile_content:
            suggestions.append("Consider cleaning apt cache: '&& rm -rf /var/lib/apt/lists/*'")
        
        if 'pip install' in dockerfile_content and '--no-cache-dir' not in dockerfile_content:
            suggestions.append("Consider using pip with --no-cache-dir to reduce image size")
        
        if 'npm install' in dockerfile_content and '--production' not in dockerfile_content and 'ci' not in dockerfile_content:
            suggestions.append("Consider using 'npm ci' or 'npm install --production' for production builds")
        
        # Check for security best practices
        user_found = any('USER ' in line for line in lines if not line.strip().startswith('#'))
        # Check if it's an Apache/Nginx container that handles user switching internally
        is_web_server = any(server in dockerfile_content.lower() for server in ['apache', 'nginx', 'httpd'])
        has_www_data = 'www-data' in dockerfile_content or 'nginx' in dockerfile_content
        
        if not user_found and not (is_web_server and has_www_data):
            warnings.append("No USER directive found - running as root is a security risk")
        
        # Try hadolint if available
        hadolint_results = self._run_hadolint(dockerfile_content)
        if hadolint_results:
            errors.extend(hadolint_results.get('errors', []))
            warnings.extend(hadolint_results.get('warnings', []))
        
        is_valid = len(errors) == 0
        
        return {
            "valid": is_valid,
            "errors": errors,
            "warnings": warnings,
            "suggestions": suggestions,
            "score": self._calculate_score(errors, warnings, suggestions)
        }
    
    def _run_hadolint(self, dockerfile_content: str) -> Optional[Dict]:
        """Run hadolint if available"""
        if not shutil.which('hadolint'):
            return None
        
        try:
            with tempfile.NamedTemporaryFile(mode='w', suffix='.Dockerfile', delete=False) as f:
                f.write(dockerfile_content)
                temp_path = f.name
            
            result = subprocess.run(
                ['hadolint', temp_path],
                capture_output=True,
                text=True,
                timeout=5
            )
            
            os.unlink(temp_path)
            
            if result.returncode == 0:
                return {"errors": [], "warnings": []}
            
            # Parse hadolint output
            output_lines = result.stdout.split('\n')
            errors = []
            warnings = []
            
            for line in output_lines:
                if 'DL' in line or 'SC' in line:  # Hadolint/ShellCheck codes
                    if 'error' in line.lower():
                        errors.append(line.split(':', 1)[-1].strip() if ':' in line else line)
                    elif 'warning' in line.lower():
                        warnings.append(line.split(':', 1)[-1].strip() if ':' in line else line)
            
            return {"errors": errors, "warnings": warnings}
            
        except Exception as e:
            self.logger.warning(f"Hadolint validation failed: {str(e)}")
            return None
    
    def _calculate_score(self, errors: List, warnings: List, suggestions: List) -> int:
        """Calculate quality score 0-100"""
        score = 100
        score -= len(errors) * 20  # Critical issues
        score -= len(warnings) * 5  # Minor issues
        score -= len(suggestions) * 2  # Improvements
        return max(0, min(100, score))
    
    def validate_docker_compose(self, compose_content: str) -> Dict:
        """Validate docker-compose.yml content"""
        errors = []
        warnings = []
        suggestions = []
        
        lines = compose_content.split('\n')
        
        # Check for required fields
        has_version = any('version:' in line for line in lines)
        has_services = any('services:' in line for line in lines)
        
        if not has_version:
            warnings.append("No version specified - consider adding 'version: \"3.8\"'")
        
        if not has_services:
            errors.append("Missing 'services:' section - compose file must define services")
        
        # Check for common issues
        if 'build:' in compose_content and 'context:' not in compose_content:
            warnings.append("Build directive found without context - specify build context")
        
        if 'depends_on:' in compose_content:
            suggestions.append("Using depends_on: Consider adding healthchecks for better reliability")
        
        # Check for volumes without persistence warning
        if 'volumes:' not in compose_content and 'database' in compose_content.lower():
            warnings.append("Database service found without volumes - data will be lost on restart")
        
        is_valid = len(errors) == 0
        
        return {
            "valid": is_valid,
            "errors": errors,
            "warnings": warnings,
            "suggestions": suggestions,
            "score": self._calculate_score(errors, warnings, suggestions)
        }