Krishna1107's picture
fixed inference
2794920
"""Docker build/run simulator — deterministic, rule-based."""
from typing import Dict, List, Optional, Set
from server.models import FileContent
class DockerSimulator:
VALID_INSTRUCTIONS: Set[str] = {
"FROM",
"RUN",
"CMD",
"LABEL",
"MAINTAINER",
"EXPOSE",
"ENV",
"ADD",
"COPY",
"ENTRYPOINT",
"VOLUME",
"USER",
"WORKDIR",
"ARG",
"ONBUILD",
"STOPSIGNAL",
"HEALTHCHECK",
"SHELL",
}
def _split_lines(self, content: str) -> List[str]:
return [line.rstrip() for line in content.split("\n")]
def _non_empty_non_comment_lines(self, lines: List[str]) -> List[str]:
return [line.strip() for line in lines if line.strip() and not line.strip().startswith("#")]
def _source_exists(self, source: str, context_files: Dict[str, FileContent]) -> bool:
if source in {".", "./"}:
return True
if "*" in source:
prefix = source.replace("*", "")
return any(path.startswith(prefix) for path in context_files)
# Check exact match or directory prefix match (e.g. "dist/" matches "dist/index.html")
clean = source.rstrip("/")
if clean in context_files:
return True
return any(path.startswith(clean + "/") or path == clean for path in context_files)
def _join_continuation_lines(self, lines: List[str]) -> List[str]:
"""Join lines ending with backslash into single logical lines."""
result: List[str] = []
current = ""
for line in lines:
stripped = line.rstrip()
if stripped.endswith("\\"):
current += stripped[:-1] + " "
else:
current += stripped
result.append(current)
current = ""
if current:
result.append(current)
return result
def validate(self, dockerfile: Optional[FileContent], context_files: Dict[str, FileContent]):
if dockerfile is None:
return {"build_success": False, "run_success": False, "error": "Dockerfile missing"}
content = dockerfile.content
lines = self._split_lines(content)
active_lines = self._non_empty_non_comment_lines(lines)
if not active_lines:
return {"build_success": False, "run_success": False, "error": "Dockerfile is empty"}
# ARG before FROM is fine, but the first real instruction must be FROM
first_non_arg = None
for line in active_lines:
token = line.split()[0].upper()
if token == "ARG":
continue
first_non_arg = token
break
if first_non_arg is None or first_non_arg != "FROM":
return {
"build_success": False,
"run_success": False,
"error": "Dockerfile must start with FROM",
}
# validate instructions
for idx, raw in enumerate(active_lines, start=1):
token = raw.split()[0].upper()
# Handle --platform= prefix on FROM
if token.startswith("FROM"):
token = "FROM"
if token.startswith("&&"):
return {
"build_success": False,
"run_success": False,
"error": f"Dockerfile parse error: unknown instruction: {token}",
"line": idx,
}
# Strip leading --flags (e.g. --platform=...) — the instruction is after
if token.startswith("--"):
continue
if token not in self.VALID_INSTRUCTIONS:
return {
"build_success": False,
"run_success": False,
"error": f"Dockerfile parse error: unknown instruction: {token}",
"line": idx,
}
# known-bad base image tags
if "FROM python:3.9-slimm" in content:
return {
"build_success": False,
"run_success": False,
"error": "pull access denied for python:3.9-slimm",
}
# typo in requirements filename
if "requirments.txt" in content:
return {
"build_success": False,
"run_success": False,
"error": "COPY failed: file not found in build context: requirments.txt",
}
# COPY source must exist in build context
for raw in active_lines:
upper = raw.upper()
if upper.startswith("COPY "):
parts = raw.split()
if len(parts) < 3:
return {
"build_success": False,
"run_success": False,
"error": "COPY requires source and destination",
}
src = parts[1]
if src.startswith("--from=") and len(parts) >= 4:
src = parts[2]
if src.startswith("--"):
continue
if not self._source_exists(src, context_files):
return {
"build_success": False,
"run_success": False,
"error": f"COPY failed: file not found in build context: {src}",
}
# platform ARGs need to be declared
if "--platform=$BUILDPLATFORM" in content and "ARG BUILDPLATFORM" not in content:
return {
"build_success": False,
"run_success": False,
"error": "failed to parse platform: BUILDPLATFORM not declared",
}
if "--platform=$TARGETPLATFORM" in content and "ARG TARGETPLATFORM" not in content:
return {
"build_success": False,
"run_success": False,
"error": "failed to parse platform: TARGETPLATFORM not declared",
}
# multi-stage: output dir mismatch (dist vs build)
if "COPY --from=builder /app/dist" in content:
pkg = context_files.get("package.json")
if pkg and "react-scripts build" in pkg.content:
return {
"build_success": False,
"run_success": False,
"error": "COPY failed: stat app/dist: file does not exist",
}
# EXPOSE must have a numeric port
for raw in active_lines:
upper = raw.upper()
if upper.startswith("EXPOSE "):
parts = raw.split()
for part in parts[1:]:
cleaned = part.strip('"').strip("'")
port_proto = cleaned.split("/")[0]
if not port_proto.isdigit():
return {
"build_success": False,
"run_success": False,
"error": f"EXPOSE requires numeric port or port/protocol, got: {cleaned}",
}
# ============================
# runtime checks (build OK, run might fail)
# ============================
# no WORKDIR → module resolution fails
has_workdir = "WORKDIR" in content
if ("npm start" in content or 'CMD ["npm", "start"]' in content) and not has_workdir:
return {
"build_success": True,
"run_success": False,
"run_error": "Error: Cannot find module '/package.json'",
}
# ENTRYPOINT + CMD both specify python → conflict
if 'ENTRYPOINT ["python"' in content and 'CMD ["python"' in content:
return {
"build_success": True,
"run_success": False,
"run_error": "container exits immediately; ENTRYPOINT and CMD both specify full command",
}
# entrypoint script needs chmod +x
if 'ENTRYPOINT ["./start.sh"]' in content and "chmod +x" not in content:
return {
"build_success": True,
"run_success": False,
"run_error": "exec ./start.sh: permission denied",
}
# DATABASE_URL env var missing
has_database_url_env = "ENV DATABASE_URL" in content
needs_database_url = (
"app.py" in content
and "DATABASE_URL" not in content
and any("gunicorn" in fc.content for fc in context_files.values() if fc.content)
)
if needs_database_url and not has_database_url_env:
return {
"build_success": True,
"run_success": False,
"run_error": "KeyError: 'DATABASE_URL' — Application requires DATABASE_URL environment variable",
}
# non-root user can't bind privileged ports
has_user_switch = False
expose_port = None
for raw in active_lines:
upper = raw.upper()
if upper.startswith("USER ") and "root" not in raw.lower():
has_user_switch = True
if upper.startswith("EXPOSE "):
parts = raw.split()
if len(parts) >= 2:
port_str = parts[1].split("/")[0].strip('"').strip("'")
if port_str.isdigit():
expose_port = int(port_str)
if has_user_switch and expose_port is not None and expose_port < 1024:
return {
"build_success": True,
"run_success": False,
"run_error": f"PermissionError: [Errno 13] Permission denied — non-root user cannot bind to port {expose_port}",
}
return {"build_success": True, "run_success": True}