pranit / validate.py
RushiMane2003's picture
Upload 41 files
99f938a verified
#!/usr/bin/env python
"""
Pre-submission validation script for HF Spaces deployment.
Checks:
✓ Environment variables are set
✓ Required files exist
✓ inference.py format compliance
✓ Docker build (optional)
✓ Inference script execution
✓ stdout format compliance
Run:
python validate.py
python validate.py --skip-inference # Skip inference execution
python validate.py --docker # Also validate Docker build
"""
import os
import sys
import subprocess
import json
import re
from pathlib import Path
from typing import List, Tuple
class Colors:
GREEN = "\033[92m"
RED = "\033[91m"
YELLOW = "\033[93m"
BLUE = "\033[94m"
RESET = "\033[0m"
def print_check(name: str, passed: bool, msg: str = ""):
status = f"{Colors.GREEN}{Colors.RESET}" if passed else f"{Colors.RED}{Colors.RESET}"
print(f" {status} {name}")
if msg:
print(f" {msg}")
def check_environment_variables() -> bool:
"""Check mandatory environment variables"""
print(f"\n{Colors.BLUE}1. Environment Variables{Colors.RESET}")
required_vars = {
"API_BASE_URL": "LLM API endpoint",
"MODEL_NAME": "Model identifier",
"HF_TOKEN": "Hugging Face token",
}
all_ok = True
for var, desc in required_vars.items():
value = os.getenv(var)
passed = bool(value)
print_check(f"{var}", passed, desc if not passed else f"Set: {value[:50]}...")
all_ok = all_ok and passed
return all_ok
def check_required_files() -> bool:
"""Check required project files"""
print(f"\n{Colors.BLUE}2. Required Files{Colors.RESET}")
required_files = [
("inference.py", "Inference script"),
("requirements.txt", "Python dependencies"),
("openenv.yaml", "OpenEnv specification"),
("Dockerfile", "Container definition"),
(".env.example", "Environment template"),
]
all_ok = True
for filename, desc in required_files:
passed = Path(filename).exists()
print_check(filename, passed, desc)
all_ok = all_ok and passed
return all_ok
def check_inference_compliance() -> bool:
"""Check inference.py OpenEnv compliance"""
print(f"\n{Colors.BLUE}3. Inference Script Compliance{Colors.RESET}")
checks = []
# Read inference.py
try:
with open("inference.py") as f:
content = f.read()
# Check 1: Imports
has_openai = "from openai import OpenAI" in content or "import openai" in content
checks.append(("OpenAI imports", has_openai))
# Check 2: env variables
has_api_base = "API_BASE_URL" in content
checks.append(("API_BASE_URL config", has_api_base))
has_model = "MODEL_NAME" in content
checks.append(("MODEL_NAME config", has_model))
has_hf_token = "HF_TOKEN" in content
checks.append(("HF_TOKEN config", has_hf_token))
# Check 3: Logging format
has_start_log = '[START]' in content and 'task=' in content
checks.append(("[START] logging", has_start_log))
has_step_log = '[STEP]' in content and 'step=' in content
checks.append(("[STEP] logging", has_step_log))
has_end_log = '[END]' in content and 'success=' in content
checks.append(("[END] logging", has_end_log))
# Check 4: Main function
has_main = "if __name__" in content and "main()" in content
checks.append(("Main entry point", has_main))
# Check 5: Async support
has_async = "asyncio" in content or "async def" in content
checks.append(("Async support", has_async))
except Exception as e:
print_check("Read inference.py", False, str(e))
return False
all_ok = True
for check_name, passed in checks:
print_check(check_name, passed)
all_ok = all_ok and passed
return all_ok
def check_requirements() -> bool:
"""Check requirements.txt"""
print(f"\n{Colors.BLUE}4. Python Dependencies{Colors.RESET}")
required_packages = [
"openai",
"numpy",
"opencv-python",
]
try:
with open("requirements.txt") as f:
content = f.read().lower()
all_ok = True
for pkg in required_packages:
found = pkg.lower() in content
print_check(pkg, found)
all_ok = all_ok and found
return all_ok
except Exception as e:
print_check("Read requirements.txt", False, str(e))
return False
def check_openenv_yaml() -> bool:
"""Check openenv.yaml structure"""
print(f"\n{Colors.BLUE}5. OpenEnv YAML Specification{Colors.RESET}")
checks = []
try:
with open("openenv.yaml") as f:
content = f.read()
# Check basic structure
has_name = "name:" in content
checks.append(("Name field", has_name))
has_endpoints = "endpoints:" in content
checks.append(("Endpoints section", has_endpoints))
has_reset = "reset:" in content
checks.append(("Reset endpoint", has_reset))
has_step = "step:" in content
checks.append(("Step endpoint", has_step))
has_tasks = "tasks:" in content
checks.append(("Tasks section", has_tasks))
has_env_vars = "environment_variables:" in content
checks.append(("Environment variables", has_env_vars))
has_validation = "validation:" in content
checks.append(("Validation rules", has_validation))
except Exception as e:
print_check("Read openenv.yaml", False, str(e))
return False
all_ok = True
for check_name, passed in checks:
print_check(check_name, passed)
all_ok = all_ok and passed
return all_ok
def validate_stdout_format(output: str) -> Tuple[bool, List[str]]:
"""Validate stdout format compliance"""
errors = []
lines = output.strip().split("\n")
# Must have START, STEP(s), and END
has_start = any(line.startswith("[START]") for line in lines)
has_step = any(line.startswith("[STEP]") for line in lines)
has_end = any(line.startswith("[END]") for line in lines)
if not has_start:
errors.append("Missing [START] line")
if not has_step:
errors.append("Missing [STEP] lines")
if not has_end:
errors.append("Missing [END] line")
# Validate line formats
for line in lines:
if line.startswith("[START]"):
if not re.search(r"task=\w+", line):
errors.append(f"[START] missing task field: {line}")
if not re.search(r"env=\w+", line):
errors.append(f"[START] missing env field: {line}")
if not re.search(r"model=", line):
errors.append(f"[START] missing model field: {line}")
elif line.startswith("[STEP]"):
if not re.search(r"step=\d+", line):
errors.append(f"[STEP] missing step field: {line}")
if not re.search(r"reward=[\d.]+", line):
errors.append(f"[STEP] missing reward field: {line}")
if not re.search(r"done=(true|false)", line):
errors.append(f"[STEP] missing done field: {line}")
elif line.startswith("[END]"):
if not re.search(r"success=(true|false)", line):
errors.append(f"[END] missing success field: {line}")
if not re.search(r"steps=\d+", line):
errors.append(f"[END] missing steps field: {line}")
if not re.search(r"score=[\d.]+", line):
errors.append(f"[END] missing score field: {line}")
return len(errors) == 0, errors
def check_docker_build() -> bool:
"""Check if Dockerfile builds"""
print(f"\n{Colors.BLUE}6. Docker Build{Colors.RESET}")
if not Path("Dockerfile").exists():
print_check("Dockerfile exists", False)
return False
try:
print(" Building Docker image (this may take a few minutes)...")
result = subprocess.run(
["docker", "build", "-t", "autonomous-traffic-control:test", "."],
capture_output=True,
text=True,
timeout=600,
)
passed = result.returncode == 0
print_check("Docker build", passed)
if not passed:
print(f" Error: {result.stderr[:200]}")
return passed
except FileNotFoundError:
print(f" {Colors.YELLOW}⚠ Docker not found - skipping{Colors.RESET}")
return True
except Exception as e:
print_check("Docker build", False, str(e))
return False
def check_inference_execution() -> bool:
"""Run inference and check output format"""
print(f"\n{Colors.BLUE}7. Inference Execution{Colors.RESET}")
if not Path("inference.py").exists():
print_check("inference.py", False)
return False
try:
print(" Running inference script (timeout: 30s)...")
env = os.environ.copy()
if not env.get("API_BASE_URL"):
print(f" {Colors.YELLOW}⚠ API_BASE_URL not set - using default{Colors.RESET}")
if not env.get("HF_TOKEN"):
print(f" {Colors.YELLOW}⚠ HF_TOKEN not set - validation will fail{Colors.RESET}")
result = subprocess.run(
["python", "inference.py"],
capture_output=True,
text=True,
timeout=30,
env=env,
)
output = result.stdout + result.stderr
# Check execution
execution_ok = result.returncode == 0
print_check("Script execution", execution_ok)
# Check stdout format
format_ok, errors = validate_stdout_format(output)
print_check("Stdout format", format_ok)
if errors:
for error in errors[:3]:
print(f" {Colors.RED}Error: {error}{Colors.RESET}")
# Show sample output
if "[START]" in output:
start_line = [l for l in output.split("\n") if "[START]" in l][0]
print(f" Sample: {start_line[:80]}...")
return execution_ok and format_ok
except subprocess.TimeoutExpired:
print_check("Script execution", False, "Timeout (>30s)")
return False
except Exception as e:
print_check("Script execution", False, str(e))
return False
def main():
"""Run all validation checks"""
import argparse
parser = argparse.ArgumentParser(
description="Validate autonomous-traffic-control for HF Spaces deployment"
)
parser.add_argument(
"--skip-inference",
action="store_true",
help="Skip inference execution check",
)
parser.add_argument(
"--docker",
action="store_true",
help="Also validate Docker build",
)
args = parser.parse_args()
print(f"\n{Colors.BLUE}{'='*60}")
print("Autonomous Traffic Control - Pre-submission Validation")
print(f"{'='*60}{Colors.RESET}\n")
results = {
"Environment Variables": check_environment_variables(),
"Required Files": check_required_files(),
"Inference Compliance": check_inference_compliance(),
"Python Dependencies": check_requirements(),
"OpenEnv YAML": check_openenv_yaml(),
}
if args.docker:
results["Docker Build"] = check_docker_build()
if not args.skip_inference:
results["Inference Execution"] = check_inference_execution()
# Summary
print(f"\n{Colors.BLUE}{'='*60}")
print("Validation Summary")
print(f"{'='*60}{Colors.RESET}\n")
for check_name, passed in results.items():
status = f"{Colors.GREEN}PASS{Colors.RESET}" if passed else f"{Colors.RED}FAIL{Colors.RESET}"
print(f" {check_name:<30} {status}")
all_passed = all(results.values())
print(f"\n{Colors.BLUE}{'='*60}{Colors.RESET}\n")
if all_passed:
print(f"{Colors.GREEN}✓ All checks passed!{Colors.RESET}")
print(" Your project is ready for HF Spaces submission.")
return 0
else:
print(f"{Colors.RED}✗ Some checks failed.{Colors.RESET}")
print(" Please fix the issues above before submitting.")
return 1
if __name__ == "__main__":
sys.exit(main())