Spaces:

Rick-Xu315
/

Slurm_Gen

Sleeping

App Files Files Community

Slurm_Gen / app.py

Rick-Xu315

Create app.py

6e7c413 verified 7 months ago

raw

history blame contribute delete

12.9 kB

	import gradio as gr

	def generate_slurm_script(job_name, account, partition, nodes, ntasks_per_node,
	cpus_per_task, memory, walltime, program_file, program_args,
	gpu_count, output_file, error_file, combine_output,
	array_indices, dependency_type, dependency_job_ids,
	mail_type, mail_user, export_env, nodelist, signal_time):

	# Enhanced error checking for required fields
	errors = []
	warnings = []

	# Required field validation
	if not job_name or not job_name.strip():
	errors.append("Job Name is required")
	elif len(job_name.strip()) > 64:
	warnings.append("Job Name is longer than 64 characters (may be truncated)")

	if not program_file or not program_file.strip():
	errors.append("Program/Script to Run is required")
	elif not (program_file.strip().endswith(('.py', '.sh', '.R', '.m', '.cpp', '.c', '.f90', '.f', '.pl', '.rb', '.go', '.rs')) or
	program_file.strip().startswith(('./', '/'))):
	warnings.append("Program file doesn't have a common extension - ensure it's executable")

	if not walltime or walltime == "":
	errors.append("Wall Time is required")
	elif walltime and walltime != "":
	# Validate walltime format
	try:
	parts = walltime.split(':')
	if len(parts) != 3:
	errors.append("Wall Time must be in HH:MM:SS format")
	else:
	hours, minutes, seconds = map(int, parts)
	if hours < 0 or minutes < 0 or minutes >= 60 or seconds < 0 or seconds >= 60:
	errors.append("Invalid time values in Wall Time")
	except ValueError:
	errors.append("Wall Time must contain only numbers and colons (HH:MM:SS)")

	# Additional validations
	if nodes and int(nodes) < 1:
	errors.append("Number of nodes must be at least 1")

	if gpu_count and int(gpu_count) > 0 and not partition or partition == "Default":
	warnings.append("GPU requested but no GPU partition selected")

	if array_indices and array_indices.strip():
	# Validate array indices format
	try:
	# Basic validation for array format
	if not any(c.isdigit() for c in array_indices):
	errors.append("Array indices must contain numbers")
	except:
	errors.append("Invalid array indices format")

	if dependency_type != "None" and (not dependency_job_ids or not dependency_job_ids.strip()):
	errors.append("Dependency Job IDs required when dependency type is selected")

	if mail_type != "None" and (not mail_user or not mail_user.strip()):
	warnings.append("Email address recommended when email notifications are enabled")

	# Return errors or warnings
	if errors:
	error_msg = "❌ ERRORS FOUND:\n" + "\n".join(f"• {error}" for error in errors)
	if warnings:
	error_msg += "\n\n⚠️ WARNINGS:\n" + "\n".join(f"• {warning}" for warning in warnings)
	return error_msg

	if warnings:
	warning_msg = "⚠️ WARNINGS:\n" + "\n".join(f"• {warning}" for warning in warnings) + "\n\n"
	else:
	warning_msg = ""

	# Start building the script
	script = "#!/bin/bash\n\n"
	script += "# Slurm job script generated by GUI\n"
	if warnings:
	script += f"# {warning_msg.replace(chr(10), chr(10) + '# ')}\n"
	script += "\n"

	# Required directives
	script += f"#SBATCH --job-name={job_name}\n"
	script += f"#SBATCH --time={walltime}\n"

	# Optional account
	if account and account.strip():
	script += f"#SBATCH --account={account}\n"

	# Partition/Queue
	if partition and partition != "Default":
	script += f"#SBATCH --partition={partition}\n"

	# Resource allocation
	script += f"#SBATCH --nodes={int(nodes)}\n"
	if int(ntasks_per_node) > 0:
	script += f"#SBATCH --ntasks-per-node={int(ntasks_per_node)}\n"
	if int(cpus_per_task) > 0:
	script += f"#SBATCH --cpus-per-task={int(cpus_per_task)}\n"

	# Memory
	if memory and memory != "Default":
	script += f"#SBATCH --mem={memory}\n"

	# GPU resources
	if int(gpu_count) > 0:
	script += f"#SBATCH --gres=gpu:{int(gpu_count)}\n"

	# Output/Error files
	if combine_output:
	output_name = output_file if output_file.strip() else f"{job_name}_%j.out"
	script += f"#SBATCH --output={output_name}\n"
	else:
	output_name = output_file if output_file.strip() else f"{job_name}_%j.out"
	error_name = error_file if error_file.strip() else f"{job_name}_%j.err"
	script += f"#SBATCH --output={output_name}\n"
	script += f"#SBATCH --error={error_name}\n"

	# Job arrays
	if array_indices and array_indices.strip():
	script += f"#SBATCH --array={array_indices}\n"

	# Job dependencies
	if dependency_type != "None" and dependency_job_ids and dependency_job_ids.strip():
	script += f"#SBATCH --dependency={dependency_type}:{dependency_job_ids}\n"

	# Email notifications
	if mail_type != "None":
	script += f"#SBATCH --mail-type={mail_type}\n"
	if mail_user and mail_user.strip():
	script += f"#SBATCH --mail-user={mail_user}\n"

	# Environment export
	if export_env != "Default":
	script += f"#SBATCH --export={export_env}\n"

	# Specific node list
	if nodelist and nodelist.strip():
	script += f"#SBATCH --nodelist={nodelist}\n"

	# Signal before job termination
	if int(signal_time) > 0:
	script += f"#SBATCH --signal=B:USR1@{int(signal_time)}\n"

	script += "\n"

	# Add environment variable examples as comments
	script += "# Available Slurm environment variables:\n"
	script += "# $SLURM_JOB_NAME - Job name\n"
	script += "# $SLURM_JOB_ID - Job ID\n"
	script += "# $SLURM_SUBMIT_DIR - Submit directory\n"
	script += "# $SLURM_SUBMIT_HOST - Submit host\n"
	script += "# $SLURM_JOB_NODELIST - Node list\n"
	script += "# $SLURM_JOB_PARTITION - Partition name\n"
	script += "# $SLURM_JOB_NUM_NODES - Number of allocated nodes\n"
	script += "# $SLURM_NTASKS - Number of processes\n"
	script += "# $SLURM_TASKS_PER_NODE - Processes per node\n"
	script += "# $SLURM_ARRAY_TASK_ID - Array task ID (if array job)\n\n"

	# Change to submit directory
	script += "# Change to the directory from which the job was submitted\n"
	script += "cd $SLURM_SUBMIT_DIR\n\n"

	# Load modules section
	script += "# Load required modules here\n"
	script += "# module load python/3.9\n"
	script += "# module load gcc/9.3.0\n\n"

	# Main program execution
	script += "# Run the program\n"
	if program_file.startswith('./') or program_file.startswith('/'):
	script += f"{program_file}"
	else:
	script += f"./{program_file}"

	if program_args and program_args.strip():
	script += f" {program_args}"

	script += "\n"

	return script

	# Define all the input components
	def create_interface():
	with gr.Blocks(title="🚀 Comprehensive Slurm Script Generator") as interface:
	gr.Markdown("""
	# 🚀 Comprehensive Slurm Script Generator

	Generate complete Slurm job scripts with all available options.

	Instructions:
	1. Fill in required fields (marked with *)
	2. Configure your job parameters
	3. Click Submit to generate the script
	4. Copy the generated script and save as .sh file
	5. Submit with: sbatch your_script.sh

	Tips:
	• Job Name and Program/Script are required
	• Use array jobs (e.g., 1-10) for parameter sweeps
	• Set dependencies to chain jobs together
	• Check available partitions with 'sinfo' command
	""")

	with gr.Row():
	with gr.Column(scale=1):
	gr.Markdown("### 📝 Basic Job Information")
	job_name = gr.Textbox(label="Job Name *", placeholder="my_job_name", info="Required: Name for your job")
	account = gr.Textbox(label="Account (optional)", placeholder="your_account_name")
	partition = gr.Dropdown(
	choices=["Default", "general", "debug", "gpu", "gpuq", "highmem", "standard"],
	value="Default",
	label="Partition/Queue"
	)
	program_file = gr.Textbox(label="Program/Script to Run *", placeholder="my_program.py", info="Required: Script or program to execute")
	program_args = gr.Textbox(label="Program Arguments (optional)", placeholder="input.txt --verbose")

	with gr.Column(scale=1):
	gr.Markdown("### ⚙️ Resource Allocation")
	nodes = gr.Slider(minimum=1, maximum=20, value=1, step=1, label="Number of Nodes")
	ntasks_per_node = gr.Slider(minimum=0, maximum=128, value=1, step=1, label="Tasks per Node (0=auto)")
	cpus_per_task = gr.Slider(minimum=0, maximum=64, value=1, step=1, label="CPUs per Task (0=auto)")
	memory = gr.Dropdown(
	choices=["Default", "1G", "2G", "4G", "8G", "16G", "32G", "64G", "128G", "256G"],
	value="8G",
	label="Memory per Node"
	)
	walltime = gr.Dropdown(
	choices=["", "00:15:00", "00:30:00", "01:00:00", "02:00:00", "04:00:00", "08:00:00", "12:00:00", "24:00:00"],
	value="01:00:00",
	label="Wall Time (HH:MM:SS) *",
	info="Required: Maximum runtime for your job"
	)
	gpu_count = gr.Slider(minimum=0, maximum=8, value=0, step=1, label="Number of GPUs (0=no GPU)")

	with gr.Row():
	with gr.Column(scale=1):
	gr.Markdown("### 📁 Output Configuration")
	output_file = gr.Textbox(label="Output File (optional)", placeholder="output_%j.out")
	error_file = gr.Textbox(label="Error File (optional)", placeholder="error_%j.err")
	combine_output = gr.Checkbox(label="Combine stdout and stderr", value=False)

	with gr.Column(scale=1):
	gr.Markdown("### 🔗 Job Dependencies & Arrays")
	array_indices = gr.Textbox(label="Array Job Indices (optional)", placeholder="1-10 or 1,3,5-8")
	dependency_type = gr.Dropdown(
	choices=["None", "after", "afterok", "afternotok", "afterany"],
	value="None",
	label="Job Dependency Type"
	)
	dependency_job_ids = gr.Textbox(label="Dependency Job IDs (optional)", placeholder="12345,12346")

	with gr.Row():
	with gr.Column(scale=1):
	gr.Markdown("### 📧 Email Notifications")
	mail_type = gr.Dropdown(
	choices=["None", "BEGIN", "END", "FAIL", "ALL", "BEGIN,END", "END,FAIL", "BEGIN,END,FAIL"],
	value="FAIL",
	label="Email Notification Types"
	)
	mail_user = gr.Textbox(label="Email Address (optional)", placeholder="user@university.edu")

	with gr.Column(scale=1):
	gr.Markdown("### 🔧 Advanced Options")
	export_env = gr.Dropdown(
	choices=["Default", "ALL", "NONE"],
	value="Default",
	label="Export Environment"
	)
	nodelist = gr.Textbox(label="Specific Nodes (optional)", placeholder="node001,node002")
	signal_time = gr.Slider(minimum=0, maximum=300, value=0, step=10, label="Signal Before End (seconds, 0=disabled)")

	# Submit button
	submit_btn = gr.Button("🚀 Generate Slurm Script", variant="primary", size="lg")

	# Output
	output = gr.Textbox(label="Generated Slurm Script", lines=20, max_lines=30)

	# Connect the function
	submit_btn.click(
	fn=generate_slurm_script,
	inputs=[
	job_name, account, partition, nodes, ntasks_per_node, cpus_per_task,
	memory, walltime, program_file, program_args, gpu_count, output_file,
	error_file, combine_output, array_indices, dependency_type,
	dependency_job_ids, mail_type, mail_user, export_env, nodelist, signal_time
	],
	outputs=output
	)

	return interface

	# Launch the interface
	if __name__ == "__main__":
	interface = create_interface()
	interface.launch(share=True)