Contents


Deprecated: please see new documentation site.



LoadLeveler allows one to specify multiple, independent job steps per LL queue script. They are run concurrently as long as the resources are available. Likewise, LoadLeveler provides for the specification of dependencies among jobs steps such that job chains maybe set up depending on the return status of a previously run job.

Independent Jobs

Independent job steps are specified using the step_name directive. In this example, the environment directive applies to all stanzas.

#!/bin/sh
#
#
#@ environment = COPY_ALL
#@ step_name = adcirc_e1
#@ job_type = parallel
#@ output = /work/default/estrabd/adcirc-systest/$(step_name).$(jobid).out
#@ error = /work/default/estrabd/adcirc-systest/$(step_name).$(jobid).err
#@ notify_user = estrabd@cct.lsu.edu
#@ notification = error
#@ class = checkpt
#@ checkpoint = no
#@ restart = yes
#@ wall_clock_limit = 00:10:00
#@ node_usage = not_shared
#@ node = 2,2
#@ total_tasks = 16
#@ requirements = (Arch == "Power5")
#@ initialdir = /work/default/estrabd/adcirc-systest
#@ executable = /work/default/estrabd/adcirc-systest/padcirc.sh
#@ network.MPI =sn_single,not_shared,US,HIGH
#@ resources = ConsumableMemory(1 gb)
#@ queue
#
#
# independent job step
#@ step_name = adcirc_e2
#@ job_type = parallel
#@ output = /work/default/estrabd/adcirc-systest/$(step_name).$(jobid).out
#@ error = /work/default/estrabd/adcirc-systest/$(step_name).$(jobid).err
#@ notify_user = estrabd@cct.lsu.edu
#@ notification = error
#@ class = checkpt
#@ checkpoint = no
#@ restart = yes
#@ wall_clock_limit = 00:10:00
#@ node_usage = not_shared
#@ node = 2,2
#@ total_tasks = 16
#@ requirements = (Arch == "Power5")
#@ initialdir = /work/default/estrabd/adcirc-systest
#@ executable = /work/default/estrabd/adcirc-systest/padcirc.sh
#@ network.MPI =sn_single,not_shared,US,HIGH
#@ resources = ConsumableMemory(1 gb)
#@ queue
#
#
# independent job step
#@ step_name = adcirc_e3
#@ job_type = parallel
#@ output = /work/default/estrabd/adcirc-systest/$(step_name).$(jobid).out
#@ error = /work/default/estrabd/adcirc-systest/$(step_name).$(jobid).err
#@ notify_user = estrabd@cct.lsu.edu
#@ notification = error
#@ class = checkpt
#@ checkpoint = no
#@ restart = yes
#@ wall_clock_limit = 00:10:00
#@ node_usage = not_shared
#@ node = 2,2
#@ total_tasks = 16
#@ requirements = (Arch == "Power5")
#@ initialdir = /work/default/estrabd/adcirc-systest
#@ executable = /work/default/estrabd/adcirc-systest/padcirc.sh
#@ network.MPI =sn_single,not_shared,US,HIGH
#@ resources = ConsumableMemory(1 gb)
#@ queue

Dependent Jobs

The following is an example of multiple job steps in a single LoadLeveler queue script that depend on one another. Note the addition of the dependency directive.

#!/bin/sh
#
#
#@ job_name =  adcircSysTest
#
# PREP (serial)
#@ step_name = prep_e1
#@ environment = COPY_ALL
#@ job_type = serial
#@ output = /work/default/estrabd/adcirc-systest/$(step_name).$(jobid).out
#@ error = /work/default/estrabd/adcirc-systest/$(step_name).$(jobid).err
#@ wall_clock_limit = 00:10:00
#@ class = checkpt
#@ resources = ConsumableMemory(1 gb)
#@ initialdir = /work/default/estrabd/adcirc-systest
#@ executable = /work/default/estrabd/adcirc-systest/prep.sh
#@ queue
#
#
# RUN (parallel)
#@ dependency = (prep_e1 >= 0)
#@ step_name = adcirc_e1
#@ job_type = parallel
#@ output = /work/default/estrabd/adcirc-systest/$(step_name).$(jobid).out
#@ error = /work/default/estrabd/adcirc-systest/$(step_name).$(jobid).err
#@ notify_user = estrabd@cct.lsu.edu
#@ notification = error
#@ class = checkpt
#@ checkpoint = no
#@ restart = yes
#@ wall_clock_limit = 00:10:00
#@ node_usage = not_shared
#@ node = 2,2
#@ total_tasks = 16
#@ requirements = (Arch == "Power5")
#@ initialdir = /work/default/estrabd/adcirc-systest
#@ executable = /work/default/estrabd/adcirc-systest/padcirc.sh
#@ network.MPI =sn_single,not_shared,US,HIGH
#@ resources = ConsumableMemory(1 gb)
#@ queue
#
#
# POST (serial)
#@ dependency = (adcirc_e1 >= 0)
#@ step_name = post_e1
#@ environment = COPY_ALL
#@ job_type = serial
#@ output = /work/default/estrabd/adcirc-systest/$(step_name).$(jobid).out
#@ error = /work/default/estrabd/adcirc-systest/$(step_name).$(jobid).err
#@ wall_clock_limit = 00:05:00
#@ class = checkpt
#@ resources = ConsumableMemory(1 gb)
#@ initialdir = /work/default/estrabd/adcirc-systest
#@ executable = /work/default/estrabd/adcirc-systest/post.sh
#@ queue

Users may direct questions to sys-help@loni.org.

Powered by MediaWiki