diff --git a/1_single_core_job/README.md b/1_single_core_job/README.md deleted file mode 100644 index acda6518f9b946b3e8590f4880c3bd48e91af5d2..0000000000000000000000000000000000000000 --- a/1_single_core_job/README.md +++ /dev/null @@ -1,34 +0,0 @@ -# Single core jobs -A single core job is a job with only a single thread. This type of job is used when it is hard or impossible to make use of multiple cores/threads. A simple example could be a data parser that reads a file and transforms it into a more suitable format. - -## How to run -To run the example do the following steps: -1. Log in to Lundgren -2. Change directory to the example code -3. Run `sbatch single_core_job.sh` -4. Check queue status by running `squeue` -5. When the job is completed check the file _single_core_job.log_ - -## Detailed description of the example -The batch script is the main file for the job allocation and preparation. Inside the python script a few environmental variables are fetched and printed out. - -### The batch script -The batch script, _single_core_job.sh_, contains three sections. The first section contains input arguments to the Slurm scheduler. The second section loads Python into environment so it is accessible and lastly the a job step is performed. - -The input arguments are defined with a comment beginning with SBATCH followed by the argument key and value. For easier readablility the -- method is used. - -- __job-name:__ The name of the job is set to _demo_single_core_ -- __time:__ The requeted time is set to 1 minute, _00:01:00_ -- __ntasks:__ The number of tasks to be performed in this job is set to _1_. -- __cpus-per-task:__ The requested number of cores per task is set to _1_ -- __mem:__ The requested memory is set to _50 MB_ -- __output:__ The standard output should be sent to the file _single_core_job.log_ - -Python needs to be loaded into the environment in order to be accessible this is done in the next step with the __module__ command. - -The job step with the single task is allocated and performed with the __srun__ command. - -#### The python script -The python script represents the taskt to be done. In this case the task is to print out some environment variables that are set by Slurm. - -The environment variable __JOB_ID__ can be used to create temporary files and folders. In this example it creates a file named _<JOB_ID>_.txt and writes the job name into it. \ No newline at end of file diff --git a/1_single_core_job/single_core_job.sh b/1_single_core_job/single_core_job.sh deleted file mode 100644 index e0bd2984d4ab484180eef6b7040eff35da5571c1..0000000000000000000000000000000000000000 --- a/1_single_core_job/single_core_job.sh +++ /dev/null @@ -1,12 +0,0 @@ -#! /bin/bash -#SBATCH --job-name=demo_single_core -#SBATCH --time=00:01:00 -#SBATCH --ntasks=1 -#SBATCH --cpus-per-task=1 -#SBATCH --mem=50MB -#SBATCH --output=single_core_job.log - -# Loading Python into the environment -module load python/anaconda3-2024.02-3.11.7 - -srun python single_core_task.py \ No newline at end of file diff --git a/1_single_core_job/single_core_task.py b/1_single_core_job/single_core_task.py deleted file mode 100644 index 9e8e26d1f9b1a1ce66ac181168e6b900bd7d2f75..0000000000000000000000000000000000000000 --- a/1_single_core_job/single_core_task.py +++ /dev/null @@ -1,37 +0,0 @@ -from datetime import datetime - -import logging -import os -import time - -logger = logging.getLogger(__name__) - -def main(): - # Read environment variables. - JOB_NAME = os.environ.get('SLURM_JOB_NAME','Unknown') - JOB_ID = os.environ.get('SLURM_JOB_ID','Unknown') - NUMBER_OF_CORES = os.environ.get('SLURM_CPUS_PER_TASK','Unknown') - MAXIMUM_MEMORY = os.environ.get('SLURM_MEM_PER_NODE','Unknown') - - # Sleeping until next minute. - # This represents the calculations - current_time = datetime.now() - sleep_time = 60 - current_time.second - logger.info('%s - Sleeping for %d seconds.',current_time.strftime('%Y-%m-%d %H:%M:%S'), sleep_time) - time.sleep(sleep_time) - - # Printing some things to standard output. - logger.info('\nJob ID:\t\t\t%s\nJob name:\t\t%s\nAllocated cores:\t%s\nAllocated memory:\t%s', - JOB_ID, JOB_NAME, NUMBER_OF_CORES,MAXIMUM_MEMORY) - - # Writing some output to a file based on the Slurm job id. - output_file = '{}.txt'.format(JOB_ID) - with open(output_file,'w') as file: - file.write('This file was created by the job {} with id {}\n'.format - (JOB_NAME, JOB_ID)) - - logger.info('Job completed.') - -if __name__ == '__main__': - logging.basicConfig(level=logging.INFO) - main() diff --git a/1_single_job_step/README.md b/1_single_job_step/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8261b611f7b8198eff6b0e01c28589914633506d --- /dev/null +++ b/1_single_job_step/README.md @@ -0,0 +1,49 @@ +# Single job step +In SLURM, Job Steps are a way to launch distinct parallel (most commonly) and/or sequential tasks from within a single job script. Job Steps are executed using the SLURM command "srun" + +This is an example where Slurm is instructed to run a single job step. A single job step is fast to write and simple to use. + +This folder contains one batch script that runs a sequential program and one batch sript that runs a parallel program. The batch scripts it self, that defines the job steps, are similar with only slight modifications in the settings. + +## How to run +To run the example do the following steps: +1. Log in to Lundgren +2. Change directory to the example code +3. Run `sbatch single_job_step_sequential.sh` or `sbatch single_job_step_parallel.sh` +4. Check queue status by running `squeue` +5. When the job is completed check the file _sequential_single_job_step.log_ or _parallel_single_job_step.log_ for the program log. + +_If you run the parallel example, try change the batch file to run on 1 and or 4 cores._ + +## Detailed description of the example +The batch script is the main file for the job allocation and preparation. Inside the python script a few environmental variables are fetched and printed out. + +### The batch script +The batch script, _single_job_step_sequential.sh_ / _single_job_step_parallel.sh_ contains three sections. The first section contains input arguments to the Slurm scheduler. The second section loads Python into environment so it is accessible and lastly the a job step is performed. + +The input arguments are defined with a comment beginning with SBATCH followed by the argument key and value. For easier readablility the -- method is used. + +- __job-name:__ The name of the job +- __time:__ The requeted time +- __ntasks:__ The number of tasks to be performed in this job +- __cpus-per-task:__ The requested number of cpus per task +- __mem-per-cpu:__ The requested memory adjusted per the number of cpu's +- __output:__ File name for standard output + +Python needs to be loaded into the environment in order to be accessible this is done in the next step with the __module__ command. + +The single job step is allocated and performed with the __srun__ command. + +#### The python script +The python script represents the taskt to be done. In this case the task is read an input file and wait to simulate a calculation and afterwards print to an output file. + +- The environment variable __JOB_ID__ can be used to create temporary files and folders. +- The environment variable __SLURM_CPUS_PER_TASK__ is used to restrict the worker pool to the allocated number of cpus when running in parallel. + +### How to set the number of cores in different programing languages and softwares +Most programming languages and softwares tries to make use of all cores that are available. This can lead to an oversubscription on the resources. On a shared resource one must match the maximum used resources with the allocated ones. This section gives a reference in how to do it in commonly used softwares. + +- __CPLEX:__ Use the parameter _global thread count_. Read more in the [documentation](https://www.ibm.com/docs/en/icos/22.1.2?topic=parameters-global-thread-count) +- __Gurobi:__ Use the configuration parameter _ThreadLimit_. Read more in the [documentation](https://docs.gurobi.com/projects/optimizer/en/current/reference/parameters.html#threadlimit) +- __MATLAB:__ Create a instance of the parpool object with the _poolsize_ set to the number of cores and use the pool when running in parallell. Read more in the [documentation](https://se.mathworks.com/help/parallel-computing/parpool.html) +- __Python:__ If the multiprocessing package is used, create an instance of the pool class with the _processes_ set to the number of cores and use the pool when running in parallell. Read more in the [documentation](https://docs.python.org/3/library/multiprocessing.html#multiprocessing.pool.Pool) \ No newline at end of file diff --git a/1_single_job_step/parallel_task.py b/1_single_job_step/parallel_task.py new file mode 100644 index 0000000000000000000000000000000000000000..e579a6fe4f724c14b8ac6aef0464b1b9a47f1875 --- /dev/null +++ b/1_single_job_step/parallel_task.py @@ -0,0 +1,69 @@ +from datetime import datetime +from multiprocessing import Pool + +import json +import logging +import os +import sys +import time + +logger = logging.getLogger(__name__) + +def sleep(input) -> int: + time.sleep(input[1]) + logger.info('Task %d done.',input[0]) + + return input[1] + +def main(input_file: str, output_file: str): + # Read environment variables. + JOB_NAME = os.environ.get('SLURM_JOB_NAME','Unknown') + JOB_ID = os.environ.get('SLURM_JOB_ID','Unknown') + NUMBER_OF_CPUS = os.environ.get('SLURM_CPUS_PER_TASK','Unknown') + if NUMBER_OF_CPUS in 'Unknown': + logger.error('Unkown number of CPU''s, exiting.') + return + + NUMBER_OF_CPUS = int(NUMBER_OF_CPUS) + logger.info('**** Output for job %s (%s) ****', JOB_NAME, JOB_ID) + logger.info('Running program with %d CPU''s.',NUMBER_OF_CPUS) + + # Reading configuration file and create a list of tasks + # This represents the reading of parameters and calculations + logger.info('Reading configuration from %s.',input_file) + with open(input_file, 'r') as file: + data = json.load(file) + + tasks = [] + total_time = 0 + for i in range(len(data['sleep'])): + time = data['sleep'][i] + tasks.append((i, time)) + total_time = total_time + time + + # Creating a multiprocessing pool to perform the tasks + pool = Pool(processes=NUMBER_OF_CPUS) + + # Running submitting the tasks to the worker pool + tic = datetime.now() + logger.info('Submitting tasks to pool.') + results = pool.map(sleep, tasks) + toc = datetime.now() + + logger.info('All tasks are done, took %d seconds, compared to %d seconds with single thread.', + (toc-tic).seconds, total_time) + + logger.info('Writing result to %s', output_file) + with open(output_file, 'w') as file: + file.write('time\n') + for result in results: + file.write('{}\n'.format(result)) + + logger.info('Done.') + +if __name__ == '__main__': + logging.basicConfig(level=logging.INFO) + input_file = sys.argv[1] + output_file = sys.argv[2] + main(input_file, output_file) + sys.exit(0) diff --git a/1_single_job_step/sequential_task.py b/1_single_job_step/sequential_task.py new file mode 100644 index 0000000000000000000000000000000000000000..0862c077978d01286f96b097bd965fc6bd3b9dcb --- /dev/null +++ b/1_single_job_step/sequential_task.py @@ -0,0 +1,63 @@ +from datetime import datetime +from multiprocessing import Pool + +import json +import logging +import os +import sys +import time + +logger = logging.getLogger(__name__) + +def sleep(input) -> int: + time.sleep(input[1]) + logger.info('Task %d done.',input[0]) + + return input[1] + +def main(input_file: str, output_file: str): + # Read environment variables. + JOB_NAME = os.environ.get('SLURM_JOB_NAME','Unknown') + JOB_ID = os.environ.get('SLURM_JOB_ID','Unknown') + + logger.info('**** Output for job %s (%s) ****', JOB_NAME, JOB_ID) + logger.info('Running program sequential.') + + # Reading configuration file and create a list of tasks + # This represents the reading of parameters and calculations + logger.info('Reading configuration from %s.',input_file) + with open(input_file, 'r') as file: + data = json.load(file) + + tasks = [] + results = [] + total_time = 0 + for i in range(len(data['sleep'])): + time = data['sleep'][i] + tasks.append((i, time)) + total_time = total_time + time + + # Running submitting the tasks in sequence. + tic = datetime.now() + logger.info('Running tasks sequentially.') + for task in tasks: + results.append(sleep(task)) + toc = datetime.now() + + logger.info('All tasks are done, took %d seconds, compared to %d seconds with single thread.', + (toc-tic).seconds, total_time) + + logger.info('Writing result to %s', output_file) + with open(output_file, 'w') as file: + file.write('time\n') + for result in results: + file.write('{}\n'.format(result)) + + logger.info('Done.') + +if __name__ == '__main__': + logging.basicConfig(level=logging.INFO) + input_file = sys.argv[1] + output_file = sys.argv[2] + main(input_file, output_file) + sys.exit(0) diff --git a/1_single_job_step/single_job_step_parallel.sh b/1_single_job_step/single_job_step_parallel.sh new file mode 100644 index 0000000000000000000000000000000000000000..70be4b091ac463c4f7c600504b9551f7669ad16a --- /dev/null +++ b/1_single_job_step/single_job_step_parallel.sh @@ -0,0 +1,13 @@ +#! /bin/bash +#SBATCH --job-name=parallel_single_job_step +#SBATCH --time=00:02:00 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=2 +#SBATCH --mem-per-cpu=50MB +#SBATCH --output=parallel_single_job_step.log + +# Loading Python into the environment +module load python/anaconda3-2024.02-3.11.7 + +# Start job stage +srun python parallel_task.py ../data/data_1.txt output_paralell.csv \ No newline at end of file diff --git a/1_single_job_step/single_job_step_sequential.sh b/1_single_job_step/single_job_step_sequential.sh new file mode 100644 index 0000000000000000000000000000000000000000..133033dc9c2d86201ef074f766e4212d81fbe53e --- /dev/null +++ b/1_single_job_step/single_job_step_sequential.sh @@ -0,0 +1,13 @@ +#! /bin/bash +#SBATCH --job-name=sequential_single_job_step +#SBATCH --time=00:02:00 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=1 +#SBATCH --mem-per-cpu=50MB +#SBATCH --output=sequential_single_job_step.log + +# Loading Python into the environment +module load python/anaconda3-2024.02-3.11.7 + +# Start job stage +srun python sequential_task.py ../data/data_1.txt output_output_sequential.csv \ No newline at end of file