Commit 16bfc81e authored by PARWORK Lambert's avatar PARWORK Lambert

add hybrid openmp + mpi example

parent 616599f5
1)
This version of the code, works only if #cores is a multiple of 24 (#indexes in the array).
Then if you modify batch_mpi.sh file with ppn=5, program will give wrong results
......@@ -6,4 +7,10 @@ Then if you modify batch_mpi.sh file with ppn=5, program will give wrong result
Try to modify mpi_array.c program to allow whatever total number of cores (<= 24 here, or change NMAX value)
2)
change your code to an hybrid openmp/mpi code
#-*-makefile-*-
CC = mpicc
CFLAGS = -O2 -fopenmp
##
##
bin : mpi_openmp_array
mpi_openmp_array : mpi_openmp_array.c
${CC} ${CFLAGS} $@.c -o $@
clean : clean_txt
@rm -f mpi_openmp_array
clean_txt:
@rm -f run-err.txt run-log.txt nodes_list.txt nodes.list
The purpose of the following exercice is to show you how to
compile and how to submit a program using an hybrid OpenMP+MPI API
*) compilation
make
*) execution (you must use "mpi" queue)
qsub -q mpi batch_mpi.sh
#!/bin/bash
### run it with "qsub -q mpi batch_mpi.sh" #########
#jobs's name
#PBS -N test_Hybrid_OpenMP_MPI
#we request 2 nodes and 5 cores per nodes for a total of 2 x 5 = 10 cores
#there will be 2 MPI process with 5 OpenMP tasks
#PBS -l nodes=3:ppn=5
#The maximum wall-clock time during which this job can run (sec)
#PBS -l walltime=60
# output log file name
#PBS -o "run-log.txt"
#output error file name
#PBS -e "run-err.txt"
# change to submission jobs directory
cd $PBS_O_WORKDIR
# save list of nodes used during the run
cat ${PBS_NODEFILE} > nodes_list.txt
# --> create list of nodes (MANDATORY)
uniq ${PBS_NODEFILE} > nodes.list
# MANDATORY
export OMP_NUM_THREADS=${PBS_NUM_PPN}
export IPATH_NO_CPUAFFINITY=1
# command to launch the run
mpiexec -f nodes.list -np ${PBS_NUM_NODES} ./mpi_openmp_array
#include <mpi.h> // PROVIDES THE BASIC MPI DEFINITION AND TYPES
#include <stdio.h>
#include <string.h>
#define NMAX 24
#define MIN(a,b) ((a) < (b) ? (a) : (b))
int main(int argc, char **argv) {
int i, my_rank, partner, size,a[NMAX],chunk,istart,istop;
int this_thread, // my thread index
num_threads; // #threads used during the run
MPI_Status stat;
MPI_Init(&argc, &argv); // START MPI
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); // DETERMINE RANK OF THIS PROCESSOR
MPI_Comm_size(MPI_COMM_WORLD, &size); // DETERMINE TOTAL NUMBER OF PROCESSORS
#pragma omp parallel
{
this_thread = omp_get_thread_num();
num_threads = omp_get_num_threads();
if (this_thread == 0) { // Only thread #0 print this
printf("Proc [%d] => #Threads used = %d\n",my_rank, num_threads);
}
}
chunk=NMAX/size; // #chunks per processors
istart=(chunk*my_rank); // first index of my rank
if (my_rank<size-1) {
istop=MIN(istart+chunk,NMAX); // last index of my rank
} else {
istop=NMAX;
}
#pragma omp parallel for
for (i=istart; i<istop; i++) // EVERY PROCESSOR COMPUTE ONE CHUNK OF THE ARRAY
a[i] = 2 * i;
if (my_rank == 0) { // master GATHER ALL RESULTS
printf("Total numbers of Nodes : [%d]\n",size);
for (partner = 1; partner < size; partner++){
istart=(chunk*partner);
if (partner<size-1) {
istop=MIN(istart+chunk,NMAX);
} else {
istop=NMAX;
}
MPI_Recv(a + istart ,istop-istart, MPI_INT, partner, 1, MPI_COMM_WORLD, &stat);
}
for (i=0; i<NMAX; i++)
fprintf(stderr,"a[%5d] = %8d\n",i,a[i]);
}
else { // ALL processors except the master
MPI_Send(a+istart,istop-istart , MPI_INT, 0,1,MPI_COMM_WORLD);
}
MPI_Finalize(); // EXIT MPI
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment