|
|
MPI_Bcast(void* buffer, int count, MPI_Datatype datatype, int rootID, MPI_Comm comm ) |
|
Psuedo code that describes what happens in a MPI_Bcast():
if ( myID == rootID )
{
for ( every ID i in the communication set "comm" )
{
MPI_Send( buffer, count, datatype, i, TAG, comm);
}
}
else
{
MPI_Recv( buffer, count, datatype, rootID, TAG, comm);
}
|
Example 1: source is node 0
|
Example 2: source is node 1
|
int main(int argc, char **argv)
{
char buff[128];
int secret_num;
int numprocs;
int myid;
int i;
MPI_Status stat;
MPI_Init(&argc,&argv);
MPI_Comm_size(MPI_COMM_WORLD,&numprocs);
MPI_Comm_rank(MPI_COMM_WORLD,&myid);
// ------------------------------------------
// Node 0 obtains the secret number
// ------------------------------------------
if ( myid == 0 )
{
secret_num = atoi(argv[1]);
}
// ------------------------------------------
// Node 0 shares the secret with everybody
// ------------------------------------------
MPI_Bcast (&secret_num, 1, MPI_INT, 0, MPI_COMM_WORLD);
if ( myid == 0 )
{
for( i = 1; i < numprocs; i++)
{
MPI_Recv(buff, 128, MPI_CHAR, i, 0, MPI_COMM_WORLD, &stat);
cout << buff << endl;
}
}
else
{
sprintf(buff, "Processor %d knows the secret code: %d",
myid, secret_num);
MPI_Send(buff, 128, MPI_CHAR, 0, 0, MPI_COMM_WORLD);
}
MPI_Finalize();
}
|
Demo instruction:
MPI_Scatter(void* sendbuf, // Distribute sendbuf evenly to recvbuf int sendcount, // # items sent to EACH processor MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, int rootID, // Sending processor ! MPI_Comm comm) |
|
|
However: these rules are not strictly enforced.
(Don't blame MPI for causing "funny errors" if you decide to violate these rules :-))
|
int main(int argc, char **argv)
{
int buff[100];
int recvbuff[2];
int numprocs;
int myid;
int i, k;
int mysum;
MPI_Status stat;
MPI_Init(&argc,&argv);
MPI_Comm_size(MPI_COMM_WORLD,&numprocs);
MPI_Comm_rank(MPI_COMM_WORLD,&myid);
if ( myid == 0 )
{
cout << "WE have " << numprocs << " processors" << endl;
// -----------------------------------------------
// Node 0 prepare 2 number for each processor
// [1][2] [3][4] [5][6] .... etc
// -----------------------------------------------
k = 1;
for ( i = 0; i < 2*numprocs; i += 2 )
{
buff[i] = k++;
buff[i+1] = k++;
}
}
// ------------------------------------------
// Node 0 scatter the array to the processors:
// ------------------------------------------
MPI_Scatter (buff, 2, MPI_INT, recvbuff, 2, MPI_INT, 0, MPI_COMM_WORLD);
^^^ !!! ^^^ !!!
if ( myid == 0 )
{
// Processor 0
mysum = recvbuff[0] + recvbuff[1];
cout << "Processor " << myid << ": sum = " << mysum << endl;
for( i = 1; i < numprocs; i++)
{
MPI_Recv(&mysum, 1, MPI_INT, i, 0, MPI_COMM_WORLD, &stat);
cout << "Processor " << i << ": sum = " << mysum << endl;
}
}
else
{
// Other processors
mysum = recvbuff[0] + recvbuff[1];
MPI_Send(&mysum, 1, MPI_INT, 0, 0, MPI_COMM_WORLD);
}
MPI_Finalize();
}
|
Demo instruction:
It does the reverse of MPI_Gather()....
Illustrated:
|
MPI_Gather(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, int rootID, MPI_Comm comm) |
|
NOTE:
the number of items received in
recvbuf
of the
rootID process
will be equal to:
|
|
Again: these rules are not strictly enforced.
(And again, don't blame MPI for causing "funny errors" if you decide to violate these rules....)
Example 1: the "getherer" is node 0
|
Example 2: the "getherer" is node 1
|
int main(int argc, char **argv)
{
int buff[100];
int recvbuff[2];
int numprocs;
int myid;
int i, k;
int mysum;
MPI_Status stat;
MPI_Init(&argc,&argv);
MPI_Comm_size(MPI_COMM_WORLD,&numprocs);
MPI_Comm_rank(MPI_COMM_WORLD,&myid);
if ( myid == 0 )
{
cout << "WE have " << numprocs << " processors" << endl;
// -----------------------------------------------
// Node 0 prepare 2 number for each processor
// [1][2] [3][4] [5][6] .... etc
// -----------------------------------------------
k = 1;
for ( i = 0; i < 2*numprocs; i += 2 )
{
buff[i] = k++;
buff[i+1] = k++;
}
}
// ------------------------------------------
// Node 0 scatter the array to the processors:
// ------------------------------------------
MPI_Scatter (buff, 2, MPI_INT, recvbuff, 2, MPI_INT, 0, MPI_COMM_WORLD);
mysum = recvbuff[0] + recvbuff[1]; // Everyone calculate sum
// ------------------------------------------
// Node 0 collects the results in "buff":
// ------------------------------------------
MPI_Gather (&mysum, 1, MPI_INT, &buff, 1, MPI_INT, 0, MPI_COMM_WORLD);
// ------------------------------------------
// Node 0 prints result
// ------------------------------------------
if ( myid == 0 )
{
for( i = 0; i < numprocs; i++)
{
cout << "Processor " << i << ": sum = " << buff[i] << endl;
}
}
MPI_Finalize();
}
|
Demo instruction:
|
MPI_Reduce(void* sendbuf, void* recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Op op, int rootID, MPI_Comm comm) |
| NOTE: each item received will be immediately incorporated into the variable recvbuf !!! |
|
Again: these rules are not strictly enforced.
(And again, don't blame MPI for causing "funny errors" if you decide to violate these rules....)
|
The effect is the same as:
|
| MPI Reduction Operation | Effect of the Reduction Operation |
|---|---|
| MPI_MAX | Finds the maximum value |
| MPI_MIN | Finds the minimum value |
| MPI_SUM | Computes the sum of all value |
| MPI_PROD | Computes the product of all value |
| MPI_LAND | Computes the "logical AND" of all value (0 = false, non-zero = true) |
| MPI_BAND | Computes the "Bitwise AND" of all value |
| MPI_LOR | Computes the "logical OR" of all value (0 = false, non-zero = true) |
| MPI_BOR | Computes the "Bitwise OR" of all value |
| MPI_LXOR | Computes the "logical XOR" of all value (0 = false, non-zero = true) |
| MPI_BXOR | Computes the "Bitwise XOR" of all value |
| MPI_MAXLOC | Find the maximum value and the processor ID that has the value
(you need to pass a structure with these 2 elements: (double value, int rank)) |
| MPI_MINLOC | Find the minimum value and the processor ID that has the value
(you need to pass a structure with these 2 elements: (double value, int rank)) |
double f(double a)
{
return( 2.0 / sqrt(1 - a*a) );
}
/* =======================
MAIN
======================= */
int main(int argc, char *argv[])
{
int N;
double w, x;
int i, myid;
double mypi, final_pi;
MPI_Init(&argc,&argv); // Initialize
MPI_Comm_size(MPI_COMM_WORLD, &num_procs); // Get # processors
MPI_Comm_rank(MPI_COMM_WORLD, &myid); // Get my rank (id)
if ( myid == 0 )
N = atoi(argv[1]);
MPI_Bcast (&N, 1, MPI_INT, 0, MPI_COMM_WORLD);
w = 1.0/(double) N;
/* ******************************************************************* */
mypi = 0.0;
for (i = myid; i < N; i = i + num_procs)
{
x = w*(i + 0.5);
mypi = mypi + w*f(x);
}
/* ******************************************************************* */
MPI_Reduce ( &mypi, &final_pi, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
if ( myid == 0 )
{
cout << "Pi = " << final_pi << endl << endl;
}
MPI_Finalize();
}
|
Demo instruction:
Advanced material below: skipped !!! (Read on by yourself if you're interested)
MPI_Op_create(MPI_User_function *function,
int commute,
MPI_Op *op)
|
void function_name( void *in,
void *inout,
int *len,
MPI_Datatype *datatype);
|
void myAdd( void *a, void *b, int *len, MPI_Datatype *datatype)
{
int i;
if ( *datatype == MPI_INT )
{
int *x = (int *)a; // Turn the (void *) into an (int *)
int *y = (int *)b; // Turn the (void *) into an (int *)
for (i = 0; i < *len; i++)
{
*y = *x + *y;
x++;
y++;
}
}
else if ( *datatype == MPI_DOUBLE )
{
double *x = (double *)a; // Turn the (void *) into an (double *)
double *y = (double *)b; // Turn the (void *) into an (double *)
for (i = 0; i < *len; i++)
{
*y = *x + *y;
x++;
y++;
}
}
}
double f(double a)
{
return( 2.0 / sqrt(1 - a*a) );
}
/* =======================
MAIN
======================= */
int main(int argc, char *argv[])
{
int N;
double w, x;
int i, myid;
double mypi, final_pi;
MPI_Op myOp;
MPI_Init(&argc,&argv); // Initialize
MPI_Comm_size(MPI_COMM_WORLD, &num_procs); // Get # processors
MPI_Comm_rank(MPI_COMM_WORLD, &myid); // Get my rank (id)
if ( myid == 0 )
N = atoi(argv[1]);
MPI_Bcast (&N, 1, MPI_INT, 0, MPI_COMM_WORLD);
w = 1.0/(double) N;
/* ******************************************************************* */
mypi = 0.0;
for (i = myid; i < N; i = i + num_procs)
{
x = w*(i + 0.5);
mypi = mypi + w*f(x);
}
/* ******************************************************************* */
MPI_Op_create( myAdd, 1, &myOp);
MPI_Reduce ( &mypi, &final_pi, 1, MPI_DOUBLE, myOp, 0, MPI_COMM_WORLD);
if ( myid == 0 )
{
cout << "Pi = " << final_pi << endl << endl;
}
MPI_Finalize();
}
|
Demo instruction:
MPI_Barrier( MPI_Comm comm ) |
Effect:
|