
#define MBITS2BYTES 131072.0
double
Grads_Contract_Monitor::_getExpectedTime( int recordNumber )
{
int M, N, NB, J, NPCOL, ilst, imyc, icur;
float * GAMMA, * LATENCY;
float * BANDWIDTH;
int extra, icurcol, jb, mess_size, mp, mycol, n, nblocks, nq, nsteps;
double tbcast = 0.0, tfact = 0.0, tu, tupdate = 0.0;
Grads_Lib_Problem_Item_T * pblm; Grads_Lib_Fine_Grid_Item_T * grid;
pblm = Grads_Lib_Problem_Find( _problem );
M = N = ((Grads_Lib_Problem_Data_T *)(pblm->data))->n; NB = ((Grads_Lib_Problem_Data_T *)(pblm->data))->nb; J = recordNumber * NB; /* Antoine has since adjusted this to +1 to J as */ /* fortran arrays start with index 1, not 0 */
grid = Grads_Lib_Fine_Grid_Find( _fineGrid );
NPCOL = ((Grads_Lib_Fine_Grid_Data_T *)(grid->data))->num_hosts; GAMMA = ((Grads_Lib_Fine_Grid_Data_T *)(grid->data))->speed; LATENCY = ((Grads_Lib_Fine_Grid_Data_T *)(grid->data))->lat; BANDWIDTH = ((Grads_Lib_Fine_Grid_Data_T *)(grid->data))->band;
jb = N - J + 1; jb = Mmin( jb, NB ); icurcol = ( ( J - 1 ) / NB ) % NPCOL;
/* * time to factor - This is Level 2 BLAS, so GAMMA is under-estimating * the time to factor */ mp = M - J + 1; tfact = ( (double)(mp) - ( (double)(jb) / 3.0 ) ) ; tfact *= (double)(jb) * (double)(jb) / ( GAMMA[icurcol] * 1.0e+6 ); /* * time to bcast, split-ring time for process 0 to get the message. */
if( NPCOL > 1 )
{
mess_size = mp * jb * sizeof( double );
nsteps = ( NPCOL + 1 ) / 2;
if( icurcol < NPCOL - icurcol )
{
icur = icurcol*NPCOL + icurcol + 1;
tbcast += ( LATENCY[icur] * 1.0e-3 ) +
(double)(mess_size) / ( BANDWIDTH[icur] * MBITS2BYTES );
nsteps -= 1;
for( mycol = 0; mycol < icurcol; mycol++ )
{
imyc = mycol * NPCOL + mycol + 1;
tbcast += ( LATENCY[imyc] * 1.0e-3 ) +
(double)(mess_size) / ( BANDWIDTH[imyc] * MBITS2BYTES );
}
if( nsteps > icurcol )
{
ilst = ( NPCOL - 1 ) * NPCOL + 0;
tbcast += ( LATENCY[ilst] * 1.0e-3 ) +
(double)(mess_size) / ( BANDWIDTH[ilst] * MBITS2BYTES );
}
}
else
{
for( mycol = icurcol; mycol < NPCOL; mycol++ )
{
if( mycol + 1 < NPCOL ) imyc = mycol * NPCOL + mycol + 1;
else imyc = (NPCOL-1) * NPCOL + 0;
tbcast += ( LATENCY[imyc] * 1.0e-3 ) +
(double)(mess_size) / ( BANDWIDTH[imyc] * MBITS2BYTES );
}
if( nsteps > NPCOL - icurcol )
{
ilst = 0 * NPCOL + 1;
tbcast += ( LATENCY[ilst] * 1.0e-3 ) +
(double)(mess_size) / ( BANDWIDTH[ilst] * MBITS2BYTES );
}
}
}
/*
* Update - For every processor, find out how many columns they have
* to update, compute the max time to do that.
*/
n = N - J + 1 - jb; nblocks = n / NB;
nq = ( nblocks / NPCOL ) * NB; extra = nblocks % NPCOL;
for( mycol = 0; mycol < NPCOL; mycol++ )
{
if( mycol < extra ) { nq += NB; }
else if( mycol == extra ) { nq += ( n % NB ); }
tu = 2.0 * (double)(mp - jb) + (double)(jb);
tu *= (double)(jb) * (double)(nq) / ( GAMMA[mycol] * 1.0e+6 );
tupdate = Mmax( tu, tupdate ); }
cerr << " ExpectedTime: tfact " << tfact
<< ", tbcast " << tbcast
<< ", tupdate " << tupdate
<< " \n";
return( tfact + tbcast + tupdate ); }
![]()
Department of Computer Science
University of Illinois at Urbana-Champaign
Last modified: Tuesday, February 06, 2001 12:52 PM