wiki:Jade

On Jade(CINES) for release 1476, 32 cpus for a 2563

****************************************************
max number of allocated scalars: 39
 ---------------------------------------------
   32 Processor run, global<256,256,256>
   for 100 time steps
main timer : (real:0:3:4,user:0:3:0,sys:0:0:2)[clk:18473]
	loop : (real:0:2:7,user:0:2:3,sys:0:0:1)[clk:12734]	(100 calls X 1.273400e+02)
	FFT timer : (real:0:2:10,user:0:2:6,sys:0:0:1)[clk:13048]
		FFT and transposition time : (real:0:2:10,user:0:2:6,sys:0:0:1)[clk:13048]	(512 calls X 2.548438e+01)
			FFT only : (real:0:0:26,user:0:0:26,sys:0:0:0)[clk:2611]	(2560 calls X 1.019922e+00)
		planification time : (real:0:0:53,user:0:0:53,sys:0:0:0)[clk:5388]
	azur::array timer root : (real:0:0:39,user:0:0:39,sys:0:0:0)[clk:3938]
		view = expr : (real:0:0:39,user:0:0:39,sys:0:0:0)[clk:3937]	(2380 calls X 1.654202e+00)
			basic3<flt> id= (basic3<int> + flt) : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:1]	(2 calls X 5.000000e-01)
			fftw4<cdbl> = cdbl : (real:0:0:1,user:0:0:1,sys:0:0:0)[clk:185]	(106 calls X 1.745283e+00)
			fftw4<dbl> = dbl : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:5]	(4 calls X 1.250000e+00)
			fftw3<dbl> = fftw3<dbl> : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:7]	(9 calls X 7.777778e-01)
			fftw3<dbl> *= dbl : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:4]	(6 calls X 6.666667e-01)
			fftw3<dbl> = dbl : (real:0:0:2,user:0:0:2,sys:0:0:0)[clk:245]	(516 calls X 4.748062e-01)
			fftw3<dbl> /= dbl : (real:0:0:2,user:0:0:3,sys:0:0:0)[clk:297]	(618 calls X 4.805825e-01)
			fftw4<cdbl> = fftw4<cdbl> : (real:0:0:10,user:0:0:10,sys:0:0:0)[clk:1079]	(406 calls X 2.657635e+00)
			fftw4<dbl> = fftw4<dbl> : (real:0:0:5,user:0:0:5,sys:0:0:0)[clk:548]	(205 calls X 2.673171e+00)
			fftw4<cdbl> id= (s2v<basic3<dbl>> * ((fftw4<cdbl> * dbl) + fftw4<cdbl>)) : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:6]	(2 calls X 3.000000e+00)
			fftw4<cdbl> id= (s2v<basic3<dbl>> * (fftw4<cdbl> * dbl)) : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:8]	(2 calls X 4.000000e+00)
			fftw4<cdbl> *= s2v<basic3<dbl>> : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:4]	(2 calls X 2.000000e+00)
			fftw4<cdbl> += fftw4<cdbl> : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:4]	(2 calls X 2.000000e+00)
			fftw4<cdbl> id= ((fftw4<cdbl> * dbl) * s2v<basic3<dbl>>) : (real:0:0:3,user:0:0:3,sys:0:0:0)[clk:316]	(100 calls X 3.160000e+00)
			fftw4<cdbl> -= fftw4<cdbl> : (real:0:0:2,user:0:0:2,sys:0:0:0)[clk:272]	(100 calls X 2.720000e+00)
			fftw4<cdbl> -= ((fftw4<cdbl> * dbl) * s2v<basic3<dbl>>) : (real:0:0:3,user:0:0:3,sys:0:0:0)[clk:322]	(100 calls X 3.220000e+00)
			fftw4<cdbl> id= (s2v<basic3<dbl>> swp(*) (fftw4<cdbl> + (fftw4<cdbl> * dbl))) : (real:0:0:6,user:0:0:6,sys:0:0:0)[clk:634]	(200 calls X 3.170000e+00)
	cubby::field timer root : (real:0:0:13,user:0:0:13,sys:0:0:0)[clk:1353]
		vector::in_place_curl : (real:0:0:3,user:0:0:3,sys:0:0:0)[clk:327]	(204 calls X 1.602941e+00)
		vector::vec_prod : (real:0:0:5,user:0:0:5,sys:0:0:0)[clk:524]	(204 calls X 2.568627e+00)
		vector::project : (real:0:0:1,user:0:0:1,sys:0:0:0)[clk:169]	(102 calls X 1.656863e+00)
		scalar::dealias : (real:0:0:2,user:0:0:2,sys:0:0:0)[clk:298]	(612 calls X 4.869281e-01)
		scalar::local_energy : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:35]	(66 calls X 5.303030e-01)

On Jade(CINES) for release 1481, 32 cpus for a 2563

max number of allocated scalars: 39
 ---------------------------------------------
   32 Processor run, global<256,256,256>
   for 100 time steps
main timer : (real:0:3:18,user:0:3:13,sys:0:0:1)[clk:19808]
	loop : (real:0:2:20,user:0:2:16,sys:0:0:1)[clk:14030]	(100 calls X 1.403000e+02)
	FFT timer : (real:0:2:29,user:0:2:24,sys:0:0:1)[clk:14924]
		FFT and transposition time : (real:0:2:29,user:0:2:24,sys:0:0:1)[clk:14924]	(512 calls X 2.914844e+01)
			FFT only : (real:0:0:28,user:0:0:28,sys:0:0:0)[clk:2870]	(4302 calls X 6.671315e-01)
		planification time : (real:0:0:47,user:0:0:47,sys:0:0:0)[clk:4728]
	azur::array timer root : (real:0:0:35,user:0:0:35,sys:0:0:0)[clk:3548]
		view = expr : (real:0:0:35,user:0:0:35,sys:0:0:0)[clk:3547]	(2380 calls X 1.490336e+00)
			basic3<flt> id= (basic3<int> + flt) : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:0]	(2 calls X 0.000000e+00)
			fftw4<cdbl> id= cdbl : (real:0:0:1,user:0:0:1,sys:0:0:0)[clk:165]	(106 calls X 1.556604e+00)
			fftw4<dbl> id= dbl : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:5]	(4 calls X 1.250000e+00)
			fftw3<dbl> id= fftw3<dbl> : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:6]	(9 calls X 6.666667e-01)
			fftw3<dbl> *= dbl : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:3]	(6 calls X 5.000000e-01)
			fftw3<dbl> id= dbl : (real:0:0:2,user:0:0:2,sys:0:0:0)[clk:219]	(516 calls X 4.244186e-01)
			fftw3<dbl> /= dbl : (real:0:0:2,user:0:0:2,sys:0:0:0)[clk:283]	(618 calls X 4.579288e-01)
			fftw4<cdbl> id= fftw4<cdbl> : (real:0:0:9,user:0:0:9,sys:0:0:0)[clk:977]	(406 calls X 2.406404e+00)
			fftw4<dbl> id= fftw4<dbl> : (real:0:0:4,user:0:0:5,sys:0:0:0)[clk:494]	(205 calls X 2.409756e+00)
			fftw4<cdbl> id= (s2v<basic3<dbl>> * ((fftw4<cdbl> * dbl) + fftw4<cdbl>)) : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:6]	(2 calls X 3.000000e+00)
			fftw4<cdbl> id= (s2v<basic3<dbl>> * (fftw4<cdbl> * dbl)) : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:6]	(2 calls X 3.000000e+00)
			fftw4<cdbl> *= s2v<basic3<dbl>> : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:4]	(2 calls X 2.000000e+00)
			fftw4<cdbl> += fftw4<cdbl> : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:6]	(2 calls X 3.000000e+00)
			fftw4<cdbl> id= ((fftw4<cdbl> * dbl) * s2v<basic3<dbl>>) : (real:0:0:2,user:0:0:2,sys:0:0:0)[clk:285]	(100 calls X 2.850000e+00)
			fftw4<cdbl> -= fftw4<cdbl> : (real:0:0:2,user:0:0:2,sys:0:0:0)[clk:236]	(100 calls X 2.360000e+00)
			fftw4<cdbl> -= ((fftw4<cdbl> * dbl) * s2v<basic3<dbl>>) : (real:0:0:2,user:0:0:2,sys:0:0:0)[clk:287]	(100 calls X 2.870000e+00)
			fftw4<cdbl> id= (s2v<basic3<dbl>> swp(*) (fftw4<cdbl> + (fftw4<cdbl> * dbl))) : (real:0:0:5,user:0:0:5,sys:0:0:0)[clk:565]	(200 calls X 2.825000e+00)
	cubby::field timer root : (real:0:1:20,user:0:1:16,sys:0:0:1)[clk:8074]
		scalar::transpose_blocks_when_received : (real:0:1:8,user:0:1:4,sys:0:0:1)[clk:6851]	(3072 calls X 2.230143e+00)
			scalar::copy_transposed : (real:0:0:9,user:0:0:8,sys:0:0:0)[clk:955]	(49152 calls X 1.942953e-02)
		vector::in_place_curl : (real:0:0:2,user:0:0:2,sys:0:0:0)[clk:266]	(204 calls X 1.303922e+00)
		vector::vec_prod : (real:0:0:4,user:0:0:4,sys:0:0:0)[clk:484]	(204 calls X 2.372549e+00)
		vector::project : (real:0:0:1,user:0:0:1,sys:0:0:0)[clk:159]	(102 calls X 1.558824e+00)
		scalar::dealias : (real:0:0:2,user:0:0:2,sys:0:0:0)[clk:280]	(612 calls X 4.575163e-01)
		scalar::local_energy : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:34]	(66 calls X 5.151515e-01)

On Jade(CINES) for release 1476, 32 cpus for a 5123

****************************************************
max number of allocated scalars: 39
 ---------------------------------------------
   32 Processor run, global<512,512,512>
   for 100 time steps
main timer : (real:0:30:34,user:0:30:23,sys:0:0:7)[clk:183467]
	loop : (real:0:18:37,user:0:18:30,sys:0:0:4)[clk:111746]	(100 calls X 1.117460e+03)
	FFT timer : (real:0:23:27,user:0:23:18,sys:0:0:4)[clk:140711]
		FFT and transposition time : (real:0:23:27,user:0:23:18,sys:0:0:4)[clk:140711]	(512 calls X 2.748262e+02)
			FFT only : (real:0:3:57,user:0:3:57,sys:0:0:0)[clk:23730]	(2560 calls X 9.269531e+00)
		planification time : (real:0:11:24,user:0:11:22,sys:0:0:0)[clk:68439]
	azur::array timer root : (real:0:5:15,user:0:5:14,sys:0:0:0)[clk:31559]
		view = expr : (real:0:5:15,user:0:5:14,sys:0:0:0)[clk:31559]	(2380 calls X 1.326008e+01)
			basic3<flt> id= (basic3<int> + flt) : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:4]	(2 calls X 2.000000e+00)
			fftw4<cdbl> = cdbl : (real:0:0:14,user:0:0:14,sys:0:0:0)[clk:1480]	(106 calls X 1.396226e+01)
			fftw4<dbl> = dbl : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:51]	(4 calls X 1.275000e+01)
			fftw3<dbl> = fftw3<dbl> : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:63]	(9 calls X 7.000000e+00)
			fftw3<dbl> *= dbl : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:26]	(6 calls X 4.333333e+00)
			fftw3<dbl> = dbl : (real:0:0:20,user:0:0:20,sys:0:0:0)[clk:2026]	(516 calls X 3.926357e+00)
			fftw3<dbl> /= dbl : (real:0:0:25,user:0:0:25,sys:0:0:0)[clk:2531]	(618 calls X 4.095469e+00)
			fftw4<cdbl> = fftw4<cdbl> : (real:0:1:25,user:0:1:25,sys:0:0:0)[clk:8538]	(406 calls X 2.102956e+01)
			fftw4<dbl> = fftw4<dbl> : (real:0:0:44,user:0:0:44,sys:0:0:0)[clk:4409]	(205 calls X 2.150732e+01)
			fftw4<cdbl> id= (s2v<basic3<dbl>> * ((fftw4<cdbl> * dbl) + fftw4<cdbl>)) : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:53]	(2 calls X 2.650000e+01)
			fftw4<cdbl> id= (s2v<basic3<dbl>> * (fftw4<cdbl> * dbl)) : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:53]	(2 calls X 2.650000e+01)
			fftw4<cdbl> *= s2v<basic3<dbl>> : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:33]	(2 calls X 1.650000e+01)
			fftw4<cdbl> += fftw4<cdbl> : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:42]	(2 calls X 2.100000e+01)
			fftw4<cdbl> id= ((fftw4<cdbl> * dbl) * s2v<basic3<dbl>>) : (real:0:0:24,user:0:0:24,sys:0:0:0)[clk:2496]	(100 calls X 2.496000e+01)
			fftw4<cdbl> -= fftw4<cdbl> : (real:0:0:21,user:0:0:21,sys:0:0:0)[clk:2108]	(100 calls X 2.108000e+01)
			fftw4<cdbl> -= ((fftw4<cdbl> * dbl) * s2v<basic3<dbl>>) : (real:0:0:25,user:0:0:25,sys:0:0:0)[clk:2554]	(100 calls X 2.554000e+01)
			fftw4<cdbl> id= (s2v<basic3<dbl>> swp(*) (fftw4<cdbl> + (fftw4<cdbl> * dbl))) : (real:0:0:50,user:0:0:50,sys:0:0:0)[clk:5092]	(200 calls X 2.546000e+01)
	cubby::field timer root : (real:0:1:45,user:0:1:45,sys:0:0:0)[clk:10569]
		vector::in_place_curl : (real:0:0:26,user:0:0:26,sys:0:0:0)[clk:2610]	(204 calls X 1.279412e+01)
		vector::vec_prod : (real:0:0:43,user:0:0:43,sys:0:0:0)[clk:4335]	(204 calls X 2.125000e+01)
		vector::project : (real:0:0:13,user:0:0:13,sys:0:0:0)[clk:1390]	(102 calls X 1.362745e+01)
		scalar::dealias : (real:0:0:19,user:0:0:19,sys:0:0:0)[clk:1972]	(612 calls X 3.222222e+00)
		scalar::local_energy : (real:0:0:2,user:0:0:2,sys:0:0:0)[clk:262]	(66 calls X 3.969697e+00)

On Jade(CINES) for release 1481, 32 cpus for a 5123

max number of allocated scalars: 39
 ---------------------------------------------
   32 Processor run, global<512,512,512>
   for 100 time steps
main timer : (real:0:30:41,user:0:30:30,sys:0:0:7)[clk:184105]
	loop : (real:0:18:45,user:0:18:38,sys:0:0:5)[clk:112555]	(100 calls X 1.125550e+03)
	FFT timer : (real:0:23:48,user:0:23:40,sys:0:0:5)[clk:142862]
		FFT and transposition time : (real:0:23:48,user:0:23:40,sys:0:0:5)[clk:142862]	(512 calls X 2.790273e+02)
			FFT only : (real:0:3:39,user:0:3:39,sys:0:0:0)[clk:21948]	(4302 calls X 5.101813e+00)
		planification time : (real:0:11:13,user:0:11:12,sys:0:0:0)[clk:67306]
	azur::array timer root : (real:0:5:3,user:0:5:2,sys:0:0:0)[clk:30378]
		view = expr : (real:0:5:3,user:0:5:2,sys:0:0:0)[clk:30378]	(2380 calls X 1.276387e+01)
			basic3<flt> id= (basic3<int> + flt) : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:4]	(2 calls X 2.000000e+00)
			fftw4<cdbl> id= cdbl : (real:0:0:14,user:0:0:14,sys:0:0:0)[clk:1404]	(106 calls X 1.324528e+01)
			fftw4<dbl> id= dbl : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:46]	(4 calls X 1.150000e+01)
			fftw3<dbl> id= fftw3<dbl> : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:63]	(9 calls X 7.000000e+00)
			fftw3<dbl> *= dbl : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:24]	(6 calls X 4.000000e+00)
			fftw3<dbl> id= dbl : (real:0:0:19,user:0:0:19,sys:0:0:0)[clk:1991]	(516 calls X 3.858527e+00)
			fftw3<dbl> /= dbl : (real:0:0:24,user:0:0:24,sys:0:0:0)[clk:2440]	(618 calls X 3.948220e+00)
			fftw4<cdbl> id= fftw4<cdbl> : (real:0:1:22,user:0:1:21,sys:0:0:0)[clk:8221]	(406 calls X 2.024877e+01)
			fftw4<dbl> id= fftw4<dbl> : (real:0:0:41,user:0:0:41,sys:0:0:0)[clk:4194]	(205 calls X 2.045854e+01)
			fftw4<cdbl> id= (s2v<basic3<dbl>> * ((fftw4<cdbl> * dbl) + fftw4<cdbl>)) : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:52]	(2 calls X 2.600000e+01)
			fftw4<cdbl> id= (s2v<basic3<dbl>> * (fftw4<cdbl> * dbl)) : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:50]	(2 calls X 2.500000e+01)
			fftw4<cdbl> *= s2v<basic3<dbl>> : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:36]	(2 calls X 1.800000e+01)
			fftw4<cdbl> += fftw4<cdbl> : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:42]	(2 calls X 2.100000e+01)
			fftw4<cdbl> id= ((fftw4<cdbl> * dbl) * s2v<basic3<dbl>>) : (real:0:0:24,user:0:0:24,sys:0:0:0)[clk:2413]	(100 calls X 2.413000e+01)
			fftw4<cdbl> -= fftw4<cdbl> : (real:0:0:20,user:0:0:20,sys:0:0:0)[clk:2042]	(100 calls X 2.042000e+01)
			fftw4<cdbl> -= ((fftw4<cdbl> * dbl) * s2v<basic3<dbl>>) : (real:0:0:24,user:0:0:24,sys:0:0:0)[clk:2477]	(100 calls X 2.477000e+01)
			fftw4<cdbl> id= (s2v<basic3<dbl>> swp(*) (fftw4<cdbl> + (fftw4<cdbl> * dbl))) : (real:0:0:48,user:0:0:48,sys:0:0:0)[clk:4879]	(200 calls X 2.439500e+01)
	cubby::field timer root : (real:0:9:57,user:0:9:49,sys:0:0:5)[clk:59742]
		scalar::transpose_blocks_when_received : (real:0:8:16,user:0:8:8,sys:0:0:5)[clk:49627]	(3072 calls X 1.615462e+01)
			scalar::copy_transposed : (real:0:1:27,user:0:1:26,sys:0:0:0)[clk:8724]	(49152 calls X 1.774902e-01)
		vector::in_place_curl : (real:0:0:24,user:0:0:24,sys:0:0:0)[clk:2478]	(204 calls X 1.214706e+01)
		vector::vec_prod : (real:0:0:41,user:0:0:41,sys:0:0:0)[clk:4125]	(204 calls X 2.022059e+01)
		vector::project : (real:0:0:13,user:0:0:13,sys:0:0:0)[clk:1325]	(102 calls X 1.299020e+01)
		scalar::dealias : (real:0:0:19,user:0:0:19,sys:0:0:0)[clk:1936]	(612 calls X 3.163399e+00)
		scalar::local_energy : (real:0:0:2,user:0:0:2,sys:0:0:0)[clk:251]	(66 calls X 3.803030e+00)
Last modified 10 years ago Last modified on Aug 17, 2010 5:43:56 PM