wiki:256-8procs-per-node-licallo

8 procs -> 227 s -> 1816 s-mono

16 procs -> 129 s -> 2064 s-mono

32 procs -> 81 s -> 2592 s-mono

64 procs -> 59 s -> 3776 s-mono

128 procs -> 23 s -> 2944 s-mono

256 procs -> 31 s -> 7936 s-mono

 8 Processor run, global<256,256,256>
   for 100 time steps
main timer : (real:0:8:52,user:0:8:33,sys:0:0:18)[clk:53229]
	loop : (real:0:3:47,user:0:3:31,sys:0:0:16)[clk:22786]	(100 calls X 2.278600e+02)
	FFT timer : (real:0:7:36,user:0:7:22,sys:0:0:13)[clk:45614]
		planification time : (real:0:4:57,user:0:4:57,sys:0:0:0)[clk:29796]
		FFT (out of place) and transposition time : (real:0:2:38,user:0:2:25,sys:0:0:12)[clk:15818]	(509 calls X 3.107662e+01)
			FFT (out of place) only : (real:0:1:41,user:0:1:40,sys:0:0:0)[clk:10101]	(2545 calls X 3.968959e+00)
			FFTW (out of place) Transposition only : (real:0:0:53,user:0:0:40,sys:0:0:12)[clk:5359]	(2036 calls X 2.632122e+00)
	azur::array timer root : (real:0:0:38,user:0:0:36,sys:0:0:2)[clk:3893]
		view = expr : (real:0:0:38,user:0:0:36,sys:0:0:2)[clk:3893]	(1620 calls X 2.403086e+00)
			fftw3<dbl> id= dbl : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:2]	(2 calls X 1.000000e+00)
			fftw4<dbl> /= int : (real:0:0:3,user:0:0:3,sys:0:0:0)[clk:356]	(204 calls X 1.745098e+00)
			fftw4<dbl> id= dbl : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:2]	(1 calls X 2.000000e+00)
			fftw3<dbl> id= fftw3<dbl> : (real:0:0:6,user:0:0:5,sys:0:0:0)[clk:602]	(606 calls X 9.933993e-01)
			fftw4<dbl> *= dbl : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:2]	(1 calls X 2.000000e+00)
			fftw4<cdbl> id= fftw4<cdbl> : (real:0:0:5,user:0:0:5,sys:0:0:0)[clk:580]	(202 calls X 2.871287e+00)
			fftw4<dbl> id= fftw4<dbl> : (real:0:0:3,user:0:0:1,sys:0:0:2)[clk:339]	(103 calls X 3.291262e+00)
			fftw4<cdbl> id= (s2v<basic3<dbl>> * ((fftw4<cdbl> * dbl) + fftw4<cdbl>)) : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:3]	(1 calls X 3.000000e+00)
			fftw4<cdbl> id= (s2v<basic3<dbl>> * ((fftw4<cdbl> * dbl) + fftw4<cdbl>)) : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:4]	(1 calls X 4.000000e+00)
			fftw4<cdbl> id= (s2v<basic3<dbl>> * (fftw4<cdbl> * dbl)) : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:4]	(1 calls X 4.000000e+00)
			fftw4<cdbl> *= s2v<basic3<dbl>> : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:2]	(1 calls X 2.000000e+00)
			fftw4<cdbl> += fftw4<cdbl> : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:3]	(1 calls X 3.000000e+00)
			fftw4<cdbl> id= ((fftw4<cdbl> * s2v<basic3<dbl>>) + (s2v<basic3<dbl>> * (fftw4<cdbl> * dbl))) : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:5]	(1 call
s X 5.000000e+00)
			fftw4<cdbl> id= ((fftw4<cdbl> * dbl) * s2v<basic3<dbl>>) : (real:0:0:3,user:0:0:3,sys:0:0:0)[clk:356]	(99 calls X 3.595960e+00)
			fftw4<cdbl> -= fftw4<cdbl> : (real:0:0:3,user:0:0:3,sys:0:0:0)[clk:323]	(99 calls X 3.262626e+00)
			fftw4<cdbl> -= ((fftw4<cdbl> * dbl) * s2v<basic3<dbl>>) : (real:0:0:3,user:0:0:3,sys:0:0:0)[clk:374]	(99 calls X 3.777778e+00)
			fftw4<cdbl> id= (s2v<basic3<dbl>> swp(*) (fftw4<cdbl> + (fftw4<cdbl> * dbl))) : (real:0:0:9,user:0:0:9,sys:0:0:0)[clk:934]	(198 calls X 4.717172e+
00)
	cubby::field timer root : (real:0:1:18,user:0:1:5,sys:0:0:12)[clk:7843]
		scalar::transpose_blocks_when_received : (real:0:0:53,user:0:0:40,sys:0:0:12)[clk:5357]	(3054 calls X 1.754093e+00)
			scalar::copy_transposed : (real:0:0:13,user:0:0:12,sys:0:0:0)[clk:1311]	(12216 calls X 1.073183e-01)
		vector::in_place_curl : (real:0:0:5,user:0:0:5,sys:0:0:0)[clk:538]	(202 calls X 2.663366e+00)
		scalar::local_energy : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:37]	(66 calls X 5.606061e-01)
		vector::vec_prod : (real:0:0:6,user:0:0:6,sys:0:0:0)[clk:654]	(202 calls X 3.237624e+00)
		vector::project : (real:0:0:5,user:0:0:5,sys:0:0:0)[clk:513]	(101 calls X 5.079208e+00)
		scalar::dealias : (real:0:0:7,user:0:0:7,sys:0:0:0)[clk:744]	(606 calls X 1.227723e+00)
   16 Processor run, global<256,256,256>
   for 100 time steps
main timer : (real:0:2:6,user:0:1:52,sys:0:0:13)[clk:12686]
        loop : (real:0:2:3,user:0:1:50,sys:0:0:13)[clk:12393]   (100 calls X 1.239300e+02)
        FFT timer : (real:0:1:25,user:0:1:14,sys:0:0:11)[clk:8586]
                planification time : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:0]
                FFT (out of place) and transposition time : (real:0:1:25,user:0:1:14,sys:0:0:11)[clk:8586]      (509 calls X 1.686837e+01)
                        FFT (out of place) only : (real:0:0:51,user:0:0:50,sys:0:0:0)[clk:5109] (2545 calls X 2.007466e+00)
                        FFTW (out of place) Transposition only : (real:0:0:32,user:0:0:21,sys:0:0:10)[clk:3273] (2036 calls X 1.607564e+00)
        azur::array timer root : (real:0:0:21,user:0:0:20,sys:0:0:1)[clk:2168]
                view = expr : (real:0:0:21,user:0:0:20,sys:0:0:1)[clk:2167]     (1620 calls X 1.337654e+00)
                        fftw3<dbl> id= dbl : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:1]   (2 calls X 5.000000e-01)
                        fftw4<dbl> /= int : (real:0:0:2,user:0:0:1,sys:0:0:0)[clk:203]  (204 calls X 9.950981e-01)
                        fftw4<dbl> id= dbl : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:1]   (1 calls X 1.000000e+00)
                        fftw3<dbl> id= fftw3<dbl> : (real:0:0:3,user:0:0:3,sys:0:0:0)[clk:364]  (606 calls X 6.006601e-01)
                        fftw4<dbl> *= dbl : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:1]    (1 calls X 1.000000e+00)
                        fftw4<cdbl> id= fftw4<cdbl> : (real:0:0:3,user:0:0:3,sys:0:0:0)[clk:301]        (202 calls X 1.490099e+00)
                        fftw4<dbl> id= fftw4<dbl> : (real:0:0:1,user:0:0:0,sys:0:0:1)[clk:194]  (103 calls X 1.883495e+00)
                        fftw4<cdbl> id= (s2v<basic3<dbl>> * ((fftw4<cdbl> * dbl) + fftw4<cdbl>)) : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:2]     (1 calls X 2.000000e+00)
                        fftw4<cdbl> id= (s2v<basic3<dbl>> * ((fftw4<cdbl> * dbl) + fftw4<cdbl>)) : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:2]     (1 calls X 2.000000e+00)
                        fftw4<cdbl> id= (s2v<basic3<dbl>> * (fftw4<cdbl> * dbl)) : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:2]     (1 calls X 2.000000e+00)
                        fftw4<cdbl> *= s2v<basic3<dbl>> : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:2]      (1 calls X 2.000000e+00)
                        fftw4<cdbl> += fftw4<cdbl> : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:2]   (1 calls X 2.000000e+00)
                        fftw4<cdbl> id= ((fftw4<cdbl> * s2v<basic3<dbl>>) + (s2v<basic3<dbl>> * (fftw4<cdbl> * dbl))) : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:2]        (1 calls X 2.000000e+00)
                        fftw4<cdbl> id= ((fftw4<cdbl> * dbl) * s2v<basic3<dbl>>) : (real:0:0:1,user:0:0:1,sys:0:0:0)[clk:193]   (99 calls X 1.949495e+00)
                        fftw4<cdbl> -= fftw4<cdbl> : (real:0:0:1,user:0:0:1,sys:0:0:0)[clk:171] (99 calls X 1.727273e+00)
                        fftw4<cdbl> -= ((fftw4<cdbl> * dbl) * s2v<basic3<dbl>>) : (real:0:0:2,user:0:0:2,sys:0:0:0)[clk:222]    (99 calls X 2.242424e+00)
                        fftw4<cdbl> id= (s2v<basic3<dbl>> swp(*) (fftw4<cdbl> + (fftw4<cdbl> * dbl))) : (real:0:0:5,user:0:0:5,sys:0:0:0)[clk:503]      (198 calls X 2.540404e+00)
        cubby::field timer root : (real:0:0:45,user:0:0:34,sys:0:0:10)[clk:4549]
                scalar::transpose_blocks_when_received : (real:0:0:32,user:0:0:21,sys:0:0:10)[clk:3272] (3054 calls X 1.071382e+00)
                        scalar::copy_transposed : (real:0:0:7,user:0:0:7,sys:0:0:0)[clk:776]    (24432 calls X 3.176162e-02)
                vector::in_place_curl : (real:0:0:2,user:0:0:2,sys:0:0:0)[clk:284]      (202 calls X 1.405941e+00)
                scalar::local_energy : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:16]        (66 calls X 2.424242e-01)
                vector::vec_prod : (real:0:0:3,user:0:0:3,sys:0:0:0)[clk:345]   (202 calls X 1.707921e+00)
                vector::project : (real:0:0:2,user:0:0:2,sys:0:0:0)[clk:259]    (101 calls X 2.564356e+00)
                scalar::dealias : (real:0:0:3,user:0:0:3,sys:0:0:0)[clk:373]    (606 calls X 6.155115e-01)

  32 Processor run, global<256,256,256>
   for 100 time steps
main timer : (real:0:2:33,user:0:2:16,sys:0:0:15)[clk:15374]
	loop : (real:0:1:21,user:0:1:5,sys:0:0:15)[clk:8196]	(100 calls X 8.196000e+01)
	FFT timer : (real:0:2:12,user:0:1:56,sys:0:0:14)[clk:13248]
		planification time : (real:0:1:9,user:0:1:9,sys:0:0:0)[clk:6952]
		FFT (out of place) and transposition time : (real:0:1:2,user:0:0:47,sys:0:0:14)[clk:6296]	(509 calls X 1.236935e+01)
			FFT (out of place) only : (real:0:0:29,user:0:0:29,sys:0:0:0)[clk:2915]	(2545 calls X 1.145383e+00)
			FFTW (out of place) Transposition only : (real:0:0:32,user:0:0:17,sys:0:0:14)[clk:3299]	(2036 calls X 1.620334e+00)
	azur::array timer root : (real:0:0:10,user:0:0:10,sys:0:0:0)[clk:1095]
		view = expr : (real:0:0:10,user:0:0:10,sys:0:0:0)[clk:1095]	(1620 calls X 6.759259e-01)
			fftw3<dbl> id= dbl : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:0]	(2 calls X 0.000000e+00)
			fftw4<dbl> /= int : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:80]	(204 calls X 3.921569e-01)
			fftw4<dbl> id= dbl : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:1]	(1 calls X 1.000000e+00)
			fftw3<dbl> id= fftw3<dbl> : (real:0:0:1,user:0:0:1,sys:0:0:0)[clk:172]	(606 calls X 2.838284e-01)
			fftw4<dbl> *= dbl : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:0]	(1 calls X 0.000000e+00)
			fftw4<cdbl> id= fftw4<cdbl> : (real:0:0:1,user:0:0:1,sys:0:0:0)[clk:157]	(202 calls X 7.772277e-01)
			fftw4<dbl> id= fftw4<dbl> : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:96]	(103 calls X 9.320388e-01)
			fftw4<cdbl> id= (s2v<basic3<dbl>> * ((fftw4<cdbl> * dbl) + fftw4<cdbl>)) : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:1]	(1 calls X 1.000000e+00)
			fftw4<cdbl> id= (s2v<basic3<dbl>> * ((fftw4<cdbl> * dbl) + fftw4<cdbl>)) : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:1]	(1 calls X 1.000000e+00)
			fftw4<cdbl> id= (s2v<basic3<dbl>> * (fftw4<cdbl> * dbl)) : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:1]	(1 calls X 1.000000e+00)
			fftw4<cdbl> *= s2v<basic3<dbl>> : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:1]	(1 calls X 1.000000e+00)
			fftw4<cdbl> += fftw4<cdbl> : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:0]	(1 calls X 0.000000e+00)
			fftw4<cdbl> id= ((fftw4<cdbl> * s2v<basic3<dbl>>) + (s2v<basic3<dbl>> * (fftw4<cdbl> * dbl))) : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:2]	(1 call
s X 2.000000e+00)
			fftw4<cdbl> id= ((fftw4<cdbl> * dbl) * s2v<basic3<dbl>>) : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:98]	(99 calls X 9.898990e-01)
			fftw4<cdbl> -= fftw4<cdbl> : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:75]	(99 calls X 7.575758e-01)
			fftw4<cdbl> -= ((fftw4<cdbl> * dbl) * s2v<basic3<dbl>>) : (real:0:0:1,user:0:0:1,sys:0:0:0)[clk:120]	(99 calls X 1.212121e+00)
			fftw4<cdbl> id= (s2v<basic3<dbl>> swp(*) (fftw4<cdbl> + (fftw4<cdbl> * dbl))) : (real:0:0:2,user:0:0:2,sys:0:0:0)[clk:288]	(198 calls X 1.454545e+
00)
	cubby::field timer root : (real:0:0:39,user:0:0:23,sys:0:0:14)[clk:3967]
		scalar::transpose_blocks_when_received : (real:0:0:32,user:0:0:17,sys:0:0:14)[clk:3297]	(3054 calls X 1.079568e+00)
			scalar::copy_transposed : (real:0:0:4,user:0:0:3,sys:0:0:0)[clk:404]	(48864 calls X 8.267845e-03)
		vector::in_place_curl : (real:0:0:1,user:0:0:1,sys:0:0:0)[clk:141]	(202 calls X 6.980198e-01)
		scalar::local_energy : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:9]	(66 calls X 1.363636e-01)
		vector::vec_prod : (real:0:0:2,user:0:0:1,sys:0:0:0)[clk:201]	(202 calls X 9.950495e-01)
		vector::project : (real:0:0:1,user:0:0:1,sys:0:0:0)[clk:124]	(101 calls X 1.227723e+00)
		scalar::dealias : (real:0:0:1,user:0:0:1,sys:0:0:0)[clk:195]	(606 calls X 3.217822e-01)

   64 Processor run, global<256,256,256>
   for 100 time steps
main timer : (real:0:1:1,user:0:0:40,sys:0:0:17)[clk:6180]
	loop : (real:0:0:59,user:0:0:39,sys:0:0:16)[clk:5959]	(100 calls X 5.959000e+01)
	FFT timer : (real:0:0:51,user:0:0:30,sys:0:0:16)[clk:5135]
		planification time : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:1]
		FFT (out of place) and transposition time : (real:0:0:51,user:0:0:30,sys:0:0:16)[clk:5134]	(509 calls X 1.008644e+01)
			FFT (out of place) only : (real:0:0:13,user:0:0:12,sys:0:0:0)[clk:1355]	(2545 calls X 5.324165e-01)
			FFTW (out of place) Transposition only : (real:0:0:37,user:0:0:17,sys:0:0:16)[clk:3728]	(2036 calls X 1.831041e+00)
	azur::array timer root : (real:0:0:5,user:0:0:5,sys:0:0:0)[clk:557]
		view = expr : (real:0:0:5,user:0:0:5,sys:0:0:0)[clk:557]	(1620 calls X 3.438272e-01)
			fftw3<dbl> id= dbl : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:0]	(2 calls X 0.000000e+00)
			fftw4<dbl> /= int : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:48]	(204 calls X 2.352941e-01)
			fftw4<dbl> id= dbl : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:1]	(1 calls X 1.000000e+00)
			fftw3<dbl> id= fftw3<dbl> : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:85]	(606 calls X 1.402640e-01)
			fftw4<dbl> *= dbl : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:0]	(1 calls X 0.000000e+00)
			fftw4<cdbl> id= fftw4<cdbl> : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:85]	(202 calls X 4.207921e-01)
			fftw4<dbl> id= fftw4<dbl> : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:54]	(103 calls X 5.242718e-01)
			fftw4<cdbl> id= (s2v<basic3<dbl>> * ((fftw4<cdbl> * dbl) + fftw4<cdbl>)) : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:1]	(1 
calls X 1.000000e+00)
			fftw4<cdbl> id= (s2v<basic3<dbl>> * ((fftw4<cdbl> * dbl) + fftw4<cdbl>)) : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:0]	(1 
calls X 0.000000e+00)
			fftw4<cdbl> id= (s2v<basic3<dbl>> * (fftw4<cdbl> * dbl)) : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:0]	(1 calls X 0.000000
e+00)
			fftw4<cdbl> *= s2v<basic3<dbl>> : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:0]	(1 calls X 0.000000e+00)
			fftw4<cdbl> += fftw4<cdbl> : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:1]	(1 calls X 1.000000e+00)
			fftw4<cdbl> id= ((fftw4<cdbl> * s2v<basic3<dbl>>) + (s2v<basic3<dbl>> * (fftw4<cdbl> * dbl))) : (real:0:0:0,user:0:0:0,sys:
0:0:0)[clk:0]	(1 calls X 0.000000e+00)
			fftw4<cdbl> id= ((fftw4<cdbl> * dbl) * s2v<basic3<dbl>>) : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:46]	(99 calls X 4.64646
5e-01)
			fftw4<cdbl> -= fftw4<cdbl> : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:42]	(99 calls X 4.242424e-01)
			fftw4<cdbl> -= ((fftw4<cdbl> * dbl) * s2v<basic3<dbl>>) : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:60]	(99 calls X 6.06060
6e-01)
			fftw4<cdbl> id= (s2v<basic3<dbl>> swp(*) (fftw4<cdbl> + (fftw4<cdbl> * dbl))) : (real:0:0:1,user:0:0:1,sys:0:0:0)[clk:133](198 calls X 6.717172e-01)
	cubby::field timer root : (real:0:0:40,user:0:0:20,sys:0:0:16)[clk:4052]
		scalar::transpose_blocks_when_received : (real:0:0:37,user:0:0:17,sys:0:0:16)[clk:3726]	(3054 calls X 1.220039e+00)
			scalar::copy_transposed : (real:0:0:1,user:0:0:2,sys:0:0:0)[clk:193]	(97728 calls X 1.974869e-03)
		vector::in_place_curl : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:63]	(202 calls X 3.118812e-01)
		scalar::local_energy : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:5]	(66 calls X 7.575758e-02)
		vector::vec_prod : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:98]	(202 calls X 4.851485e-01)
		vector::project : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:61]	(101 calls X 6.039604e-01)
		scalar::dealias : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:98]	(606 calls X 1.617162e-01)

   128 Processor run, global<256,256,256>
   for 100 time steps
main timer : (real:0:0:23,user:0:0:16,sys:0:0:5)[clk:2331]
	loop : (real:0:0:21,user:0:0:16,sys:0:0:4)[clk:2102]	(100 calls X 2.102000e+01)
	FFT timer : (real:0:0:18,user:0:0:12,sys:0:0:4)[clk:1840]
		planification time : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:0]
		FFT (out of place) and transposition time : (real:0:0:18,user:0:0:12,sys:0:0:4)[clk:1840]	(509 calls X 3.614931e+00)
			FFT (out of place) only : (real:0:0:5,user:0:0:5,sys:0:0:0)[clk:589]	(2545 calls X 2.314342e-01)
			FFTW (out of place) Transposition only : (real:0:0:12,user:0:0:6,sys:0:0:4)[clk:1230]	(2036 calls X 6.041257e-01)
	azur::array timer root : (real:0:0:2,user:0:0:2,sys:0:0:0)[clk:250]
		view = expr : (real:0:0:2,user:0:0:2,sys:0:0:0)[clk:249]	(1620 calls X 1.537037e-01)
			fftw3<dbl> id= dbl : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:1]	(2 calls X 5.000000e-01)
			fftw4<dbl> /= int : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:20]	(204 calls X 9.803922e-02)
			fftw4<dbl> id= dbl : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:0]	(1 calls X 0.000000e+00)
			fftw3<dbl> id= fftw3<dbl> : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:40]	(606 calls X 6.600660e-02)
			fftw4<dbl> *= dbl : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:1]	(1 calls X 1.000000e+00)
			fftw4<cdbl> id= fftw4<cdbl> : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:30]	(202 calls X 1.485149e-01)
			fftw4<dbl> id= fftw4<dbl> : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:26]	(103 calls X 2.524272e-01)
			fftw4<cdbl> id= (s2v<basic3<dbl>> * ((fftw4<cdbl> * dbl) + fftw4<cdbl>)) : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:0]	(1 
calls X 0.000000e+00)
			fftw4<cdbl> id= (s2v<basic3<dbl>> * ((fftw4<cdbl> * dbl) + fftw4<cdbl>)) : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:0]	(1 
calls X 0.000000e+00)
			fftw4<cdbl> id= (s2v<basic3<dbl>> * (fftw4<cdbl> * dbl)) : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:0]	(1 calls X 0.000000
e+00)
			fftw4<cdbl> *= s2v<basic3<dbl>> : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:0]	(1 calls X 0.000000e+00)
			fftw4<cdbl> += fftw4<cdbl> : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:0]	(1 calls X 0.000000e+00)
			fftw4<cdbl> id= ((fftw4<cdbl> * s2v<basic3<dbl>>) + (s2v<basic3<dbl>> * (fftw4<cdbl> * dbl))) : (real:0:0:0,user:0:0:0,sys:
0:0:0)[clk:1]	(1 calls X 1.000000e+00)
			fftw4<cdbl> id= ((fftw4<cdbl> * dbl) * s2v<basic3<dbl>>) : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:25]	(99 calls X 2.52525
2e-01)
			fftw4<cdbl> -= fftw4<cdbl> : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:19]	(99 calls X 1.919192e-01)
			fftw4<cdbl> -= ((fftw4<cdbl> * dbl) * s2v<basic3<dbl>>) : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:26]	(99 calls X 2.62626
3e-01)
			fftw4<cdbl> id= (s2v<basic3<dbl>> swp(*) (fftw4<cdbl> + (fftw4<cdbl> * dbl))) : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:59]	(198 calls X 2.979798e-01)
	cubby::field timer root : (real:0:0:13,user:0:0:7,sys:0:0:4)[clk:1377]
		scalar::transpose_blocks_when_received : (real:0:0:12,user:0:0:6,sys:0:0:4)[clk:1228]	(3054 calls X 4.020956e-01)
			scalar::copy_transposed : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:51]	(195456 calls X 2.609283e-04)
		vector::in_place_curl : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:30]	(202 calls X 1.485149e-01)
		scalar::local_energy : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:2]	(66 calls X 3.030303e-02)
		vector::vec_prod : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:42]	(202 calls X 2.079208e-01)
		vector::project : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:26]	(101 calls X 2.574258e-01)
		scalar::dealias : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:47]	(606 calls X 7.755776e-02)

   256 Processor run, global<256,256,256>
   for 100 time steps
main timer : (real:0:0:35,user:0:0:18,sys:0:0:13)[clk:3529]
	loop : (real:0:0:31,user:0:0:18,sys:0:0:13)[clk:3110]	(100 calls X 3.110000e+01)
	FFT timer : (real:0:0:33,user:0:0:16,sys:0:0:13)[clk:3301]
		planification time : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:0]
		FFT (out of place) and transposition time : (real:0:0:33,user:0:0:16,sys:0:0:13)[clk:3301]	(509 calls X 6.485265e+00)
			FFT (out of place) only : (real:0:0:3,user:0:0:2,sys:0:0:0)[clk:301]	(2545 calls X 1.182711e-01)
			FFTW (out of place) Transposition only : (real:0:0:29,user:0:0:13,sys:0:0:13)[clk:2987]	(2036 calls X 1.467092e+00)
	azur::array timer root : (real:0:0:1,user:0:0:1,sys:0:0:0)[clk:125]
		view = expr : (real:0:0:1,user:0:0:1,sys:0:0:0)[clk:124]	(1620 calls X 7.654321e-02)
			fftw3<dbl> id= dbl : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:0]	(2 calls X 0.000000e+00)
			fftw4<dbl> /= int : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:11]	(204 calls X 5.392157e-02)
			fftw4<dbl> id= dbl : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:0]	(1 calls X 0.000000e+00)
			fftw3<dbl> id= fftw3<dbl> : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:20]	(606 calls X 3.300330e-02)
			fftw4<dbl> *= dbl : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:0]	(1 calls X 0.000000e+00)
			fftw4<cdbl> id= fftw4<cdbl> : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:26]	(202 calls X 1.287129e-01)
			fftw4<dbl> id= fftw4<dbl> : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:10]	(103 calls X 9.708738e-02)
			fftw4<cdbl> id= (s2v<basic3<dbl>> * ((fftw4<cdbl> * dbl) + fftw4<cdbl>)) : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:0]	(1 
calls X 0.000000e+00)
			fftw4<cdbl> id= (s2v<basic3<dbl>> * ((fftw4<cdbl> * dbl) + fftw4<cdbl>)) : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:0]	(1 
calls X 0.000000e+00)
			fftw4<cdbl> id= (s2v<basic3<dbl>> * (fftw4<cdbl> * dbl)) : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:0]	(1 calls X 0.000000
e+00)
			fftw4<cdbl> *= s2v<basic3<dbl>> : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:0]	(1 calls X 0.000000e+00)
			fftw4<cdbl> += fftw4<cdbl> : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:0]	(1 calls X 0.000000e+00)
			fftw4<cdbl> id= ((fftw4<cdbl> * s2v<basic3<dbl>>) + (s2v<basic3<dbl>> * (fftw4<cdbl> * dbl))) : (real:0:0:0,user:0:0:0,sys:
0:0:0)[clk:0]	(1 calls X 0.000000e+00)
			fftw4<cdbl> id= ((fftw4<cdbl> * dbl) * s2v<basic3<dbl>>) : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:6]	(99 calls X 6.06060
6e-02)
			fftw4<cdbl> -= fftw4<cdbl> : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:9]	(99 calls X 9.090909e-02)
			fftw4<cdbl> -= ((fftw4<cdbl> * dbl) * s2v<basic3<dbl>>) : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:10]	(99 calls X 1.01010
1e-01)
			fftw4<cdbl> id= (s2v<basic3<dbl>> swp(*) (fftw4<cdbl> + (fftw4<cdbl> * dbl))) : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:32]	(198 calls X 1.616162e-01)
	cubby::field timer root : (real:0:0:30,user:0:0:14,sys:0:0:13)[clk:3048]
		scalar::transpose_blocks_when_received : (real:0:0:29,user:0:0:13,sys:0:0:13)[clk:2985]	(3054 calls X 9.774067e-01)
			scalar::copy_transposed : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:27]	(390912 calls X 6.906925e-05)
		vector::in_place_curl : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:10]	(202 calls X 4.950495e-02)
		scalar::local_energy : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:1]	(66 calls X 1.515152e-02)
		vector::vec_prod : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:17]	(202 calls X 8.415841e-02)
		vector::project : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:11]	(101 calls X 1.089109e-01)
		scalar::dealias : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:23]	(606 calls X 3.795379e-02)
Last modified 9 years ago Last modified on Sep 29, 2011 2:51:29 PM