wiki:FrippOcto8G

Queue : Fripp Octo 8G

option -Ofast

8 procs -> 517 s
16 procs -> 465 s
32 procs -> 765 s

  8 Processor run, global<256,256,256>
   for 100 time steps
main timer : (real:0:14:22,user:0:13:46,sys:0:0:28)[clk:86277]
	loop : (real:0:8:37,user:0:8:6,sys:0:0:26)[clk:51716]	(100 calls X 5.171600e+02)
	FFT timer : (real:0:10:22,user:0:10:8,sys:0:0:8)[clk:62231]
		planification time : (real:0:5:32,user:0:5:28,sys:0:0:1)[clk:33230]
		FFT and transposition time : (real:0:4:50,user:0:4:40,sys:0:0:7)[clk:29001]	(509 calls X 5.697643e+01)
			FFT only : (real:0:2:31,user:0:2:29,sys:0:0:0)[clk:15175]	(2545 calls X 5.962672e+00)
			Transposition only : (real:0:1:49,user:0:1:42,sys:0:0:6)[clk:10938]	(1832 calls X 5.970524e+00)
	azur::array timer root : (real:0:2:26,user:0:2:17,sys:0:0:7)[clk:14629]
		view = expr : (real:0:2:26,user:0:2:17,sys:0:0:7)[clk:14629]	(2538 calls X 5.763988e+00)
			fftw3<dbl> id= dbl : (real:0:0:10,user:0:0:2,sys:0:0:7)[clk:1000]	(510 calls X 1.960784e+00)
			fftw3<dbl> /= dbl : (real:0:0:13,user:0:0:13,sys:0:0:0)[clk:1334]	(612 calls X 2.179739e+00)
			fftw4<dbl> id= dbl : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:5]	(1 calls X 5.000000e+00)
			fftw3<dbl> id= fftw3<dbl> : (real:0:0:17,user:0:0:17,sys:0:0:0)[clk:1728]	(606 calls X 2.851485e+00)
			fftw3<dbl> *= dbl : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:6]	(3 calls X 2.000000e+00)
			fftw4<cdbl> id= fftw4<cdbl> : (real:0:0:19,user:0:0:19,sys:0:0:0)[clk:1962]	(202 calls X 9.712872e+00)
			fftw4<dbl> id= fftw4<dbl> : (real:0:0:8,user:0:0:8,sys:0:0:0)[clk:869]	(103 calls X 8.436893e+00)
			fftw4<cdbl> id= (s2v<basic3<dbl>> * ((fftw4<cdbl> * dbl) + fftw4<cdbl>)) : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:17]	(1 calls X 1.700000e+01)
			fftw4<cdbl> id= (s2v<basic3<dbl>> * ((fftw4<cdbl> * dbl) + fftw4<cdbl>)) : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:20]	(1 calls X 2.000000e+01)
			fftw4<cdbl> id= (s2v<basic3<dbl>> * (fftw4<cdbl> * dbl)) : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:14]	(1 calls X 1.400000e+01)
			fftw4<cdbl> *= s2v<basic3<dbl>> : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:11]	(1 calls X 1.100000e+01)
			fftw4<cdbl> += fftw4<cdbl> : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:12]	(1 calls X 1.200000e+01)
			fftw4<cdbl> id= ((fftw4<cdbl> * s2v<basic3<dbl>>) + (s2v<basic3<dbl>> * (fftw4<cdbl> * dbl))) : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:20]	(1 
calls X 2.000000e+01)
			fftw4<cdbl> id= ((fftw4<cdbl> * dbl) * s2v<basic3<dbl>>) : (real:0:0:14,user:0:0:14,sys:0:0:0)[clk:1440]	(99 calls X 1.454545e+01)
			fftw4<cdbl> -= fftw4<cdbl> : (real:0:0:12,user:0:0:11,sys:0:0:0)[clk:1200]	(99 calls X 1.212121e+01)
			fftw4<cdbl> -= ((fftw4<cdbl> * dbl) * s2v<basic3<dbl>>) : (real:0:0:16,user:0:0:15,sys:0:0:0)[clk:1614]	(99 calls X 1.630303e+01)
			fftw4<cdbl> id= (s2v<basic3<dbl>> swp(*) (fftw4<cdbl> + (fftw4<cdbl> * dbl))) : (real:0:0:33,user:0:0:33,sys:0:0:0)[clk:3376]	(198 calls X 1.7050
50e+01)
	cubby::field timer root : (real:0:3:29,user:0:3:21,sys:0:0:6)[clk:20990]
		scalar::transpose_blocks_when_received : (real:0:2:4,user:0:1:57,sys:0:0:6)[clk:12486]	(3054 calls X 4.088408e+00)
			scalar::copy_transposed : (real:0:0:41,user:0:0:42,sys:0:0:0)[clk:4158]	(12216 calls X 3.403733e-01)
		vector::in_place_curl : (real:0:0:25,user:0:0:24,sys:0:0:0)[clk:2501]	(202 calls X 1.238119e+01)
		scalar::local_energy : (real:0:0:2,user:0:0:2,sys:0:0:0)[clk:238]	(66 calls X 3.606061e+00)
		vector::vec_prod : (real:0:0:29,user:0:0:29,sys:0:0:0)[clk:2997]	(202 calls X 1.483663e+01)
		vector::project : (real:0:0:14,user:0:0:14,sys:0:0:0)[clk:1465]	(101 calls X 1.450495e+01)
		scalar::dealias : (real:0:0:13,user:0:0:12,sys:0:0:0)[clk:1301]	(606 calls X 2.146865e+00)
   16 Processor run, global<256,256,256>
   for 100 time steps
main timer : (real:0:10:45,user:0:6:48,sys:0:3:57)[clk:64579]
	loop : (real:0:7:45,user:0:3:56,sys:0:3:48)[clk:46504]	(100 calls X 4.650400e+02)
	FFT timer : (real:0:8:47,user:0:4:59,sys:0:3:47)[clk:52733]
		planification time : (real:0:2:46,user:0:2:46,sys:0:0:0)[clk:16677]
		FFT and transposition time : (real:0:6:0,user:0:2:13,sys:0:3:46)[clk:36056]	(509 calls X 7.083694e+01)
			FFT only : (real:0:1:5,user:0:1:4,sys:0:0:0)[clk:6517]	(2545 calls X 2.560707e+00)
			Transposition only : (real:0:4:12,user:0:0:54,sys:0:3:17)[clk:25225]	(1832 calls X 1.376910e+01)
	azur::array timer root : (real:0:1:11,user:0:1:8,sys:0:0:3)[clk:7158]
		view = expr : (real:0:1:11,user:0:1:8,sys:0:0:3)[clk:7157]	(2538 calls X 2.819937e+00)
			fftw3<dbl> id= dbl : (real:0:0:4,user:0:0:1,sys:0:0:3)[clk:469]	(510 calls X 9.196078e-01)
			fftw3<dbl> /= dbl : (real:0:0:6,user:0:0:6,sys:0:0:0)[clk:616]	(612 calls X 1.006536e+00)
			fftw4<dbl> id= dbl : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:3]	(1 calls X 3.000000e+00)
			fftw3<dbl> id= fftw3<dbl> : (real:0:0:8,user:0:0:8,sys:0:0:0)[clk:804]	(606 calls X 1.326733e+00)
			fftw3<dbl> *= dbl : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:3]	(3 calls X 1.000000e+00)
			fftw4<cdbl> id= fftw4<cdbl> : (real:0:0:9,user:0:0:9,sys:0:0:0)[clk:924]	(202 calls X 4.574257e+00)
			fftw4<dbl> id= fftw4<dbl> : (real:0:0:4,user:0:0:4,sys:0:0:0)[clk:408]	(103 calls X 3.961165e+00)
			fftw4<cdbl> id= (s2v<basic3<dbl>> * ((fftw4<cdbl> * dbl) + fftw4<cdbl>)) : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:9]	(1 calls X 9.000000e+00)
			fftw4<cdbl> id= (s2v<basic3<dbl>> * ((fftw4<cdbl> * dbl) + fftw4<cdbl>)) : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:9]	(1 calls X 9.000000e+00)
			fftw4<cdbl> id= (s2v<basic3<dbl>> * (fftw4<cdbl> * dbl)) : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:8]	(1 calls X 8.000000e+00)
			fftw4<cdbl> *= s2v<basic3<dbl>> : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:5]	(1 calls X 5.000000e+00)
			fftw4<cdbl> += fftw4<cdbl> : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:10]	(1 calls X 1.000000e+01)
			fftw4<cdbl> id= ((fftw4<cdbl> * s2v<basic3<dbl>>) + (s2v<basic3<dbl>> * (fftw4<cdbl> * dbl))) : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:9]	(1 calls X 9.0
00000e+00)
			fftw4<cdbl> id= ((fftw4<cdbl> * dbl) * s2v<basic3<dbl>>) : (real:0:0:7,user:0:0:7,sys:0:0:0)[clk:737]	(99 calls X 7.444445e+00)
			fftw4<cdbl> -= fftw4<cdbl> : (real:0:0:6,user:0:0:6,sys:0:0:0)[clk:614]	(99 calls X 6.202020e+00)
			fftw4<cdbl> -= ((fftw4<cdbl> * dbl) * s2v<basic3<dbl>>) : (real:0:0:8,user:0:0:8,sys:0:0:0)[clk:807]	(99 calls X 8.151515e+00)
			fftw4<cdbl> id= (s2v<basic3<dbl>> swp(*) (fftw4<cdbl> + (fftw4<cdbl> * dbl))) : (real:0:0:17,user:0:0:17,sys:0:0:0)[clk:1722]	(198 calls X 8.696970e+00)
	cubby::field timer root : (real:0:5:31,user:0:1:44,sys:0:3:46)[clk:33120]
		scalar::transpose_blocks_when_received : (real:0:4:49,user:0:1:2,sys:0:3:46)[clk:28921]	(3054 calls X 9.469875e+00)
			scalar::copy_transposed : (real:0:0:16,user:0:0:17,sys:0:0:0)[clk:1680]	(24432 calls X 6.876228e-02)
		vector::in_place_curl : (real:0:0:12,user:0:0:12,sys:0:0:0)[clk:1238]	(202 calls X 6.128713e+00)
		scalar::local_energy : (real:0:0:1,user:0:0:1,sys:0:0:0)[clk:113]	(66 calls X 1.712121e+00)
		vector::vec_prod : (real:0:0:14,user:0:0:14,sys:0:0:0)[clk:1489]	(202 calls X 7.371287e+00)
		vector::project : (real:0:0:7,user:0:0:7,sys:0:0:0)[clk:726]	(101 calls X 7.188119e+00)
		scalar::dealias : (real:0:0:6,user:0:0:6,sys:0:0:0)[clk:633]	(606 calls X 1.044554e+00)

 ---------------------------------------------
   32 Processor run, global<256,256,256>
   for 100 time steps
main timer : (real:0:14:34,user:0:3:51,sys:0:10:41)[clk:87474]
	loop : (real:0:12:45,user:0:2:17,sys:0:10:26)[clk:76507]	(100 calls X 7.650700e+02)
	FFT timer : (real:0:13:33,user:0:2:58,sys:0:10:34)[clk:81342]
		planification time : (real:0:1:31,user:0:1:30,sys:0:0:0)[clk:9125]
		FFT and transposition time : (real:0:12:2,user:0:1:27,sys:0:10:33)[clk:72217]	(509 calls X 1.418802e+02)
			FFT only : (real:0:0:30,user:0:0:31,sys:0:0:0)[clk:3062]	(2545 calls X 1.203143e+00)
			Transposition only : (real:0:9:53,user:0:0:45,sys:0:9:6)[clk:59332]	(1832 calls X 3.238646e+01)
	azur::array timer root : (real:0:0:35,user:0:0:33,sys:0:0:1)[clk:3515]
		view = expr : (real:0:0:35,user:0:0:33,sys:0:0:1)[clk:3513]	(2538 calls X 1.384161e+00)
			fftw3<dbl> id= dbl : (real:0:0:2,user:0:0:0,sys:0:0:1)[clk:223]	(510 calls X 4.372549e-01)
			fftw3<dbl> /= dbl : (real:0:0:2,user:0:0:2,sys:0:0:0)[clk:297]	(612 calls X 4.852941e-01)
			fftw4<dbl> id= dbl : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:0]	(1 calls X 0.000000e+00)
			fftw3<dbl> id= fftw3<dbl> : (real:0:0:3,user:0:0:3,sys:0:0:0)[clk:390]	(606 calls X 6.435643e-01)
			fftw3<dbl> *= dbl : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:1]	(3 calls X 3.333333e-01)
			fftw4<cdbl> id= fftw4<cdbl> : (real:0:0:4,user:0:0:4,sys:0:0:0)[clk:445]	(202 calls X 2.202970e+00)
			fftw4<dbl> id= fftw4<dbl> : (real:0:0:2,user:0:0:1,sys:0:0:0)[clk:202]	(103 calls X 1.961165e+00)
			fftw4<cdbl> id= (s2v<basic3<dbl>> * ((fftw4<cdbl> * dbl) + fftw4<cdbl>)) : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:4]	(1 calls X 4.000000e+00)
			fftw4<cdbl> id= (s2v<basic3<dbl>> * ((fftw4<cdbl> * dbl) + fftw4<cdbl>)) : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:5]	(1 calls X 5.000000e+00)
			fftw4<cdbl> id= (s2v<basic3<dbl>> * (fftw4<cdbl> * dbl)) : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:4]	(1 calls X 4.000000e+00)
			fftw4<cdbl> *= s2v<basic3<dbl>> : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:2]	(1 calls X 2.000000e+00)
			fftw4<cdbl> += fftw4<cdbl> : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:3]	(1 calls X 3.000000e+00)
			fftw4<cdbl> id= ((fftw4<cdbl> * s2v<basic3<dbl>>) + (s2v<basic3<dbl>> * (fftw4<cdbl> * dbl))) : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:5]	(1 
calls X 5.000000e+00)
			fftw4<cdbl> id= ((fftw4<cdbl> * dbl) * s2v<basic3<dbl>>) : (real:0:0:3,user:0:0:3,sys:0:0:0)[clk:391]	(99 calls X 3.949495e+00)
			fftw4<cdbl> -= fftw4<cdbl> : (real:0:0:3,user:0:0:3,sys:0:0:0)[clk:301]	(99 calls X 3.040404e+00)
			fftw4<cdbl> -= ((fftw4<cdbl> * dbl) * s2v<basic3<dbl>>) : (real:0:0:3,user:0:0:3,sys:0:0:0)[clk:397]	(99 calls X 4.010101e+00)
			fftw4<cdbl> id= (s2v<basic3<dbl>> swp(*) (fftw4<cdbl> + (fftw4<cdbl> * dbl))) : (real:0:0:8,user:0:0:8,sys:0:0:0)[clk:843]	(198 calls X 4.2575
76e+00)
	cubby::field timer root : (real:0:11:49,user:0:1:13,sys:0:10:33)[clk:70943]
		scalar::transpose_blocks_when_received : (real:0:11:28,user:0:0:53,sys:0:10:33)[clk:68855]	(3054 calls X 2.254584e+01)
			scalar::copy_transposed : (real:0:0:7,user:0:0:7,sys:0:0:0)[clk:773]	(48864 calls X 1.581942e-02)
		vector::in_place_curl : (real:0:0:6,user:0:0:6,sys:0:0:0)[clk:623]	(202 calls X 3.084158e+00)
		scalar::local_energy : (real:0:0:0,user:0:0:0,sys:0:0:0)[clk:59]	(66 calls X 8.939394e-01)
		vector::vec_prod : (real:0:0:7,user:0:0:7,sys:0:0:0)[clk:721]	(202 calls X 3.569307e+00)
		vector::project : (real:0:0:3,user:0:0:3,sys:0:0:0)[clk:369]	(101 calls X 3.653465e+00)
		scalar::dealias : (real:0:0:3,user:0:0:3,sys:0:0:0)[clk:316]	(606 calls X 5.214521e-01)
Last modified 10 years ago Last modified on Mar 23, 2011 6:33:18 PM