|
|
(6 intermediate revisions by the same user not shown) |
Line 1: |
Line 1: |
− | * [[CudaVsNested]]
| + | == troubleshooting == |
− | * [[CudaLog]]
| |
| | | |
− | * http://www.litec-computer.de/PC-Komponenten/Grafikkarten/PCI-express/nVidia/Gigabyte-GV-NX85T256H-8500GT-512MB-Dual-DVI-TV-out-passiv::13515.html | + | * compiling cuda samples : error message "... cannot be declared weak" : gcc-4.4 is not supported, |
− | * http://www.litec-computer.de/PC-Komponenten/Grafikkarten/PCI-express/nVidia/ASUS-EN8500GT-SILENT-MAGIC-HTD-512MB-DVI-TV-out-passiv::13117.html | + | ** http://carlosscheidegger.wordpress.com/2009/11/16/cuda-2-3-on-ubuntu-9-10/ |
− | * beachten : keine 88 (kann kein atomic), nur 85 oder 86, möglichst viel ram (512mb) , keine karten mit "Nur 128-bit Speicherinterface"
| + | ** add “–compiler-bindir=/usr/bin/gcc-4.3″ to NVCCFLAGS in your makefiles |
| + | ** cuda/shared/common.mk |
| + | ** cuda//C/common/common.mk |
| | | |
− | * svn+ssh://ghoulsblade@zwischenwelt.org/var/svn/robertprojarbeit
| |
− | * http://zwischenwelt.org/svn/robertprojarbeit
| |
− | * http://zwischenwelt.org/svn/robertdiplarbeit
| |
| | | |
− | * nvidia-cuda-forum http://forums.nvidia.com/index.php?showforum=62 (search for 8600) | + | * running example : segfault |
− | * nvidia-cuda-hp http://developer.nvidia.com/object/cuda.html | + | ** cuda driver not installed ? |
− | * FAQ : http://forums.nvidia.com/index.php?showtopic=36286&hl=8600 (many interesting programming tips) | + | ** try : make emu=1 |
− | * SIMD : http://en.wikipedia.org/wiki/Vector_processor | + | ** try : make dbg=1 emu=1 |
− | * samples http://developer.download.nvidia.com/compute/cuda/sdk/website/samples.html | + | ** gdb ... backtrace: 0xb7fa7ccd in cudaGetDeviceProperties () from /usr/local/cuda/lib/libcudart.so.2 |
− | * cuda 1.0 announcement 26.june : http://forums.nvidia.com/index.php?showtopic=39030&hl=8600
| |
| | | |
| | | |
| + | * error : /usr/bin/ld: cannot find -lglut |
| + | ** solution : apt-get install libglut3-dev |
| | | |
| | | |
| + | * error (during launch) : error while loading shared libraries: libcudart.so.2: cannot open shared object file: No such file or directory |
| + | ** solution : /etc/ld.so.conf |
| + | ** add line : /usr/local/cuda/lib |
| + | ** start /sbin/ldconfig |
| | | |
| + | == old == |
| | | |
− | * $(CUDA_BIN_PATH)\nvcc.exe -arch sm_11 -ccbin "$(VCInstallDir)bin" -c -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/O2,/Zi,/MT -I"$(CUDA_INC_PATH)" -I./ -I../../common/inc -o $(ConfigurationName)\myproj.obj myproj.cu
| + | [[cuda_old_notes]] |
− | | |
− | == success 11.10.2007 ==
| |
− | | |
− | gpu/cpu= 0.5 ok err<=0.5% N=263250 size=9MB IO=15 i3:0,0 tgpu=43.3s tcpu=93.0
| |
− | gpu/cpu= 0.6 ok err<=0.5% N=2621440 size=90MB IO=32 i3:1,1 tgpu=595.5s tcpu=1074.1
| |
− | | |
− | == new card laptop ==
| |
− | | |
− | <pre>
| |
− | name : GeForce 9500M GS
| |
− | 523584k totalGlobalMem
| |
− | 16k sharedMemPerBlock
| |
− | 8k regsPerBlock
| |
− | 32 warpSize
| |
− | 256k memPitch
| |
− | 512 maxThreadsPerBlock
| |
− | 512 maxThreadsDim[0]
| |
− | 512 maxThreadsDim[1]
| |
− | 64 maxThreadsDim[2]
| |
− | 63k maxGridSize[0]
| |
− | 63k maxGridSize[1]
| |
− | 1 maxGridSize[2]
| |
− | 64k totalConstMem
| |
− | 1 major
| |
− | 1 minor
| |
− | 927k clockRate
| |
− | 256 textureAlignment
| |
− | | |
− | </pre>
| |
− | | |
− | | |
− | == old cards, 256 mb ==
| |
− | <pre>
| |
− | ##### ##### ##### ##### #####device 0
| |
− | name : GeForce 8500 GT
| |
− | 261888k totalGlobalMem
| |
− | 16k sharedMemPerBlock
| |
− | 8k regsPerBlock
| |
− | 32 warpSize
| |
− | 256k memPitch
| |
− | 512 maxThreadsPerBlock
| |
− | 512 maxThreadsDim[0]
| |
− | 512 maxThreadsDim[1]
| |
− | 64 maxThreadsDim[2]
| |
− | 63k maxGridSize[0]
| |
− | 63k maxGridSize[1]
| |
− | 1 maxGridSize[2]
| |
− | 64k totalConstMem
| |
− | 1 major
| |
− | 1 minor
| |
− | 1371k clockRate
| |
− | 256 textureAlignment
| |
− | ##### ##### ##### ##### #####device 1
| |
− | name : GeForce 8500 GT
| |
− | 261824k totalGlobalMem
| |
− | 16k sharedMemPerBlock
| |
− | 8k regsPerBlock
| |
− | 32 warpSize
| |
− | 256k memPitch
| |
− | 512 maxThreadsPerBlock
| |
− | 512 maxThreadsDim[0]
| |
− | 512 maxThreadsDim[1]
| |
− | 64 maxThreadsDim[2]
| |
− | 63k maxGridSize[0]
| |
− | 63k maxGridSize[1]
| |
− | 1 maxGridSize[2]
| |
− | 64k totalConstMem
| |
− | 1 major
| |
− | 1 minor
| |
− | 1371k clockRate
| |
− | 256 textureAlignment
| |
− | </pre>
| |
− | | |
− | == new cards, 512 mb ==
| |
− | | |
− | <pre>
| |
− | ##### ##### ##### ##### #####device 0
| |
− | name : GeForce 8500 GT
| |
− | 524032k totalGlobalMem
| |
− | 16k sharedMemPerBlock
| |
− | 8k regsPerBlock
| |
− | 32 warpSize
| |
− | 256k memPitch
| |
− | 512 maxThreadsPerBlock
| |
− | 512 maxThreadsDim[0]
| |
− | 512 maxThreadsDim[1]
| |
− | 64 maxThreadsDim[2]
| |
− | 63k maxGridSize[0]
| |
− | 63k maxGridSize[1]
| |
− | 1 maxGridSize[2]
| |
− | 64k totalConstMem
| |
− | 1 major
| |
− | 1 minor
| |
− | 896k clockRate
| |
− | 256 textureAlignment
| |
− | ##### ##### ##### ##### #####device 1
| |
− | name : GeForce 8500 GT
| |
− | 523968k totalGlobalMem
| |
− | 16k sharedMemPerBlock
| |
− | 8k regsPerBlock
| |
− | 32 warpSize
| |
− | 256k memPitch
| |
− | 512 maxThreadsPerBlock
| |
− | 512 maxThreadsDim[0]
| |
− | 512 maxThreadsDim[1]
| |
− | 64 maxThreadsDim[2]
| |
− | 63k maxGridSize[0]
| |
− | 63k maxGridSize[1]
| |
− | 1 maxGridSize[2]
| |
− | 64k totalConstMem
| |
− | 1 major
| |
− | 1 minor
| |
− | 896k clockRate
| |
− | 256 textureAlignment
| |
− | </pre>
| |
− | | |
− | == sample output ==
| |
− | | |
− | <pre>
| |
− | N=65536
| |
− | SX=2048
| |
− | SY=64
| |
− | SZ=2
| |
− | I0=32
| |
− | DATASIZE_IN_RAW=2304kb
| |
− | DATASIZE_IN_STATE=256kb
| |
− | DATASIZE_IN_INDEX=136kb
| |
− | DATASIZE_IN_TOTAL=2696kb
| |
− | DATASIZE_OUT_TOTAL=16384kb
| |
− | 0.74 sec : reading data from file
| |
− | assert passed : (INDEXPOS_0(I0) == INDEXSTART_1-1)
| |
− | assert passed : (INDEXPOS_1(I0-1,I0) == INDEXSTART_2-1)
| |
− | assert passed : (INDEXPOS_2(I0-1,I0-1,I0) == INDEX_END-1)
| |
− | assert passed : (sz < 255)
| |
− | -2.478893,-2.459100,-2.459100,...,4.097229
| |
− | 0.09 sec : generating index data
| |
− | 0.02 sec : allocate and init device mem
| |
− | 9.55 sec : exec kernel on device
| |
− | 0.00 sec : receive results from device
| |
− | atom[0]=57100 atom[1]=0 iNumResults=57100 kMaxResults=2097152
| |
− | check : with index on cpu...
| |
− | check : with index on cpu: iNumResults=57100
| |
− | 17.29 sec : check : with index on cpu
| |
− | | |
− | Press ENTER to exit...
| |
− | </pre>
| |
− | coarse-strong-ultra-100-96_10000.txt
| |
− | | |
− | == big file : 750 mb ==
| |
− | | |
− | <pre>
| |
− | N=65536
| |
− | SX=2048
| |
− | SY=64
| |
− | SZ=2
| |
− | I0=32
| |
− | DATASIZE_IN_RAW=2304kb
| |
− | DATASIZE_IN_STATE=256kb
| |
− | DATASIZE_IN_INDEX=136kb
| |
− | DATASIZE_IN_TOTAL=2696kb
| |
− | DATASIZE_OUT_TOTAL=16384kb
| |
− | line 1:coarse-strong-ultra-000000 000 0.04820 0.05043 0.05505 0.04887 0.05384 0.
| |
− | 05995 0.06215 0.06603 0.06615 0.06679 0.06202 0.06686 0.06654 0.06499 0.06705 0.
| |
− | 05835 0.05513 0.05442 0.05398 0.04658 0.05066 0.05162 0.04627 0.04324 0.04433 0.
| |
− | 04061 0.03673 0.04281 0.04078 0.03822 0.03821 0.03952 0.03648 0.03312 0.03402 0.
| |
− | 03277 0.03029 0.04310 0.04202 0.04168 0.03964 0.04356 0.04193 0.03870 0.04995 0.
| |
− | 04837 0.04460 0.04857 0.04831 0.04714 0.04734 0.05127 0.05319 0.05641 0.05886 0.
| |
− | 05365 0.05752 0.05877 0.05435 0.05526
| |
− | line 2:0.05103 0.05033 0.04347 0.04750 0.04574 0.03781 0.03926 0.03612 0.03374 0
| |
− | .03207 0.03060 0.02828 0.02507 0.02721 0.02558 0.02493 0.03374 0.02920 0.03031 0
| |
− | .02906 0.03283 0.03207 0.03207 0.04170 0.04342 0.04020 0.04298 0.04131 0.04298 0
| |
− | .04404 0.04810 0.04727 0.04491 0.04764 0.04301 0.04383 0.04317 0.04511 0.04545 0
| |
− | .04818 0.04908 0.04463 0.04620 0.04699 0.04360 0.04529 0.04346 0.04122 0.04011 0
| |
− | .03737 0.03293 0.02865 0.02832 0.02520 0.02255 0.02125 0.02035 0.01192 0.01107 0
| |
− | .00685 0.01059 0.01442 0.01595 0.02073
| |
− | line 3: 0.01839 0.01905 0.02360 0.02169 0.02749 0.02954 0.03428 0.03788 0.03793
| |
− | 0.04286 0.03990 0.04313 0.04426 0.04382 0.04529 0.04475 0.04587 0.04372 0.04377
| |
− | 0.04492 0.04141 0.04456 0.04552 0.04159 0.04223 0.03591 0.03698 0.03614 0.03736
| |
− | 0.03934 0.03516 0.03583 0.02827 0.02601 0.02256 0.02111 0.02014 0.02016 0.02150
| |
− | 0.01603 0.01632 0.01486 0.01818 0.01843 0.02040 0.02465 0.02337 0.02405 0.02452
| |
− | 0.02435 0.02745 0.02855 0.03582 0.03742 0.04081 0.04198 0.03672 0.03736 0.03749
| |
− | 0.03917 0.04121 0.04257 0.04485 0.0417
| |
− | line 4:5 0.04121 0.04167 0.03555 0.03806 0.03863 0.03748 0.03451 0.03039 0.02957
| |
− | 0.02493 0.02768 0.03054 0.02767 0.02438 0.02099 0.01713 0.01462 0.01508 0.01577
| |
− | 0.01737 0.01791 0.02091 0.02171 0.02182 0.02522 0.02480 0.02493 0.02913 0.02996
| |
− | 0.03269 0.03296 0.03631 0.03866 0.03451 0.04187 0.03918 0.04450 0.04598 0.04060
| |
− | 0.04482 0.03857 0.04485 0.04499 0.04898 0.05087 0.04374 0.04700 0.04478 0.04084
| |
− | 0.04081 0.04103 0.04135 0.03853 0.03664 0.03382 0.02755 0.02830 0.02619 0.02489
| |
− | 0.02267 0.01911 0.01841 0.01437 0.015
| |
− | line 5:69 0.01486 0.01429 0.01689 0.02116 0.02161 0.02497 0.02357 0.02480 0.0248
| |
− | 6 0.02717 0.03292 0.03393 0.03558 0.03961 0.04140 0.03921 0.03968 0.03907 0.0418
| |
− | 6 0.04094 0.04488 0.04213 0.04043 0.04132 0.04242 0.04413 0.04329 0.04607 0.0437
| |
− | 9 0.04371 0.04105 0.03754 0.03824 0.03826 0.03761 0.03649 0.03190 0.03074 0.0221
| |
− | 8 0.02409 0.02173 0.01948 0.01822 0.01906 0.02054 0.01712 0.01795 0.01876 0.0171
| |
− | 6 0.02161 0.02460 0.02727 0.02947 0.02950 0.03441 0.03185 0.03891 0.03999 0.0409
| |
− | 8 0.04559 0.04661 0.04359 0.04212 0.04
| |
− | total lines : 1507200
| |
− | Drücken Sie eine beliebige Taste . . .
| |
− | </pre>
| |
− | | |
− | | |
− | | |
− | == big test 1 (without last index on device) ==
| |
− | | |
− | <pre>
| |
− | N=1048576
| |
− | SX=32768
| |
− | SY=1024
| |
− | SZ=32
| |
− | I0=32
| |
− | DATASIZE_IN_RAW=36864kb
| |
− | DATASIZE_IN_STATE=4096kb
| |
− | DATASIZE_IN_INDEX=136kb
| |
− | DATASIZE_IN_TOTAL=41096kb
| |
− | DATASIZE_OUT_TOTAL=16384kb
| |
− | ReadTextData data/Corel_ColorMoments_9d.ascii : 68040 lines of real data, added
| |
− | 980536 lines of random data
| |
− | WARNING ! iRealNumLines=68040 does not match the hardcoded N=1048576
| |
− | 1.08 sec : reading data from file
| |
− | assert passed : (INDEXPOS_0(I0) == INDEXSTART_1-1)
| |
− | assert passed : (INDEXPOS_1(I0-1,I0) == INDEXSTART_2-1)
| |
− | assert passed : (INDEXPOS_2(I0-1,I0-1,I0) == INDEX_END-1)
| |
− | assert passed : (sz < 255)
| |
− | -2.478893,-2.478893,-2.478893,...,4.097229
| |
− | 1.86 sec : generating index data
| |
− | 0.06 sec : allocate and init device mem
| |
− | 764.99 sec : exec kernel on device
| |
− | 0.00 sec : receive results from device
| |
− | atom[0]=60051 atom[1]=0 iNumResults=60051 kMaxResults=2097152
| |
− | check : with index on cpu...
| |
− | check : with index on cpu: iNumResults=60051
| |
− | 357.94 sec : check : with index on cpu
| |
− | | |
− | Press ENTER to exit...
| |
− | </pre>
| |
− | | |
− | == big data with all index levels used in device ==
| |
− | <pre>
| |
− | N=1048576
| |
− | SX=32768
| |
− | SY=1024
| |
− | SZ=32
| |
− | I0=32
| |
− | DATASIZE_IN_RAW=36864kb
| |
− | DATASIZE_IN_STATE=4096kb
| |
− | DATASIZE_IN_INDEX=136kb
| |
− | DATASIZE_IN_TOTAL=41096kb
| |
− | DATASIZE_OUT_TOTAL=16384kb
| |
− | ReadTextData data/Corel_ColorMoments_9d.ascii : 68040 lines of real data, added
| |
− | 980536 lines of random data
| |
− | WARNING ! iRealNumLines=68040 does not match the hardcoded N=1048576
| |
− | 0.96 sec : reading data from file
| |
− | assert passed : (INDEXPOS_0(I0) == INDEXSTART_1-1)
| |
− | assert passed : (INDEXPOS_1(I0-1,I0) == INDEXSTART_2-1)
| |
− | assert passed : (INDEXPOS_2(I0-1,I0-1,I0) == INDEX_END-1)
| |
− | assert passed : (sz < 255)
| |
− | -2.478893,-2.478893,-2.478893,...,4.097229
| |
− | 1.86 sec : generating index data
| |
− | 0.06 sec : allocate and init device mem
| |
− | 354.73 sec : exec kernel on device
| |
− | 0.00 sec : receive results from device
| |
− | atom[0]=60051 atom[1]=0 iNumResults=60051 kMaxResults=2097152
| |
− | check : with index on cpu...
| |
− | check : with index on cpu: iNumResults=60051
| |
− | 357.97 sec : check : with index on cpu
| |
− | | |
− | Press ENTER to exit...
| |
− | </pre>
| |
− | | |
− | == big data with I0:16 : device is faster than cpu ==
| |
− | | |
− | <pre>
| |
− | N=1048576
| |
− | SX=65536
| |
− | SY=4096
| |
− | SZ=256
| |
− | I0=16
| |
− | DATASIZE_IN_RAW=36864kb
| |
− | DATASIZE_IN_STATE=4096kb
| |
− | DATASIZE_IN_INDEX=18kb
| |
− | DATASIZE_IN_TOTAL=40978kb
| |
− | DATASIZE_OUT_TOTAL=16384kb
| |
− | ReadTextData data/Corel_ColorMoments_9d.ascii : 68040 lines of real data, added
| |
− | 980536 lines of random data
| |
− | WARNING ! iRealNumLines=68040 does not match the hardcoded N=1048576
| |
− | 0.95 sec : reading data from file
| |
− | assert passed : (INDEXPOS_0(I0) == INDEXSTART_1-1)
| |
− | assert passed : (INDEXPOS_1(I0-1,I0) == INDEXSTART_2-1)
| |
− | assert passed : (INDEXPOS_2(I0-1,I0-1,I0) == INDEX_END-1)
| |
− | assert FAILED : (sz < 255)
| |
− | -2.478893,-2.478893,-2.478893,...,4.097229
| |
− | 1.91 sec : generating index data
| |
− | 0.06 sec : allocate and init device mem
| |
− | 393.60 sec : exec kernel on device
| |
− | 0.00 sec : receive results from device
| |
− | atom[0]=60051 atom[1]=0 iNumResults=60051 kMaxResults=2097152
| |
− | check : with index on cpu...
| |
− | check : with index on cpu: iNumResults=60051
| |
− | 520.65 sec : check : with index on cpu
| |
− | | |
− | Press ENTER to exit...
| |
− | | |
− | </pre>
| |
− | | |
− | <pre>
| |
− | gpu/cpu= 0.4 N=65536 size=2MB IO=16 gpu_idx3=0 cpu_idx3=0 time_gpu=10.6s time_cpu=24.8
| |
− | gpu/cpu= 0.4 N=65536 size=2MB IO=16 gpu_idx3=0 cpu_idx3=0 time_gpu=10.6s time_cpu=25.3
| |
− | gpu/cpu= 0.9 N=32768 size=1MB IO=32 gpu_idx3=0 cpu_idx3=0 time_gpu=3.1s time_cpu=3.4
| |
− | gpu/cpu= 0.7 N=32768 size=1MB IO=32 gpu_idx3=0 cpu_idx3=0 time_gpu=2.9s time_cpu=4.3
| |
− | </pre>
| |