WARNING: THIS SITE IS A MIRROR OF GITHUB.COM / IT CANNOT LOGIN OR REGISTER ACCOUNTS / THE CONTENTS ARE PROVIDED AS-IS / THIS SITE ASSUMES NO RESPONSIBILITY FOR ANY DISPLAYED CONTENT OR LINKS / IF YOU FOUND SOMETHING MAY NOT GOOD FOR EVERYONE, CONTACT ADMIN AT ilovescratch@foxmail.com
Skip to content

Commit 8a684b0

Browse files
committed
Merge branch 'apk'
2 parents 0fc7ec7 + 6dc250e commit 8a684b0

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

56 files changed

+2946
-517
lines changed

.gitignore

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
# Windows binaries
2+
*.exe
3+
*.exp
4+
15
# Compiled Object files
26
*.slo
37
*.lo
@@ -16,11 +20,13 @@
1620
*.lib
1721

1822
# visual studio files
23+
*.deps
1924
*.suo
2025
*.pdb
2126
*.opensdf
2227
*.sdf
2328
*.vcxproj.user
29+
*.vcxproj.filters
2430
Debug/
2531
ipch/
2632
Release/
@@ -31,3 +37,39 @@ Release/
3137
# sublime
3238
*.sublime-project
3339
*.sublime-workspace
40+
41+
# tags
42+
.tags
43+
.tags_sorted_by_file
44+
45+
# executables
46+
examples/blackscholes/bs_device
47+
examples/blackscholes/bs_host
48+
examples/blackscholes/bs_host_nvcc
49+
examples/blackscholes_hemiarray/bs_device
50+
examples/blackscholes_hemiarray/bs_host
51+
examples/blackscholes_hemiarray/bs_host_nvcc
52+
examples/blackscholes_hostdevice/blackscholes
53+
examples/blackscholes_nohemi/bs_device
54+
examples/blackscholes_nohemi/bs_host
55+
examples/blackscholes_nohemi/bs_host_nvcc
56+
examples/nbody_vec4/nbody_vec4
57+
examples/parallel_for/parallel_for_device
58+
examples/parallel_for/parallel_for_host
59+
examples/parallel_for/parallel_for_host_nvcc
60+
examples/simple/hello_device
61+
examples/simple/hello_global
62+
examples/simple/hello_host
63+
examples/simple/hello_host_nvcc
64+
examples/simple/hello_lambda_device
65+
examples/simple/hello_lambda_host
66+
examples/simple/hello_lambda_host_nvcc
67+
examples/simple/saxpy_host
68+
examples/simple/saxpy_device
69+
70+
test/test_hemi_device
71+
test/test_hemi_host
72+
73+
*.plist
74+
*.dSYM
75+

.gitmodules

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
[submodule "hemi/range"]
2+
path = hemi/range
3+
url = https://github.com/harrism/cpp11-range.git
4+
[submodule "test/googletest"]
5+
path = test/googletest
6+
url = https://github.com/google/googletest.git

LICENSE

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
1+
# Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
22
#
33
# Redistribution and use in source and binary forms, with or without
44
# modification, are permitted provided that the following conditions

README.md

Lines changed: 157 additions & 50 deletions
Large diffs are not rendered by default.

TODO.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
- [ ] Consider abstraction for __shared__ memory
2+
- [ ] Make nbody example work without CUDA
3+
- [ ] Consider __launch_bounds__ support...
4+
- [ ] Multi-dimensional thread/block accessors
5+
- [x] Add version of parallel_for with an ExecutionPolicy
6+
- [x] Combine tests into small number of binaries
7+
- [x] Add streams to ExecutionPolicy
8+
- [x] Tests for cudaLaunch with and without nvcc
9+
- [x] Tests for other APIs
10+
- [x] Provide portable utility functions for cudaDeviceReset, etc.
11+
- [x] Fix/rename index accessors
12+
- [x] Move accessors to device_api.h

examples/blackscholes/Makefile

Lines changed: 27 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,36 +1,43 @@
1-
CUDA_PATH := /usr/local/cuda
1+
# operating system
2+
HOST_OS := $(shell uname -s 2>/dev/null | tr "[:upper:]" "[:lower:]")
23

3-
CC_FLAGS := -I../ -I../../ -I$(CUDA_PATH)/include
4+
# architecture
5+
ARCH := $(shell getconf LONG_BIT)
46

5-
# uncomment for debug
6-
#DEBUG_FLAGS := -g -DDEBUG
7-
#DEBUG_FLAGS_NVCC := -G
7+
NVCC := nvcc
88

9-
# comment for debug
10-
CC_FLAGS += -O3
9+
ifeq ($(HOST_OS),darwin)
10+
CXX := clang++
11+
else
12+
CXX := g++
13+
endif
1114

12-
CC_FLAGS += $(DEBUG_FLAGS)
13-
NVCC_FLAGS := $(CC_FLAGS) $(DEBUG_FLAGS_NVCC)
15+
STD := -std=c++11
1416

15-
NVCC_FLAGS := $(CC_FLAGS) $(DEBUG_FLAGS_NVCC)
17+
CXX_FLAGS := $(STD) -I../ -I../../
1618

17-
ARCH := $(shell getconf LONG_BIT)
19+
HOST_ONLY_FLAGS := -DHEMI_CUDA_DISABLE
1820

19-
LIB_FLAGS_32 := -L$(CUDA_PATH)/lib
20-
LIB_FLAGS_64 := -L$(CUDA_PATH)/lib64
21+
# uncomment for debug
22+
#DEBUG_FLAGS := -g -DDEBUG
23+
#DEBUG_FLAGS_NVCC := -G
24+
25+
# comment for debug
26+
CXX_FLAGS += -O3
2127

22-
LIB_FLAGS := $(LIB_FLAGS_$(ARCH)) -lcudart
28+
CXX_FLAGS += $(DEBUG_FLAGS)
29+
NVCC_FLAGS := $(CXX_FLAGS) $(DEBUG_FLAGS_NVCC) --expt-extended-lambda
2330

24-
all: bs_device bs_host_nvcc bs_host_g++
31+
all: bs_device bs_host_nvcc bs_host
2532

2633
bs_device: blackscholes.cpp
27-
nvcc blackscholes.cpp $(NVCC_FLAGS) $(LIB_FLAGS) -x cu -o bs_device
34+
$(NVCC) blackscholes.cpp $(NVCC_FLAGS) -x cu -o bs_device
2835

2936
bs_host_nvcc: blackscholes.cpp
30-
nvcc blackscholes.cpp $(CC_FLAGS) $(LIB_FLAGS) -x c++ -o bs_host_nvcc
37+
$(NVCC) blackscholes.cpp $(CXX_FLAGS) $(HOST_ONLY_FLAGS) -x c++ -o bs_host_nvcc
3138

32-
bs_host_g++: blackscholes.cpp
33-
g++ blackscholes.cpp $(CC_FLAGS) $(LIB_FLAGS) -o bs_host_g++
39+
bs_host: blackscholes.cpp
40+
$(CXX) blackscholes.cpp $(CXX_FLAGS) $(HOST_ONLY_FLAGS) -o bs_host
3441

3542
clean:
36-
rm -rf bs_device bs_host_nvcc bs_host_g++
43+
rm -rf bs_device bs_host_nvcc bs_host

examples/blackscholes/blackscholes.cpp

Lines changed: 11 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313

1414
#include "timer.h"
1515
#include "hemi/hemi.h"
16+
#include "hemi/parallel_for.h"
17+
#include "hemi/device_api.h"
1618

1719
const float RISKFREE = 0.02f;
1820
const float VOLATILITY = 0.30f;
@@ -42,15 +44,11 @@ float CND(float d)
4244
}
4345

4446
// Black-Scholes formula for both call and put
45-
HEMI_KERNEL(BlackScholes)
46-
(float *callResult, float *putResult, const float *stockPrice,
47-
const float *optionStrike, const float *optionYears, float Riskfree,
48-
float Volatility, int optN)
47+
void BlackScholes(float *callResult, float *putResult, const float *stockPrice,
48+
const float *optionStrike, const float *optionYears, float Riskfree,
49+
float Volatility, int optN)
4950
{
50-
int offset = hemiGetElementOffset();
51-
int stride = hemiGetElementStride();
52-
53-
for(int opt = offset; opt < optN; opt += stride)
51+
hemi::parallel_for(0, optN, [=] HEMI_LAMBDA (int opt)
5452
{
5553
float S = stockPrice[opt];
5654
float X = optionStrike[opt];
@@ -68,7 +66,7 @@ HEMI_KERNEL(BlackScholes)
6866
float expRT = expf(- R * T);
6967
callResult[opt] = S * CNDD1 - X * expRT * CNDD2;
7068
putResult[opt] = X * expRT * (1.0f - CNDD2) - S * (1.0f - CNDD1);
71-
}
69+
});
7270
}
7371

7472
float RandFloat(float low, float high)
@@ -120,9 +118,6 @@ int main(int argc, char **argv)
120118

121119
initOptions(OPT_N, stockPrice, optionStrike, optionYears);
122120

123-
int blockDim = 128; // blockDim, gridDim ignored by host code
124-
int gridDim = std::min<int>(1024, (OPT_N + blockDim - 1) / blockDim);
125-
126121
printf("Running %s Version...\n", HEMI_LOC_STRING);
127122

128123
StartTimer();
@@ -137,11 +132,11 @@ int main(int argc, char **argv)
137132
d_stockPrice = stockPrice;
138133
d_optionStrike = optionStrike;
139134
d_optionYears = optionYears;
140-
#endif
135+
#endif
136+
141137

142-
HEMI_KERNEL_LAUNCH(BlackScholes, gridDim, blockDim, 0, 0,
143-
d_callResult, d_putResult, d_stockPrice, d_optionStrike,
144-
d_optionYears, RISKFREE, VOLATILITY, OPT_N);
138+
BlackScholes(d_callResult, d_putResult, (const float*)d_stockPrice, (const float*)d_optionStrike,
139+
(const float*)d_optionYears, RISKFREE, VOLATILITY, OPT_N);
145140

146141
#ifdef HEMI_CUDA_COMPILER
147142
checkCuda( cudaMemcpy(callResult, d_callResult, OPT_SZ, cudaMemcpyDeviceToHost) );

examples/blackscholes/blackscholes.sln

Lines changed: 0 additions & 36 deletions
This file was deleted.
Lines changed: 27 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,36 +1,43 @@
1-
CUDA_PATH := /usr/local/cuda
1+
# operating system
2+
HOST_OS := $(shell uname -s 2>/dev/null | tr "[:upper:]" "[:lower:]")
23

3-
CC_FLAGS := -I../ -I../../ -I$(CUDA_PATH)/include
4+
# architecture
5+
ARCH := $(shell getconf LONG_BIT)
46

5-
# uncomment for debug
6-
#DEBUG_FLAGS := -g -DDEBUG
7-
#DEBUG_FLAGS_NVCC := -G
7+
NVCC := nvcc
88

9-
# comment for debug
10-
CC_FLAGS += -O3
9+
ifeq ($(HOST_OS),darwin)
10+
CXX := clang++
11+
else
12+
CXX := g++
13+
endif
1114

12-
CC_FLAGS += $(DEBUG_FLAGS)
13-
NVCC_FLAGS := $(CC_FLAGS) $(DEBUG_FLAGS_NVCC)
15+
STD := -std=c++11
1416

15-
NVCC_FLAGS := $(CC_FLAGS) $(DEBUG_FLAGS_NVCC)
17+
CXX_FLAGS := $(STD) -I../ -I../../
1618

17-
ARCH := $(shell getconf LONG_BIT)
19+
HOST_ONLY_FLAGS := -DHEMI_CUDA_DISABLE
1820

19-
LIB_FLAGS_32 := -L$(CUDA_PATH)/lib
20-
LIB_FLAGS_64 := -L$(CUDA_PATH)/lib64
21+
# uncomment for debug
22+
#DEBUG_FLAGS := -g -DDEBUG
23+
#DEBUG_FLAGS_NVCC := -G
24+
25+
# comment for debug
26+
CXX_FLAGS += -O3
2127

22-
LIB_FLAGS := $(LIB_FLAGS_$(ARCH)) -lcudart
28+
CXX_FLAGS += $(DEBUG_FLAGS)
29+
NVCC_FLAGS := $(CXX_FLAGS) $(DEBUG_FLAGS_NVCC) --expt-extended-lambda
2330

24-
all: bs_device bs_host_nvcc bs_host_g++
31+
all: bs_device bs_host_nvcc bs_host
2532

2633
bs_device: blackscholes.cpp
27-
nvcc blackscholes.cpp $(NVCC_FLAGS) $(LIB_FLAGS) -x cu -o bs_device
34+
$(NVCC) blackscholes.cpp $(NVCC_FLAGS) -x cu -o bs_device
2835

2936
bs_host_nvcc: blackscholes.cpp
30-
nvcc blackscholes.cpp $(CC_FLAGS) $(LIB_FLAGS) -x c++ -o bs_host_nvcc
37+
$(NVCC) blackscholes.cpp $(CXX_FLAGS) $(HOST_ONLY_FLAGS) -x c++ -o bs_host_nvcc
3138

32-
bs_host_g++: blackscholes.cpp
33-
g++ blackscholes.cpp $(CC_FLAGS) $(LIB_FLAGS) -o bs_host_g++
39+
bs_host: blackscholes.cpp
40+
$(CXX) blackscholes.cpp $(CXX_FLAGS) $(HOST_ONLY_FLAGS) -o bs_host
3441

3542
clean:
36-
rm -rf bs_device bs_host_nvcc bs_host_g++
43+
rm -rf bs_device bs_host_nvcc bs_host

examples/blackscholes_hemiarray/blackscholes.cpp

Lines changed: 11 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313

1414
#include "timer.h"
1515
#include "hemi/hemi.h"
16+
#include "hemi/device_api.h"
17+
#include "hemi/parallel_for.h"
1618
#include "hemi/array.h"
1719

1820
const float RISKFREE = 0.02f;
@@ -43,15 +45,11 @@ float CND(float d)
4345
}
4446

4547
// Black-Scholes formula for both call and put
46-
HEMI_KERNEL(BlackScholes)
47-
(float *callResult, float *putResult, const float *stockPrice,
48+
void BlackScholes(float *callResult, float *putResult, const float *stockPrice,
4849
const float *optionStrike, const float *optionYears, float Riskfree,
4950
float Volatility, int optN)
5051
{
51-
int offset = hemiGetElementOffset();
52-
int stride = hemiGetElementStride();
53-
54-
for(int opt = offset; opt < optN; opt += stride)
52+
hemi::parallel_for(0, optN, [=] HEMI_LAMBDA (int opt)
5553
{
5654
float S = stockPrice[opt];
5755
float X = optionStrike[opt];
@@ -69,7 +67,7 @@ HEMI_KERNEL(BlackScholes)
6967
float expRT = expf(- R * T);
7068
callResult[opt] = S * CNDD1 - X * expRT * CNDD2;
7169
putResult[opt] = X * expRT * (1.0f - CNDD2) - S * (1.0f - CNDD1);
72-
}
70+
});
7371
}
7472

7573
float RandFloat(float low, float high)
@@ -105,20 +103,16 @@ int main(int argc, char **argv)
105103
optionStrike.writeOnlyHostPtr(),
106104
optionYears.writeOnlyHostPtr());
107105

108-
int blockDim = 128; // blockDim, gridDim ignored by host code
109-
int gridDim = std::min<int>(1024, (OPT_N + blockDim - 1) / blockDim);
110-
111106
printf("Running %s Version...\n", HEMI_LOC_STRING);
112107

113108
StartTimer();
114109

115-
HEMI_KERNEL_LAUNCH(BlackScholes, gridDim, blockDim, 0, 0,
116-
callResult.writeOnlyPtr(),
117-
putResult.writeOnlyPtr(),
118-
stockPrice.readOnlyPtr(),
119-
optionStrike.readOnlyPtr(),
120-
optionYears.readOnlyPtr(),
121-
RISKFREE, VOLATILITY, OPT_N);
110+
BlackScholes(callResult.writeOnlyPtr(),
111+
putResult.writeOnlyPtr(),
112+
stockPrice.readOnlyPtr(),
113+
optionStrike.readOnlyPtr(),
114+
optionYears.readOnlyPtr(),
115+
RISKFREE, VOLATILITY, OPT_N);
122116

123117
// force copy back to host if needed and print a sanity check
124118
printf("Option 0 call: %f\n", callResult.readOnlyPtr(hemi::host)[0]);

0 commit comments

Comments
 (0)