-
Notifications
You must be signed in to change notification settings - Fork 17
Image upload benchamark #238
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
6635ba9
951e2fd
141295b
874814a
ddb7bfc
f1fc8d5
7abe408
717f6ae
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,64 @@ | ||
| include(common RESULT_VARIABLE RES) | ||
| if(NOT RES) | ||
| message(FATAL_ERROR "common.cmake not found. Should be in {repo_root}/cmake directory") | ||
| endif() | ||
|
|
||
| nbl_create_executable_project("" "" "" "" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}") | ||
|
|
||
| if(NBL_EMBED_BUILTIN_RESOURCES) | ||
| set(_BR_TARGET_ ${EXECUTABLE_NAME}_builtinResourceData) | ||
| set(RESOURCE_DIR "app_resources") | ||
|
|
||
| get_filename_component(_SEARCH_DIRECTORIES_ "${CMAKE_CURRENT_SOURCE_DIR}" ABSOLUTE) | ||
| get_filename_component(_OUTPUT_DIRECTORY_SOURCE_ "${CMAKE_CURRENT_BINARY_DIR}/src" ABSOLUTE) | ||
| get_filename_component(_OUTPUT_DIRECTORY_HEADER_ "${CMAKE_CURRENT_BINARY_DIR}/include" ABSOLUTE) | ||
|
|
||
| file(GLOB_RECURSE BUILTIN_RESOURCE_FILES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}" CONFIGURE_DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}/*") | ||
| foreach(RES_FILE ${BUILTIN_RESOURCE_FILES}) | ||
| LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "${RES_FILE}") | ||
| endforeach() | ||
|
|
||
| ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_SEARCH_DIRECTORIES_}" "${RESOURCE_DIR}" "nbl::this_example::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}") | ||
|
|
||
| LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_}) | ||
| endif() | ||
|
|
||
| set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") | ||
| set(DEPENDS | ||
| app_resources/common.hlsl | ||
| app_resources/tile_upload.comp.hlsl | ||
| ) | ||
| target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS}) | ||
| set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) | ||
|
|
||
| set(SM 6_8) | ||
| set(JSON [=[ | ||
| [ | ||
| { | ||
| "INPUT": "app_resources/tile_upload.comp.hlsl", | ||
| "KEY": "snakeStore" | ||
| } | ||
| ] | ||
| ]=]) | ||
| string(CONFIGURE "${JSON}" JSON) | ||
|
|
||
| NBL_CREATE_NSC_COMPILE_RULES( | ||
| TARGET ${EXECUTABLE_NAME}SPIRV | ||
| LINK_TO ${EXECUTABLE_NAME} | ||
| DEPENDS ${DEPENDS} | ||
| BINARY_DIR ${OUTPUT_DIRECTORY} | ||
| MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT | ||
| COMMON_OPTIONS -I "${CMAKE_CURRENT_SOURCE_DIR}" -T lib_${SM} | ||
| OUTPUT_VAR KEYS | ||
| INCLUDE nbl/this_example/builtin/build/spirv/keys.hpp | ||
| NAMESPACE nbl::this_example::builtin::build | ||
| INPUTS ${JSON} | ||
| ) | ||
|
|
||
| NBL_CREATE_RESOURCE_ARCHIVE( | ||
| NAMESPACE nbl::this_example::builtin::build | ||
| TARGET ${EXECUTABLE_NAME}_builtinsBuild | ||
| LINK_TO ${EXECUTABLE_NAME} | ||
| BIND ${OUTPUT_DIRECTORY} | ||
| BUILTINS ${KEYS} | ||
| ) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,10 @@ | ||
| #include <nbl/builtin/hlsl/morton.hlsl> | ||
|
|
||
| struct PushConstantData | ||
| { | ||
| uint64_t deviceBufferAddress; | ||
| uint32_t2 dstOffset; | ||
| uint32_t srcWidth; | ||
| uint32_t srcHeight; | ||
| uint32_t tilesPerRow; | ||
| }; |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,72 @@ | ||
| #include "common.hlsl" | ||
|
|
||
| [[vk::binding(0,0)]] RWTexture2D<float32_t4> dstImage; | ||
| [[vk::push_constant]] PushConstantData pc; | ||
|
|
||
| using namespace nbl::hlsl; | ||
|
|
||
| static const uint32_t TILE_SIZE = 128u; | ||
|
|
||
| [numthreads(128, 1, 1)] | ||
| [shader("compute")] | ||
| void SnakeStore(uint32_t3 ID : SV_DispatchThreadID) | ||
| { | ||
| const uint32_t gIdx = ID.x; | ||
| const uint32_t tileIdx = gIdx / (TILE_SIZE * TILE_SIZE); | ||
| const uint32_t localIdx = gIdx % (TILE_SIZE * TILE_SIZE); | ||
| const uint32_t2 tileBase = uint32_t2(tileIdx % pc.tilesPerRow, tileIdx / pc.tilesPerRow) * TILE_SIZE; | ||
| const uint32_t2 localPos = uint32_t2(localIdx % TILE_SIZE, localIdx / TILE_SIZE); | ||
| const uint32_t2 pixelPos = tileBase + localPos; | ||
|
|
||
| const uint32_t packed = vk::RawBufferLoad<uint32_t>(pc.deviceBufferAddress + gIdx * 4u); | ||
| dstImage[pixelPos] = unpackUnorm4x8(int32_t(packed)); | ||
| } | ||
|
|
||
| [numthreads(128, 1, 1)] | ||
| [shader("compute")] | ||
| void SnakeLoad(uint32_t3 ID : SV_DispatchThreadID) | ||
| { | ||
| const uint32_t gIdx = ID.x; | ||
| const uint32_t tileIdx = gIdx / (TILE_SIZE * TILE_SIZE); | ||
| const uint32_t localIdx = gIdx % (TILE_SIZE * TILE_SIZE); | ||
| const uint32_t2 tileBase = uint32_t2(tileIdx % pc.tilesPerRow, tileIdx / pc.tilesPerRow) * TILE_SIZE; | ||
| const uint32_t2 localPos = uint32_t2(localIdx % TILE_SIZE, localIdx / TILE_SIZE); | ||
| const uint32_t2 pixelPos = tileBase + localPos; | ||
|
|
||
| vk::RawBufferStore<uint32_t>(pc.deviceBufferAddress + gIdx * 4u, uint32_t(packUnorm4x8(dstImage[pixelPos]))); | ||
| } | ||
|
|
||
| [numthreads(128, 1, 1)] | ||
| [shader("compute")] | ||
| void MortonStore(uint32_t3 ID : SV_DispatchThreadID) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Mortong Example for 16x16 group:
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. btw we're not going to go with morton benchmarking any further, but let's just fix it up and make it work as we first intended. |
||
| { | ||
| const uint32_t gIdx = ID.x; | ||
| const uint32_t tileIdx = gIdx / (TILE_SIZE * TILE_SIZE); | ||
| const uint32_t localIdx = gIdx % (TILE_SIZE * TILE_SIZE); | ||
| const uint32_t2 tileBase = uint32_t2(tileIdx % pc.tilesPerRow, tileIdx / pc.tilesPerRow) * TILE_SIZE; | ||
|
|
||
| morton::code<false, 7, 2> mc; | ||
| mc.value = uint16_t(localIdx); | ||
| const uint32_t2 localPos = _static_cast<uint32_t2>(mc); | ||
| const uint32_t2 pixelPos = tileBase + localPos; | ||
|
|
||
| const uint32_t packed = vk::RawBufferLoad<uint32_t>(pc.deviceBufferAddress + gIdx * 4u); | ||
| dstImage[pixelPos] = unpackUnorm4x8(int32_t(packed)); | ||
| } | ||
|
|
||
| [numthreads(128, 1, 1)] | ||
| [shader("compute")] | ||
| void MortonLoad(uint32_t3 ID : SV_DispatchThreadID) | ||
| { | ||
| const uint32_t gIdx = ID.x; | ||
| const uint32_t tileIdx = gIdx / (TILE_SIZE * TILE_SIZE); | ||
| const uint32_t localIdx = gIdx % (TILE_SIZE * TILE_SIZE); | ||
| const uint32_t2 tileBase = uint32_t2(tileIdx % pc.tilesPerRow, tileIdx / pc.tilesPerRow) * TILE_SIZE; | ||
|
|
||
| morton::code<false, 7, 2> mc; | ||
| mc.value = uint16_t(localIdx); | ||
| const uint32_t2 localPos = _static_cast<uint32_t2>(mc); | ||
| const uint32_t2 pixelPos = tileBase + localPos; | ||
|
|
||
| vk::RawBufferStore<uint32_t>(pc.deviceBufferAddress + gIdx * 4u, uint32_t(packUnorm4x8(dstImage[pixelPos]))); | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,28 @@ | ||
| { | ||
| "enableParallelBuild": true, | ||
| "threadsPerBuildProcess" : 2, | ||
| "isExecuted": false, | ||
| "scriptPath": "", | ||
| "cmake": { | ||
| "configurations": [ "Release", "Debug", "RelWithDebInfo" ], | ||
| "buildModes": [], | ||
| "requiredOptions": [] | ||
| }, | ||
| "profiles": [ | ||
| { | ||
| "backend": "vulkan", // should be none | ||
| "platform": "windows", | ||
| "buildModes": [], | ||
| "runConfiguration": "Release", // we also need to run in Debug nad RWDI because foundational example | ||
| "gpuArchitectures": [] | ||
| } | ||
| ], | ||
| "dependencies": [], | ||
| "data": [ | ||
| { | ||
| "dependencies": [], | ||
| "command": [""], | ||
| "outputs": [] | ||
| } | ||
| ] | ||
| } |

Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
pixelPos will be just the global thread idx.we need to have individual offsets for each tile requests<< TILE_SIZE_LOG2. for modulo use&127uor&(TILE_SIZE-1)globalPos.xy << 7(define TILE_SIZE_LOG2)