diff --git a/cpp/include/rmm/mr/device_memory_resource.hpp b/cpp/include/rmm/mr/device_memory_resource.hpp index 99a9293b0..3b698aa0c 100644 --- a/cpp/include/rmm/mr/device_memory_resource.hpp +++ b/cpp/include/rmm/mr/device_memory_resource.hpp @@ -98,6 +98,8 @@ class device_memory_resource { * The returned pointer will have 256 byte alignment regardless of the value * of alignment. Higher alignments must use the aligned_resource_adaptor. * + * The returned pointer is immediately valid on all streams. + * * @throws rmm::bad_alloc When the requested `bytes` cannot be allocated. * * @param bytes The size of the allocation @@ -110,16 +112,22 @@ class device_memory_resource { alignment <= rmm::CUDA_ALLOCATION_ALIGNMENT && rmm::is_supported_alignment(alignment), "Alignment must be less than or equal to 256 and a power of two", rmm::bad_alloc); - return do_allocate(bytes, cuda_stream_view{}); + auto const stream = cuda_stream_view{}; + void* ptr = do_allocate(bytes, stream); + stream.synchronize(); + return ptr; } /** - * @brief Deallocate memory pointed to by \p p. + * @brief Deallocate memory pointed to by \p ptr. + * + * @note All stream-ordered work on `ptr` must be complete before calling this function otherwise + * behavior is undefined. * * @param ptr Pointer to be deallocated * @param bytes The size in bytes of the allocation. This must be equal to the - * value of `bytes` that was passed to the `allocate` call that returned `p`. - * @param alignment The alignment that was passed to the `allocate` call that returned `p` + * value of `bytes` that was passed to the `allocate` call that returned `ptr`. + * @param alignment The alignment that was passed to the `allocate` call that returned `ptr` */ void deallocate_sync( void* ptr, @@ -159,8 +167,8 @@ class device_memory_resource { * @param stream The stream on which to perform the deallocation * @param ptr Pointer to be deallocated * @param bytes The size in bytes of the allocation. This must be equal to the - * value of `bytes` that was passed to the `allocate` call that returned `p`. - * @param alignment The alignment that was passed to the `allocate` call that returned `p` + * value of `bytes` that was passed to the `allocate` call that returned `ptr`. + * @param alignment The alignment that was passed to the `allocate` call that returned `ptr` */ void deallocate(cuda_stream_view stream, void* ptr, @@ -235,14 +243,14 @@ class device_memory_resource { virtual void* do_allocate(std::size_t bytes, cuda_stream_view stream) = 0; /** - * @brief Deallocate memory pointed to by \p p. + * @brief Deallocate memory pointed to by \p ptr. * * If supported, this operation may optionally be executed on a stream. * Otherwise, the stream is ignored and the null stream is used. * * @param ptr Pointer to be deallocated * @param bytes The size in bytes of the allocation. This must be equal to the - * value of `bytes` that was passed to the `allocate` call that returned `p`. + * value of `bytes` that was passed to the `allocate` call that returned `ptr`. * @param stream Stream on which to perform deallocation */ virtual void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) noexcept = 0;