//===----------------------------------------------------------------------===//
//
// Part of libcu++, the C++ Standard Library for your entire system,
// under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES.
//
//===----------------------------------------------------------------------===//

// Modifications Copyright (c) 2025 Advanced Micro Devices, Inc.
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.

#ifndef _CUDA_DISCARD_MEMORY
#define _CUDA_DISCARD_MEMORY

#include <cuda/std/detail/__config>

#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
#  pragma GCC system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
#  pragma clang system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
#  pragma system_header
#endif // no system header

#include <cuda/std/cstdint>

_LIBCUDACXX_BEGIN_NAMESPACE_CUDA

inline _CCCL_HOST_DEVICE void discard_memory(volatile void* __ptr, size_t __nbytes) noexcept
{
  // The discard PTX instruction is only available with PTX ISA 7.4 and later
#if __cccl_ptx_isa < 740ULL
  (void) (__ptr);
  (void) (__nbytes);
#else
  NV_IF_TARGET_LIBHIPCXX(
    NV_PROVIDES_SM_80,
    (if (!__isGlobal((void*) __ptr)) return;

     char* __p                          = reinterpret_cast<char*>(const_cast<void*>(__ptr));
     char* const __end_p                = __p + __nbytes;
     static constexpr size_t _LINE_SIZE = 128;

     // Trim the first block and last block if they're not 128 bytes aligned
     size_t __misalignment     = reinterpret_cast<uintptr_t>(__p) % _LINE_SIZE;
     char* __start_aligned     = __misalignment == 0 ? __p : __p + (_LINE_SIZE - __misalignment);
     char* const __end_aligned = __end_p - (reinterpret_cast<uintptr_t>(__end_p) % _LINE_SIZE);

     while (__start_aligned < __end_aligned) {
       asm volatile("discard.global.L2 [%0], 128;" ::"l"(__start_aligned) :);
       __start_aligned += _LINE_SIZE;
     }),
    ((void) (__ptr); (void) (__nbytes);))
#endif
}

_LIBCUDACXX_END_NAMESPACE_CUDA

#endif
