From 194ed2f271ca01732a63ad4b92b65ad41668f36c Mon Sep 17 00:00:00 2001 From: Tex Riddell Date: Fri, 10 Apr 2026 19:09:07 -0700 Subject: [PATCH] [SM6.10] LinAlg: Fix thread-scope InterlockedAccumulate params The thread-scope overload wasn't supposed to have Stride and Layout, since the Layout must be OuterProductOptimal for thread scope. Additionally, Align is device-dependent for OuterProductOptimal, so there's no point supplying it from HLSL or DXIL. This change removes these three parameters from this function in the HLSL Header, supplying 0 for Stride, MatrixLayout::OuterProductOptimal for Layout, and 0 for Align to the builtin operation which will pass these values along to the DXIL operation. Fixes #8360 --- tools/clang/lib/Headers/hlsl/dx/linalg.h | 8 +++----- .../test/CodeGenDXIL/hlsl/linalg/api/matrix-class.hlsl | 4 ++-- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/tools/clang/lib/Headers/hlsl/dx/linalg.h b/tools/clang/lib/Headers/hlsl/dx/linalg.h index 3c2b60a023..aee43524ab 100644 --- a/tools/clang/lib/Headers/hlsl/dx/linalg.h +++ b/tools/clang/lib/Headers/hlsl/dx/linalg.h @@ -406,11 +406,9 @@ class Matrix { template typename hlsl::enable_if::type - InterlockedAccumulate(RWByteAddressBuffer Res, uint StartOffset, uint Stride, - MatrixLayoutEnum Layout, - uint Align = sizeof(ElementType)) { - __builtin_LinAlg_MatrixAccumulateToDescriptor(__handle, Res, StartOffset, - Stride, Layout, Align); + InterlockedAccumulate(RWByteAddressBuffer Res, uint StartOffset) { + __builtin_LinAlg_MatrixAccumulateToDescriptor( + __handle, Res, StartOffset, 0, MatrixLayout::OuterProductOptimal, 0); } }; diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/api/matrix-class.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/api/matrix-class.hlsl index 8d1f43f4b2..e8c516c23d 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/api/matrix-class.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/api/matrix-class.hlsl @@ -175,12 +175,12 @@ void main(uint ID : SV_GroupID) // // CHECK: %[[TSACCUM:.*]] = call %dx.types.LinAlgMatrixC9M4N4U2S0 @dx.op.linAlgMatrixOuterProduct.mC9M4N4U2S0.v4f32.v4f32 // CHECK: call void @dx.op.linAlgMatrixAccumulateToDescriptor.mC9M4N4U2S0(i32 -2147483621, -// CHECK-SAME: %dx.types.LinAlgMatrixC9M4N4U2S0 %[[TSACCUM]], %dx.types.Handle %{{[0-9]+}}, i32 0, i32 16, i32 1, i32 4) +// CHECK-SAME: %dx.types.LinAlgMatrixC9M4N4U2S0 %[[TSACCUM]], %dx.types.Handle %{{[0-9]+}}, i32 0, i32 0, i32 4, i32 0) // CHECK-SAME: ; LinAlgMatrixAccumulateToDescriptor(matrix,handle,offset,stride,layout,align) vector vec1 = 1.0f; vector vec2 = 2.0f; TSMatrixAccumTy TSMatAccum = OuterProduct(vec1, vec2); - TSMatAccum.InterlockedAccumulate(RWBAB, 0, 16, MatrixLayoutEnum::ColMajor); + TSMatAccum.InterlockedAccumulate(RWBAB, 0); // CHECK: call i32 @dx.op.linAlgMatrixQueryAccumulatorLayout(i32 -2147483626) ; LinAlgMatrixQueryAccumulatorLayout() MatrixUseEnum layout = AccumulatorLayout();