From c94033fc5ceb2dcd66d8e6b87bef4fc2c9b414b9 Mon Sep 17 00:00:00 2001 From: Pablo Marquez Tello Date: Mon, 27 Apr 2026 16:06:52 +0100 Subject: [PATCH] fix: Enable asm requantized max pooling with differing qinfo Restrict differing src/dst quantization-info support in CpuPool2dAssemblyWrapperKernel to MAX pooling, while keeping AVG pooling on the generic fallback path. Fix the quantized multiplier validation check and wire the requantization shifts correctly for the asm pooling path so requantized MAX pooling validates and executes correctly. Reject padded QASYMM8_SIGNED AVG pooling in the asm wrapper for same-qinfo configurations, matching the existing QASYMM8 policy. Add coverage for QASYMM8_SIGNED padded MAX pooling on the asm path for same qinfo and zero-offset differing qinfo. Add validate coverage for padded NHWC QASYMM8 MAX with differing quantization info. Resolves MLCE-1821 Signed-off-by: Pablo Marquez Tello Change-Id: I33c99d0d4ea1bf57ed28d0750422403e60e2a276 --- .../CpuPool2dAssemblyWrapperKernel.cpp | 28 ++++-- tests/validation/NEON/PoolingLayer.cpp | 92 ++++++++++++++++++- 2 files changed, 109 insertions(+), 11 deletions(-) diff --git a/src/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.cpp b/src/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.cpp index cb92a904cc9..fa1f5935bc0 100644 --- a/src/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.cpp +++ b/src/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.cpp @@ -134,21 +134,28 @@ CpuPool2dAssemblyWrapperKernel::validate(const ITensorInfo *src, const ITensorIn if (src_qinfo != dst_qinfo) { + ARM_COMPUTE_RETURN_ERROR_ON_MSG(info.pool_type != PoolingType::MAX, + "Assembly kernels only support differing src/dst quantization info for " + "MAX pooling"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(src->data_type() == DataType::QASYMM8_SIGNED && + (src_qinfo.offset != 0 || dst_qinfo.offset != 0), + "Assembly kernels only support differing src/dst quantization info for " + "QASYMM8_SIGNED when both offsets are zero"); const float multiplier = src_qinfo.scale / dst_qinfo.scale; int32_t dst_multiplier{}; int32_t dst_shift{}; ARM_COMPUTE_RETURN_ERROR_ON( - quantization::calculate_quantized_multiplier(multiplier, &dst_multiplier, &dst_shift)); + !quantization::calculate_quantized_multiplier(multiplier, &dst_multiplier, &dst_shift)); } else { - if (src->data_type() == DataType::QASYMM8) + if (src->data_type() == DataType::QASYMM8 || src->data_type() == DataType::QASYMM8_SIGNED) { const bool has_padding = info.pad_stride_info.has_padding(); ARM_COMPUTE_RETURN_ERROR_ON_MSG(info.pool_type != PoolingType::MAX && !info.exclude_padding && has_padding, - "Assembly kernels only support padded MAX pooling for QASYMM8 with " - "same src/dst quantization info"); + "Assembly kernels only support padded MAX pooling for QASYMM8 and " + "QASYMM8_SIGNED with same src/dst quantization info"); } } } @@ -156,13 +163,13 @@ CpuPool2dAssemblyWrapperKernel::validate(const ITensorInfo *src, const ITensorIn { const TensorInfo out_info(dst_shape, 1, src->data_type()); // use src dtype as they're same. ARM_COMPUTE_RETURN_ERROR_ON_SIZE_UNSUPPORTED(&out_info); - if (src->data_type() == DataType::QASYMM8) + if (src->data_type() == DataType::QASYMM8 || src->data_type() == DataType::QASYMM8_SIGNED) { // If dst is not configured, the quantization info are the same const bool has_padding = info.pad_stride_info.has_padding(); ARM_COMPUTE_RETURN_ERROR_ON_MSG(info.pool_type != PoolingType::MAX && !info.exclude_padding && has_padding, - "Assembly kernels only support padded MAX pooling for QASYMM8 with " - "same src/dst quantization info"); + "Assembly kernels only support padded MAX pooling for QASYMM8 and " + "QASYMM8_SIGNED with same src/dst quantization info"); } } return Status{}; @@ -303,9 +310,10 @@ void CpuPool2dAssemblyWrapperKernel::create_arm_pooling_requant(const ITensorInf int32_t dst_shift{}; quantization::calculate_quantized_multiplier(multiplier, &dst_multiplier, &dst_shift); - const arm_conv::pooling::Requantize32 requant_args(src_qinfo.offset, dst_qinfo.offset, - dst_shift, // left shift - 0, // right shift + const int32_t left_shift = std::max(-dst_shift, static_cast(0)); + const int32_t right_shift = std::min(-dst_shift, static_cast(0)); + + const arm_conv::pooling::Requantize32 requant_args(src_qinfo.offset, dst_qinfo.offset, left_shift, right_shift, dst_multiplier); // Configure assembly pooling kernel with requantization diff --git a/tests/validation/NEON/PoolingLayer.cpp b/tests/validation/NEON/PoolingLayer.cpp index 2ac1c58bada..820a7e62646 100644 --- a/tests/validation/NEON/PoolingLayer.cpp +++ b/tests/validation/NEON/PoolingLayer.cpp @@ -115,6 +115,38 @@ const auto PoolingDatasetQASYMM8PaddedMax = combine(make("Shape", {TensorShape(7 make("InputQuantInfo", {QuantizationInfo(0.25f, 11)}), make("OutputQuantInfo", {QuantizationInfo(0.25f, 11)})); +const auto PoolingDatasetQASYMM8PaddedMaxDifferentQInfo = combine(make("Shape", {TensorShape(7U, 5U, 3U)}), + make("PoolingType", {PoolingType::MAX}), + make("PoolingSize", {Size2D(3, 3)}), + make("PadStride", {PadStrideInfo(2, 2, 1, 1)}), + make("ExcludePadding", {false}), + make("DataType", DataType::QASYMM8), + make("DataLayout", {DataLayout::NHWC}), + make("InputQuantInfo", {QuantizationInfo(0.25f, 11)}), + make("OutputQuantInfo", {QuantizationInfo(0.5f, 7)})); + +const auto PoolingDatasetQASYMM8SignedPaddedMaxDifferentQInfoZeroOffset = + combine(make("Shape", {TensorShape(7U, 5U, 3U), TensorShape(8U, 7U, 5U)}), + make("PoolingType", {PoolingType::MAX}), + make("PoolingSize", {Size2D(3, 3)}), + make("PadStride", {PadStrideInfo(2, 2, 1, 1)}), + make("ExcludePadding", {false}), + make("DataType", DataType::QASYMM8_SIGNED), + make("DataLayout", {DataLayout::NHWC}), + make("InputQuantInfo", {QuantizationInfo(0.25f, 0)}), + make("OutputQuantInfo", {QuantizationInfo(0.5f, 0)})); + +const auto PoolingDatasetQASYMM8SignedPaddedMaxSameQInfo = + combine(make("Shape", {TensorShape(7U, 5U, 3U), TensorShape(8U, 7U, 5U)}), + make("PoolingType", {PoolingType::MAX}), + make("PoolingSize", {Size2D(3, 3)}), + make("PadStride", {PadStrideInfo(2, 2, 1, 1)}), + make("ExcludePadding", {false}), + make("DataType", DataType::QASYMM8_SIGNED), + make("DataLayout", {DataLayout::NHWC}), + make("InputQuantInfo", {QuantizationInfo(0.25f, -11)}), + make("OutputQuantInfo", {QuantizationInfo(0.25f, -11)})); + TEST_SUITE(NEON) TEST_SUITE(PoolingLayer) @@ -163,12 +195,46 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip( ), input_info, output_info, pool_info, expected) { - bool is_valid = bool(NEPoolingLayer::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), pool_info)); + bool is_valid = bool(NEPoolingLayer::validate(&input_info.clone()->set_is_resizable(false), + &output_info.clone()->set_is_resizable(false), + pool_info)); ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS); } // clang-format on // *INDENT-ON* +TEST_CASE(ValidatePaddedMaxDifferentQuantizationInfo, framework::DatasetMode::ALL) +{ + TensorInfo input_info(TensorShape(3U, 15U, 11U, 1U), 1, DataType::QASYMM8, DataLayout::NHWC); + TensorInfo output_info(TensorShape(3U, 8U, 6U, 1U), 1, DataType::QASYMM8, DataLayout::NHWC); + const auto pool_info = PoolingLayerInfo(PoolingType::MAX, 3, DataLayout::NHWC, PadStrideInfo(2, 2, 1, 1), false); + + input_info.set_quantization_info(QuantizationInfo(0.25f, 11)); + output_info.set_quantization_info(QuantizationInfo(0.5f, 7)); + input_info.set_is_resizable(false); + output_info.set_is_resizable(false); + + const bool is_valid = bool(NEPoolingLayer::validate(&input_info, &output_info, pool_info)); + + ARM_COMPUTE_EXPECT(is_valid, framework::LogLevel::ERRORS); +} + +TEST_CASE(ValidatePaddedAvgDifferentQuantizationInfo, framework::DatasetMode::ALL) +{ + TensorInfo input_info(TensorShape(3U, 15U, 11U, 1U), 1, DataType::QASYMM8, DataLayout::NHWC); + TensorInfo output_info(TensorShape(3U, 8U, 6U, 1U), 1, DataType::QASYMM8, DataLayout::NHWC); + const auto pool_info = PoolingLayerInfo(PoolingType::AVG, 3, DataLayout::NHWC, PadStrideInfo(2, 2, 1, 1), false); + + input_info.set_quantization_info(QuantizationInfo(0.25f, 11)); + output_info.set_quantization_info(QuantizationInfo(0.5f, 7)); + input_info.set_is_resizable(false); + output_info.set_is_resizable(false); + + const bool is_valid = bool(NEPoolingLayer::validate(&input_info, &output_info, pool_info)); + + ARM_COMPUTE_EXPECT(!is_valid, framework::LogLevel::ERRORS); +} + template using NEPoolingLayerIndicesFixture = PoolingLayerIndicesValidationFixture; @@ -404,6 +470,14 @@ FIXTURE_DATA_TEST_CASE(QASYMM8PaddedMax, validate(Accessor(_target), _reference, tolerance_qasymm8); } +FIXTURE_DATA_TEST_CASE(PaddedMaxDifferentQInfo, + NEPoolingLayerQuantizedFixture, + framework::DatasetMode::PRECOMMIT, + PoolingDatasetQASYMM8PaddedMaxDifferentQInfo) +{ + validate(Accessor(_target), _reference, tolerance_qasymm8); +} + FIXTURE_DATA_TEST_CASE(RunSmallNCHW, NEPoolingLayerQuantizedFixture, framework::DatasetMode::PRECOMMIT, @@ -448,6 +522,22 @@ FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, } TEST_SUITE_END() // QASYMM8 TEST_SUITE(QASYMM8_SIGNED) +FIXTURE_DATA_TEST_CASE(PaddedMaxSameQInfo, + NEPoolingLayerQuantizedFixture, + framework::DatasetMode::PRECOMMIT, + PoolingDatasetQASYMM8SignedPaddedMaxSameQInfo) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_qasymm8_s); +} +FIXTURE_DATA_TEST_CASE(PaddedMaxDifferentQInfoZeroOffset, + NEPoolingLayerQuantizedFixture, + framework::DatasetMode::PRECOMMIT, + PoolingDatasetQASYMM8SignedPaddedMaxDifferentQInfoZeroOffset) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_qasymm8_s); +} FIXTURE_DATA_TEST_CASE(RunSmall, NEPoolingLayerQuantizedFixture, framework::DatasetMode::PRECOMMIT,