Skip to content
29 changes: 29 additions & 0 deletions cpp/src/arrow/util/bitmap_ops.cc
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,35 @@ bool OptionalBitmapEquals(const std::shared_ptr<Buffer>& left, int64_t left_offs
right ? right->data() : nullptr, right_offset, length);
}

Result<std::shared_ptr<Buffer>> OptionalBitmapAnd(MemoryPool* pool,
const std::shared_ptr<Buffer>& left,
int64_t left_offset,
const std::shared_ptr<Buffer>& right,
int64_t right_offset, int64_t length,
int64_t out_offset) {
if (left == nullptr && right == nullptr) {
return nullptr;
}
if (left == nullptr) {
if (right_offset >= out_offset && (right_offset - out_offset) % 8 == 0) {
int64_t byte_shift = (right_offset - out_offset) / 8;
int64_t byte_length = bit_util::BytesForBits(out_offset + length);
return SliceBuffer(right, byte_shift, byte_length);
}
return CopyBitmap(pool, right->data(), right_offset, length, out_offset);
}
Comment thread
Shockp marked this conversation as resolved.
if (right == nullptr) {
if (left_offset >= out_offset && (left_offset - out_offset) % 8 == 0) {
int64_t byte_shift = (left_offset - out_offset) / 8;
int64_t byte_length = bit_util::BytesForBits(out_offset + length);
return SliceBuffer(left, byte_shift, byte_length);
}
return CopyBitmap(pool, left->data(), left_offset, length, out_offset);
}
return BitmapAnd(pool, left->data(), left_offset, right->data(), right_offset, length,
out_offset);
}

namespace {

template <template <typename> class BitOp>
Expand Down
23 changes: 23 additions & 0 deletions cpp/src/arrow/util/bitmap_ops.h
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,29 @@ ARROW_EXPORT
void BitmapAnd(const uint8_t* left, int64_t left_offset, const uint8_t* right,
int64_t right_offset, int64_t length, int64_t out_offset, uint8_t* out);

/// \brief Do a "bitmap and" on two buffers, handling null buffers safely.
///
/// If both buffers are null, returns nullptr.
/// If one buffer is null, allocates a new buffer and copies the valid buffer into it.
/// If both buffers are valid, allocates a new buffer and performs a bitwise AND.
///
/// \param[in] pool memory pool to allocate memory from
/// \param[in] left left source buffer (may be null)
/// \param[in] left_offset bit offset into the left buffer
/// \param[in] right right source buffer (may be null)
/// \param[in] right_offset bit offset into the right buffer
/// \param[in] length number of bits to compute
/// \param[in] out_offset bit offset into the output buffer
///
/// \return Resulting buffer (may be null)
ARROW_EXPORT
Result<std::shared_ptr<Buffer>> OptionalBitmapAnd(MemoryPool* pool,
const std::shared_ptr<Buffer>& left,
int64_t left_offset,
const std::shared_ptr<Buffer>& right,
int64_t right_offset, int64_t length,
int64_t out_offset = 0);

/// \brief Do a "bitmap or" for the given bit length on right and left buffers
/// starting at their respective bit-offsets and put the results in out_buffer
/// starting at the given bit-offset.
Expand Down
40 changes: 40 additions & 0 deletions cpp/src/arrow/util/bitmap_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

#include <gmock/gmock.h>
#include <gtest/gtest.h>
#include <cstring>

#include "arrow/array/array_base.h"
#include "arrow/array/data.h"
Expand Down Expand Up @@ -1236,6 +1237,45 @@ TEST(BitmapOpTest, PartialLeadingByteSafety) {
}
}

TEST(BitmapOpTest, OptionalBitmapAnd) {
MemoryPool* pool = default_memory_pool();
int64_t length = 16;
int64_t phys_bits = length;

// Create two distinct test buffers
ASSERT_OK_AND_ASSIGN(auto buf1, AllocateEmptyBitmap(phys_bits, pool));
ASSERT_OK_AND_ASSIGN(auto buf2, AllocateEmptyBitmap(phys_bits, pool));

// Fill buf1 with 1s (true)
std::memset(buf1->mutable_data(), 0xFF, buf1->size());
// Fill buf2 with alternating bits 01010101 (0xAA)
std::memset(buf2->mutable_data(), 0xAA, buf2->size());

// State 1: Both nullptr
ASSERT_OK_AND_ASSIGN(auto result1,
OptionalBitmapAnd(pool, nullptr, 0, nullptr, 0, length, 0));
ASSERT_EQ(result1, nullptr);

// State 2: Left nullptr, Right valid (Should copy Right)
ASSERT_OK_AND_ASSIGN(auto result2,
OptionalBitmapAnd(pool, nullptr, 0, buf2, 0, length, 0));
ASSERT_NE(result2, nullptr);
EXPECT_TRUE(BitmapEquals(buf2->data(), 0, result2->data(), 0, length));

// State 3: Left valid, Right nullptr (Should copy Left)
ASSERT_OK_AND_ASSIGN(auto result3,
OptionalBitmapAnd(pool, buf1, 0, nullptr, 0, length, 0));
ASSERT_NE(result3, nullptr);
EXPECT_TRUE(BitmapEquals(buf1->data(), 0, result3->data(), 0, length));

// State 4: Both valid (Should perform Bitwise AND)
ASSERT_OK_AND_ASSIGN(auto result4,
OptionalBitmapAnd(pool, buf1, 0, buf2, 0, length, 0));
ASSERT_NE(result4, nullptr);
// Since buf1 is all 1s, (buf1 AND buf2) should perfectly equal buf2
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is an easy case to test, but can we test with inputs where the output is not trivially equal to one of the inputs?

EXPECT_TRUE(BitmapEquals(buf2->data(), 0, result4->data(), 0, length));
}

// Tests for Bitmap visiting.

// test the basic assumption of word level Bitmap::Visit
Expand Down
Loading