Skip to content

Commit d2ad1c7

Browse files
authored
librt: Support specifying vec capacity during construction (#21304)
Use e.g. `vec[i64](capacity=n)` to reserve capacity in the buffer during construction. This can be used to reduce the number of buffer reallocs when appending items. Capacity is the length of buffer, while length is the number of items currently stored in the buffer. I used coding agent assist but did this in multiple smaller increments.
1 parent 6710142 commit d2ad1c7

20 files changed

Lines changed: 679 additions & 43 deletions

mypy/typeshed/stubs/librt/librt/vecs.pyi

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@ T = TypeVar("T")
55

66
class vec(Generic[T]):
77
@overload
8-
def __init__(self) -> None: ...
8+
def __init__(self, *, capacity: i64 = ...) -> None: ...
99
@overload
10-
def __init__(self, items: Iterable[T], /) -> None: ...
10+
def __init__(self, items: Iterable[T], /, *, capacity: i64 = ...) -> None: ...
1111
def __len__(self) -> i64: ...
1212
@overload
1313
def __getitem__(self, i: i64, /) -> T: ...

mypyc/irbuild/expression.py

Lines changed: 33 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -364,10 +364,17 @@ def transform_call_expr(builder: IRBuilder, expr: CallExpr) -> Value:
364364
):
365365
item_type = builder.type_to_rtype(analyzed.types[0])
366366
vec_type = RVec(item_type)
367-
if len(expr.args) == 0:
368-
return vec_create(builder.builder, vec_type, 0, expr.line)
369-
elif len(expr.args) == 1 and expr.arg_kinds == [ARG_POS]:
370-
return translate_vec_create_from_iterable(builder, vec_type, expr.args[0])
367+
capacity = _get_vec_capacity(builder, expr)
368+
if len(expr.args) == 0 or (len(expr.args) == 1 and expr.arg_kinds == [ARG_NAMED]):
369+
# vec[T]() or vec[T](capacity=N)
370+
return vec_create(builder.builder, vec_type, 0, expr.line, capacity=capacity)
371+
elif (len(expr.args) == 1 and expr.arg_kinds == [ARG_POS]) or (
372+
len(expr.args) == 2 and expr.arg_kinds == [ARG_POS, ARG_NAMED]
373+
):
374+
# vec[T](items) or vec[T](items, capacity=N)
375+
return translate_vec_create_from_iterable(
376+
builder, vec_type, expr.args[0], capacity=capacity
377+
)
371378
callee = analyzed.expr # Unwrap type application
372379

373380
if isinstance(callee, MemberExpr):
@@ -561,8 +568,16 @@ def translate_super_method_call(builder: IRBuilder, expr: CallExpr, callee: Supe
561568
return builder.builder.call(decl, arg_values, arg_kinds, arg_names, expr.line)
562569

563570

571+
def _get_vec_capacity(builder: IRBuilder, expr: CallExpr) -> Value | None:
572+
"""Extract the 'capacity' keyword argument value from a vec() call, or None."""
573+
for i, (kind, name) in enumerate(zip(expr.arg_kinds, expr.arg_names)):
574+
if kind == ARG_NAMED and name == "capacity":
575+
return builder.accept(expr.args[i])
576+
return None
577+
578+
564579
def translate_vec_create_from_iterable(
565-
builder: IRBuilder, vec_type: RVec, arg: Expression
580+
builder: IRBuilder, vec_type: RVec, arg: Expression, *, capacity: Value | None = None
566581
) -> Value:
567582
line = arg.line
568583
item_type = vec_type.item_type
@@ -581,28 +596,35 @@ def translate_vec_create_from_iterable(
581596
if is_int64_rprimitive(other_type) or is_int_rprimitive(other_type):
582597
length = builder.accept(other)
583598
init = builder.accept(lst.items[0])
584-
return vec_create_initialized(builder.builder, vec_type, length, init, line)
599+
return vec_create_initialized(
600+
builder.builder, vec_type, length, init, line, capacity=capacity
601+
)
585602
assert False, other_type
586603
if isinstance(arg, ListExpr):
587604
items = []
588605
for item in arg.items:
589606
value = builder.accept(item)
590607
items.append(builder.coerce(value, item_type, line))
591-
return vec_create_from_values(builder.builder, vec_type, items, line)
608+
return vec_create_from_values(builder.builder, vec_type, items, line, capacity=capacity)
592609
if isinstance(arg, ListComprehension):
593-
return translate_vec_comprehension(builder, vec_type, arg.generator)
594-
return vec_from_iterable(builder, vec_type, arg, line)
610+
return translate_vec_comprehension(builder, vec_type, arg.generator, capacity=capacity)
611+
return vec_from_iterable(builder, vec_type, arg, line, capacity=capacity)
595612

596613

597614
def vec_from_iterable(
598-
builder: IRBuilder, vec_type: RVec, iterable: Expression, line: int
615+
builder: IRBuilder,
616+
vec_type: RVec,
617+
iterable: Expression,
618+
line: int,
619+
*,
620+
capacity: Value | None = None,
599621
) -> Value:
600622
"""Construct a vec from an arbitrary iterable."""
601623
# Translate it as a vec comprehension vec[t]([<name> for <name> in
602624
# iterable]). This way we can use various special casing supported
603625
# by for loops and comprehensions.
604626
vec = Register(vec_type)
605-
builder.assign(vec, vec_create(builder.builder, vec_type, 0, line), line)
627+
builder.assign(vec, vec_create(builder.builder, vec_type, 0, line, capacity=capacity), line)
606628
name = f"___tmp_{line}"
607629
var = Var(name)
608630
reg = builder.add_local(var, vec_type.item_type)

mypyc/irbuild/for_helpers.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -363,7 +363,9 @@ def gen_inner_stmts() -> None:
363363
return builder.read(set_ops, gen.line)
364364

365365

366-
def translate_vec_comprehension(builder: IRBuilder, vec_type: RVec, gen: GeneratorExpr) -> Value:
366+
def translate_vec_comprehension(
367+
builder: IRBuilder, vec_type: RVec, gen: GeneratorExpr, *, capacity: Value | None = None
368+
) -> Value:
367369
def set_item(x: Value, y: Value, z: Value, line: int) -> None:
368370
vec_init_item_unsafe(builder.builder, x, y, z, line)
369371

@@ -372,15 +374,17 @@ def set_item(x: Value, y: Value, z: Value, line: int) -> None:
372374
builder,
373375
gen,
374376
empty_op_llbuilder=lambda length, line: vec_create(
375-
builder.builder, vec_type, length, line
377+
builder.builder, vec_type, length, line, capacity=capacity
376378
),
377379
set_item_op=set_item,
378380
)
379381
if val is not None:
380382
return val
381383

382384
vec = Register(vec_type)
383-
builder.assign(vec, vec_create(builder.builder, vec_type, 0, gen.line), gen.line)
385+
builder.assign(
386+
vec, vec_create(builder.builder, vec_type, 0, gen.line, capacity=capacity), gen.line
387+
)
384388
loop_params = list(zip(gen.indices, gen.sequences, gen.condlists, gen.is_async))
385389

386390
def gen_inner_stmts() -> None:

mypyc/irbuild/vec.py

Lines changed: 30 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -74,17 +74,28 @@ def as_platform_int(builder: LowLevelIRBuilder, v: Value, line: int) -> Value:
7474
return builder.coerce(v, c_pyssize_t_rprimitive, line)
7575

7676

77-
def vec_create(builder: LowLevelIRBuilder, vtype: RVec, length: int | Value, line: int) -> Value:
77+
def vec_create(
78+
builder: LowLevelIRBuilder,
79+
vtype: RVec,
80+
length: int | Value,
81+
line: int,
82+
*,
83+
capacity: Value | None = None,
84+
) -> Value:
7885
if isinstance(length, int):
7986
length = Integer(length, c_pyssize_t_rprimitive)
8087
length = as_platform_int(builder, length, line)
88+
if capacity is not None:
89+
capacity = as_platform_int(builder, capacity, line)
90+
else:
91+
capacity = length
8192

8293
item_type = vtype.item_type
8394
api_name = vec_api_by_item_type.get(item_type)
8495
if api_name is not None:
8596
call = CallC(
8697
f"{api_name}.alloc",
87-
[length, length],
98+
[length, capacity],
8899
vtype,
89100
False,
90101
False,
@@ -110,7 +121,7 @@ def vec_create(builder: LowLevelIRBuilder, vtype: RVec, length: int | Value, lin
110121
if depth == 0:
111122
call = CallC(
112123
"VecTApi.alloc",
113-
[length, length, typeval],
124+
[length, capacity, typeval],
114125
vtype,
115126
False,
116127
False,
@@ -121,7 +132,7 @@ def vec_create(builder: LowLevelIRBuilder, vtype: RVec, length: int | Value, lin
121132
else:
122133
call = CallC(
123134
"VecNestedApi.alloc",
124-
[length, length, typeval, Integer(depth, int32_rprimitive)],
135+
[length, capacity, typeval, Integer(depth, int32_rprimitive)],
125136
vtype,
126137
False,
127138
False,
@@ -134,7 +145,13 @@ def vec_create(builder: LowLevelIRBuilder, vtype: RVec, length: int | Value, lin
134145

135146

136147
def vec_create_initialized(
137-
builder: LowLevelIRBuilder, vtype: RVec, length: int | Value, init: Value, line: int
148+
builder: LowLevelIRBuilder,
149+
vtype: RVec,
150+
length: int | Value,
151+
init: Value,
152+
line: int,
153+
*,
154+
capacity: Value | None = None,
138155
) -> Value:
139156
"""Create vec with items initialized to the given value."""
140157
if isinstance(length, int):
@@ -143,7 +160,7 @@ def vec_create_initialized(
143160

144161
item_type = vtype.item_type
145162
init = builder.coerce(init, item_type, line)
146-
vec = vec_create(builder, vtype, length, line)
163+
vec = vec_create(builder, vtype, length, line, capacity=capacity)
147164

148165
items_start = vec_items(builder, vec)
149166
step = step_size(item_type)
@@ -160,9 +177,14 @@ def vec_create_initialized(
160177

161178

162179
def vec_create_from_values(
163-
builder: LowLevelIRBuilder, vtype: RVec, values: list[Value], line: int
180+
builder: LowLevelIRBuilder,
181+
vtype: RVec,
182+
values: list[Value],
183+
line: int,
184+
*,
185+
capacity: Value | None = None,
164186
) -> Value:
165-
vec = vec_create(builder, vtype, len(values), line)
187+
vec = vec_create(builder, vtype, len(values), line, capacity=capacity)
166188
ptr = vec_items(builder, vec)
167189
item_type = vtype.item_type
168190
step = step_size(item_type)

mypyc/lib-rt/vecs/librt_vecs.c

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -96,29 +96,34 @@ typedef struct {
9696

9797
static PyObject *vec_generic_alias_call(PyObject *self, PyObject *args, PyObject *kw)
9898
{
99-
static char *kwlist[] = {"", NULL};
99+
static char *kwlist[] = {"", "capacity", NULL};
100100
PyObject *init = NULL;
101-
if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:vec", kwlist, &init)) {
101+
int64_t cap = 0;
102+
if (!PyArg_ParseTupleAndKeywords(args, kw, "|OL:vec", kwlist, &init, &cap)) {
103+
return NULL;
104+
}
105+
if (cap < 0) {
106+
PyErr_SetString(PyExc_ValueError, "capacity must not be negative");
102107
return NULL;
103108
}
104109
VecGenericAlias *p = (VecGenericAlias *)self;
105110
if (p->depth == 0) {
106111
if (init == NULL) {
107-
VecT vec = VecT_New(0, 0, p->item_type);
112+
VecT vec = VecT_New(0, cap, p->item_type);
108113
if (VEC_IS_ERROR(vec))
109114
return NULL;
110115
return VecT_Box(vec, p->item_type);
111116
} else {
112-
return VecT_FromIterable(p->item_type, init);
117+
return VecT_FromIterable(p->item_type, init, cap);
113118
}
114119
} else {
115120
if (init == NULL) {
116-
VecNested vec = VecNested_New(0, 0, p->item_type, p->depth);
121+
VecNested vec = VecNested_New(0, cap, p->item_type, p->depth);
117122
if (VEC_IS_ERROR(vec))
118123
return NULL;
119124
return VecNested_Box(vec);
120125
} else {
121-
return VecNested_FromIterable(p->item_type, p->depth, init);
126+
return VecNested_FromIterable(p->item_type, p->depth, init, cap);
122127
}
123128
}
124129
}

mypyc/lib-rt/vecs/librt_vecs.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -713,7 +713,7 @@ static inline int VecT_ItemCheck(VecT v, PyObject *item, size_t item_type) {
713713
}
714714

715715
VecT VecT_New(Py_ssize_t size, Py_ssize_t cap, size_t item_type);
716-
PyObject *VecT_FromIterable(size_t item_type, PyObject *iterable);
716+
PyObject *VecT_FromIterable(size_t item_type, PyObject *iterable, int64_t cap);
717717
PyObject *VecT_Box(VecT vec, size_t item_type);
718718
VecT VecT_Append(VecT vec, PyObject *x, size_t item_type);
719719
VecT VecT_Remove(VecT vec, PyObject *x);
@@ -726,7 +726,7 @@ static inline int VecNested_Check(PyObject *o) {
726726
}
727727

728728
VecNested VecNested_New(Py_ssize_t size, Py_ssize_t cap, size_t item_type, size_t depth);
729-
PyObject *VecNested_FromIterable(size_t item_type, size_t depth, PyObject *iterable);
729+
PyObject *VecNested_FromIterable(size_t item_type, size_t depth, PyObject *iterable, int64_t cap);
730730
PyObject *VecNested_Box(VecNested);
731731
VecNested VecNested_Append(VecNested vec, VecNestedBufItem x);
732732
VecNested VecNested_Remove(VecNested vec, VecNestedBufItem x);

mypyc/lib-rt/vecs/vec_nested.c

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,10 @@ VecNested VecNested_ConvertFromNested(VecNestedBufItem item) {
6565
}
6666

6767
VecNested VecNested_New(Py_ssize_t size, Py_ssize_t cap, size_t item_type, size_t depth) {
68+
if (cap < 0) {
69+
PyErr_SetString(PyExc_ValueError, "capacity must not be negative");
70+
return vec_error();
71+
}
6872
if (cap < size)
6973
cap = size;
7074
VecNested vec = vec_alloc(cap, item_type, depth);
@@ -564,10 +568,16 @@ PyTypeObject VecNestedType = {
564568
// TODO: free
565569
};
566570

567-
PyObject *VecNested_FromIterable(size_t item_type, size_t depth, PyObject *iterable) {
568-
VecNested v = vec_alloc(0, item_type, depth);
571+
PyObject *VecNested_FromIterable(size_t item_type, size_t depth, PyObject *iterable, int64_t cap) {
572+
VecNested v = vec_alloc(cap, item_type, depth);
569573
if (VEC_IS_ERROR(v))
570574
return NULL;
575+
if (cap > 0) {
576+
for (int64_t i = 0; i < cap; i++) {
577+
v.buf->items[i].len = -1;
578+
v.buf->items[i].buf = NULL;
579+
}
580+
}
571581
v.len = 0;
572582

573583
PyObject *iter = PyObject_GetIter(iterable);

mypyc/lib-rt/vecs/vec_t.c

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,10 @@ VecT VecT_ConvertFromNested(VecNestedBufItem item) {
8181
}
8282

8383
VecT VecT_New(Py_ssize_t size, Py_ssize_t cap, size_t item_type) {
84+
if (cap < 0) {
85+
PyErr_SetString(PyExc_ValueError, "capacity must not be negative");
86+
return vec_error();
87+
}
8488
if (cap < size)
8589
cap = size;
8690
VecT vec = vec_alloc(cap, item_type);
@@ -557,10 +561,14 @@ PyTypeObject VecTType = {
557561
// TODO: free
558562
};
559563

560-
PyObject *VecT_FromIterable(size_t item_type, PyObject *iterable) {
561-
VecT v = vec_alloc(0, item_type);
564+
PyObject *VecT_FromIterable(size_t item_type, PyObject *iterable, int64_t cap) {
565+
VecT v = vec_alloc(cap, item_type);
562566
if (VEC_IS_ERROR(v))
563567
return NULL;
568+
if (cap > 0) {
569+
for (int64_t i = 0; i < cap; i++)
570+
v.buf->items[i] = NULL;
571+
}
564572
v.len = 0;
565573

566574
PyObject *iter = PyObject_GetIter(iterable);

mypyc/lib-rt/vecs/vec_template.c

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -84,8 +84,12 @@ VEC FUNC(ConvertFromNested)(VecNestedBufItem item) {
8484
}
8585

8686
VEC FUNC(New)(Py_ssize_t size, Py_ssize_t cap) {
87+
if (cap < 0) {
88+
PyErr_SetString(PyExc_ValueError, "capacity must not be negative");
89+
return vec_error();
90+
}
8791
if (cap < size)
88-
size = cap;
92+
cap = size;
8993
VEC vec = vec_alloc(cap);
9094
if (VEC_IS_ERROR(vec))
9195
return vec;
@@ -96,10 +100,13 @@ VEC FUNC(New)(Py_ssize_t size, Py_ssize_t cap) {
96100
return vec;
97101
}
98102

99-
PyObject *FUNC(FromIterable)(PyObject *iterable) {
100-
VEC v = vec_alloc(0);
103+
PyObject *FUNC(FromIterable)(PyObject *iterable, int64_t cap) {
104+
VEC v = vec_alloc(cap);
101105
if (VEC_IS_ERROR(v))
102106
return NULL;
107+
if (cap > 0) {
108+
memset(v.buf->items, 0, sizeof(ITEM_C_TYPE) * cap);
109+
}
103110
v.len = 0;
104111

105112
PyObject *iter = PyObject_GetIter(iterable);
@@ -132,15 +139,20 @@ PyObject *FUNC(FromIterable)(PyObject *iterable) {
132139
}
133140

134141
static PyObject *vec_new(PyTypeObject *self, PyObject *args, PyObject *kw) {
135-
static char *kwlist[] = {"", NULL};
142+
static char *kwlist[] = {"", "capacity", NULL};
136143
PyObject *init = NULL;
137-
if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:vec", kwlist, &init)) {
144+
int64_t cap = 0;
145+
if (!PyArg_ParseTupleAndKeywords(args, kw, "|OL:vec", kwlist, &init, &cap)) {
146+
return NULL;
147+
}
148+
if (cap < 0) {
149+
PyErr_SetString(PyExc_ValueError, "capacity must not be negative");
138150
return NULL;
139151
}
140152
if (init == NULL) {
141-
return FUNC(Box)(FUNC(New)(0, 0));
153+
return FUNC(Box)(FUNC(New)(0, cap));
142154
} else {
143-
return (PyObject *)FUNC(FromIterable)(init);
155+
return (PyObject *)FUNC(FromIterable)(init, cap);
144156
}
145157
}
146158

0 commit comments

Comments
 (0)