From 7f9739c266a3c8467c154787aa1e6089440bd1be Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 10 Mar 2026 15:00:32 +0000 Subject: [PATCH 1/4] Initial plan From 185fe1c9260f7481cc9c42e74480c89c20a37358 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 10 Mar 2026 15:14:27 +0000 Subject: [PATCH 2/4] Move known_callables from ASTBuilderBase to TargetBase Co-authored-by: inducer <352067+inducer@users.noreply.github.com> --- loopy/library/reduction.py | 4 ++-- loopy/target/__init__.py | 12 ++++++++++-- loopy/target/c/__init__.py | 21 ++++++++++++--------- loopy/target/cuda.py | 16 ++++++---------- loopy/target/opencl.py | 13 +++++++------ loopy/target/pyopencl.py | 23 ++++++++++++----------- loopy/target/python.py | 7 ------- loopy/tools.py | 2 +- loopy/translation_unit.py | 2 +- 9 files changed, 51 insertions(+), 49 deletions(-) diff --git a/loopy/library/reduction.py b/loopy/library/reduction.py index 38ebc1c70..adb5a0bfa 100644 --- a/loopy/library/reduction.py +++ b/loopy/library/reduction.py @@ -368,7 +368,7 @@ def __call__(self, from loopy.translation_unit import add_callable_to_table # getting the callable 'max' from target - max_scalar_callable = target.get_device_ast_builder().known_callables["max"] + max_scalar_callable = target.known_callables["max"] # type specialize the callable max_scalar_callable, callables_table = max_scalar_callable.with_types( @@ -404,7 +404,7 @@ def __call__(self, from loopy.translation_unit import add_callable_to_table # getting the callable 'min' from target - min_scalar_callable = target.get_device_ast_builder().known_callables["min"] + min_scalar_callable = target.known_callables["min"] # type specialize the callable min_scalar_callable, callables_table = min_scalar_callable.with_types( diff --git a/loopy/target/__init__.py b/loopy/target/__init__.py index 9476c0e2a..3785067ce 100644 --- a/loopy/target/__init__.py +++ b/loopy/target/__init__.py @@ -191,6 +191,15 @@ def get_kernel_executor( """ raise NotImplementedError() + @property + def known_callables(self): + """ + Returns a mapping from function ids to corresponding + :class:`loopy.kernel.function_interface.InKernelCallable` for the + function ids known to *self*. + """ + return {} + @dataclass(frozen=True) class ASTBuilderBase(ABC, Generic[ASTType]): @@ -208,8 +217,7 @@ def known_callables(self): :class:`loopy.kernel.function_interface.InKernelCallable` for the function ids known to *self.target*. """ - # FIXME: @inducer: Do we need to move this to TargetBase? - return {} + return dict(self.target.known_callables) def symbol_manglers(self): return [] diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py index a2885b7bf..41b64d710 100644 --- a/loopy/target/c/__init__.py +++ b/loopy/target/c/__init__.py @@ -509,6 +509,13 @@ def get_host_ast_builder(self): def get_device_ast_builder(self): return CFamilyASTBuilder(self) + @property + @override + def known_callables(self): + callables = super().known_callables + callables.update(get_c_callables()) + return callables + # {{{ types @memoize_method @@ -890,13 +897,6 @@ def preamble_generators(self): lambda preamble_info: _preamble_generator( preamble_info, self.preamble_function_qualifier)]) - @property - @override - def known_callables(self): - callables = super().known_callables - callables.update(get_c_callables()) - return callables - # }}} # {{{ code generation @@ -1606,15 +1606,18 @@ class CWithGNULibcTarget(CTarget): def get_device_ast_builder(self): return CWithGNULibcASTBuilder(self) - -class CWithGNULibcASTBuilder(CASTBuilder): @property + @override def known_callables(self): callables = super().known_callables callables.update(get_gnu_libc_callables()) return callables +class CWithGNULibcASTBuilder(CASTBuilder): + pass + + class ExecutableCWithGNULibcTarget(ExecutableCTarget): def get_device_ast_builder(self): return CWithGNULibcASTBuilder(self) diff --git a/loopy/target/cuda.py b/loopy/target/cuda.py index 23cd2dab0..614786a0f 100644 --- a/loopy/target/cuda.py +++ b/loopy/target/cuda.py @@ -249,6 +249,12 @@ def split_kernel_at_global_barriers(self): def get_device_ast_builder(self): return CUDACASTBuilder(self) + @property + def known_callables(self): + callables = super().known_callables + callables.update(get_cuda_callables()) + return callables + # {{{ types @memoize_method @@ -330,16 +336,6 @@ class CUDACASTBuilder(CFamilyASTBuilder): preamble_function_qualifier = "inline __device__" - # {{{ library - - @property - def known_callables(self): - callables = super().known_callables - callables.update(get_cuda_callables()) - return callables - - # }}} - # {{{ top-level codegen def get_function_declaration( diff --git a/loopy/target/opencl.py b/loopy/target/opencl.py index ed05c7628..0d17a2ca1 100644 --- a/loopy/target/opencl.py +++ b/loopy/target/opencl.py @@ -632,6 +632,13 @@ def split_kernel_at_global_barriers(self): def get_device_ast_builder(self): return OpenCLCASTBuilder(self) + @property + @override + def known_callables(self): + callables = super().known_callables + callables.update(get_opencl_callables()) + return callables + @memoize_method def get_dtype_registry(self) -> DTypeRegistry: from loopy.target.c.compyte.dtypes import ( @@ -673,12 +680,6 @@ def vector_dtype(self, base, count): class OpenCLCASTBuilder(CFamilyASTBuilder): # {{{ library - @property - def known_callables(self): - callables = super().known_callables - callables.update(get_opencl_callables()) - return callables - def symbol_manglers(self): return ( [*super().symbol_manglers(), opencl_symbol_mangler]) diff --git a/loopy/target/pyopencl.py b/loopy/target/pyopencl.py index 9d88394c9..55f046ed4 100644 --- a/loopy/target/pyopencl.py +++ b/loopy/target/pyopencl.py @@ -584,6 +584,18 @@ def get_host_ast_builder(self): def get_device_ast_builder(self): return PyOpenCLCASTBuilder(self) + @property + @override + def known_callables(self): + from loopy.library.random123 import get_random123_callables + + # order matters: e.g. prefer our abs() over that of the + # superclass + callables = super().known_callables + callables.update(get_pyopencl_callables()) + callables.update(get_random123_callables(self)) + return callables + # {{{ types @override @@ -1224,17 +1236,6 @@ def get_function_declaration( # {{{ library - @property - def known_callables(self): - from loopy.library.random123 import get_random123_callables - - # order matters: e.g. prefer our abs() over that of the - # superclass - callables = super().known_callables - callables.update(get_pyopencl_callables()) - callables.update(get_random123_callables(self.target)) - return callables - def preamble_generators(self): return ([pyopencl_preamble_generator, *super().preamble_generators()]) diff --git a/loopy/target/python.py b/loopy/target/python.py index 3b4b9795f..1fd336da0 100644 --- a/loopy/target/python.py +++ b/loopy/target/python.py @@ -190,13 +190,6 @@ class PythonASTBuilderBase(ASTBuilderBase[Generable]): """A Python host AST builder for integration with PyOpenCL. """ - @property - def known_callables(self): - from loopy.target.c import get_c_callables - callables = super().known_callables - callables.update(get_c_callables()) - return callables - def preamble_generators(self): return ( [*super().preamble_generators(), _base_python_preamble_generator]) diff --git a/loopy/tools.py b/loopy/tools.py index 8ab419585..777cc38a2 100644 --- a/loopy/tools.py +++ b/loopy/tools.py @@ -674,7 +674,7 @@ def __init__(self, rule_mapping_context, callables_table, target): @cached_property def known_callables(self): from loopy.kernel.function_interface import CallableKernel - return (frozenset(self.target.get_device_ast_builder().known_callables) + return (frozenset(self.target.known_callables) | {name for name, clbl in self.callables_table.items() if isinstance(clbl, CallableKernel)}) diff --git a/loopy/translation_unit.py b/loopy/translation_unit.py index 9d27e24a4..fbb20b91b 100644 --- a/loopy/translation_unit.py +++ b/loopy/translation_unit.py @@ -873,7 +873,7 @@ def resolve_callables(t_unit: TranslationUnit) -> TranslationUnit: # get registered callables known_callables = dict(t_unit.callables_table) # get target specific callables - known_callables.update(t_unit.target.get_device_ast_builder().known_callables) + known_callables.update(t_unit.target.known_callables) # get loopy specific callables known_callables.update(get_loopy_callables()) From 91bbd26372f9a30c738502b76ed48f19ce380a69 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 10 Mar 2026 15:28:24 +0000 Subject: [PATCH 3/4] Split known_callables into known_host_callables and known_device_callables on TargetBase Co-authored-by: inducer <352067+inducer@users.noreply.github.com> --- loopy/library/reduction.py | 4 ++-- loopy/target/__init__.py | 21 +++++++++++++++++---- loopy/target/c/__init__.py | 8 ++++---- loopy/target/cuda.py | 4 ++-- loopy/target/opencl.py | 4 ++-- loopy/target/pyopencl.py | 12 ++++++++++-- loopy/target/python.py | 4 ++++ loopy/tools.py | 2 +- loopy/translation_unit.py | 2 +- 9 files changed, 43 insertions(+), 18 deletions(-) diff --git a/loopy/library/reduction.py b/loopy/library/reduction.py index adb5a0bfa..660cbf482 100644 --- a/loopy/library/reduction.py +++ b/loopy/library/reduction.py @@ -368,7 +368,7 @@ def __call__(self, from loopy.translation_unit import add_callable_to_table # getting the callable 'max' from target - max_scalar_callable = target.known_callables["max"] + max_scalar_callable = target.known_device_callables["max"] # type specialize the callable max_scalar_callable, callables_table = max_scalar_callable.with_types( @@ -404,7 +404,7 @@ def __call__(self, from loopy.translation_unit import add_callable_to_table # getting the callable 'min' from target - min_scalar_callable = target.known_callables["min"] + min_scalar_callable = target.known_device_callables["min"] # type specialize the callable min_scalar_callable, callables_table = min_scalar_callable.with_types( diff --git a/loopy/target/__init__.py b/loopy/target/__init__.py index 3785067ce..ec6eadca0 100644 --- a/loopy/target/__init__.py +++ b/loopy/target/__init__.py @@ -192,11 +192,20 @@ def get_kernel_executor( raise NotImplementedError() @property - def known_callables(self): + def known_host_callables(self): """ Returns a mapping from function ids to corresponding :class:`loopy.kernel.function_interface.InKernelCallable` for the - function ids known to *self*. + function ids known to *self* for host code generation. + """ + return {} + + @property + def known_device_callables(self): + """ + Returns a mapping from function ids to corresponding + :class:`loopy.kernel.function_interface.InKernelCallable` for the + function ids known to *self* for device code generation. """ return {} @@ -215,9 +224,9 @@ def known_callables(self): """ Returns a mapping from function ids to corresponding :class:`loopy.kernel.function_interface.InKernelCallable` for the - function ids known to *self.target*. + function ids known to *self.target* for device code generation. """ - return dict(self.target.known_callables) + return dict(self.target.known_device_callables) def symbol_manglers(self): return [] @@ -359,6 +368,10 @@ def __str__(self): class DummyHostASTBuilder(ASTBuilderBase[None]): + @property + def known_callables(self): + return dict(self.target.known_host_callables) + def get_function_definition(self, codegen_state, codegen_result, schedule_index, function_decl, function_body): return function_body diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py index 41b64d710..c6803f3fd 100644 --- a/loopy/target/c/__init__.py +++ b/loopy/target/c/__init__.py @@ -511,8 +511,8 @@ def get_device_ast_builder(self): @property @override - def known_callables(self): - callables = super().known_callables + def known_device_callables(self): + callables = super().known_device_callables callables.update(get_c_callables()) return callables @@ -1608,8 +1608,8 @@ def get_device_ast_builder(self): @property @override - def known_callables(self): - callables = super().known_callables + def known_device_callables(self): + callables = super().known_device_callables callables.update(get_gnu_libc_callables()) return callables diff --git a/loopy/target/cuda.py b/loopy/target/cuda.py index 614786a0f..5e07da951 100644 --- a/loopy/target/cuda.py +++ b/loopy/target/cuda.py @@ -250,8 +250,8 @@ def get_device_ast_builder(self): return CUDACASTBuilder(self) @property - def known_callables(self): - callables = super().known_callables + def known_device_callables(self): + callables = super().known_device_callables callables.update(get_cuda_callables()) return callables diff --git a/loopy/target/opencl.py b/loopy/target/opencl.py index 0d17a2ca1..e434d608a 100644 --- a/loopy/target/opencl.py +++ b/loopy/target/opencl.py @@ -634,8 +634,8 @@ def get_device_ast_builder(self): @property @override - def known_callables(self): - callables = super().known_callables + def known_device_callables(self): + callables = super().known_device_callables callables.update(get_opencl_callables()) return callables diff --git a/loopy/target/pyopencl.py b/loopy/target/pyopencl.py index 55f046ed4..dbea17765 100644 --- a/loopy/target/pyopencl.py +++ b/loopy/target/pyopencl.py @@ -586,16 +586,24 @@ def get_device_ast_builder(self): @property @override - def known_callables(self): + def known_device_callables(self): from loopy.library.random123 import get_random123_callables # order matters: e.g. prefer our abs() over that of the # superclass - callables = super().known_callables + callables = super().known_device_callables callables.update(get_pyopencl_callables()) callables.update(get_random123_callables(self)) return callables + @property + @override + def known_host_callables(self): + from loopy.target.c import get_c_callables + callables = super().known_host_callables + callables.update(get_c_callables()) + return callables + # {{{ types @override diff --git a/loopy/target/python.py b/loopy/target/python.py index 1fd336da0..5de59b38f 100644 --- a/loopy/target/python.py +++ b/loopy/target/python.py @@ -190,6 +190,10 @@ class PythonASTBuilderBase(ASTBuilderBase[Generable]): """A Python host AST builder for integration with PyOpenCL. """ + @property + def known_callables(self): + return dict(self.target.known_host_callables) + def preamble_generators(self): return ( [*super().preamble_generators(), _base_python_preamble_generator]) diff --git a/loopy/tools.py b/loopy/tools.py index 777cc38a2..685293558 100644 --- a/loopy/tools.py +++ b/loopy/tools.py @@ -674,7 +674,7 @@ def __init__(self, rule_mapping_context, callables_table, target): @cached_property def known_callables(self): from loopy.kernel.function_interface import CallableKernel - return (frozenset(self.target.known_callables) + return (frozenset(self.target.known_device_callables) | {name for name, clbl in self.callables_table.items() if isinstance(clbl, CallableKernel)}) diff --git a/loopy/translation_unit.py b/loopy/translation_unit.py index fbb20b91b..cfb2e9b12 100644 --- a/loopy/translation_unit.py +++ b/loopy/translation_unit.py @@ -873,7 +873,7 @@ def resolve_callables(t_unit: TranslationUnit) -> TranslationUnit: # get registered callables known_callables = dict(t_unit.callables_table) # get target specific callables - known_callables.update(t_unit.target.known_callables) + known_callables.update(t_unit.target.known_device_callables) # get loopy specific callables known_callables.update(get_loopy_callables()) From 844de53791e7e82ce885a91eb8cbcfb910d38785 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 10 Mar 2026 16:30:05 +0000 Subject: [PATCH 4/4] Fix ExecutableCWithGNULibcTarget missing known_device_callables override Co-authored-by: inducer <352067+inducer@users.noreply.github.com> --- loopy/target/c/__init__.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py index c6803f3fd..6c5520bfc 100644 --- a/loopy/target/c/__init__.py +++ b/loopy/target/c/__init__.py @@ -1622,6 +1622,13 @@ class ExecutableCWithGNULibcTarget(ExecutableCTarget): def get_device_ast_builder(self): return CWithGNULibcASTBuilder(self) + @property + @override + def known_device_callables(self): + callables = super().known_device_callables + callables.update(get_gnu_libc_callables()) + return callables + # }}} # vim: foldmethod=marker