From d109f76df23aa8970018cea612a407babd3ded65 Mon Sep 17 00:00:00 2001 From: bdchatham Date: Sat, 16 May 2026 10:56:21 -0700 Subject: [PATCH 1/6] feat(seinode): sidecar TLS via externally-provisioned Secret MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements #255 per the merged LLD at docs/design-seinode-sidecar-tls-toggle-lld.md. Cert ownership moves out of the controller. Operator/platform tooling provisions a kubernetes.io/tls Secret; the controller references it by name, validates its presence + cert SANs in a steady-state preflight branch, and gates init-plan creation on ConditionSidecarTLSSecretReady. Mirrors signingKey / nodeKey / operatorKeyring / imported-PVC patterns already in the controller. spec.sidecar.tls is immutable post-creation (CRD CEL); toggle = delete + recreate. API: - SidecarTLSSpec rewritten: drop IssuerName/IssuerKind, add SecretName - SidecarTLSStatus added: {SecretName, RequiredDNSNames} — machine- readable contract for platform tooling, replacing naming-convention docs - SeiNodeStatus.SidecarTLS *SidecarTLSStatus - ConditionSidecarTLSSecretReady with reasons Ready / NotFound / Malformed / SANsMismatch - CEL immutability rule on spec.sidecar.tls Controller: - reconcileSidecarTLSReady method in internal/controller/node/preflight_sidecar_tls.go - Reads Secret via APIReader (bypass cache), validates type + non-empty tls.crt/tls.key + PEM-decode + x509.ParseCertificate + SAN superset - Publishes status.sidecarTLS.requiredDNSNames whenever TLS is enabled - APIReader plumbed through SeiNodeReconciler Planner: - ResolvePlan gates init-plan creation when TLS enabled and condition not Ready; steady-state (Running) plans bypass the gate so image-drift rollouts proceed independently of TLS posture - buildBasePlan / buildBootstrapPlan / buildGenesisPlan emit only ApplyRBACProxyConfig under TLS (ApplySidecarCert removed) Code deletion: - internal/task/apply_sidecar_cert.go (task + deserializer registry entry) - GenerateSidecarCertificate from internal/noderesource/noderesource.go Tests: - preflight_sidecar_tls_test.go: validateTLSSecret matrix, requiredDNSNames, reconcileSidecarTLSReady lifecycle (enable/disable transitions, missing Secret, SANs mismatch). Uses in-memory ECDSA P-256 self-signed certs. - Existing sidecar_tls / plan_execution / reconciler / noderesource tests updated for the new spec shape. Co-Authored-By: Claude Opus 4.7 (1M context) --- api/v1alpha1/common_types.go | 29 +- api/v1alpha1/seinode_types.go | 33 ++ api/v1alpha1/zz_generated.deepcopy.go | 25 ++ cmd/main.go | 11 +- config/crd/sei.io_seinodedeployments.yaml | 34 +- config/crd/sei.io_seinodes.yaml | 54 ++- internal/controller/node/controller.go | 6 + .../controller/node/plan_execution_test.go | 20 +- .../controller/node/preflight_sidecar_tls.go | 150 ++++++++ .../node/preflight_sidecar_tls_test.go | 337 ++++++++++++++++++ internal/controller/node/reconciler_test.go | 11 +- internal/noderesource/noderesource.go | 45 +-- internal/noderesource/sidecar_tls_test.go | 46 +-- internal/planner/bootstrap.go | 14 +- internal/planner/planner.go | 29 +- internal/planner/sidecar_tls_test.go | 47 ++- internal/task/apply_sidecar_cert.go | 70 ---- internal/task/task.go | 1 - manifests/sei.io_seinodedeployments.yaml | 34 +- manifests/sei.io_seinodes.yaml | 54 ++- 20 files changed, 796 insertions(+), 254 deletions(-) create mode 100644 internal/controller/node/preflight_sidecar_tls.go create mode 100644 internal/controller/node/preflight_sidecar_tls_test.go delete mode 100644 internal/task/apply_sidecar_cert.go diff --git a/api/v1alpha1/common_types.go b/api/v1alpha1/common_types.go index cc4c9dd..63ab6b3 100644 --- a/api/v1alpha1/common_types.go +++ b/api/v1alpha1/common_types.go @@ -193,23 +193,26 @@ type SidecarConfig struct { Resources *corev1.ResourceRequirements `json:"resources,omitempty"` // TLS, if set, fronts the sidecar API with kube-rbac-proxy on - // :8443 backed by a cert-manager-issued cert. Init-only: - // toggling on a Running SeiNode requires recreation. See seictl#165. + // :8443 using TLS material from a Secret in the SeiNode's + // namespace. The Secret is operator-provisioned (e.g., via a + // cert-manager Certificate in the platform GitOps repo); this + // controller does not create it. + // + // Immutable post-creation. Toggling TLS on an existing SeiNode is a + // delete + recreate operation; data persists via the SeiNode's PVC + // retention mechanism. + // // +optional TLS *SidecarTLSSpec `json:"tls,omitempty"` } -// SidecarTLSSpec configures the cert-manager-issued serving cert for -// the kube-rbac-proxy fronting. +// SidecarTLSSpec references an externally-provisioned TLS Secret. type SidecarTLSSpec struct { - // IssuerName references a cert-manager Issuer or ClusterIssuer - // that signs the proxy's serving certificate. + // SecretName is a kubernetes.io/tls Secret in the SeiNode's + // namespace. The cert SANs must include the DNS names published + // in status.sidecarTLS.requiredDNSNames; the controller validates + // this before allowing the pod to schedule. // +kubebuilder:validation:Required - IssuerName string `json:"issuerName"` - - // IssuerKind is "Issuer" (namespaced) or "ClusterIssuer". - // +kubebuilder:default=ClusterIssuer - // +kubebuilder:validation:Enum=Issuer;ClusterIssuer - // +optional - IssuerKind string `json:"issuerKind,omitempty"` + // +kubebuilder:validation:MinLength=1 + SecretName string `json:"secretName"` } diff --git a/api/v1alpha1/seinode_types.go b/api/v1alpha1/seinode_types.go index e8ad503..41f4c75 100644 --- a/api/v1alpha1/seinode_types.go +++ b/api/v1alpha1/seinode_types.go @@ -10,6 +10,7 @@ import ( // the populated field determines the node's operating mode. // +kubebuilder:validation:XValidation:rule="(has(self.fullNode) ? 1 : 0) + (has(self.archive) ? 1 : 0) + (has(self.replayer) ? 1 : 0) + (has(self.validator) ? 1 : 0) == 1",message="exactly one of fullNode, archive, replayer, or validator must be set" // +kubebuilder:validation:XValidation:rule="!has(self.replayer) || (has(self.peers) && size(self.peers) > 0)",message="peers is required when replayer mode is set" +// +kubebuilder:validation:XValidation:rule="(!has(oldSelf.sidecar) || !has(oldSelf.sidecar.tls)) ? true : (has(self.sidecar) && has(self.sidecar.tls) && self.sidecar.tls == oldSelf.sidecar.tls)",message="spec.sidecar.tls is immutable; delete + recreate the SeiNode to change TLS configuration" type SeiNodeSpec struct { // ChainID of the chain this node belongs to. // +kubebuilder:validation:MinLength=1 @@ -273,6 +274,13 @@ const ( // pre-flight validation. Only set on SeiNodes with // spec.validator.operatorKeyring. ConditionOperatorKeyringReady = "OperatorKeyringReady" + + // ConditionSidecarTLSSecretReady indicates whether the externally- + // provisioned TLS Secret referenced by spec.sidecar.tls.secretName + // is present, well-formed, and has SANs matching the required DNS + // names. Mirrors SigningKeyReady / NodeKeyReady / OperatorKeyringReady. + // Only set on SeiNodes with spec.sidecar.tls. + ConditionSidecarTLSSecretReady = "SidecarTLSSecretReady" ) // Reasons for the ImportPVCReady condition. @@ -303,6 +311,14 @@ const ( ReasonOperatorKeyringInvalid = "OperatorKeyringInvalid" // terminal: fail the plan ) +// Reasons for the SidecarTLSSecretReady condition. +const ( + ReasonTLSSecretReady = "Ready" // Secret present, well-formed, SANs match required DNS names + ReasonTLSSecretNotFound = "NotFound" // Secret not found in the SeiNode's namespace + ReasonTLSSecretMalformed = "Malformed" // Wrong type, empty tls.crt/tls.key, or unparseable cert + ReasonTLSSecretSANsMismatch = "SANsMismatch" // Cert parses but cert.DNSNames does not include required SANs +) + // SeiNodeStatus defines the observed state of a SeiNode. type SeiNodeStatus struct { // Phase is the high-level lifecycle state. @@ -343,6 +359,23 @@ type SeiNodeStatus struct { // config so the node advertises a reachable address for gossip discovery. // +optional ExternalAddress string `json:"externalAddress,omitempty"` + + // SidecarTLS is set whenever spec.sidecar.tls is non-nil. Publishes + // the contract platform tooling must satisfy when provisioning the + // TLS Secret. Machine-readable replacement for naming-convention docs. + // +optional + SidecarTLS *SidecarTLSStatus `json:"sidecarTLS,omitempty"` +} + +// SidecarTLSStatus declares the controller's expectations of the +// referenced TLS Secret. +type SidecarTLSStatus struct { + // SecretName mirrors spec.sidecar.tls.secretName for visibility. + SecretName string `json:"secretName"` + + // RequiredDNSNames is the SAN list the cert in SecretName must + // include. Derived from the SeiNode's headless service DNS. + RequiredDNSNames []string `json:"requiredDNSNames"` } // +kubebuilder:object:root=true diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index efa7970..c044e9f 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -978,6 +978,11 @@ func (in *SeiNodeStatus) DeepCopyInto(out *SeiNodeStatus) { *out = make([]string, len(*in)) copy(*out, *in) } + if in.SidecarTLS != nil { + in, out := &in.SidecarTLS, &out.SidecarTLS + *out = new(SidecarTLSStatus) + (*in).DeepCopyInto(*out) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SeiNodeStatus. @@ -1117,6 +1122,26 @@ func (in *SidecarTLSSpec) DeepCopy() *SidecarTLSSpec { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *SidecarTLSStatus) DeepCopyInto(out *SidecarTLSStatus) { + *out = *in + if in.RequiredDNSNames != nil { + in, out := &in.RequiredDNSNames, &out.RequiredDNSNames + *out = make([]string, len(*in)) + copy(*out, *in) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SidecarTLSStatus. +func (in *SidecarTLSStatus) DeepCopy() *SidecarTLSStatus { + if in == nil { + return nil + } + out := new(SidecarTLSStatus) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *SigningKeySource) DeepCopyInto(out *SigningKeySource) { *out = *in diff --git a/cmd/main.go b/cmd/main.go index ed67e56..077b9c5 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -188,11 +188,12 @@ func main() { //nolint:staticcheck // TODO: migrate to GetEventRecorder (new events API) nodeRecorder := mgr.GetEventRecorderFor("seinode-controller") if err := (&nodecontroller.SeiNodeReconciler{ - Client: kc, - Scheme: mgr.GetScheme(), - Recorder: nodeRecorder, - Platform: platformCfg, - Planner: &planner.NodeResolver{BuildSidecarClient: buildSidecarClient}, + Client: kc, + APIReader: mgr.GetAPIReader(), + Scheme: mgr.GetScheme(), + Recorder: nodeRecorder, + Platform: platformCfg, + Planner: &planner.NodeResolver{BuildSidecarClient: buildSidecarClient}, PlanExecutor: &planner.Executor[*seiv1alpha1.SeiNode]{ ConfigFor: func(_ context.Context, node *seiv1alpha1.SeiNode) task.ExecutionConfig { return task.ExecutionConfig{ diff --git a/config/crd/sei.io_seinodedeployments.yaml b/config/crd/sei.io_seinodedeployments.yaml index 329ad8d..fcfaaf5 100644 --- a/config/crd/sei.io_seinodedeployments.yaml +++ b/config/crd/sei.io_seinodedeployments.yaml @@ -612,24 +612,25 @@ spec: tls: description: |- TLS, if set, fronts the sidecar API with kube-rbac-proxy on - :8443 backed by a cert-manager-issued cert. Init-only: - toggling on a Running SeiNode requires recreation. See seictl#165. + :8443 using TLS material from a Secret in the SeiNode's + namespace. The Secret is operator-provisioned (e.g., via a + cert-manager Certificate in the platform GitOps repo); this + controller does not create it. + + Immutable post-creation. Toggling TLS on an existing SeiNode is a + delete + recreate operation; data persists via the SeiNode's PVC + retention mechanism. properties: - issuerKind: - default: ClusterIssuer - description: IssuerKind is "Issuer" (namespaced) or - "ClusterIssuer". - enum: - - Issuer - - ClusterIssuer - type: string - issuerName: + secretName: description: |- - IssuerName references a cert-manager Issuer or ClusterIssuer - that signs the proxy's serving certificate. + SecretName is a kubernetes.io/tls Secret in the SeiNode's + namespace. The cert SANs must include the DNS names published + in status.sidecarTLS.requiredDNSNames; the controller validates + this before allowing the pod to schedule. + minLength: 1 type: string required: - - issuerName + - secretName type: object type: object validator: @@ -921,6 +922,11 @@ spec: - message: peers is required when replayer mode is set rule: '!has(self.replayer) || (has(self.peers) && size(self.peers) > 0)' + - message: spec.sidecar.tls is immutable; delete + recreate the + SeiNode to change TLS configuration + rule: '(!has(oldSelf.sidecar) || !has(oldSelf.sidecar.tls)) + ? true : (has(self.sidecar) && has(self.sidecar.tls) && self.sidecar.tls + == oldSelf.sidecar.tls)' required: - spec type: object diff --git a/config/crd/sei.io_seinodes.yaml b/config/crd/sei.io_seinodes.yaml index dad2a1c..626abc4 100644 --- a/config/crd/sei.io_seinodes.yaml +++ b/config/crd/sei.io_seinodes.yaml @@ -467,23 +467,25 @@ spec: tls: description: |- TLS, if set, fronts the sidecar API with kube-rbac-proxy on - :8443 backed by a cert-manager-issued cert. Init-only: - toggling on a Running SeiNode requires recreation. See seictl#165. + :8443 using TLS material from a Secret in the SeiNode's + namespace. The Secret is operator-provisioned (e.g., via a + cert-manager Certificate in the platform GitOps repo); this + controller does not create it. + + Immutable post-creation. Toggling TLS on an existing SeiNode is a + delete + recreate operation; data persists via the SeiNode's PVC + retention mechanism. properties: - issuerKind: - default: ClusterIssuer - description: IssuerKind is "Issuer" (namespaced) or "ClusterIssuer". - enum: - - Issuer - - ClusterIssuer - type: string - issuerName: + secretName: description: |- - IssuerName references a cert-manager Issuer or ClusterIssuer - that signs the proxy's serving certificate. + SecretName is a kubernetes.io/tls Secret in the SeiNode's + namespace. The cert SANs must include the DNS names published + in status.sidecarTLS.requiredDNSNames; the controller validates + this before allowing the pod to schedule. + minLength: 1 type: string required: - - issuerName + - secretName type: object type: object validator: @@ -769,6 +771,11 @@ spec: - message: peers is required when replayer mode is set rule: '!has(self.replayer) || (has(self.peers) && size(self.peers) > 0)' + - message: spec.sidecar.tls is immutable; delete + recreate the SeiNode + to change TLS configuration + rule: '(!has(oldSelf.sidecar) || !has(oldSelf.sidecar.tls)) ? true : + (has(self.sidecar) && has(self.sidecar.tls) && self.sidecar.tls == + oldSelf.sidecar.tls)' status: description: SeiNodeStatus defines the observed state of a SeiNode. properties: @@ -996,6 +1003,27 @@ spec: items: type: string type: array + sidecarTLS: + description: |- + SidecarTLS is set whenever spec.sidecar.tls is non-nil. Publishes + the contract platform tooling must satisfy when provisioning the + TLS Secret. Machine-readable replacement for naming-convention docs. + properties: + requiredDNSNames: + description: |- + RequiredDNSNames is the SAN list the cert in SecretName must + include. Derived from the SeiNode's headless service DNS. + items: + type: string + type: array + secretName: + description: SecretName mirrors spec.sidecar.tls.secretName for + visibility. + type: string + required: + - requiredDNSNames + - secretName + type: object type: object type: object served: true diff --git a/internal/controller/node/controller.go b/internal/controller/node/controller.go index 2f704ff..2557099 100644 --- a/internal/controller/node/controller.go +++ b/internal/controller/node/controller.go @@ -43,6 +43,10 @@ type PlatformConfig = platform.Config // SeiNodeReconciler reconciles a SeiNode object. type SeiNodeReconciler struct { client.Client + // APIReader bypasses the controller-runtime cache. Used by preflight + // validation that must observe newly-provisioned Secrets without + // waiting for cache propagation. + APIReader client.Reader Scheme *runtime.Scheme Recorder record.EventRecorder Platform PlatformConfig @@ -105,6 +109,8 @@ func (r *SeiNodeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct return ctrl.Result{}, fmt.Errorf("reconciling peers: %w", err) } + r.reconcileSidecarTLSReady(ctx, node) + planAlreadyActive := node.Status.Plan != nil && node.Status.Plan.Phase == seiv1alpha1.TaskPlanActive if err := r.Planner.ResolvePlan(ctx, node); err != nil { return ctrl.Result{}, fmt.Errorf("resolving plan: %w", err) diff --git a/internal/controller/node/plan_execution_test.go b/internal/controller/node/plan_execution_test.go index e541541..c6d657e 100644 --- a/internal/controller/node/plan_execution_test.go +++ b/internal/controller/node/plan_execution_test.go @@ -119,10 +119,11 @@ func newProgressionReconciler(t *testing.T, mock *mockSidecarClient, objs ...cli WithStatusSubresource(&seiv1alpha1.SeiNode{}). Build() r := &SeiNodeReconciler{ - Client: c, - Scheme: s, - Recorder: record.NewFakeRecorder(100), - Platform: platformtest.Config(), + Client: c, + APIReader: c, + Scheme: s, + Recorder: record.NewFakeRecorder(100), + Platform: platformtest.Config(), Planner: &planner.NodeResolver{ BuildSidecarClient: func(_ *seiv1alpha1.SeiNode) (task.SidecarClient, error) { return mock, nil }, }, @@ -785,11 +786,12 @@ func TestReconcileInitializing_SidecarClientError_Requeues(t *testing.T) { Build() r := &SeiNodeReconciler{ - Client: c, - Scheme: s, - Recorder: record.NewFakeRecorder(100), - Platform: platformtest.Config(), - Planner: &planner.NodeResolver{}, + Client: c, + APIReader: c, + Scheme: s, + Recorder: record.NewFakeRecorder(100), + Platform: platformtest.Config(), + Planner: &planner.NodeResolver{}, PlanExecutor: &planner.Executor[*seiv1alpha1.SeiNode]{ ConfigFor: func(_ context.Context, n *seiv1alpha1.SeiNode) task.ExecutionConfig { return task.ExecutionConfig{ diff --git a/internal/controller/node/preflight_sidecar_tls.go b/internal/controller/node/preflight_sidecar_tls.go new file mode 100644 index 0000000..256ca48 --- /dev/null +++ b/internal/controller/node/preflight_sidecar_tls.go @@ -0,0 +1,150 @@ +package node + +import ( + "context" + "crypto/x509" + "encoding/pem" + "fmt" + "slices" + + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + apimeta "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" + + seiv1alpha1 "github.com/sei-protocol/sei-k8s-controller/api/v1alpha1" + "github.com/sei-protocol/sei-k8s-controller/internal/noderesource" +) + +// reconcileSidecarTLSReady is the steady-state preflight branch for the +// externally-provisioned sidecar TLS Secret. Mirrors the validate-signing-key +// / validate-node-key / validate-operator-keyring pre-flight checks, but +// runs as a controller method (not a plan task) so it gates plan creation +// rather than plan execution. +// +// When TLS is disabled the condition and status struct are cleared. +// When TLS is enabled the controller publishes status.sidecarTLS with the +// required DNS names and sets SidecarTLSSecretReady according to the +// Secret's presence + cert validity. +// +// All mutations are in-memory; the caller's Status().Patch flushes them. +// No error return: every check resolves to a condition reason rather than a +// reconcile failure, so the caller always proceeds to the rest of reconcile. +func (r *SeiNodeReconciler) reconcileSidecarTLSReady(ctx context.Context, node *seiv1alpha1.SeiNode) { + if !noderesource.SidecarTLSEnabled(node) { + apimeta.RemoveStatusCondition(&node.Status.Conditions, seiv1alpha1.ConditionSidecarTLSSecretReady) + node.Status.SidecarTLS = nil + return + } + + required := requiredDNSNames(node) + node.Status.SidecarTLS = &seiv1alpha1.SidecarTLSStatus{ + SecretName: node.Spec.Sidecar.TLS.SecretName, + RequiredDNSNames: required, + } + + reason, msg := validateTLSSecret(ctx, r.APIReader, node, required) + status := metav1.ConditionFalse + if reason == seiv1alpha1.ReasonTLSSecretReady { + status = metav1.ConditionTrue + } + apimeta.SetStatusCondition(&node.Status.Conditions, metav1.Condition{ + Type: seiv1alpha1.ConditionSidecarTLSSecretReady, + Status: status, + Reason: reason, + Message: msg, + ObservedGeneration: node.Generation, + }) +} + +// requiredDNSNames returns the SAN list the operator-provisioned cert must +// include. Derived from the SeiNode's headless service DNS — the names +// kube-rbac-proxy will be reached on from inside the cluster. +func requiredDNSNames(node *seiv1alpha1.SeiNode) []string { + return []string{ + fmt.Sprintf("%s.%s.svc.cluster.local", node.Name, node.Namespace), + fmt.Sprintf("%s-0.%s.%s.svc.cluster.local", node.Name, node.Name, node.Namespace), + } +} + +// validateTLSSecret reads the referenced Secret via the supplied reader +// (typically the controller's APIReader to bypass the cache) and returns +// the appropriate SidecarTLSSecretReady reason + message. +// +// Reasons: +// - Ready: Secret type kubernetes.io/tls, tls.crt/tls.key non-empty, +// cert parses, cert.DNSNames is a superset of required. +// - NotFound: Secret absent from the SeiNode's namespace. +// - Malformed: wrong Secret type, empty tls.crt/tls.key, or unparseable cert. +// - SANsMismatch: cert parses but does not cover the required DNS names. +func validateTLSSecret( + ctx context.Context, + reader client.Reader, + node *seiv1alpha1.SeiNode, + required []string, +) (reason, msg string) { + name := node.Spec.Sidecar.TLS.SecretName + + secret := &corev1.Secret{} + key := types.NamespacedName{Name: name, Namespace: node.Namespace} + if err := reader.Get(ctx, key, secret); err != nil { + if apierrors.IsNotFound(err) { + return seiv1alpha1.ReasonTLSSecretNotFound, + fmt.Sprintf("secret %q not found in namespace %q", name, node.Namespace) + } + // Transient errors (network, RBAC) surface as NotFound — the next + // reconcile retries. Distinguishing them in the condition would + // add a reason without operator-actionable signal. + return seiv1alpha1.ReasonTLSSecretNotFound, + fmt.Sprintf("getting Secret %q: %v", name, err) + } + + if secret.Type != corev1.SecretTypeTLS { + return seiv1alpha1.ReasonTLSSecretMalformed, + fmt.Sprintf("secret %q has type %q; want %q", name, secret.Type, corev1.SecretTypeTLS) + } + + crtPEM := secret.Data[corev1.TLSCertKey] + keyPEM := secret.Data[corev1.TLSPrivateKeyKey] + if len(crtPEM) == 0 { + return seiv1alpha1.ReasonTLSSecretMalformed, + fmt.Sprintf("secret %q has empty %s", name, corev1.TLSCertKey) + } + if len(keyPEM) == 0 { + return seiv1alpha1.ReasonTLSSecretMalformed, + fmt.Sprintf("secret %q has empty %s", name, corev1.TLSPrivateKeyKey) + } + + block, _ := pem.Decode(crtPEM) + if block == nil { + return seiv1alpha1.ReasonTLSSecretMalformed, + fmt.Sprintf("secret %q %s is not valid PEM", name, corev1.TLSCertKey) + } + cert, err := x509.ParseCertificate(block.Bytes) + if err != nil { + return seiv1alpha1.ReasonTLSSecretMalformed, + fmt.Sprintf("secret %q %s does not parse: %v", name, corev1.TLSCertKey, err) + } + + if missing := missingSANs(cert.DNSNames, required); len(missing) > 0 { + return seiv1alpha1.ReasonTLSSecretSANsMismatch, + fmt.Sprintf("secret %q cert SANs missing required DNS names: %v (cert has: %v)", + name, missing, cert.DNSNames) + } + + return seiv1alpha1.ReasonTLSSecretReady, + fmt.Sprintf("secret %q is well-formed and SANs cover required DNS names", name) +} + +// missingSANs returns the elements of required that are absent from have. +func missingSANs(have, required []string) []string { + var missing []string + for _, r := range required { + if !slices.Contains(have, r) { + missing = append(missing, r) + } + } + return missing +} diff --git a/internal/controller/node/preflight_sidecar_tls_test.go b/internal/controller/node/preflight_sidecar_tls_test.go new file mode 100644 index 0000000..0d9b9d6 --- /dev/null +++ b/internal/controller/node/preflight_sidecar_tls_test.go @@ -0,0 +1,337 @@ +package node + +import ( + "context" + "crypto/ecdsa" + "crypto/elliptic" + "crypto/rand" + "crypto/x509" + "crypto/x509/pkix" + "encoding/pem" + "math/big" + "testing" + "time" + + . "github.com/onsi/gomega" + corev1 "k8s.io/api/core/v1" + apimeta "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + clientgoscheme "k8s.io/client-go/kubernetes/scheme" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + + seiv1alpha1 "github.com/sei-protocol/sei-k8s-controller/api/v1alpha1" +) + +const ( + tlsTestChainID = "atlantic-2" + tlsTestImage = "ghcr.io/sei-protocol/seid:latest" +) + +// makeSelfSignedCertPEM produces a PEM-encoded ECDSA P-256 self-signed +// certificate with the given DNS SANs. Cryptographic verification is not +// the controller's concern — the preflight check only parses the cert and +// inspects DNSNames — so a self-signed leaf is sufficient test material. +func makeSelfSignedCertPEM(t *testing.T, dnsNames []string) []byte { + t.Helper() + key, err := ecdsa.GenerateKey(elliptic.P256(), rand.Reader) + if err != nil { + t.Fatalf("generating ECDSA key: %v", err) + } + tmpl := &x509.Certificate{ + SerialNumber: big.NewInt(1), + Subject: pkix.Name{CommonName: "test-leaf"}, + NotBefore: time.Now().Add(-1 * time.Hour), + NotAfter: time.Now().Add(24 * time.Hour), + DNSNames: dnsNames, + KeyUsage: x509.KeyUsageDigitalSignature, + ExtKeyUsage: []x509.ExtKeyUsage{x509.ExtKeyUsageServerAuth}, + } + der, err := x509.CreateCertificate(rand.Reader, tmpl, tmpl, &key.PublicKey, key) + if err != nil { + t.Fatalf("creating cert: %v", err) + } + return pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: der}) +} + +func tlsTestNode() *seiv1alpha1.SeiNode { + return &seiv1alpha1.SeiNode{ + ObjectMeta: metav1.ObjectMeta{Name: "tls-node", Namespace: "ns-1"}, + Spec: seiv1alpha1.SeiNodeSpec{ + ChainID: tlsTestChainID, + Image: tlsTestImage, + FullNode: &seiv1alpha1.FullNodeSpec{}, + Sidecar: &seiv1alpha1.SidecarConfig{ + TLS: &seiv1alpha1.SidecarTLSSpec{SecretName: "tls-node-cert"}, + }, + }, + } +} + +func tlsTestNodeNoTLS() *seiv1alpha1.SeiNode { + n := tlsTestNode() + n.Spec.Sidecar = nil + return n +} + +func tlsSecret(name, namespace string, crt, key []byte, secretType corev1.SecretType) *corev1.Secret { + return &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: namespace}, + Type: secretType, + Data: map[string][]byte{ + corev1.TLSCertKey: crt, + corev1.TLSPrivateKeyKey: key, + }, + } +} + +func tlsReader(t *testing.T, objs ...client.Object) client.Reader { + t.Helper() + s := runtime.NewScheme() + if err := clientgoscheme.AddToScheme(s); err != nil { + t.Fatal(err) + } + if err := seiv1alpha1.AddToScheme(s); err != nil { + t.Fatal(err) + } + return fake.NewClientBuilder().WithScheme(s).WithObjects(objs...).Build() +} + +// --- requiredDNSNames --- + +func TestRequiredDNSNames_ServiceAndPod0(t *testing.T) { + g := NewWithT(t) + got := requiredDNSNames(tlsTestNode()) + g.Expect(got).To(Equal([]string{ + "tls-node.ns-1.svc.cluster.local", + "tls-node-0.tls-node.ns-1.svc.cluster.local", + })) +} + +func TestRequiredDNSNames_DifferentNamespace(t *testing.T) { + g := NewWithT(t) + n := tlsTestNode() + n.Namespace = "eng-brandon" + n.Name = "validator-0" + got := requiredDNSNames(n) + g.Expect(got).To(Equal([]string{ + "validator-0.eng-brandon.svc.cluster.local", + "validator-0-0.validator-0.eng-brandon.svc.cluster.local", + })) +} + +// --- validateTLSSecret matrix --- + +func TestValidateTLSSecret_Ready(t *testing.T) { + g := NewWithT(t) + node := tlsTestNode() + required := requiredDNSNames(node) + crt := makeSelfSignedCertPEM(t, required) + sec := tlsSecret(node.Spec.Sidecar.TLS.SecretName, node.Namespace, crt, []byte("k"), corev1.SecretTypeTLS) + + reason, msg := validateTLSSecret(context.Background(), tlsReader(t, sec), node, required) + g.Expect(reason).To(Equal(seiv1alpha1.ReasonTLSSecretReady)) + g.Expect(msg).To(ContainSubstring("well-formed")) +} + +func TestValidateTLSSecret_NotFound(t *testing.T) { + g := NewWithT(t) + node := tlsTestNode() + reason, msg := validateTLSSecret(context.Background(), tlsReader(t), node, requiredDNSNames(node)) + g.Expect(reason).To(Equal(seiv1alpha1.ReasonTLSSecretNotFound)) + g.Expect(msg).To(ContainSubstring("not found")) +} + +func TestValidateTLSSecret_WrongType(t *testing.T) { + g := NewWithT(t) + node := tlsTestNode() + sec := tlsSecret(node.Spec.Sidecar.TLS.SecretName, node.Namespace, + []byte("c"), []byte("k"), corev1.SecretTypeOpaque) + + reason, msg := validateTLSSecret(context.Background(), tlsReader(t, sec), node, requiredDNSNames(node)) + g.Expect(reason).To(Equal(seiv1alpha1.ReasonTLSSecretMalformed)) + g.Expect(msg).To(ContainSubstring("type")) +} + +func TestValidateTLSSecret_EmptyCert(t *testing.T) { + g := NewWithT(t) + node := tlsTestNode() + sec := tlsSecret(node.Spec.Sidecar.TLS.SecretName, node.Namespace, + []byte(""), []byte("k"), corev1.SecretTypeTLS) + + reason, msg := validateTLSSecret(context.Background(), tlsReader(t, sec), node, requiredDNSNames(node)) + g.Expect(reason).To(Equal(seiv1alpha1.ReasonTLSSecretMalformed)) + g.Expect(msg).To(ContainSubstring(corev1.TLSCertKey)) +} + +func TestValidateTLSSecret_EmptyKey(t *testing.T) { + g := NewWithT(t) + node := tlsTestNode() + sec := tlsSecret(node.Spec.Sidecar.TLS.SecretName, node.Namespace, + makeSelfSignedCertPEM(t, requiredDNSNames(node)), []byte(""), corev1.SecretTypeTLS) + + reason, msg := validateTLSSecret(context.Background(), tlsReader(t, sec), node, requiredDNSNames(node)) + g.Expect(reason).To(Equal(seiv1alpha1.ReasonTLSSecretMalformed)) + g.Expect(msg).To(ContainSubstring(corev1.TLSPrivateKeyKey)) +} + +func TestValidateTLSSecret_NotPEM(t *testing.T) { + g := NewWithT(t) + node := tlsTestNode() + sec := tlsSecret(node.Spec.Sidecar.TLS.SecretName, node.Namespace, + []byte("not-a-pem-block"), []byte("k"), corev1.SecretTypeTLS) + + reason, msg := validateTLSSecret(context.Background(), tlsReader(t, sec), node, requiredDNSNames(node)) + g.Expect(reason).To(Equal(seiv1alpha1.ReasonTLSSecretMalformed)) + g.Expect(msg).To(ContainSubstring("PEM")) +} + +func TestValidateTLSSecret_PEMButUnparseable(t *testing.T) { + g := NewWithT(t) + node := tlsTestNode() + garbage := pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: []byte("\x00\x01\x02\x03")}) + sec := tlsSecret(node.Spec.Sidecar.TLS.SecretName, node.Namespace, + garbage, []byte("k"), corev1.SecretTypeTLS) + + reason, msg := validateTLSSecret(context.Background(), tlsReader(t, sec), node, requiredDNSNames(node)) + g.Expect(reason).To(Equal(seiv1alpha1.ReasonTLSSecretMalformed)) + g.Expect(msg).To(ContainSubstring("does not parse")) +} + +func TestValidateTLSSecret_SANsMismatch(t *testing.T) { + g := NewWithT(t) + node := tlsTestNode() + // Cert has the wrong DNS names — covers neither required SAN. + crt := makeSelfSignedCertPEM(t, []string{"unrelated.example.com"}) + sec := tlsSecret(node.Spec.Sidecar.TLS.SecretName, node.Namespace, crt, []byte("k"), corev1.SecretTypeTLS) + + reason, msg := validateTLSSecret(context.Background(), tlsReader(t, sec), node, requiredDNSNames(node)) + g.Expect(reason).To(Equal(seiv1alpha1.ReasonTLSSecretSANsMismatch)) + g.Expect(msg).To(ContainSubstring("missing")) +} + +func TestValidateTLSSecret_PartialSANsMismatch(t *testing.T) { + // Cert covers the service SAN but not the pod-0 SAN — still a mismatch. + g := NewWithT(t) + node := tlsTestNode() + required := requiredDNSNames(node) + crt := makeSelfSignedCertPEM(t, []string{required[0]}) // only the service entry + sec := tlsSecret(node.Spec.Sidecar.TLS.SecretName, node.Namespace, crt, []byte("k"), corev1.SecretTypeTLS) + + reason, _ := validateTLSSecret(context.Background(), tlsReader(t, sec), node, required) + g.Expect(reason).To(Equal(seiv1alpha1.ReasonTLSSecretSANsMismatch)) +} + +func TestValidateTLSSecret_SupersetSANsOK(t *testing.T) { + // Cert covers required SANs plus extras — still Ready. + g := NewWithT(t) + node := tlsTestNode() + required := requiredDNSNames(node) + crt := makeSelfSignedCertPEM(t, append([]string{"extra.example.com"}, required...)) + sec := tlsSecret(node.Spec.Sidecar.TLS.SecretName, node.Namespace, crt, []byte("k"), corev1.SecretTypeTLS) + + reason, _ := validateTLSSecret(context.Background(), tlsReader(t, sec), node, required) + g.Expect(reason).To(Equal(seiv1alpha1.ReasonTLSSecretReady)) +} + +// --- reconcileSidecarTLSReady lifecycle --- + +func tlsReconciler(t *testing.T, objs ...client.Object) *SeiNodeReconciler { + t.Helper() + s := runtime.NewScheme() + if err := clientgoscheme.AddToScheme(s); err != nil { + t.Fatal(err) + } + if err := seiv1alpha1.AddToScheme(s); err != nil { + t.Fatal(err) + } + c := fake.NewClientBuilder().WithScheme(s).WithObjects(objs...).Build() + return &SeiNodeReconciler{Client: c, APIReader: c, Scheme: s} +} + +func TestReconcileSidecarTLSReady_TLSDisabled_ClearsConditionAndStatus(t *testing.T) { + g := NewWithT(t) + node := tlsTestNodeNoTLS() + // Seed stale state to verify it's cleared. + node.Status.SidecarTLS = &seiv1alpha1.SidecarTLSStatus{SecretName: "stale", RequiredDNSNames: []string{"stale.example"}} + apimeta.SetStatusCondition(&node.Status.Conditions, metav1.Condition{ + Type: seiv1alpha1.ConditionSidecarTLSSecretReady, Status: metav1.ConditionTrue, Reason: "Ready", + }) + + r := tlsReconciler(t) + r.reconcileSidecarTLSReady(context.Background(), node) + + g.Expect(node.Status.SidecarTLS).To(BeNil()) + g.Expect(apimeta.FindStatusCondition(node.Status.Conditions, seiv1alpha1.ConditionSidecarTLSSecretReady)).To(BeNil()) +} + +func TestReconcileSidecarTLSReady_TLSEnabledSecretReady_SetsConditionTrue(t *testing.T) { + g := NewWithT(t) + node := tlsTestNode() + required := requiredDNSNames(node) + crt := makeSelfSignedCertPEM(t, required) + sec := tlsSecret(node.Spec.Sidecar.TLS.SecretName, node.Namespace, crt, []byte("k"), corev1.SecretTypeTLS) + + r := tlsReconciler(t, sec) + r.reconcileSidecarTLSReady(context.Background(), node) + + g.Expect(node.Status.SidecarTLS).NotTo(BeNil()) + g.Expect(node.Status.SidecarTLS.SecretName).To(Equal(node.Spec.Sidecar.TLS.SecretName)) + g.Expect(node.Status.SidecarTLS.RequiredDNSNames).To(Equal(required)) + + cond := apimeta.FindStatusCondition(node.Status.Conditions, seiv1alpha1.ConditionSidecarTLSSecretReady) + g.Expect(cond).NotTo(BeNil()) + g.Expect(cond.Status).To(Equal(metav1.ConditionTrue)) + g.Expect(cond.Reason).To(Equal(seiv1alpha1.ReasonTLSSecretReady)) +} + +func TestReconcileSidecarTLSReady_TLSEnabledSecretMissing_SetsConditionFalse(t *testing.T) { + g := NewWithT(t) + node := tlsTestNode() + + r := tlsReconciler(t) // no Secret seeded + r.reconcileSidecarTLSReady(context.Background(), node) + + g.Expect(node.Status.SidecarTLS).NotTo(BeNil(), "status must still publish the required-SANs contract") + cond := apimeta.FindStatusCondition(node.Status.Conditions, seiv1alpha1.ConditionSidecarTLSSecretReady) + g.Expect(cond).NotTo(BeNil()) + g.Expect(cond.Status).To(Equal(metav1.ConditionFalse)) + g.Expect(cond.Reason).To(Equal(seiv1alpha1.ReasonTLSSecretNotFound)) +} + +func TestReconcileSidecarTLSReady_SANsMismatch_SetsConditionFalse(t *testing.T) { + g := NewWithT(t) + node := tlsTestNode() + crt := makeSelfSignedCertPEM(t, []string{"wrong.example.com"}) + sec := tlsSecret(node.Spec.Sidecar.TLS.SecretName, node.Namespace, crt, []byte("k"), corev1.SecretTypeTLS) + + r := tlsReconciler(t, sec) + r.reconcileSidecarTLSReady(context.Background(), node) + + cond := apimeta.FindStatusCondition(node.Status.Conditions, seiv1alpha1.ConditionSidecarTLSSecretReady) + g.Expect(cond).NotTo(BeNil()) + g.Expect(cond.Status).To(Equal(metav1.ConditionFalse)) + g.Expect(cond.Reason).To(Equal(seiv1alpha1.ReasonTLSSecretSANsMismatch)) +} + +func TestReconcileSidecarTLSReady_TransitionEnabledToDisabled_Cleans(t *testing.T) { + g := NewWithT(t) + node := tlsTestNode() + required := requiredDNSNames(node) + crt := makeSelfSignedCertPEM(t, required) + sec := tlsSecret(node.Spec.Sidecar.TLS.SecretName, node.Namespace, crt, []byte("k"), corev1.SecretTypeTLS) + + r := tlsReconciler(t, sec) + r.reconcileSidecarTLSReady(context.Background(), node) + g.Expect(node.Status.SidecarTLS).NotTo(BeNil()) + + // Operator drops spec.sidecar.tls (would be CEL-blocked in prod; this + // exercises the reconcile branch's defensive cleanup for the same-name + // transition that envtests reach via separate-object lifecycle). + node.Spec.Sidecar = nil + r.reconcileSidecarTLSReady(context.Background(), node) + + g.Expect(node.Status.SidecarTLS).To(BeNil()) + g.Expect(apimeta.FindStatusCondition(node.Status.Conditions, seiv1alpha1.ConditionSidecarTLSSecretReady)).To(BeNil()) +} diff --git a/internal/controller/node/reconciler_test.go b/internal/controller/node/reconciler_test.go index 65c7c0f..b0a65e6 100644 --- a/internal/controller/node/reconciler_test.go +++ b/internal/controller/node/reconciler_test.go @@ -45,11 +45,12 @@ func newNodeReconciler(t *testing.T, objs ...client.Object) (*SeiNodeReconciler, Build() mock := &mockSidecarClient{} r := &SeiNodeReconciler{ - Client: c, - Scheme: s, - Recorder: record.NewFakeRecorder(100), - Platform: platformtest.Config(), - Planner: &planner.NodeResolver{}, + Client: c, + APIReader: c, + Scheme: s, + Recorder: record.NewFakeRecorder(100), + Platform: platformtest.Config(), + Planner: &planner.NodeResolver{}, PlanExecutor: &planner.Executor[*seiv1alpha1.SeiNode]{ ConfigFor: func(_ context.Context, node *seiv1alpha1.SeiNode) task.ExecutionConfig { return task.ExecutionConfig{ diff --git a/internal/noderesource/noderesource.go b/internal/noderesource/noderesource.go index 0dd68e1..2c0010c 100644 --- a/internal/noderesource/noderesource.go +++ b/internal/noderesource/noderesource.go @@ -14,7 +14,6 @@ import ( corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/util/intstr" "k8s.io/utils/ptr" @@ -951,11 +950,15 @@ func SidecarTLSEnabled(node *seiv1alpha1.SeiNode) bool { return node.Spec.Sidecar != nil && node.Spec.Sidecar.TLS != nil } -// SidecarTLSSecretName is the cert-manager-managed Secret carrying the -// proxy's TLS material. Exported because plan tasks reference it when -// emitting the Certificate. +// SidecarTLSSecretName returns the operator-provisioned kubernetes.io/tls +// Secret name from spec.sidecar.tls.secretName. The controller does not +// create this Secret; platform tooling provisions it ahead of SeiNode +// creation. Returns empty when TLS is disabled. func SidecarTLSSecretName(node *seiv1alpha1.SeiNode) string { - return node.Name + "-sidecar-tls" + if !SidecarTLSEnabled(node) { + return "" + } + return node.Spec.Sidecar.TLS.SecretName } // RBACProxyConfigMapName is the ConfigMap carrying the proxy's @@ -1119,38 +1122,6 @@ func proxyReadinessProbe() *corev1.Probe { } } -// GenerateSidecarCertificate returns an unstructured Certificate so -// the controller avoids depending on cert-manager Go types — the -// three fields we need (apiVersion, kind, spec) are stable. -func GenerateSidecarCertificate(node *seiv1alpha1.SeiNode) *unstructured.Unstructured { - if !SidecarTLSEnabled(node) { - return nil - } - tls := node.Spec.Sidecar.TLS - svcDNS := fmt.Sprintf("%s.%s.svc.cluster.local", node.Name, node.Namespace) - pod0DNS := fmt.Sprintf("%s-0.%s.%s.svc.cluster.local", node.Name, node.Name, node.Namespace) - - obj := &unstructured.Unstructured{} - obj.SetAPIVersion("cert-manager.io/v1") - obj.SetKind("Certificate") - obj.SetName(SidecarTLSSecretName(node)) - obj.SetNamespace(node.Namespace) - obj.SetLabels(ResourceLabels(node)) - _ = unstructured.SetNestedMap(obj.Object, map[string]any{ - "secretName": SidecarTLSSecretName(node), - "duration": "2160h", // 90 days - "renewBefore": "360h", // 15 days - "commonName": svcDNS, - "dnsNames": []any{svcDNS, pod0DNS}, - "issuerRef": map[string]any{ - "name": tls.IssuerName, - "kind": tls.IssuerKind, - "group": "cert-manager.io", - }, - }, "spec") - return obj -} - // GenerateRBACProxyConfigMap produces the ConfigMap carrying the // kube-rbac-proxy authorization config — a single coarse SAR scoped // to (group=sei.io, resource=seinodetasks, namespace=, name=). diff --git a/internal/noderesource/sidecar_tls_test.go b/internal/noderesource/sidecar_tls_test.go index 95953e2..02d97e6 100644 --- a/internal/noderesource/sidecar_tls_test.go +++ b/internal/noderesource/sidecar_tls_test.go @@ -13,8 +13,7 @@ import ( func withSidecarTLS(node *seiv1alpha1.SeiNode) *seiv1alpha1.SeiNode { node.Spec.Sidecar.TLS = &seiv1alpha1.SidecarTLSSpec{ - IssuerName: "validator-ca", - IssuerKind: "ClusterIssuer", + SecretName: node.Name + "-tls", } return node } @@ -159,27 +158,32 @@ func TestServicePorts_NoAPIPortWithoutTLS(t *testing.T) { } } -func TestGenerateSidecarCertificate_NilWithoutTLS(t *testing.T) { +func TestSidecarTLSSecretName_ReadsFromSpec(t *testing.T) { g := NewWithT(t) - g.Expect(GenerateSidecarCertificate(newGenesisNode("a", "default"))).To(BeNil()) + g.Expect(SidecarTLSSecretName(newGenesisNode("a", "default"))).To(Equal(""), + "empty when TLS disabled") + + node := withSidecarTLS(newGenesisNode("a", "default")) + g.Expect(SidecarTLSSecretName(node)).To(Equal("a-tls"), + "returns spec.sidecar.tls.secretName when TLS enabled") } -func TestGenerateSidecarCertificate_HappyPath(t *testing.T) { +func TestPodSpec_TLSVolumeUsesSecretNameFromSpec(t *testing.T) { g := NewWithT(t) - cert := GenerateSidecarCertificate(withSidecarTLS(newGenesisNode("a", "default"))) - g.Expect(cert).NotTo(BeNil()) - g.Expect(cert.GetAPIVersion()).To(Equal("cert-manager.io/v1")) - g.Expect(cert.GetKind()).To(Equal("Certificate")) - g.Expect(cert.GetName()).To(Equal("a-sidecar-tls")) - - spec, ok := unstructuredMap(cert, "spec") - g.Expect(ok).To(BeTrue()) - g.Expect(spec["secretName"]).To(Equal("a-sidecar-tls")) - g.Expect(spec["commonName"]).To(Equal("a.default.svc.cluster.local")) - - issuerRef, _ := spec["issuerRef"].(map[string]any) - g.Expect(issuerRef["name"]).To(Equal("validator-ca")) - g.Expect(issuerRef["kind"]).To(Equal("ClusterIssuer")) + node := withSidecarTLS(newGenesisNode("a", "default")) + node.Spec.Sidecar.TLS.SecretName = "custom-cert-secret" + sts := mustGenerateStatefulSet(t, node, platformtest.Config()) + + var tlsVol *corev1.Volume + for i := range sts.Spec.Template.Spec.Volumes { + if sts.Spec.Template.Spec.Volumes[i].Name == sidecarTLSVolumeName { + tlsVol = &sts.Spec.Template.Spec.Volumes[i] + break + } + } + g.Expect(tlsVol).NotTo(BeNil()) + g.Expect(tlsVol.Secret).NotTo(BeNil()) + g.Expect(tlsVol.Secret.SecretName).To(Equal("custom-cert-secret")) } func TestGenerateRBACProxyConfigMap_NilWithoutTLS(t *testing.T) { @@ -260,7 +264,3 @@ func TestRollback_RemovesProxyResources(t *testing.T) { } } -func unstructuredMap(u interface{ UnstructuredContent() map[string]any }, key string) (map[string]any, bool) { - v, ok := u.UnstructuredContent()[key].(map[string]any) - return v, ok -} diff --git a/internal/planner/bootstrap.go b/internal/planner/bootstrap.go index 0902a76..9482079 100644 --- a/internal/planner/bootstrap.go +++ b/internal/planner/bootstrap.go @@ -99,10 +99,10 @@ func buildBootstrapPlan( // Phase 4: Create production StatefulSet and Service (after bootstrap teardown frees the PVC) if noderesource.SidecarTLSEnabled(node) { - if err := appendTask(task.TaskTypeApplySidecarCert, - &task.ApplySidecarCertParams{NodeName: node.Name, Namespace: node.Namespace}); err != nil { - return nil, err - } + // kube-rbac-proxy authz ConfigMap is controller-owned; the + // TLS Secret itself is operator-provisioned externally and + // gated on via the SidecarTLSSecretReady condition before + // this plan is built. if err := appendTask(task.TaskTypeApplyRBACProxyConfig, &task.ApplyRBACProxyConfigParams{NodeName: node.Name, Namespace: node.Namespace}); err != nil { return nil, err @@ -187,7 +187,11 @@ func buildGenesisPlan(node *seiv1alpha1.SeiNode) (*seiv1alpha1.TaskPlan, error) prog := []string{task.TaskTypeEnsureDataPVC} if noderesource.SidecarTLSEnabled(node) { - prog = append(prog, task.TaskTypeApplySidecarCert, task.TaskTypeApplyRBACProxyConfig) + // kube-rbac-proxy authz ConfigMap is controller-owned; the + // TLS Secret itself is operator-provisioned externally and + // gated on via the SidecarTLSSecretReady condition before + // this plan is built. + prog = append(prog, task.TaskTypeApplyRBACProxyConfig) } prog = append(prog, task.TaskTypeApplyStatefulSet, diff --git a/internal/planner/planner.go b/internal/planner/planner.go index d6fe745..5ffd397 100644 --- a/internal/planner/planner.go +++ b/internal/planner/planner.go @@ -134,6 +134,18 @@ func (p *NodeResolver) ResolvePlan(ctx context.Context, node *seiv1alpha1.SeiNod handleTerminalPlan(ctx, node) + // Gate init-plan creation on sidecar TLS readiness. The controller's + // preflight method (reconcileSidecarTLSReady) sets this condition + // before ResolvePlan runs. Mirrors the not-yet-ready behavior of + // referenced Secrets: stay in Pending until the operator provisions + // a valid Secret. Steady-state (Running) plans bypass this gate so + // observability stays live and image-drift plans still build. + if noderesource.SidecarTLSEnabled(node) && + node.Status.Phase != seiv1alpha1.PhaseRunning && + !sidecarTLSSecretReady(node) { + return nil + } + mode, err := plannerForMode(node) if err != nil { return err @@ -159,6 +171,13 @@ func (p *NodeResolver) ResolvePlan(ctx context.Context, node *seiv1alpha1.SeiNod return nil } +// sidecarTLSSecretReady returns true iff the SidecarTLSSecretReady +// condition is present and True. Missing/False both gate plan creation. +func sidecarTLSSecretReady(node *seiv1alpha1.SeiNode) bool { + cond := meta.FindStatusCondition(node.Status.Conditions, seiv1alpha1.ConditionSidecarTLSSecretReady) + return cond != nil && cond.Status == metav1.ConditionTrue +} + // handleTerminalPlan handles completed or failed plans: clears conditions // and nils the plan so the planner can build the next one if needed. func handleTerminalPlan(ctx context.Context, node *seiv1alpha1.SeiNode) { @@ -529,9 +548,11 @@ func buildBasePlan( prog = append(prog, task.TaskTypeValidateOperatorKeyring) } if noderesource.SidecarTLSEnabled(node) { - // Emit Cert + ConfigMap before pod schedules. Cert-manager - // is async; kubelet retries Secret mounts. - prog = append(prog, task.TaskTypeApplySidecarCert, task.TaskTypeApplyRBACProxyConfig) + // kube-rbac-proxy authz ConfigMap is controller-owned; the + // TLS Secret itself is operator-provisioned externally and + // gated on via the SidecarTLSSecretReady condition before + // this plan is built. + prog = append(prog, task.TaskTypeApplyRBACProxyConfig) } prog = append(prog, task.TaskTypeApplyStatefulSet, task.TaskTypeApplyService) prog = append(prog, sidecarProg...) @@ -574,8 +595,6 @@ func paramsForTaskType( return &task.ApplyServiceParams{NodeName: node.Name, Namespace: node.Namespace} case task.TaskTypeApplyRBACProxyConfig: return &task.ApplyRBACProxyConfigParams{NodeName: node.Name, Namespace: node.Namespace} - case task.TaskTypeApplySidecarCert: - return &task.ApplySidecarCertParams{NodeName: node.Name, Namespace: node.Namespace} case task.TaskTypeReplacePod: return &task.ReplacePodParams{NodeName: node.Name, Namespace: node.Namespace} case task.TaskTypeObserveImage: diff --git a/internal/planner/sidecar_tls_test.go b/internal/planner/sidecar_tls_test.go index 9496424..1315784 100644 --- a/internal/planner/sidecar_tls_test.go +++ b/internal/planner/sidecar_tls_test.go @@ -25,7 +25,7 @@ func TestSidecarURLForNode_TLSRoutesThroughProxyHTTPS(t *testing.T) { node := &seiv1alpha1.SeiNode{ ObjectMeta: metav1.ObjectMeta{Name: testValidatorName, Namespace: "sei"}, Spec: seiv1alpha1.SeiNodeSpec{Sidecar: &seiv1alpha1.SidecarConfig{ - TLS: &seiv1alpha1.SidecarTLSSpec{IssuerName: "ca", IssuerKind: "ClusterIssuer"}, + TLS: &seiv1alpha1.SidecarTLSSpec{SecretName: testValidatorName + "-tls"}, }}, } got := SidecarURLForNode(node) @@ -40,7 +40,7 @@ const ( testSeidImage = "seid:v6.4.1" ) -func TestValidatorPlanner_BuildPlan_NoSidecarTLSTasksByDefault(t *testing.T) { +func TestValidatorPlanner_BuildPlan_NoRBACProxyConfigTaskByDefault(t *testing.T) { node := &seiv1alpha1.SeiNode{ ObjectMeta: metav1.ObjectMeta{Name: testValidatorName, Namespace: sourceChainID}, Spec: seiv1alpha1.SeiNodeSpec{ @@ -53,10 +53,6 @@ func TestValidatorPlanner_BuildPlan_NoSidecarTLSTasksByDefault(t *testing.T) { if err != nil { t.Fatalf("BuildPlan: %v", err) } - if idx := indexOfTaskType(plan, task.TaskTypeApplySidecarCert); idx >= 0 { - t.Fatalf("plan must not contain %s without spec.sidecar.tls; got %v", - task.TaskTypeApplySidecarCert, taskTypes(plan)) - } if idx := indexOfTaskType(plan, task.TaskTypeApplyRBACProxyConfig); idx >= 0 { t.Fatalf("plan must not contain %s without spec.sidecar.tls; got %v", task.TaskTypeApplyRBACProxyConfig, taskTypes(plan)) @@ -64,13 +60,10 @@ func TestValidatorPlanner_BuildPlan_NoSidecarTLSTasksByDefault(t *testing.T) { } func tlsSpec() *seiv1alpha1.SidecarTLSSpec { - return &seiv1alpha1.SidecarTLSSpec{ - IssuerName: "validator-ca", - IssuerKind: "ClusterIssuer", - } + return &seiv1alpha1.SidecarTLSSpec{SecretName: testValidatorName + "-tls"} } -func TestValidatorPlanner_BuildPlan_SidecarTLSTasksSequencedBeforeStatefulSet(t *testing.T) { +func TestValidatorPlanner_BuildPlan_RBACProxyConfigSequencedBeforeStatefulSet(t *testing.T) { node := &seiv1alpha1.SeiNode{ ObjectMeta: metav1.ObjectMeta{Name: testValidatorName, Namespace: sourceChainID}, Spec: seiv1alpha1.SeiNodeSpec{ @@ -84,31 +77,31 @@ func TestValidatorPlanner_BuildPlan_SidecarTLSTasksSequencedBeforeStatefulSet(t if err != nil { t.Fatalf("BuildPlan: %v", err) } - assertTLSTasksBeforeStatefulSet(t, plan) + assertRBACProxyConfigBeforeStatefulSet(t, plan) } -// assertTLSTasksBeforeStatefulSet covers the invariant that the TLS -// side-resources (Certificate, ConfigMap) are emitted before the -// StatefulSet apply. The pod-spec includes the rbac-proxy ConfigMap -// + TLS Secret as required volumes; if those don't exist the pod -// stays Pending. Regression-locks the cursor-bot HIGH from PR #223. -func assertTLSTasksBeforeStatefulSet(t *testing.T, plan *seiv1alpha1.TaskPlan) { +// assertRBACProxyConfigBeforeStatefulSet locks the invariant that the +// kube-rbac-proxy authz ConfigMap is emitted before the StatefulSet +// apply. The pod-spec mounts the ConfigMap; if it doesn't exist the pod +// stays Pending. (Under the externalized-Secret design the TLS material +// itself is operator-provisioned and gated via the +// SidecarTLSSecretReady condition; only the authz ConfigMap is in-plan.) +func assertRBACProxyConfigBeforeStatefulSet(t *testing.T, plan *seiv1alpha1.TaskPlan) { t.Helper() - certIdx := indexOfTaskType(plan, task.TaskTypeApplySidecarCert) cfgIdx := indexOfTaskType(plan, task.TaskTypeApplyRBACProxyConfig) stsIdx := indexOfTaskType(plan, task.TaskTypeApplyStatefulSet) - if certIdx < 0 || cfgIdx < 0 { - t.Fatalf("plan must contain both %s and %s; got %v", - task.TaskTypeApplySidecarCert, task.TaskTypeApplyRBACProxyConfig, taskTypes(plan)) + if cfgIdx < 0 { + t.Fatalf("plan must contain %s; got %v", + task.TaskTypeApplyRBACProxyConfig, taskTypes(plan)) } - if certIdx >= stsIdx || cfgIdx >= stsIdx { - t.Fatalf("expected sidecar-cert(%d), rbac-proxy-config(%d) < apply-statefulset(%d); got %v", - certIdx, cfgIdx, stsIdx, taskTypes(plan)) + if cfgIdx >= stsIdx { + t.Fatalf("expected rbac-proxy-config(%d) < apply-statefulset(%d); got %v", + cfgIdx, stsIdx, taskTypes(plan)) } } -func TestBuildGenesisPlan_SidecarTLSTasksSequencedBeforeStatefulSet(t *testing.T) { +func TestBuildGenesisPlan_RBACProxyConfigSequencedBeforeStatefulSet(t *testing.T) { node := &seiv1alpha1.SeiNode{ ObjectMeta: metav1.ObjectMeta{Name: testValidatorName, Namespace: sourceChainID}, Spec: seiv1alpha1.SeiNodeSpec{ @@ -122,5 +115,5 @@ func TestBuildGenesisPlan_SidecarTLSTasksSequencedBeforeStatefulSet(t *testing.T if err != nil { t.Fatalf("buildGenesisPlan: %v", err) } - assertTLSTasksBeforeStatefulSet(t, plan) + assertRBACProxyConfigBeforeStatefulSet(t, plan) } diff --git a/internal/task/apply_sidecar_cert.go b/internal/task/apply_sidecar_cert.go deleted file mode 100644 index d2ef3e1..0000000 --- a/internal/task/apply_sidecar_cert.go +++ /dev/null @@ -1,70 +0,0 @@ -package task - -import ( - "context" - "encoding/json" - "fmt" - - ctrl "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/client" - - seiv1alpha1 "github.com/sei-protocol/sei-k8s-controller/api/v1alpha1" - "github.com/sei-protocol/sei-k8s-controller/internal/noderesource" -) - -const TaskTypeApplySidecarCert = "apply-sidecar-cert" - -type ApplySidecarCertParams struct { - NodeName string `json:"nodeName"` - Namespace string `json:"namespace"` -} - -type applySidecarCertExecution struct { - taskBase - params ApplySidecarCertParams - cfg ExecutionConfig -} - -func deserializeApplySidecarCert(id string, params json.RawMessage, cfg ExecutionConfig) (TaskExecution, error) { - var p ApplySidecarCertParams - if len(params) > 0 { - if err := json.Unmarshal(params, &p); err != nil { - return nil, fmt.Errorf("deserializing apply-sidecar-cert params: %w", err) - } - } - return &applySidecarCertExecution{ - taskBase: taskBase{id: id, status: ExecutionRunning}, - params: p, - cfg: cfg, - }, nil -} - -func (e *applySidecarCertExecution) Execute(ctx context.Context) error { - node, err := ResourceAs[*seiv1alpha1.SeiNode](e.cfg) - if err != nil { - return err - } - - desired := noderesource.GenerateSidecarCertificate(node) - if desired == nil { - e.complete() - return nil - } - if err := ctrl.SetControllerReference(node, desired, e.cfg.Scheme); err != nil { - return fmt.Errorf("setting owner reference on sidecar Certificate: %w", err) - } - - // ForceOwnership: our reconcile wins on field conflicts. cert-manager - // owns .status (separate field manager); we set only .spec. - //nolint:staticcheck // unstructured Patch + Apply is the documented path for non-typed CRDs - if err := e.cfg.KubeClient.Patch(ctx, desired, client.Apply, fieldOwner, client.ForceOwnership); err != nil { - return fmt.Errorf("applying sidecar Certificate: %w", err) - } - - e.complete() - return nil -} - -func (e *applySidecarCertExecution) Status(_ context.Context) ExecutionStatus { - return e.DefaultStatus() -} diff --git a/internal/task/task.go b/internal/task/task.go index cc03c27..ff3b7ef 100644 --- a/internal/task/task.go +++ b/internal/task/task.go @@ -219,7 +219,6 @@ var registry = map[string]taskDeserializer{ TaskTypeApplyStatefulSet: deserializeApplyStatefulSet, TaskTypeApplyService: deserializeApplyService, TaskTypeApplyRBACProxyConfig: deserializeApplyRBACProxyConfig, - TaskTypeApplySidecarCert: deserializeApplySidecarCert, TaskTypeReplacePod: deserializeReplacePod, TaskTypeObserveImage: deserializeObserveImage, TaskTypeValidateSigningKey: deserializeValidateSigningKey, diff --git a/manifests/sei.io_seinodedeployments.yaml b/manifests/sei.io_seinodedeployments.yaml index 329ad8d..fcfaaf5 100644 --- a/manifests/sei.io_seinodedeployments.yaml +++ b/manifests/sei.io_seinodedeployments.yaml @@ -612,24 +612,25 @@ spec: tls: description: |- TLS, if set, fronts the sidecar API with kube-rbac-proxy on - :8443 backed by a cert-manager-issued cert. Init-only: - toggling on a Running SeiNode requires recreation. See seictl#165. + :8443 using TLS material from a Secret in the SeiNode's + namespace. The Secret is operator-provisioned (e.g., via a + cert-manager Certificate in the platform GitOps repo); this + controller does not create it. + + Immutable post-creation. Toggling TLS on an existing SeiNode is a + delete + recreate operation; data persists via the SeiNode's PVC + retention mechanism. properties: - issuerKind: - default: ClusterIssuer - description: IssuerKind is "Issuer" (namespaced) or - "ClusterIssuer". - enum: - - Issuer - - ClusterIssuer - type: string - issuerName: + secretName: description: |- - IssuerName references a cert-manager Issuer or ClusterIssuer - that signs the proxy's serving certificate. + SecretName is a kubernetes.io/tls Secret in the SeiNode's + namespace. The cert SANs must include the DNS names published + in status.sidecarTLS.requiredDNSNames; the controller validates + this before allowing the pod to schedule. + minLength: 1 type: string required: - - issuerName + - secretName type: object type: object validator: @@ -921,6 +922,11 @@ spec: - message: peers is required when replayer mode is set rule: '!has(self.replayer) || (has(self.peers) && size(self.peers) > 0)' + - message: spec.sidecar.tls is immutable; delete + recreate the + SeiNode to change TLS configuration + rule: '(!has(oldSelf.sidecar) || !has(oldSelf.sidecar.tls)) + ? true : (has(self.sidecar) && has(self.sidecar.tls) && self.sidecar.tls + == oldSelf.sidecar.tls)' required: - spec type: object diff --git a/manifests/sei.io_seinodes.yaml b/manifests/sei.io_seinodes.yaml index dad2a1c..626abc4 100644 --- a/manifests/sei.io_seinodes.yaml +++ b/manifests/sei.io_seinodes.yaml @@ -467,23 +467,25 @@ spec: tls: description: |- TLS, if set, fronts the sidecar API with kube-rbac-proxy on - :8443 backed by a cert-manager-issued cert. Init-only: - toggling on a Running SeiNode requires recreation. See seictl#165. + :8443 using TLS material from a Secret in the SeiNode's + namespace. The Secret is operator-provisioned (e.g., via a + cert-manager Certificate in the platform GitOps repo); this + controller does not create it. + + Immutable post-creation. Toggling TLS on an existing SeiNode is a + delete + recreate operation; data persists via the SeiNode's PVC + retention mechanism. properties: - issuerKind: - default: ClusterIssuer - description: IssuerKind is "Issuer" (namespaced) or "ClusterIssuer". - enum: - - Issuer - - ClusterIssuer - type: string - issuerName: + secretName: description: |- - IssuerName references a cert-manager Issuer or ClusterIssuer - that signs the proxy's serving certificate. + SecretName is a kubernetes.io/tls Secret in the SeiNode's + namespace. The cert SANs must include the DNS names published + in status.sidecarTLS.requiredDNSNames; the controller validates + this before allowing the pod to schedule. + minLength: 1 type: string required: - - issuerName + - secretName type: object type: object validator: @@ -769,6 +771,11 @@ spec: - message: peers is required when replayer mode is set rule: '!has(self.replayer) || (has(self.peers) && size(self.peers) > 0)' + - message: spec.sidecar.tls is immutable; delete + recreate the SeiNode + to change TLS configuration + rule: '(!has(oldSelf.sidecar) || !has(oldSelf.sidecar.tls)) ? true : + (has(self.sidecar) && has(self.sidecar.tls) && self.sidecar.tls == + oldSelf.sidecar.tls)' status: description: SeiNodeStatus defines the observed state of a SeiNode. properties: @@ -996,6 +1003,27 @@ spec: items: type: string type: array + sidecarTLS: + description: |- + SidecarTLS is set whenever spec.sidecar.tls is non-nil. Publishes + the contract platform tooling must satisfy when provisioning the + TLS Secret. Machine-readable replacement for naming-convention docs. + properties: + requiredDNSNames: + description: |- + RequiredDNSNames is the SAN list the cert in SecretName must + include. Derived from the SeiNode's headless service DNS. + items: + type: string + type: array + secretName: + description: SecretName mirrors spec.sidecar.tls.secretName for + visibility. + type: string + required: + - requiredDNSNames + - secretName + type: object type: object type: object served: true From 28d89be064e58216d6cd579d16c0adaca61f90b4 Mon Sep 17 00:00:00 2001 From: bdchatham Date: Sat, 16 May 2026 11:47:11 -0700 Subject: [PATCH 2/6] =?UTF-8?q?fixup(seinode):=20cross-review=20findings?= =?UTF-8?q?=20=E2=80=94=20tighten=20CEL=20+=20reason=20prefixes=20+=20doc?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses platform-engineer cross-review against the implementation commit. One blocker (CEL too permissive), rest are non-blocker / ergonomics fixups. - api: tighten the spec.sidecar.tls immutability CEL rule. The prior shape short-circuited to true whenever oldSelf.sidecar or oldSelf.sidecar.tls was unset, which silently permitted enabling TLS on an existing Running SeiNode via spec mutation. Under the new rule, TLS state at creation is final: a SeiNode that started without TLS stays without TLS for its lifetime, and a SeiNode that started with TLS stays at the same TLS spec for its lifetime. Matches LLD §0.1(b). - api: prefix reason string values (TLSSecretReady, TLSSecretNotFound, TLSSecretMalformed, TLSSecretSANsMismatch) to match the existing SigningKey/NodeKey/OperatorKeyring family so SRE tooling can grep across condition types consistently. - task: prefix the transient-error message in validateTLSSecret so an operator who sees Reason=NotFound but finds the Secret present can disambiguate "transient error" from "Secret missing" without grepping controller logs. - api: doc-comment on SidecarTLSStatus.RequiredDNSNames notes the names are pre-computable from the target spec; published for verification, not as a prerequisite handshake. - noderesource: doc-comment on SidecarTLSSecretName notes callers should gate on SidecarTLSEnabled before relying on the empty return. - test: clarify the TransitionEnabledToDisabled test comment now that the CEL rule actually blocks the mutation in prod. Co-Authored-By: Claude Opus 4.7 (1M context) --- api/v1alpha1/seinode_types.go | 20 ++++++++++++------- config/crd/sei.io_seinodedeployments.yaml | 4 ++-- config/crd/sei.io_seinodes.yaml | 12 +++++++---- .../controller/node/preflight_sidecar_tls.go | 9 +++++---- .../node/preflight_sidecar_tls_test.go | 14 +++++++++---- internal/noderesource/noderesource.go | 3 ++- manifests/sei.io_seinodedeployments.yaml | 4 ++-- manifests/sei.io_seinodes.yaml | 12 +++++++---- 8 files changed, 50 insertions(+), 28 deletions(-) diff --git a/api/v1alpha1/seinode_types.go b/api/v1alpha1/seinode_types.go index 41f4c75..51edff4 100644 --- a/api/v1alpha1/seinode_types.go +++ b/api/v1alpha1/seinode_types.go @@ -10,7 +10,7 @@ import ( // the populated field determines the node's operating mode. // +kubebuilder:validation:XValidation:rule="(has(self.fullNode) ? 1 : 0) + (has(self.archive) ? 1 : 0) + (has(self.replayer) ? 1 : 0) + (has(self.validator) ? 1 : 0) == 1",message="exactly one of fullNode, archive, replayer, or validator must be set" // +kubebuilder:validation:XValidation:rule="!has(self.replayer) || (has(self.peers) && size(self.peers) > 0)",message="peers is required when replayer mode is set" -// +kubebuilder:validation:XValidation:rule="(!has(oldSelf.sidecar) || !has(oldSelf.sidecar.tls)) ? true : (has(self.sidecar) && has(self.sidecar.tls) && self.sidecar.tls == oldSelf.sidecar.tls)",message="spec.sidecar.tls is immutable; delete + recreate the SeiNode to change TLS configuration" +// +kubebuilder:validation:XValidation:rule="(!has(oldSelf.sidecar) || !has(oldSelf.sidecar.tls)) ? (!has(self.sidecar) || !has(self.sidecar.tls)) : (has(self.sidecar) && has(self.sidecar.tls) && self.sidecar.tls == oldSelf.sidecar.tls)",message="spec.sidecar.tls is immutable; delete + recreate the SeiNode to change TLS configuration" type SeiNodeSpec struct { // ChainID of the chain this node belongs to. // +kubebuilder:validation:MinLength=1 @@ -311,12 +311,14 @@ const ( ReasonOperatorKeyringInvalid = "OperatorKeyringInvalid" // terminal: fail the plan ) -// Reasons for the SidecarTLSSecretReady condition. +// Reasons for the SidecarTLSSecretReady condition. String values are +// prefixed to match the existing SigningKey/NodeKey/OperatorKeyring +// reason convention so SRE tooling can grep across condition types. const ( - ReasonTLSSecretReady = "Ready" // Secret present, well-formed, SANs match required DNS names - ReasonTLSSecretNotFound = "NotFound" // Secret not found in the SeiNode's namespace - ReasonTLSSecretMalformed = "Malformed" // Wrong type, empty tls.crt/tls.key, or unparseable cert - ReasonTLSSecretSANsMismatch = "SANsMismatch" // Cert parses but cert.DNSNames does not include required SANs + ReasonTLSSecretReady = "TLSSecretReady" // Secret present, well-formed, SANs match required DNS names + ReasonTLSSecretNotFound = "TLSSecretNotFound" // Secret not found in the SeiNode's namespace + ReasonTLSSecretMalformed = "TLSSecretMalformed" // Wrong type, empty tls.crt/tls.key, or unparseable cert + ReasonTLSSecretSANsMismatch = "TLSSecretSANsMismatch" // Cert parses but cert.DNSNames does not include required SANs ) // SeiNodeStatus defines the observed state of a SeiNode. @@ -374,7 +376,11 @@ type SidecarTLSStatus struct { SecretName string `json:"secretName"` // RequiredDNSNames is the SAN list the cert in SecretName must - // include. Derived from the SeiNode's headless service DNS. + // include. Derived from the SeiNode's headless service DNS: + // {name}.{namespace}.svc.cluster.local and + // {name}-0.{name}.{namespace}.svc.cluster.local. Pre-computable + // by platform tooling from the target spec; published here for + // verification rather than as a prerequisite handshake. RequiredDNSNames []string `json:"requiredDNSNames"` } diff --git a/config/crd/sei.io_seinodedeployments.yaml b/config/crd/sei.io_seinodedeployments.yaml index fcfaaf5..d9e3437 100644 --- a/config/crd/sei.io_seinodedeployments.yaml +++ b/config/crd/sei.io_seinodedeployments.yaml @@ -925,8 +925,8 @@ spec: - message: spec.sidecar.tls is immutable; delete + recreate the SeiNode to change TLS configuration rule: '(!has(oldSelf.sidecar) || !has(oldSelf.sidecar.tls)) - ? true : (has(self.sidecar) && has(self.sidecar.tls) && self.sidecar.tls - == oldSelf.sidecar.tls)' + ? (!has(self.sidecar) || !has(self.sidecar.tls)) : (has(self.sidecar) + && has(self.sidecar.tls) && self.sidecar.tls == oldSelf.sidecar.tls)' required: - spec type: object diff --git a/config/crd/sei.io_seinodes.yaml b/config/crd/sei.io_seinodes.yaml index 626abc4..c354bbc 100644 --- a/config/crd/sei.io_seinodes.yaml +++ b/config/crd/sei.io_seinodes.yaml @@ -773,9 +773,9 @@ spec: 0)' - message: spec.sidecar.tls is immutable; delete + recreate the SeiNode to change TLS configuration - rule: '(!has(oldSelf.sidecar) || !has(oldSelf.sidecar.tls)) ? true : - (has(self.sidecar) && has(self.sidecar.tls) && self.sidecar.tls == - oldSelf.sidecar.tls)' + rule: '(!has(oldSelf.sidecar) || !has(oldSelf.sidecar.tls)) ? (!has(self.sidecar) + || !has(self.sidecar.tls)) : (has(self.sidecar) && has(self.sidecar.tls) + && self.sidecar.tls == oldSelf.sidecar.tls)' status: description: SeiNodeStatus defines the observed state of a SeiNode. properties: @@ -1012,7 +1012,11 @@ spec: requiredDNSNames: description: |- RequiredDNSNames is the SAN list the cert in SecretName must - include. Derived from the SeiNode's headless service DNS. + include. Derived from the SeiNode's headless service DNS: + {name}.{namespace}.svc.cluster.local and + {name}-0.{name}.{namespace}.svc.cluster.local. Pre-computable + by platform tooling from the target spec; published here for + verification rather than as a prerequisite handshake. items: type: string type: array diff --git a/internal/controller/node/preflight_sidecar_tls.go b/internal/controller/node/preflight_sidecar_tls.go index 256ca48..9048498 100644 --- a/internal/controller/node/preflight_sidecar_tls.go +++ b/internal/controller/node/preflight_sidecar_tls.go @@ -94,11 +94,12 @@ func validateTLSSecret( return seiv1alpha1.ReasonTLSSecretNotFound, fmt.Sprintf("secret %q not found in namespace %q", name, node.Namespace) } - // Transient errors (network, RBAC) surface as NotFound — the next - // reconcile retries. Distinguishing them in the condition would - // add a reason without operator-actionable signal. + // Transient errors (network, RBAC) surface under the NotFound reason + // (the next reconcile retries). Message is prefixed so an operator + // who finds the Secret present can disambiguate without grepping + // controller logs. return seiv1alpha1.ReasonTLSSecretNotFound, - fmt.Sprintf("getting Secret %q: %v", name, err) + fmt.Sprintf("transient error getting Secret %q: %v", name, err) } if secret.Type != corev1.SecretTypeTLS { diff --git a/internal/controller/node/preflight_sidecar_tls_test.go b/internal/controller/node/preflight_sidecar_tls_test.go index 0d9b9d6..2588206 100644 --- a/internal/controller/node/preflight_sidecar_tls_test.go +++ b/internal/controller/node/preflight_sidecar_tls_test.go @@ -256,7 +256,7 @@ func TestReconcileSidecarTLSReady_TLSDisabled_ClearsConditionAndStatus(t *testin // Seed stale state to verify it's cleared. node.Status.SidecarTLS = &seiv1alpha1.SidecarTLSStatus{SecretName: "stale", RequiredDNSNames: []string{"stale.example"}} apimeta.SetStatusCondition(&node.Status.Conditions, metav1.Condition{ - Type: seiv1alpha1.ConditionSidecarTLSSecretReady, Status: metav1.ConditionTrue, Reason: "Ready", + Type: seiv1alpha1.ConditionSidecarTLSSecretReady, Status: metav1.ConditionTrue, Reason: seiv1alpha1.ReasonTLSSecretReady, }) r := tlsReconciler(t) @@ -326,9 +326,15 @@ func TestReconcileSidecarTLSReady_TransitionEnabledToDisabled_Cleans(t *testing. r.reconcileSidecarTLSReady(context.Background(), node) g.Expect(node.Status.SidecarTLS).NotTo(BeNil()) - // Operator drops spec.sidecar.tls (would be CEL-blocked in prod; this - // exercises the reconcile branch's defensive cleanup for the same-name - // transition that envtests reach via separate-object lifecycle). + // Exercises the same-in-memory-object cleanup path used when + // reconciling a fresh SeiNode after the prior one was deleted + + // recreated under the same name. CEL blocks in-place spec mutation + // in prod (see SeiNodeSpec XValidation rule); this test simulates + // what the controller sees on first reconcile of the recreated + // SeiNode if the prior controller-resident object was somehow stale. + // Cert/key pairing is intentionally NOT validated by the preflight + // (kube-rbac-proxy detects mismatch at bind time); per LLD §2 we + // validate cert SHAPE, not crypto consistency. node.Spec.Sidecar = nil r.reconcileSidecarTLSReady(context.Background(), node) diff --git a/internal/noderesource/noderesource.go b/internal/noderesource/noderesource.go index 2c0010c..c6ad067 100644 --- a/internal/noderesource/noderesource.go +++ b/internal/noderesource/noderesource.go @@ -953,7 +953,8 @@ func SidecarTLSEnabled(node *seiv1alpha1.SeiNode) bool { // SidecarTLSSecretName returns the operator-provisioned kubernetes.io/tls // Secret name from spec.sidecar.tls.secretName. The controller does not // create this Secret; platform tooling provisions it ahead of SeiNode -// creation. Returns empty when TLS is disabled. +// creation. Returns empty when TLS is disabled — callers should gate on +// SidecarTLSEnabled before relying on the return value. func SidecarTLSSecretName(node *seiv1alpha1.SeiNode) string { if !SidecarTLSEnabled(node) { return "" diff --git a/manifests/sei.io_seinodedeployments.yaml b/manifests/sei.io_seinodedeployments.yaml index fcfaaf5..d9e3437 100644 --- a/manifests/sei.io_seinodedeployments.yaml +++ b/manifests/sei.io_seinodedeployments.yaml @@ -925,8 +925,8 @@ spec: - message: spec.sidecar.tls is immutable; delete + recreate the SeiNode to change TLS configuration rule: '(!has(oldSelf.sidecar) || !has(oldSelf.sidecar.tls)) - ? true : (has(self.sidecar) && has(self.sidecar.tls) && self.sidecar.tls - == oldSelf.sidecar.tls)' + ? (!has(self.sidecar) || !has(self.sidecar.tls)) : (has(self.sidecar) + && has(self.sidecar.tls) && self.sidecar.tls == oldSelf.sidecar.tls)' required: - spec type: object diff --git a/manifests/sei.io_seinodes.yaml b/manifests/sei.io_seinodes.yaml index 626abc4..c354bbc 100644 --- a/manifests/sei.io_seinodes.yaml +++ b/manifests/sei.io_seinodes.yaml @@ -773,9 +773,9 @@ spec: 0)' - message: spec.sidecar.tls is immutable; delete + recreate the SeiNode to change TLS configuration - rule: '(!has(oldSelf.sidecar) || !has(oldSelf.sidecar.tls)) ? true : - (has(self.sidecar) && has(self.sidecar.tls) && self.sidecar.tls == - oldSelf.sidecar.tls)' + rule: '(!has(oldSelf.sidecar) || !has(oldSelf.sidecar.tls)) ? (!has(self.sidecar) + || !has(self.sidecar.tls)) : (has(self.sidecar) && has(self.sidecar.tls) + && self.sidecar.tls == oldSelf.sidecar.tls)' status: description: SeiNodeStatus defines the observed state of a SeiNode. properties: @@ -1012,7 +1012,11 @@ spec: requiredDNSNames: description: |- RequiredDNSNames is the SAN list the cert in SecretName must - include. Derived from the SeiNode's headless service DNS. + include. Derived from the SeiNode's headless service DNS: + {name}.{namespace}.svc.cluster.local and + {name}-0.{name}.{namespace}.svc.cluster.local. Pre-computable + by platform tooling from the target spec; published here for + verification rather than as a prerequisite handshake. items: type: string type: array From 1364b2bdc28317c88042db70724657d5fb6a8145 Mon Sep 17 00:00:00 2001 From: bdchatham Date: Sun, 17 May 2026 14:20:24 -0700 Subject: [PATCH 3/6] fixup(seinode): trim verbose comments Strip the speculative / multi-paragraph / cross-referencing comments added in earlier commits. Keep what describes present behavior; drop what describes intent, rationale, or future state. Co-Authored-By: Claude Opus 4.7 (1M context) --- api/v1alpha1/common_types.go | 14 ++----- api/v1alpha1/seinode_types.go | 36 ++++++----------- config/crd/sei.io_seinodedeployments.yaml | 13 ++----- config/crd/sei.io_seinodes.yaml | 30 ++++---------- internal/controller/node/controller.go | 3 -- .../controller/node/preflight_sidecar_tls.go | 39 ++++--------------- .../node/preflight_sidecar_tls_test.go | 11 +----- internal/noderesource/noderesource.go | 7 +--- internal/planner/bootstrap.go | 8 ---- internal/planner/planner.go | 15 ++----- manifests/sei.io_seinodedeployments.yaml | 13 ++----- manifests/sei.io_seinodes.yaml | 30 ++++---------- 12 files changed, 55 insertions(+), 164 deletions(-) diff --git a/api/v1alpha1/common_types.go b/api/v1alpha1/common_types.go index 63ab6b3..a1ea02f 100644 --- a/api/v1alpha1/common_types.go +++ b/api/v1alpha1/common_types.go @@ -192,16 +192,10 @@ type SidecarConfig struct { // +optional Resources *corev1.ResourceRequirements `json:"resources,omitempty"` - // TLS, if set, fronts the sidecar API with kube-rbac-proxy on - // :8443 using TLS material from a Secret in the SeiNode's - // namespace. The Secret is operator-provisioned (e.g., via a - // cert-manager Certificate in the platform GitOps repo); this - // controller does not create it. - // - // Immutable post-creation. Toggling TLS on an existing SeiNode is a - // delete + recreate operation; data persists via the SeiNode's PVC - // retention mechanism. - // + // TLS, if set, fronts the sidecar API with kube-rbac-proxy on :8443 + // using TLS material from a Secret in the SeiNode's namespace. The + // Secret is operator-provisioned; this controller does not create + // it. Immutable. // +optional TLS *SidecarTLSSpec `json:"tls,omitempty"` } diff --git a/api/v1alpha1/seinode_types.go b/api/v1alpha1/seinode_types.go index 51edff4..23078aa 100644 --- a/api/v1alpha1/seinode_types.go +++ b/api/v1alpha1/seinode_types.go @@ -275,11 +275,9 @@ const ( // spec.validator.operatorKeyring. ConditionOperatorKeyringReady = "OperatorKeyringReady" - // ConditionSidecarTLSSecretReady indicates whether the externally- - // provisioned TLS Secret referenced by spec.sidecar.tls.secretName - // is present, well-formed, and has SANs matching the required DNS - // names. Mirrors SigningKeyReady / NodeKeyReady / OperatorKeyringReady. - // Only set on SeiNodes with spec.sidecar.tls. + // ConditionSidecarTLSSecretReady reports whether the Secret named + // in spec.sidecar.tls.secretName is present, well-formed, and has + // SANs matching status.sidecarTLS.requiredDNSNames. ConditionSidecarTLSSecretReady = "SidecarTLSSecretReady" ) @@ -311,14 +309,12 @@ const ( ReasonOperatorKeyringInvalid = "OperatorKeyringInvalid" // terminal: fail the plan ) -// Reasons for the SidecarTLSSecretReady condition. String values are -// prefixed to match the existing SigningKey/NodeKey/OperatorKeyring -// reason convention so SRE tooling can grep across condition types. +// Reasons for the SidecarTLSSecretReady condition. const ( - ReasonTLSSecretReady = "TLSSecretReady" // Secret present, well-formed, SANs match required DNS names - ReasonTLSSecretNotFound = "TLSSecretNotFound" // Secret not found in the SeiNode's namespace - ReasonTLSSecretMalformed = "TLSSecretMalformed" // Wrong type, empty tls.crt/tls.key, or unparseable cert - ReasonTLSSecretSANsMismatch = "TLSSecretSANsMismatch" // Cert parses but cert.DNSNames does not include required SANs + ReasonTLSSecretReady = "TLSSecretReady" // ready + ReasonTLSSecretNotFound = "TLSSecretNotFound" // Secret missing + ReasonTLSSecretMalformed = "TLSSecretMalformed" // wrong type, empty tls.crt/tls.key, or unparseable cert + ReasonTLSSecretSANsMismatch = "TLSSecretSANsMismatch" // cert.DNSNames missing one or more required SANs ) // SeiNodeStatus defines the observed state of a SeiNode. @@ -362,25 +358,17 @@ type SeiNodeStatus struct { // +optional ExternalAddress string `json:"externalAddress,omitempty"` - // SidecarTLS is set whenever spec.sidecar.tls is non-nil. Publishes - // the contract platform tooling must satisfy when provisioning the - // TLS Secret. Machine-readable replacement for naming-convention docs. + // SidecarTLS is set when spec.sidecar.tls is non-nil. // +optional SidecarTLS *SidecarTLSStatus `json:"sidecarTLS,omitempty"` } -// SidecarTLSStatus declares the controller's expectations of the -// referenced TLS Secret. +// SidecarTLSStatus is the controller's view of the referenced TLS Secret. type SidecarTLSStatus struct { - // SecretName mirrors spec.sidecar.tls.secretName for visibility. + // SecretName mirrors spec.sidecar.tls.secretName. SecretName string `json:"secretName"` - // RequiredDNSNames is the SAN list the cert in SecretName must - // include. Derived from the SeiNode's headless service DNS: - // {name}.{namespace}.svc.cluster.local and - // {name}-0.{name}.{namespace}.svc.cluster.local. Pre-computable - // by platform tooling from the target spec; published here for - // verification rather than as a prerequisite handshake. + // RequiredDNSNames is the SAN list the cert in SecretName must include. RequiredDNSNames []string `json:"requiredDNSNames"` } diff --git a/config/crd/sei.io_seinodedeployments.yaml b/config/crd/sei.io_seinodedeployments.yaml index d9e3437..671c58f 100644 --- a/config/crd/sei.io_seinodedeployments.yaml +++ b/config/crd/sei.io_seinodedeployments.yaml @@ -611,15 +611,10 @@ spec: type: object tls: description: |- - TLS, if set, fronts the sidecar API with kube-rbac-proxy on - :8443 using TLS material from a Secret in the SeiNode's - namespace. The Secret is operator-provisioned (e.g., via a - cert-manager Certificate in the platform GitOps repo); this - controller does not create it. - - Immutable post-creation. Toggling TLS on an existing SeiNode is a - delete + recreate operation; data persists via the SeiNode's PVC - retention mechanism. + TLS, if set, fronts the sidecar API with kube-rbac-proxy on :8443 + using TLS material from a Secret in the SeiNode's namespace. The + Secret is operator-provisioned; this controller does not create + it. Immutable. properties: secretName: description: |- diff --git a/config/crd/sei.io_seinodes.yaml b/config/crd/sei.io_seinodes.yaml index c354bbc..421e1c4 100644 --- a/config/crd/sei.io_seinodes.yaml +++ b/config/crd/sei.io_seinodes.yaml @@ -466,15 +466,10 @@ spec: type: object tls: description: |- - TLS, if set, fronts the sidecar API with kube-rbac-proxy on - :8443 using TLS material from a Secret in the SeiNode's - namespace. The Secret is operator-provisioned (e.g., via a - cert-manager Certificate in the platform GitOps repo); this - controller does not create it. - - Immutable post-creation. Toggling TLS on an existing SeiNode is a - delete + recreate operation; data persists via the SeiNode's PVC - retention mechanism. + TLS, if set, fronts the sidecar API with kube-rbac-proxy on :8443 + using TLS material from a Secret in the SeiNode's namespace. The + Secret is operator-provisioned; this controller does not create + it. Immutable. properties: secretName: description: |- @@ -1004,25 +999,16 @@ spec: type: string type: array sidecarTLS: - description: |- - SidecarTLS is set whenever spec.sidecar.tls is non-nil. Publishes - the contract platform tooling must satisfy when provisioning the - TLS Secret. Machine-readable replacement for naming-convention docs. + description: SidecarTLS is set when spec.sidecar.tls is non-nil. properties: requiredDNSNames: - description: |- - RequiredDNSNames is the SAN list the cert in SecretName must - include. Derived from the SeiNode's headless service DNS: - {name}.{namespace}.svc.cluster.local and - {name}-0.{name}.{namespace}.svc.cluster.local. Pre-computable - by platform tooling from the target spec; published here for - verification rather than as a prerequisite handshake. + description: RequiredDNSNames is the SAN list the cert in SecretName + must include. items: type: string type: array secretName: - description: SecretName mirrors spec.sidecar.tls.secretName for - visibility. + description: SecretName mirrors spec.sidecar.tls.secretName. type: string required: - requiredDNSNames diff --git a/internal/controller/node/controller.go b/internal/controller/node/controller.go index 2557099..5822f9f 100644 --- a/internal/controller/node/controller.go +++ b/internal/controller/node/controller.go @@ -43,9 +43,6 @@ type PlatformConfig = platform.Config // SeiNodeReconciler reconciles a SeiNode object. type SeiNodeReconciler struct { client.Client - // APIReader bypasses the controller-runtime cache. Used by preflight - // validation that must observe newly-provisioned Secrets without - // waiting for cache propagation. APIReader client.Reader Scheme *runtime.Scheme Recorder record.EventRecorder diff --git a/internal/controller/node/preflight_sidecar_tls.go b/internal/controller/node/preflight_sidecar_tls.go index 9048498..276ea93 100644 --- a/internal/controller/node/preflight_sidecar_tls.go +++ b/internal/controller/node/preflight_sidecar_tls.go @@ -18,20 +18,10 @@ import ( "github.com/sei-protocol/sei-k8s-controller/internal/noderesource" ) -// reconcileSidecarTLSReady is the steady-state preflight branch for the -// externally-provisioned sidecar TLS Secret. Mirrors the validate-signing-key -// / validate-node-key / validate-operator-keyring pre-flight checks, but -// runs as a controller method (not a plan task) so it gates plan creation -// rather than plan execution. -// -// When TLS is disabled the condition and status struct are cleared. -// When TLS is enabled the controller publishes status.sidecarTLS with the -// required DNS names and sets SidecarTLSSecretReady according to the -// Secret's presence + cert validity. -// -// All mutations are in-memory; the caller's Status().Patch flushes them. -// No error return: every check resolves to a condition reason rather than a -// reconcile failure, so the caller always proceeds to the rest of reconcile. +// reconcileSidecarTLSReady publishes status.sidecarTLS and sets the +// SidecarTLSSecretReady condition based on the referenced Secret's +// presence + cert validity. Clears both when TLS is disabled. +// Mutations are in-memory; caller's Status().Patch flushes. func (r *SeiNodeReconciler) reconcileSidecarTLSReady(ctx context.Context, node *seiv1alpha1.SeiNode) { if !noderesource.SidecarTLSEnabled(node) { apimeta.RemoveStatusCondition(&node.Status.Conditions, seiv1alpha1.ConditionSidecarTLSSecretReady) @@ -59,9 +49,8 @@ func (r *SeiNodeReconciler) reconcileSidecarTLSReady(ctx context.Context, node * }) } -// requiredDNSNames returns the SAN list the operator-provisioned cert must -// include. Derived from the SeiNode's headless service DNS — the names -// kube-rbac-proxy will be reached on from inside the cluster. +// requiredDNSNames returns the headless service + pod-0 DNS names the +// proxy is reached on from inside the cluster. func requiredDNSNames(node *seiv1alpha1.SeiNode) []string { return []string{ fmt.Sprintf("%s.%s.svc.cluster.local", node.Name, node.Namespace), @@ -69,16 +58,8 @@ func requiredDNSNames(node *seiv1alpha1.SeiNode) []string { } } -// validateTLSSecret reads the referenced Secret via the supplied reader -// (typically the controller's APIReader to bypass the cache) and returns -// the appropriate SidecarTLSSecretReady reason + message. -// -// Reasons: -// - Ready: Secret type kubernetes.io/tls, tls.crt/tls.key non-empty, -// cert parses, cert.DNSNames is a superset of required. -// - NotFound: Secret absent from the SeiNode's namespace. -// - Malformed: wrong Secret type, empty tls.crt/tls.key, or unparseable cert. -// - SANsMismatch: cert parses but does not cover the required DNS names. +// validateTLSSecret returns the SidecarTLSSecretReady reason + message +// for the Secret named in spec.sidecar.tls.secretName. func validateTLSSecret( ctx context.Context, reader client.Reader, @@ -94,10 +75,6 @@ func validateTLSSecret( return seiv1alpha1.ReasonTLSSecretNotFound, fmt.Sprintf("secret %q not found in namespace %q", name, node.Namespace) } - // Transient errors (network, RBAC) surface under the NotFound reason - // (the next reconcile retries). Message is prefixed so an operator - // who finds the Secret present can disambiguate without grepping - // controller logs. return seiv1alpha1.ReasonTLSSecretNotFound, fmt.Sprintf("transient error getting Secret %q: %v", name, err) } diff --git a/internal/controller/node/preflight_sidecar_tls_test.go b/internal/controller/node/preflight_sidecar_tls_test.go index 2588206..21c20f8 100644 --- a/internal/controller/node/preflight_sidecar_tls_test.go +++ b/internal/controller/node/preflight_sidecar_tls_test.go @@ -326,15 +326,8 @@ func TestReconcileSidecarTLSReady_TransitionEnabledToDisabled_Cleans(t *testing. r.reconcileSidecarTLSReady(context.Background(), node) g.Expect(node.Status.SidecarTLS).NotTo(BeNil()) - // Exercises the same-in-memory-object cleanup path used when - // reconciling a fresh SeiNode after the prior one was deleted + - // recreated under the same name. CEL blocks in-place spec mutation - // in prod (see SeiNodeSpec XValidation rule); this test simulates - // what the controller sees on first reconcile of the recreated - // SeiNode if the prior controller-resident object was somehow stale. - // Cert/key pairing is intentionally NOT validated by the preflight - // (kube-rbac-proxy detects mismatch at bind time); per LLD §2 we - // validate cert SHAPE, not crypto consistency. + // CEL blocks the in-place mutation in prod; this exercises the + // defensive cleanup branch. node.Spec.Sidecar = nil r.reconcileSidecarTLSReady(context.Background(), node) diff --git a/internal/noderesource/noderesource.go b/internal/noderesource/noderesource.go index c6ad067..424ef3c 100644 --- a/internal/noderesource/noderesource.go +++ b/internal/noderesource/noderesource.go @@ -950,11 +950,8 @@ func SidecarTLSEnabled(node *seiv1alpha1.SeiNode) bool { return node.Spec.Sidecar != nil && node.Spec.Sidecar.TLS != nil } -// SidecarTLSSecretName returns the operator-provisioned kubernetes.io/tls -// Secret name from spec.sidecar.tls.secretName. The controller does not -// create this Secret; platform tooling provisions it ahead of SeiNode -// creation. Returns empty when TLS is disabled — callers should gate on -// SidecarTLSEnabled before relying on the return value. +// SidecarTLSSecretName returns spec.sidecar.tls.secretName, or empty +// when TLS is disabled. Callers gate on SidecarTLSEnabled. func SidecarTLSSecretName(node *seiv1alpha1.SeiNode) string { if !SidecarTLSEnabled(node) { return "" diff --git a/internal/planner/bootstrap.go b/internal/planner/bootstrap.go index 9482079..be1853b 100644 --- a/internal/planner/bootstrap.go +++ b/internal/planner/bootstrap.go @@ -99,10 +99,6 @@ func buildBootstrapPlan( // Phase 4: Create production StatefulSet and Service (after bootstrap teardown frees the PVC) if noderesource.SidecarTLSEnabled(node) { - // kube-rbac-proxy authz ConfigMap is controller-owned; the - // TLS Secret itself is operator-provisioned externally and - // gated on via the SidecarTLSSecretReady condition before - // this plan is built. if err := appendTask(task.TaskTypeApplyRBACProxyConfig, &task.ApplyRBACProxyConfigParams{NodeName: node.Name, Namespace: node.Namespace}); err != nil { return nil, err @@ -187,10 +183,6 @@ func buildGenesisPlan(node *seiv1alpha1.SeiNode) (*seiv1alpha1.TaskPlan, error) prog := []string{task.TaskTypeEnsureDataPVC} if noderesource.SidecarTLSEnabled(node) { - // kube-rbac-proxy authz ConfigMap is controller-owned; the - // TLS Secret itself is operator-provisioned externally and - // gated on via the SidecarTLSSecretReady condition before - // this plan is built. prog = append(prog, task.TaskTypeApplyRBACProxyConfig) } prog = append(prog, diff --git a/internal/planner/planner.go b/internal/planner/planner.go index 5ffd397..4ea4bc9 100644 --- a/internal/planner/planner.go +++ b/internal/planner/planner.go @@ -134,12 +134,8 @@ func (p *NodeResolver) ResolvePlan(ctx context.Context, node *seiv1alpha1.SeiNod handleTerminalPlan(ctx, node) - // Gate init-plan creation on sidecar TLS readiness. The controller's - // preflight method (reconcileSidecarTLSReady) sets this condition - // before ResolvePlan runs. Mirrors the not-yet-ready behavior of - // referenced Secrets: stay in Pending until the operator provisions - // a valid Secret. Steady-state (Running) plans bypass this gate so - // observability stays live and image-drift plans still build. + // Gate init plans on the TLS Secret. Running-phase plans bypass + // so image-drift rollouts proceed independently. if noderesource.SidecarTLSEnabled(node) && node.Status.Phase != seiv1alpha1.PhaseRunning && !sidecarTLSSecretReady(node) { @@ -171,8 +167,7 @@ func (p *NodeResolver) ResolvePlan(ctx context.Context, node *seiv1alpha1.SeiNod return nil } -// sidecarTLSSecretReady returns true iff the SidecarTLSSecretReady -// condition is present and True. Missing/False both gate plan creation. +// sidecarTLSSecretReady returns true iff the condition is present and True. func sidecarTLSSecretReady(node *seiv1alpha1.SeiNode) bool { cond := meta.FindStatusCondition(node.Status.Conditions, seiv1alpha1.ConditionSidecarTLSSecretReady) return cond != nil && cond.Status == metav1.ConditionTrue @@ -548,10 +543,6 @@ func buildBasePlan( prog = append(prog, task.TaskTypeValidateOperatorKeyring) } if noderesource.SidecarTLSEnabled(node) { - // kube-rbac-proxy authz ConfigMap is controller-owned; the - // TLS Secret itself is operator-provisioned externally and - // gated on via the SidecarTLSSecretReady condition before - // this plan is built. prog = append(prog, task.TaskTypeApplyRBACProxyConfig) } prog = append(prog, task.TaskTypeApplyStatefulSet, task.TaskTypeApplyService) diff --git a/manifests/sei.io_seinodedeployments.yaml b/manifests/sei.io_seinodedeployments.yaml index d9e3437..671c58f 100644 --- a/manifests/sei.io_seinodedeployments.yaml +++ b/manifests/sei.io_seinodedeployments.yaml @@ -611,15 +611,10 @@ spec: type: object tls: description: |- - TLS, if set, fronts the sidecar API with kube-rbac-proxy on - :8443 using TLS material from a Secret in the SeiNode's - namespace. The Secret is operator-provisioned (e.g., via a - cert-manager Certificate in the platform GitOps repo); this - controller does not create it. - - Immutable post-creation. Toggling TLS on an existing SeiNode is a - delete + recreate operation; data persists via the SeiNode's PVC - retention mechanism. + TLS, if set, fronts the sidecar API with kube-rbac-proxy on :8443 + using TLS material from a Secret in the SeiNode's namespace. The + Secret is operator-provisioned; this controller does not create + it. Immutable. properties: secretName: description: |- diff --git a/manifests/sei.io_seinodes.yaml b/manifests/sei.io_seinodes.yaml index c354bbc..421e1c4 100644 --- a/manifests/sei.io_seinodes.yaml +++ b/manifests/sei.io_seinodes.yaml @@ -466,15 +466,10 @@ spec: type: object tls: description: |- - TLS, if set, fronts the sidecar API with kube-rbac-proxy on - :8443 using TLS material from a Secret in the SeiNode's - namespace. The Secret is operator-provisioned (e.g., via a - cert-manager Certificate in the platform GitOps repo); this - controller does not create it. - - Immutable post-creation. Toggling TLS on an existing SeiNode is a - delete + recreate operation; data persists via the SeiNode's PVC - retention mechanism. + TLS, if set, fronts the sidecar API with kube-rbac-proxy on :8443 + using TLS material from a Secret in the SeiNode's namespace. The + Secret is operator-provisioned; this controller does not create + it. Immutable. properties: secretName: description: |- @@ -1004,25 +999,16 @@ spec: type: string type: array sidecarTLS: - description: |- - SidecarTLS is set whenever spec.sidecar.tls is non-nil. Publishes - the contract platform tooling must satisfy when provisioning the - TLS Secret. Machine-readable replacement for naming-convention docs. + description: SidecarTLS is set when spec.sidecar.tls is non-nil. properties: requiredDNSNames: - description: |- - RequiredDNSNames is the SAN list the cert in SecretName must - include. Derived from the SeiNode's headless service DNS: - {name}.{namespace}.svc.cluster.local and - {name}-0.{name}.{namespace}.svc.cluster.local. Pre-computable - by platform tooling from the target spec; published here for - verification rather than as a prerequisite handshake. + description: RequiredDNSNames is the SAN list the cert in SecretName + must include. items: type: string type: array secretName: - description: SecretName mirrors spec.sidecar.tls.secretName for - visibility. + description: SecretName mirrors spec.sidecar.tls.secretName. type: string required: - requiredDNSNames From e3fd5fc6588a3fd421e890140d3ec626452ada88 Mon Sep 17 00:00:00 2001 From: bdchatham Date: Sun, 17 May 2026 14:32:32 -0700 Subject: [PATCH 4/6] fixup(seinode): gate all plans on TLS Secret, not just init MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The prior Phase != Running carve-out let image-drift NodeUpdate plans proceed when SidecarTLSSecretReady=False. That plan cycles the pod, and a cycled pod with a missing/mis-SAN'd Secret won't mount cleanly — so the bypass would land the fleet in a worse state than blocking. Drop it. Updates LLD §4 to match. Co-Authored-By: Claude Opus 4.7 (1M context) --- docs/design-seinode-sidecar-tls-toggle-lld.md | 4 ++-- internal/planner/planner.go | 8 +++----- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/docs/design-seinode-sidecar-tls-toggle-lld.md b/docs/design-seinode-sidecar-tls-toggle-lld.md index 985d0c8..d3294c6 100644 --- a/docs/design-seinode-sidecar-tls-toggle-lld.md +++ b/docs/design-seinode-sidecar-tls-toggle-lld.md @@ -197,9 +197,9 @@ There is no `WaitForSidecarTLSSecret` task in the plan — its job is absorbed i If an operator attempts to mutate `spec.sidecar.tls`, the CRD CEL rejects the API request — no controller code runs. -If the externally-provisioned Secret rotates (cert-manager renewal): kube-rbac-proxy's existing `--tls-reload-interval=30s` flag picks up the new material from the same Secret mount; no pod restart needed; no controller action. The pre-flight validation re-runs on each reconcile and continues stamping `SidecarTLSSecretReady=True` as long as the new cert still has matching SANs. +If the externally-provisioned Secret rotates in place (cert-manager renewal with the same SANs): kube-rbac-proxy's existing `--tls-reload-interval=30s` flag picks up the new material from the same Secret mount; no pod restart needed; no controller action. Pre-flight re-runs each reconcile and continues stamping `SidecarTLSSecretReady=True`. -If the Secret SAN coverage *changes* such that the contract breaks (e.g., wrong SANs after a misconfigured re-issuance): pre-flight flips the condition to `SidecarTLSSecretReady=False, Reason=SANsMismatch`. The Running node continues to serve traffic with the now-wrong cert (kube-rbac-proxy doesn't care about controller-side validation); operators get a visible signal via the condition and can fix the cert. The pod doesn't cycle; this is observability, not enforcement. +If the Secret SAN coverage breaks (wrong SANs after a misconfigured re-issuance, Secret deleted, etc.): pre-flight flips the condition to `SidecarTLSSecretReady=False`. Plan creation is gated — any plan that fires under a broken Secret would eventually cycle the pod (image-drift NodeUpdate plans always do; even a mark-ready replan ultimately retries the sidecar HTTP call through the proxy), and a cycled pod with a missing or mis-SAN'd Secret won't bind cleanly. The Running node keeps serving on whatever cert kube-rbac-proxy is already bound to (no controller action can force a reload-from-bad-Secret), so the user-visible effect is "running pod stays as-is, no new rollouts until the operator fixes the Secret." ## 5. Operator workflow for "enable TLS on existing fleet" diff --git a/internal/planner/planner.go b/internal/planner/planner.go index 4ea4bc9..08a7b25 100644 --- a/internal/planner/planner.go +++ b/internal/planner/planner.go @@ -134,11 +134,9 @@ func (p *NodeResolver) ResolvePlan(ctx context.Context, node *seiv1alpha1.SeiNod handleTerminalPlan(ctx, node) - // Gate init plans on the TLS Secret. Running-phase plans bypass - // so image-drift rollouts proceed independently. - if noderesource.SidecarTLSEnabled(node) && - node.Status.Phase != seiv1alpha1.PhaseRunning && - !sidecarTLSSecretReady(node) { + // Gate plan creation on the TLS Secret. Any plan (init or image- + // drift rollout) eventually cycles the pod, which mounts the Secret. + if noderesource.SidecarTLSEnabled(node) && !sidecarTLSSecretReady(node) { return nil } From 769b1aed02091ce0bd80c118ef8d05833727ff99 Mon Sep 17 00:00:00 2001 From: bdchatham Date: Sun, 17 May 2026 14:45:05 -0700 Subject: [PATCH 5/6] fixup(seinode): round-3 cross-review findings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three blocker fixes + non-blocker test/observability adds + nit cleanup, synthesized across platform + kubernetes + security cross-review: Blockers: - Switch preflight read from APIReader to cached Client. Inherits the manager's existing Secret informer watch (same pattern signing-key / node-key / operator-keyring preflights use), which closes the "Pending node never requeues" gap — when an operator provisions or fixes the Secret, the informer fires a reconcile event automatically. APIReader was vestigial from the pre-externalized-cert design. - Drop the leftover cert-manager.io/certificates kubebuilder:rbac marker. The controller no longer creates Certificates under the externalized-cert design; granting unused write on a cluster-scoped CRD undercuts the LLD's blast-radius reduction. - Add ReasonTLSSecretUnavailable for non-IsNotFound errors from reader.Get. Conflating transient API errors with TLSSecretNotFound broke the dashboard signal — operators paging on NotFound would find the Secret present and burn time on a misread. Non-blocker adds: - Planner-level test for the e3fd5fc gate: blocks-when-False, blocks-when-absent, allows-when-True, not-evaluated-when-disabled, blocks-image-drift regression case. - Event emission on ConditionSidecarTLSSecretReady transitions (SidecarTLSSecretNotReady / SidecarTLSSecretReady), mirroring the existing SidecarReadinessLost/Restored pattern so operators see the signal at the kubectl-get-events layer. - Doc-comment on validateTLSSecret making the cert.DNSNames-only contract explicit (Subject.CN, IP SANs, URI SANs do not satisfy). Nits: - Drop stale "(Exact CEL pending...)" parenthetical from LLD §1; the rule is shipped and reflected. - Remove now-unused APIReader field from SeiNodeReconciler struct + cmd/main.go wiring + three test setups. Deferred (tracked): - #260 — PEM/x509 parse hardening (security non-blocker; bounded by Secret-write RBAC, defense in depth). - #261 — Alert when TLS Secret deleted while pod is still running (observability; controller publishes the signal, AlertManager rules live in the platform repo). Co-Authored-By: Claude Opus 4.7 (1M context) --- api/v1alpha1/seinode_types.go | 1 + cmd/main.go | 11 ++- config/rbac/role.yaml | 11 --- docs/design-seinode-sidecar-tls-toggle-lld.md | 4 +- internal/controller/node/controller.go | 21 ++++- .../controller/node/plan_execution_test.go | 20 +++-- .../controller/node/preflight_sidecar_tls.go | 13 ++- .../node/preflight_sidecar_tls_test.go | 2 +- internal/controller/node/reconciler_test.go | 11 ++- internal/planner/sidecar_tls_test.go | 79 +++++++++++++++++++ manifests/role.yaml | 11 --- 11 files changed, 130 insertions(+), 54 deletions(-) diff --git a/api/v1alpha1/seinode_types.go b/api/v1alpha1/seinode_types.go index 23078aa..e9444c7 100644 --- a/api/v1alpha1/seinode_types.go +++ b/api/v1alpha1/seinode_types.go @@ -313,6 +313,7 @@ const ( const ( ReasonTLSSecretReady = "TLSSecretReady" // ready ReasonTLSSecretNotFound = "TLSSecretNotFound" // Secret missing + ReasonTLSSecretUnavailable = "TLSSecretUnavailable" // transient: API error reading the Secret ReasonTLSSecretMalformed = "TLSSecretMalformed" // wrong type, empty tls.crt/tls.key, or unparseable cert ReasonTLSSecretSANsMismatch = "TLSSecretSANsMismatch" // cert.DNSNames missing one or more required SANs ) diff --git a/cmd/main.go b/cmd/main.go index 077b9c5..ed67e56 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -188,12 +188,11 @@ func main() { //nolint:staticcheck // TODO: migrate to GetEventRecorder (new events API) nodeRecorder := mgr.GetEventRecorderFor("seinode-controller") if err := (&nodecontroller.SeiNodeReconciler{ - Client: kc, - APIReader: mgr.GetAPIReader(), - Scheme: mgr.GetScheme(), - Recorder: nodeRecorder, - Platform: platformCfg, - Planner: &planner.NodeResolver{BuildSidecarClient: buildSidecarClient}, + Client: kc, + Scheme: mgr.GetScheme(), + Recorder: nodeRecorder, + Platform: platformCfg, + Planner: &planner.NodeResolver{BuildSidecarClient: buildSidecarClient}, PlanExecutor: &planner.Executor[*seiv1alpha1.SeiNode]{ ConfigFor: func(_ context.Context, node *seiv1alpha1.SeiNode) task.ExecutionConfig { return task.ExecutionConfig{ diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index 108e18d..025cbd1 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -67,17 +67,6 @@ rules: - patch - update - watch -- apiGroups: - - cert-manager.io - resources: - - certificates - verbs: - - create - - get - - list - - patch - - update - - watch - apiGroups: - gateway.networking.k8s.io resources: diff --git a/docs/design-seinode-sidecar-tls-toggle-lld.md b/docs/design-seinode-sidecar-tls-toggle-lld.md index d3294c6..6bc5816 100644 --- a/docs/design-seinode-sidecar-tls-toggle-lld.md +++ b/docs/design-seinode-sidecar-tls-toggle-lld.md @@ -74,11 +74,9 @@ CRD-level immutability: ```go // SeiNodeSpec -// +kubebuilder:validation:XValidation:rule="!has(oldSelf.sidecar.tls) || (has(self.sidecar.tls) && self.sidecar.tls == oldSelf.sidecar.tls)",message="spec.sidecar.tls is immutable; delete + recreate the SeiNode to change TLS configuration" +// +kubebuilder:validation:XValidation:rule="(!has(oldSelf.sidecar) || !has(oldSelf.sidecar.tls)) ? (!has(self.sidecar) || !has(self.sidecar.tls)) : (has(self.sidecar) && has(self.sidecar.tls) && self.sidecar.tls == oldSelf.sidecar.tls)",message="spec.sidecar.tls is immutable; delete + recreate the SeiNode to change TLS configuration" ``` -(Exact CEL pending — the rule needs to handle nil sidecar gracefully. May land at the `SidecarConfig` level instead.) - Status additions: ```go diff --git a/internal/controller/node/controller.go b/internal/controller/node/controller.go index 5822f9f..f8793b3 100644 --- a/internal/controller/node/controller.go +++ b/internal/controller/node/controller.go @@ -43,7 +43,6 @@ type PlatformConfig = platform.Config // SeiNodeReconciler reconciles a SeiNode object. type SeiNodeReconciler struct { client.Client - APIReader client.Reader Scheme *runtime.Scheme Recorder record.EventRecorder Platform PlatformConfig @@ -63,7 +62,6 @@ type SeiNodeReconciler struct { // +kubebuilder:rbac:groups="",resources=events,verbs=create;patch // +kubebuilder:rbac:groups="",resources=secrets,verbs=get;list;watch // +kubebuilder:rbac:groups="",resources=configmaps,verbs=get;list;watch;create;update;patch;delete -// +kubebuilder:rbac:groups=cert-manager.io,resources=certificates,verbs=get;list;watch;create;update;patch // Reconcile drives the SeiNode lifecycle. All status mutations after the // finalizer are accumulated in-memory and flushed in a single status patch. @@ -101,6 +99,7 @@ func (r *SeiNodeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct statusBase := client.MergeFromWithOptions(before, client.MergeFromWithOptimisticLock{}) observedPhase := node.Status.Phase prevSidecar := apimeta.FindStatusCondition(node.Status.Conditions, seiv1alpha1.ConditionSidecarReady) + prevTLSSecret := apimeta.FindStatusCondition(node.Status.Conditions, seiv1alpha1.ConditionSidecarTLSSecretReady) if err := r.reconcilePeers(ctx, node); err != nil { return ctrl.Result{}, fmt.Errorf("reconciling peers: %w", err) @@ -114,6 +113,7 @@ func (r *SeiNodeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct } r.emitSidecarReadinessEvent(node, prevSidecar) + r.emitSidecarTLSSecretEvent(node, prevTLSSecret) var result ctrl.Result var execErr error @@ -258,3 +258,20 @@ func (r *SeiNodeReconciler) emitSidecarReadinessEvent(node *seiv1alpha1.SeiNode, "sidecar Healthz returned 200; mark-ready gate is open") } } + +func (r *SeiNodeReconciler) emitSidecarTLSSecretEvent(node *seiv1alpha1.SeiNode, prev *metav1.Condition) { + cur := apimeta.FindStatusCondition(node.Status.Conditions, seiv1alpha1.ConditionSidecarTLSSecretReady) + if cur == nil { + return + } + switch { + case cur.Status == metav1.ConditionFalse && + (prev == nil || prev.Status != metav1.ConditionFalse): + r.Recorder.Eventf(node, corev1.EventTypeWarning, "SidecarTLSSecretNotReady", + "sidecar TLS Secret %q: %s", node.Spec.Sidecar.TLS.SecretName, cur.Message) + case cur.Status == metav1.ConditionTrue && + prev != nil && prev.Status == metav1.ConditionFalse: + r.Recorder.Event(node, corev1.EventTypeNormal, "SidecarTLSSecretReady", + "sidecar TLS Secret validated; plan gate is open") + } +} diff --git a/internal/controller/node/plan_execution_test.go b/internal/controller/node/plan_execution_test.go index c6d657e..e541541 100644 --- a/internal/controller/node/plan_execution_test.go +++ b/internal/controller/node/plan_execution_test.go @@ -119,11 +119,10 @@ func newProgressionReconciler(t *testing.T, mock *mockSidecarClient, objs ...cli WithStatusSubresource(&seiv1alpha1.SeiNode{}). Build() r := &SeiNodeReconciler{ - Client: c, - APIReader: c, - Scheme: s, - Recorder: record.NewFakeRecorder(100), - Platform: platformtest.Config(), + Client: c, + Scheme: s, + Recorder: record.NewFakeRecorder(100), + Platform: platformtest.Config(), Planner: &planner.NodeResolver{ BuildSidecarClient: func(_ *seiv1alpha1.SeiNode) (task.SidecarClient, error) { return mock, nil }, }, @@ -786,12 +785,11 @@ func TestReconcileInitializing_SidecarClientError_Requeues(t *testing.T) { Build() r := &SeiNodeReconciler{ - Client: c, - APIReader: c, - Scheme: s, - Recorder: record.NewFakeRecorder(100), - Platform: platformtest.Config(), - Planner: &planner.NodeResolver{}, + Client: c, + Scheme: s, + Recorder: record.NewFakeRecorder(100), + Platform: platformtest.Config(), + Planner: &planner.NodeResolver{}, PlanExecutor: &planner.Executor[*seiv1alpha1.SeiNode]{ ConfigFor: func(_ context.Context, n *seiv1alpha1.SeiNode) task.ExecutionConfig { return task.ExecutionConfig{ diff --git a/internal/controller/node/preflight_sidecar_tls.go b/internal/controller/node/preflight_sidecar_tls.go index 276ea93..29bdf34 100644 --- a/internal/controller/node/preflight_sidecar_tls.go +++ b/internal/controller/node/preflight_sidecar_tls.go @@ -22,6 +22,11 @@ import ( // SidecarTLSSecretReady condition based on the referenced Secret's // presence + cert validity. Clears both when TLS is disabled. // Mutations are in-memory; caller's Status().Patch flushes. +// +// Reads via the cached client so Secret informer events requeue the +// SeiNode automatically — operator-provisioned cert appearing after a +// SeiNode was created (and gated as Pending) wakes reconciliation +// without a separate poll loop. func (r *SeiNodeReconciler) reconcileSidecarTLSReady(ctx context.Context, node *seiv1alpha1.SeiNode) { if !noderesource.SidecarTLSEnabled(node) { apimeta.RemoveStatusCondition(&node.Status.Conditions, seiv1alpha1.ConditionSidecarTLSSecretReady) @@ -35,7 +40,7 @@ func (r *SeiNodeReconciler) reconcileSidecarTLSReady(ctx context.Context, node * RequiredDNSNames: required, } - reason, msg := validateTLSSecret(ctx, r.APIReader, node, required) + reason, msg := validateTLSSecret(ctx, r.Client, node, required) status := metav1.ConditionFalse if reason == seiv1alpha1.ReasonTLSSecretReady { status = metav1.ConditionTrue @@ -59,7 +64,9 @@ func requiredDNSNames(node *seiv1alpha1.SeiNode) []string { } // validateTLSSecret returns the SidecarTLSSecretReady reason + message -// for the Secret named in spec.sidecar.tls.secretName. +// for the Secret named in spec.sidecar.tls.secretName. Required SANs +// must appear in cert.DNSNames; Subject.CommonName, IP SANs, and URI +// SANs do not satisfy the contract. func validateTLSSecret( ctx context.Context, reader client.Reader, @@ -75,7 +82,7 @@ func validateTLSSecret( return seiv1alpha1.ReasonTLSSecretNotFound, fmt.Sprintf("secret %q not found in namespace %q", name, node.Namespace) } - return seiv1alpha1.ReasonTLSSecretNotFound, + return seiv1alpha1.ReasonTLSSecretUnavailable, fmt.Sprintf("transient error getting Secret %q: %v", name, err) } diff --git a/internal/controller/node/preflight_sidecar_tls_test.go b/internal/controller/node/preflight_sidecar_tls_test.go index 21c20f8..0769091 100644 --- a/internal/controller/node/preflight_sidecar_tls_test.go +++ b/internal/controller/node/preflight_sidecar_tls_test.go @@ -247,7 +247,7 @@ func tlsReconciler(t *testing.T, objs ...client.Object) *SeiNodeReconciler { t.Fatal(err) } c := fake.NewClientBuilder().WithScheme(s).WithObjects(objs...).Build() - return &SeiNodeReconciler{Client: c, APIReader: c, Scheme: s} + return &SeiNodeReconciler{Client: c, Scheme: s} } func TestReconcileSidecarTLSReady_TLSDisabled_ClearsConditionAndStatus(t *testing.T) { diff --git a/internal/controller/node/reconciler_test.go b/internal/controller/node/reconciler_test.go index b0a65e6..65c7c0f 100644 --- a/internal/controller/node/reconciler_test.go +++ b/internal/controller/node/reconciler_test.go @@ -45,12 +45,11 @@ func newNodeReconciler(t *testing.T, objs ...client.Object) (*SeiNodeReconciler, Build() mock := &mockSidecarClient{} r := &SeiNodeReconciler{ - Client: c, - APIReader: c, - Scheme: s, - Recorder: record.NewFakeRecorder(100), - Platform: platformtest.Config(), - Planner: &planner.NodeResolver{}, + Client: c, + Scheme: s, + Recorder: record.NewFakeRecorder(100), + Platform: platformtest.Config(), + Planner: &planner.NodeResolver{}, PlanExecutor: &planner.Executor[*seiv1alpha1.SeiNode]{ ConfigFor: func(_ context.Context, node *seiv1alpha1.SeiNode) task.ExecutionConfig { return task.ExecutionConfig{ diff --git a/internal/planner/sidecar_tls_test.go b/internal/planner/sidecar_tls_test.go index 1315784..954b744 100644 --- a/internal/planner/sidecar_tls_test.go +++ b/internal/planner/sidecar_tls_test.go @@ -4,6 +4,8 @@ import ( "fmt" "testing" + . "github.com/onsi/gomega" + apimeta "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" seiv1alpha1 "github.com/sei-protocol/sei-k8s-controller/api/v1alpha1" @@ -117,3 +119,80 @@ func TestBuildGenesisPlan_RBACProxyConfigSequencedBeforeStatefulSet(t *testing.T } assertRBACProxyConfigBeforeStatefulSet(t, plan) } + +// --- ResolvePlan TLS gating tests (locks e3fd5fc behavior) --- + +func tlsGatedNode() *seiv1alpha1.SeiNode { + return &seiv1alpha1.SeiNode{ + ObjectMeta: metav1.ObjectMeta{Name: testValidatorName, Namespace: sourceChainID}, + Spec: seiv1alpha1.SeiNodeSpec{ + ChainID: sourceChainID, + Image: testSeidImage, + Validator: &seiv1alpha1.ValidatorSpec{}, + Sidecar: &seiv1alpha1.SidecarConfig{TLS: tlsSpec()}, + }, + } +} + +func setTLSSecretReady(node *seiv1alpha1.SeiNode, status metav1.ConditionStatus, reason string) { + apimeta.SetStatusCondition(&node.Status.Conditions, metav1.Condition{ + Type: seiv1alpha1.ConditionSidecarTLSSecretReady, Status: status, Reason: reason, + }) +} + +func TestResolvePlan_TLSGate_BlocksWhenConditionFalse(t *testing.T) { + g := NewWithT(t) + node := tlsGatedNode() + setTLSSecretReady(node, metav1.ConditionFalse, seiv1alpha1.ReasonTLSSecretNotFound) + + err := (&NodeResolver{}).ResolvePlan(t.Context(), node) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(node.Status.Plan).To(BeNil(), "plan must not be built when TLS Secret is not ready") +} + +func TestResolvePlan_TLSGate_BlocksWhenConditionAbsent(t *testing.T) { + g := NewWithT(t) + node := tlsGatedNode() + // Condition never set — gate treats missing == false. + + err := (&NodeResolver{}).ResolvePlan(t.Context(), node) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(node.Status.Plan).To(BeNil(), "plan must not be built when TLS condition is absent") +} + +func TestResolvePlan_TLSGate_AllowsWhenConditionTrue(t *testing.T) { + g := NewWithT(t) + node := tlsGatedNode() + setTLSSecretReady(node, metav1.ConditionTrue, seiv1alpha1.ReasonTLSSecretReady) + + err := (&NodeResolver{}).ResolvePlan(t.Context(), node) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(node.Status.Plan).NotTo(BeNil(), "plan should build when TLS Secret is Ready") +} + +func TestResolvePlan_TLSGate_NotEvaluatedWhenTLSDisabled(t *testing.T) { + g := NewWithT(t) + node := tlsGatedNode() + node.Spec.Sidecar = nil // TLS not enabled — gate is bypassed regardless of condition state + + err := (&NodeResolver{}).ResolvePlan(t.Context(), node) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(node.Status.Plan).NotTo(BeNil(), "plan should build for TLS-disabled SeiNode") +} + +func TestResolvePlan_TLSGate_BlocksImageDrift(t *testing.T) { + // Regression for the pre-e3fd5fc carve-out: image drift used to bypass + // the gate. With the carve-out dropped, image drift on a Running node + // with broken TLS must also be blocked. + g := NewWithT(t) + node := tlsGatedNode() + node.Status.Phase = seiv1alpha1.PhaseRunning + node.Status.CurrentImage = "old" + node.Spec.Image = "new" // image drift + setTLSSecretReady(node, metav1.ConditionFalse, seiv1alpha1.ReasonTLSSecretSANsMismatch) + + err := (&NodeResolver{}).ResolvePlan(t.Context(), node) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(node.Status.Plan).To(BeNil(), + "image-drift NodeUpdate plan must not build while TLS Secret is broken — a cycled pod would mount the broken cert") +} diff --git a/manifests/role.yaml b/manifests/role.yaml index 108e18d..025cbd1 100644 --- a/manifests/role.yaml +++ b/manifests/role.yaml @@ -67,17 +67,6 @@ rules: - patch - update - watch -- apiGroups: - - cert-manager.io - resources: - - certificates - verbs: - - create - - get - - list - - patch - - update - - watch - apiGroups: - gateway.networking.k8s.io resources: From d0889c12cf4a22195766457c5b021d52506362f6 Mon Sep 17 00:00:00 2001 From: bdchatham Date: Sun, 17 May 2026 14:49:49 -0700 Subject: [PATCH 6/6] fixup(seinode): actually close the Pending-requeue gap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prior commit overstated the cached-client switch. Cached Client.Get keeps reads consistent with the manager's cache, but the SeiNode builder only Watches() SeiNode + StatefulSet + Job + Service + PVC — no Secret watch. Secret changes therefore do NOT enqueue SeiNode reconciles. A Pending node blocked on SidecarTLSSecretReady would have waited on the informer resync (~10h) until either a spec change or the resync fired. Add an explicit requeue on statusPollInterval when TLS is enabled and the condition is not True. Matches the existing Running-phase steady- state poll pattern. Also corrects the now-misleading code comment on reconcileSidecarTLSReady. Co-Authored-By: Claude Opus 4.7 (1M context) --- internal/controller/node/controller.go | 10 ++++++++++ internal/controller/node/preflight_sidecar_tls.go | 7 +++---- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/internal/controller/node/controller.go b/internal/controller/node/controller.go index f8793b3..771e8d4 100644 --- a/internal/controller/node/controller.go +++ b/internal/controller/node/controller.go @@ -173,6 +173,16 @@ func (r *SeiNodeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct return ctrl.Result{RequeueAfter: statusPollInterval}, nil } + // Pending nodes blocked on the SidecarTLSSecretReady gate poll for + // the Secret appearing. Without this the controller would wait on the + // informer resync (~10h) since the builder does not Watches() Secrets. + if noderesource.SidecarTLSEnabled(node) { + c := apimeta.FindStatusCondition(node.Status.Conditions, seiv1alpha1.ConditionSidecarTLSSecretReady) + if c == nil || c.Status != metav1.ConditionTrue { + return ctrl.Result{RequeueAfter: statusPollInterval}, nil + } + } + return result, nil } diff --git a/internal/controller/node/preflight_sidecar_tls.go b/internal/controller/node/preflight_sidecar_tls.go index 29bdf34..575cec7 100644 --- a/internal/controller/node/preflight_sidecar_tls.go +++ b/internal/controller/node/preflight_sidecar_tls.go @@ -23,10 +23,9 @@ import ( // presence + cert validity. Clears both when TLS is disabled. // Mutations are in-memory; caller's Status().Patch flushes. // -// Reads via the cached client so Secret informer events requeue the -// SeiNode automatically — operator-provisioned cert appearing after a -// SeiNode was created (and gated as Pending) wakes reconciliation -// without a separate poll loop. +// Reads via the cached client. Reconcile.statusPollInterval covers the +// "Secret arrives after SeiNode" case — the controller does not Watches() +// Secrets, so an explicit requeue loop drives convergence. func (r *SeiNodeReconciler) reconcileSidecarTLSReady(ctx context.Context, node *seiv1alpha1.SeiNode) { if !noderesource.SidecarTLSEnabled(node) { apimeta.RemoveStatusCondition(&node.Status.Conditions, seiv1alpha1.ConditionSidecarTLSSecretReady)