From 9f3111722acbd7ec64de8e074dfdcbe08a37f98b Mon Sep 17 00:00:00 2001 From: Juan Antonio Osorio Date: Tue, 7 Apr 2026 09:16:18 +0300 Subject: [PATCH] Add OpenTelemetry tracing spans to VM startup pipeline Instrument the critical path through microvm.Run() with OTel trace spans so consumers can identify performance bottlenecks. When no TracerProvider is configured (the default), all tracing is no-op with zero overhead. Spans added: - microvm.Run (root) with image/name/cpus/memory attributes - microvm.Preflight, microvm.ImagePull, microvm.RootfsClone - microvm.RootfsHooks, microvm.BackendPrepare, microvm.NetworkStart - microvm.VMSpawn, microvm.PostBoot - microvm.image.CacheLookup/Fetch/Extract/CacheStore (image/pull.go) - microvm.SSHWaitReady with per-probe events (ssh/client.go) - microvm.preflight.RunAll + per-check spans (preflight/checker.go) - microvm.backend.Start + ResolveRuntime/ResolveFirmware (backend.go) Co-Authored-By: Claude Opus 4.6 (1M context) --- go.mod | 16 +--- go.sum | 63 +------------- hypervisor/libkrun/backend.go | 24 ++++++ image/pull.go | 38 +++++++++ microvm.go | 149 ++++++++++++++++++++++++++-------- preflight/checker.go | 24 +++++- ssh/client.go | 26 +++++- 7 files changed, 230 insertions(+), 110 deletions(-) diff --git a/go.mod b/go.mod index fef0ba0..08d43bf 100644 --- a/go.mod +++ b/go.mod @@ -17,16 +17,16 @@ require ( github.com/sirupsen/logrus v1.9.4 github.com/stretchr/testify v1.11.1 github.com/vishvananda/netlink v1.3.1 + go.opentelemetry.io/otel v1.40.0 + go.opentelemetry.io/otel/trace v1.40.0 golang.org/x/crypto v0.49.0 golang.org/x/sync v0.20.0 golang.org/x/sys v0.42.0 ) require ( - github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c // indirect github.com/Microsoft/go-winio v0.6.2 // indirect github.com/apparentlymart/go-cidr v1.1.0 // indirect - github.com/cenkalti/backoff/v4 v4.3.0 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/containerd/errdefs v1.0.0 // indirect github.com/containerd/errdefs/pkg v0.3.0 // indirect @@ -34,8 +34,6 @@ require ( github.com/davecgh/go-spew v1.1.1 // indirect github.com/distribution/reference v0.6.0 // indirect github.com/docker/cli v29.3.1+incompatible // indirect - github.com/docker/distribution v2.8.3+incompatible // indirect - github.com/docker/docker v28.5.2+incompatible // indirect github.com/docker/docker-credential-helpers v0.9.3 // indirect github.com/docker/go-connections v0.6.0 // indirect github.com/docker/go-units v0.5.0 // indirect @@ -50,32 +48,22 @@ require ( github.com/moby/docker-image-spec v1.3.1 // indirect github.com/moby/moby/api v1.54.0 // indirect github.com/moby/moby/client v0.3.0 // indirect - github.com/moby/sys/sequential v0.6.0 // indirect github.com/opencontainers/go-digest v1.0.0 // indirect github.com/opencontainers/image-spec v1.1.1 // indirect github.com/pierrec/lz4/v4 v4.1.14 // indirect - github.com/pkg/errors v0.9.1 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect github.com/u-root/uio v0.0.0-20240224005618-d2acac8f3701 // indirect github.com/vbatts/tar-split v0.12.2 // indirect github.com/vishvananda/netns v0.0.5 // indirect go.opentelemetry.io/auto/sdk v1.2.1 // indirect go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0 // indirect - go.opentelemetry.io/otel v1.40.0 // indirect - go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.40.0 // indirect go.opentelemetry.io/otel/metric v1.40.0 // indirect go.opentelemetry.io/otel/sdk v1.40.0 // indirect go.opentelemetry.io/otel/sdk/metric v1.40.0 // indirect - go.opentelemetry.io/otel/trace v1.40.0 // indirect - go.opentelemetry.io/proto/otlp v1.9.0 // indirect golang.org/x/mod v0.34.0 // indirect golang.org/x/net v0.52.0 // indirect golang.org/x/time v0.11.0 // indirect golang.org/x/tools v0.43.0 // indirect - google.golang.org/genproto/googleapis/api v0.0.0-20260209200024-4cfbd4190f57 // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20260209200024-4cfbd4190f57 // indirect - google.golang.org/grpc v1.78.0 // indirect - google.golang.org/protobuf v1.36.11 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect gvisor.dev/gvisor v0.0.0-20240916094835-a174eb65023f // indirect ) diff --git a/go.sum b/go.sum index 37a1f8f..fadce72 100644 --- a/go.sum +++ b/go.sum @@ -1,12 +1,8 @@ -github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c h1:udKWzYgxTojEKWjV8V+WSxDXJ4NFATAsZjh8iIbsQIg= -github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E= github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY= github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU= github.com/apparentlymart/go-cidr v1.1.0 h1:2mAhrMoF+nhXqxTzSZMUzDHkLjmIHC+Zzn4tdgBZjnU= github.com/apparentlymart/go-cidr v1.1.0/go.mod h1:EBcsNrHc3zQeuaeCeCtQruQm+n9/YjEn/vI25Lg7Gwc= github.com/armon/go-proxyproto v0.0.0-20210323213023-7e956b284f0a/go.mod h1:QmP9hvJ91BbJmGVGSbutW19IC0Q9phDCLGaomwTJbgU= -github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8= -github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= github.com/cenkalti/backoff/v5 v5.0.3 h1:ZN+IMa753KfX5hd8vVaMixjnqRZ3y8CuJKRKj1xcsSM= github.com/cenkalti/backoff/v5 v5.0.3/go.mod h1:rkhZdG3JZukswDf7f0cwqPNk4K0sa+F97BxZthm/crw= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= @@ -15,8 +11,6 @@ github.com/containerd/errdefs v1.0.0 h1:tg5yIfIlQIrxYtu9ajqY42W3lpS19XqdxRQeEwYG github.com/containerd/errdefs v1.0.0/go.mod h1:+YBYIdtsnF4Iw6nWZhJcqGSg/dwvV7tyJ/kCkyJ2k+M= github.com/containerd/errdefs/pkg v0.3.0 h1:9IKJ06FvyNlexW690DXuQNx2KA2cUJXx151Xdx3ZPPE= github.com/containerd/errdefs/pkg v0.3.0/go.mod h1:NJw6s9HwNuRhnjJhM7pylWwMyAkmCQvQ4GpJHEqRLVk= -github.com/containerd/log v0.1.0 h1:TCJt7ioM2cr/tfR8GPbGf9/VRAX8D2B4PjzCpfX540I= -github.com/containerd/log v0.1.0/go.mod h1:VRRf09a7mHDIRezVKTRCrOq78v577GXq3bSa3EhrzVo= github.com/containerd/stargz-snapshotter/estargz v0.18.2 h1:yXkZFYIzz3eoLwlTUZKz2iQ4MrckBxJjkmD16ynUTrw= github.com/containerd/stargz-snapshotter/estargz v0.18.2/go.mod h1:XyVU5tcJ3PRpkA9XS2T5us6Eg35yM0214Y+wvrZTBrY= github.com/containers/gvisor-tap-vsock v0.8.8 h1:5FznbOYMIuaCv8B6zQ7M6wjqP63Lasy0A6GpViEnjTg= @@ -27,20 +21,10 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/distribution/reference v0.6.0 h1:0IXCQ5g4/QMHHkarYzh5l+u8T3t73zM5QvfrDyIgxBk= github.com/distribution/reference v0.6.0/go.mod h1:BbU0aIcezP1/5jX/8MP0YiH4SdvB5Y4f/wlDRiLyi3E= -github.com/docker/cli v29.2.1+incompatible h1:n3Jt0QVCN65eiVBoUTZQM9mcQICCJt3akW4pKAbKdJg= -github.com/docker/cli v29.2.1+incompatible/go.mod h1:JLrzqnKDaYBop7H2jaqPtU4hHvMKP+vjCwu2uszcLI8= -github.com/docker/cli v29.3.0+incompatible h1:z3iWveU7h19Pqx7alZES8j+IeFQZ1lhTwb2F+V9SVvk= -github.com/docker/cli v29.3.0+incompatible/go.mod h1:JLrzqnKDaYBop7H2jaqPtU4hHvMKP+vjCwu2uszcLI8= github.com/docker/cli v29.3.1+incompatible h1:M04FDj2TRehDacrosh7Vlkgc7AuQoWloQkf1PA5hmoI= github.com/docker/cli v29.3.1+incompatible/go.mod h1:JLrzqnKDaYBop7H2jaqPtU4hHvMKP+vjCwu2uszcLI8= -github.com/docker/distribution v2.8.3+incompatible h1:AtKxIZ36LoNK51+Z6RpzLpddBirtxJnzDrHLEKxTAYk= -github.com/docker/distribution v2.8.3+incompatible/go.mod h1:J2gT2udsDAN96Uj4KfcMRqY0/ypR+oyYUYmja8H+y+w= -github.com/docker/docker v28.5.2+incompatible h1:DBX0Y0zAjZbSrm1uzOkdr1onVghKaftjlSWt4AFexzM= -github.com/docker/docker v28.5.2+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk= github.com/docker/docker-credential-helpers v0.9.3 h1:gAm/VtF9wgqJMoxzT3Gj5p4AqIjCBS4wrsOh9yRqcz8= github.com/docker/docker-credential-helpers v0.9.3/go.mod h1:x+4Gbw9aGmChi3qTLZj8Dfn0TD20M/fuWy0E5+WDeCo= -github.com/docker/go-connections v0.5.0 h1:USnMq7hx7gwdVZq1L49hLXaFtUdTADjXGp+uj1Br63c= -github.com/docker/go-connections v0.5.0/go.mod h1:ov60Kzw0kKElRwhNs9UlUHAE/F9Fe6GLaXnqyDdmEXc= github.com/docker/go-connections v0.6.0 h1:LlMG9azAe1TqfR7sO+NJttz1gy6KO7VJBh+pMmjSD94= github.com/docker/go-connections v0.6.0/go.mod h1:AahvXYshr6JgfUJGdDCs2b5EZG/vmaMAntpSFH5BFKE= github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4= @@ -64,26 +48,18 @@ github.com/google/btree v1.1.2 h1:xf4v41cLI2Z6FxbKm+8Bu+m8ifhj15JuZ9sa0jZCMUU= github.com/google/btree v1.1.2/go.mod h1:qOPhT0dTNdNzV6Z/lhRX0YXUafgPLFUh+gZMl761Gm4= github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= -github.com/google/go-containerregistry v0.21.2 h1:vYaMU4nU55JJGFC9JR/s8NZcTjbE9DBBbvusTW9NeS0= -github.com/google/go-containerregistry v0.21.2/go.mod h1:ctO5aCaewH4AK1AumSF5DPW+0+R+d2FmylMJdp5G7p0= -github.com/google/go-containerregistry v0.21.3 h1:Xr+yt3VvwOOn/5nJzd7UoOhwPGiPkYW0zWDLLUXqAi4= -github.com/google/go-containerregistry v0.21.3/go.mod h1:D5ZrJF1e6dMzvInpBPuMCX0FxURz7GLq2rV3Us9aPkc= github.com/google/go-containerregistry v0.21.4 h1:VrhlIQtdhE6riZW//MjPrcJ1snAjPoCCpPHqGOygrv8= github.com/google/go-containerregistry v0.21.4/go.mod h1:kxgc23zQ2qMY/hAKt0wCbB/7tkeovAP2mE2ienynJUw= github.com/google/gopacket v1.1.19 h1:ves8RnFZPGiFnTS0uPQStjwru6uO6h+nlr9j6fL7kF8= github.com/google/gopacket v1.1.19/go.mod h1:iJ8V8n6KS+z2U1A8pUwu8bW5SyEMkXJB8Yo/Vo+TKTo= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.2 h1:8Tjv8EJ+pM1xP8mK6egEbD1OgnVTyacbefKhmbLhIhU= -github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.2/go.mod h1:pkJQ2tZHJ0aFOVEEot6oZmaVEZcRme73eIFmhiVuRWs= github.com/inetaf/tcpproxy v0.0.0-20250222171855-c4b9df066048 h1:jaqViOFFlZtkAwqvwZN+id37fosQqR5l3Oki9Dk4hz8= github.com/inetaf/tcpproxy v0.0.0-20250222171855-c4b9df066048/go.mod h1:Di7LXRyUcnvAcLicFhtM9/MlZl/TNgRSDHORM2c6CMI= github.com/insomniacslk/dhcp v0.0.0-20240710054256-ddd8a41251c9 h1:LZJWucZz7ztCqY6Jsu7N9g124iJ2kt/O62j3+UchZFg= github.com/insomniacslk/dhcp v0.0.0-20240710054256-ddd8a41251c9/go.mod h1:KclMyHxX06VrVr0DJmeFSUb1ankt7xTfoOA35pCkoic= github.com/josharian/native v1.1.0 h1:uuaP0hAbW7Y4l0ZRQ6C9zfb7Mg1mbFKry/xzDAfmtLA= github.com/josharian/native v1.1.0/go.mod h1:7X/raswPFr05uY3HiLlYeyQntB6OO7E/d2Cu7qoaN2w= -github.com/klauspost/compress v1.18.4 h1:RPhnKRAQ4Fh8zU2FY/6ZFDwTVTxgJ/EMydqSTzE9a2c= -github.com/klauspost/compress v1.18.4/go.mod h1:R0h/fSBs8DE4ENlcrlib3PsXS61voFxhIs2DeRhCvJ4= github.com/klauspost/compress v1.18.5 h1:/h1gH5Ce+VWNLSWqPzOVn6XBO+vJbCNGvjoaGBFW2IE= github.com/klauspost/compress v1.18.5/go.mod h1:cwPg85FWrGar70rWktvGQj8/hthj3wpl0PGDogxkrSQ= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= @@ -104,14 +80,6 @@ github.com/moby/moby/api v1.54.0 h1:7kbUgyiKcoBhm0UrWbdrMs7RX8dnwzURKVbZGy2GnL0= github.com/moby/moby/api v1.54.0/go.mod h1:8mb+ReTlisw4pS6BRzCMts5M49W5M7bKt1cJy/YbAqc= github.com/moby/moby/client v0.3.0 h1:UUGL5okry+Aomj3WhGt9Aigl3ZOxZGqR7XPo+RLPlKs= github.com/moby/moby/client v0.3.0/go.mod h1:HJgFbJRvogDQjbM8fqc1MCEm4mIAGMLjXbgwoZp6jCQ= -github.com/moby/sys/atomicwriter v0.1.0 h1:kw5D/EqkBwsBFi0ss9v1VG3wIkVhzGvLklJ+w3A14Sw= -github.com/moby/sys/atomicwriter v0.1.0/go.mod h1:Ul8oqv2ZMNHOceF643P6FKPXeCmYtlQMvpizfsSoaWs= -github.com/moby/sys/sequential v0.6.0 h1:qrx7XFUd/5DxtqcoH1h438hF5TmOvzC/lspjy7zgvCU= -github.com/moby/sys/sequential v0.6.0/go.mod h1:uyv8EUTrca5PnDsdMGXhZe6CCe8U/UiTWd+lL+7b/Ko= -github.com/moby/term v0.0.0-20221205130635-1aeaba878587 h1:HfkjXDfhgVaN5rmueG8cL8KKeFNecRCXFhaJ2qZ5SKA= -github.com/moby/term v0.0.0-20221205130635-1aeaba878587/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y= -github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A= -github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc= github.com/nxadm/tail v1.4.8 h1:nPr65rt6Y5JFSKQO7qToXr7pePgD6Gwiw05lkbyAQTE= github.com/nxadm/tail v1.4.8/go.mod h1:+ncqLTQzXmGhMZNUePPaPqPvBxHAIsmXswZKocGu+AU= github.com/onsi/ginkgo v1.16.5 h1:8xi0RTUf59SOSfEtZMvwTvXYMzG4gV23XVHOZiXNtnE= @@ -124,8 +92,6 @@ github.com/opencontainers/image-spec v1.1.1 h1:y0fUlFfIZhPF1W537XOLg0/fcx6zcHCJw github.com/opencontainers/image-spec v1.1.1/go.mod h1:qpqAh3Dmcf36wStyyWU+kCeDgrGnAve2nCC8+7h8Q0M= github.com/pierrec/lz4/v4 v4.1.14 h1:+fL8AQEZtz/ijeNnpduH0bROTu0O3NZAlPjQxGn8LwE= github.com/pierrec/lz4/v4 v4.1.14/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= -github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= -github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ= @@ -148,10 +114,6 @@ go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0 h1:F7Jx+6h go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0/go.mod h1:UHB22Z8QsdRDrnAtX4PntOl36ajSxcdUMt1sF7Y6E7Q= go.opentelemetry.io/otel v1.40.0 h1:oA5YeOcpRTXq6NN7frwmwFR0Cn3RhTVZvXsP4duvCms= go.opentelemetry.io/otel v1.40.0/go.mod h1:IMb+uXZUKkMXdPddhwAHm6UfOwJyh4ct1ybIlV14J0g= -go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.40.0 h1:QKdN8ly8zEMrByybbQgv8cWBcdAarwmIPZ6FThrWXJs= -go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.40.0/go.mod h1:bTdK1nhqF76qiPoCCdyFIV+N/sRHYXYCTQc+3VCi3MI= -go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.33.0 h1:wpMfgF8E1rkrT1Z6meFh1NDtownE9Ii3n3X2GJYjsaU= -go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.33.0/go.mod h1:wAy0T/dUbs468uOlkT31xjvqQgEVXv58BRFWEgn5v/0= go.opentelemetry.io/otel/metric v1.40.0 h1:rcZe317KPftE2rstWIBitCdVp89A2HqjkxR3c11+p9g= go.opentelemetry.io/otel/metric v1.40.0/go.mod h1:ib/crwQH7N3r5kfiBZQbwrTge743UDc7DTFVZrrXnqc= go.opentelemetry.io/otel/sdk v1.40.0 h1:KHW/jUzgo6wsPh9At46+h4upjtccTmuZCFAc9OJ71f8= @@ -160,8 +122,6 @@ go.opentelemetry.io/otel/sdk/metric v1.40.0 h1:mtmdVqgQkeRxHgRv4qhyJduP3fYJRMX4A go.opentelemetry.io/otel/sdk/metric v1.40.0/go.mod h1:4Z2bGMf0KSK3uRjlczMOeMhKU2rhUqdWNoKcYrtcBPg= go.opentelemetry.io/otel/trace v1.40.0 h1:WA4etStDttCSYuhwvEa8OP8I5EWu24lkOzp+ZYblVjw= go.opentelemetry.io/otel/trace v1.40.0/go.mod h1:zeAhriXecNGP/s2SEG3+Y8X9ujcJOTqQ5RgdEJcawiA= -go.opentelemetry.io/proto/otlp v1.9.0 h1:l706jCMITVouPOqEnii2fIAuO3IVGBRPV5ICjceRb/A= -go.opentelemetry.io/proto/otlp v1.9.0/go.mod h1:xE+Cx5E/eEHw+ISFkwPLwCZefwVjY+pqKg1qcK03+/4= go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc= go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= @@ -170,14 +130,10 @@ golang.org/x/crypto v0.49.0 h1:+Ng2ULVvLHnJ/ZFEq4KdcDd/cfjrrjjNSXNzxg0Y4U4= golang.org/x/crypto v0.49.0/go.mod h1:ErX4dUh2UM+CFYiXZRTcMpEcN8b/1gxEuv3nODoYtCA= golang.org/x/lint v0.0.0-20200302205851-738671d3881b/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg= -golang.org/x/mod v0.33.0 h1:tHFzIWbBifEmbwtGz65eaWyGiGZatSrT9prnU8DbVL8= -golang.org/x/mod v0.33.0/go.mod h1:swjeQEj+6r7fODbD2cqrnje9PnziFuw4bmLbBZFrQ5w= golang.org/x/mod v0.34.0 h1:xIHgNUUnW6sYkcM5Jleh05DvLOtwc6RitGHbDk4akRI= golang.org/x/mod v0.34.0/go.mod h1:ykgH52iCZe79kzLLMhyCUzhMci+nQj+0XkbXpNYtVjY= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.51.0 h1:94R/GTO7mt3/4wIKpcR5gkGmRLOuE/2hNGeWq/GBIFo= -golang.org/x/net v0.51.0/go.mod h1:aamm+2QF5ogm02fjy5Bb7CQ0WMt1/WVM7FtyaTLlA9Y= golang.org/x/net v0.52.0 h1:He/TN1l0e4mmR3QqHMT2Xab3Aj3L9qjbhRm78/6jrW0= golang.org/x/net v0.52.0/go.mod h1:R1MAz7uMZxVMualyPXb+VaqGSa3LIaUqk0eEt3w36Sw= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -185,7 +141,6 @@ golang.org/x/sync v0.20.0 h1:e0PTpb7pjO8GAtTs2dQ6jYa5BWYlMuX047Dco/pItO4= golang.org/x/sync v0.20.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.42.0 h1:omrd2nAlyT5ESRdCLYdm3+fMfNFE/+Rf4bDIQImRJeo= @@ -195,24 +150,12 @@ golang.org/x/term v0.41.0/go.mod h1:3pfBgksrReYfZ5lvYM0kSO0LIkAl4Yl2bXOkKP7Ec2A= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.35.0 h1:JOVx6vVDFokkpaq1AEptVzLTpDe9KGpj5tR4/X+ybL8= golang.org/x/text v0.35.0/go.mod h1:khi/HExzZJ2pGnjenulevKNX1W67CUy0AsXcNubPGCA= -golang.org/x/time v0.5.0 h1:o7cqy6amK/52YcAKIPlM3a+Fpj35zvRj2TP+e1xFSfk= -golang.org/x/time v0.5.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= golang.org/x/time v0.11.0 h1:/bpjEDfN9tkoN/ryeYHnv5hcMlc8ncjMcM4XBk5NWV0= golang.org/x/time v0.11.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg= golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= -golang.org/x/tools v0.42.0 h1:uNgphsn75Tdz5Ji2q36v/nsFSfR/9BRFvqhGBaJGd5k= -golang.org/x/tools v0.42.0/go.mod h1:Ma6lCIwGZvHK6XtgbswSoWroEkhugApmsXyrUmBhfr0= golang.org/x/tools v0.43.0 h1:12BdW9CeB3Z+J/I/wj34VMl8X+fEXBxVR90JeMX5E7s= golang.org/x/tools v0.43.0/go.mod h1:uHkMso649BX2cZK6+RpuIPXS3ho2hZo4FVwfoy1vIk0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -google.golang.org/genproto/googleapis/api v0.0.0-20260209200024-4cfbd4190f57 h1:JLQynH/LBHfCTSbDWl+py8C+Rg/k1OVH3xfcaiANuF0= -google.golang.org/genproto/googleapis/api v0.0.0-20260209200024-4cfbd4190f57/go.mod h1:kSJwQxqmFXeo79zOmbrALdflXQeAYcUbgS7PbpMknCY= -google.golang.org/genproto/googleapis/rpc v0.0.0-20260209200024-4cfbd4190f57 h1:mWPCjDEyshlQYzBpMNHaEof6UX1PmHcaUODUywQ0uac= -google.golang.org/genproto/googleapis/rpc v0.0.0-20260209200024-4cfbd4190f57/go.mod h1:j9x/tPzZkyxcgEFkiKEEGxfvyumM01BEtsW8xzOahRQ= -google.golang.org/grpc v1.78.0 h1:K1XZG/yGDJnzMdd/uZHAkVqJE+xIDOcmdSFZkBUicNc= -google.golang.org/grpc v1.78.0/go.mod h1:I47qjTo4OKbMkjA/aOOwxDIiPSBofUtQUI5EfpWvW7U= -google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE= -google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= @@ -220,7 +163,9 @@ gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkep gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -gotest.tools/v3 v3.4.0 h1:ZazjZUfuVeZGLAmlKKuyv3IKP5orXcwtOwDQH6YVr6o= -gotest.tools/v3 v3.4.0/go.mod h1:CtbdzLSsqVhDgMtKsx03ird5YTGB3ar27v0u/yKBW5g= +gotest.tools/v3 v3.5.2 h1:7koQfIKdy+I8UTetycgUqXWSDwpgv193Ka+qRsmBY8Q= +gotest.tools/v3 v3.5.2/go.mod h1:LtdLGcnqToBH83WByAAi/wiwSFCArdFIUV/xxN4pcjA= gvisor.dev/gvisor v0.0.0-20240916094835-a174eb65023f h1:O2w2DymsOlM/nv2pLNWCMCYOldgBBMkD7H0/prN5W2k= gvisor.dev/gvisor v0.0.0-20240916094835-a174eb65023f/go.mod h1:sxc3Uvk/vHcd3tj7/DHVBoR5wvWT/MmRq2pj7HRJnwU= +pgregory.net/rapid v1.2.0 h1:keKAYRcjm+e1F0oAuU5F5+YPAWcyxNNRK2wud503Gnk= +pgregory.net/rapid v1.2.0/go.mod h1:PY5XlDGj0+V1FCq0o192FdRhpKHGTRIWBgqjDBTrq04= diff --git a/hypervisor/libkrun/backend.go b/hypervisor/libkrun/backend.go index 15e6ee1..c2aea5e 100644 --- a/hypervisor/libkrun/backend.go +++ b/hypervisor/libkrun/backend.go @@ -9,6 +9,9 @@ import ( "os" "path/filepath" + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" + "github.com/stacklok/go-microvm/extract" "github.com/stacklok/go-microvm/hypervisor" "github.com/stacklok/go-microvm/image" @@ -111,7 +114,13 @@ func (b *Backend) validate() error { // Start launches the VM via the go-microvm-runner subprocess. func (b *Backend) Start(ctx context.Context, cfg hypervisor.VMConfig) (hypervisor.VMHandle, error) { + tracer := otel.Tracer("github.com/stacklok/go-microvm") + ctx, span := tracer.Start(ctx, "microvm.backend.Start") + defer span.End() + if err := b.validate(); err != nil { + span.RecordError(err) + span.SetStatus(codes.Error, err.Error()) return nil, err } @@ -119,21 +128,33 @@ func (b *Backend) Start(ctx context.Context, cfg hypervisor.VMConfig) (hyperviso libDir := b.libDir if b.runtime != nil { + _, rtSpan := tracer.Start(ctx, "microvm.backend.ResolveRuntime") runtimeDir, err := b.runtime.Ensure(ctx, b.cacheDir) if err != nil { + rtSpan.RecordError(err) + rtSpan.SetStatus(codes.Error, err.Error()) + rtSpan.End() return nil, fmt.Errorf("resolve runtime: %w", err) } candidate := filepath.Join(runtimeDir, extract.RunnerBinaryName) if _, err := os.Stat(candidate); err != nil { + rtSpan.RecordError(err) + rtSpan.SetStatus(codes.Error, err.Error()) + rtSpan.End() return nil, fmt.Errorf("resolve runtime: %s not found at %s: %w", extract.RunnerBinaryName, candidate, err) } runnerPath = candidate libDir = runtimeDir + rtSpan.End() } if b.firmware != nil { + _, fwSpan := tracer.Start(ctx, "microvm.backend.ResolveFirmware") fwDir, err := b.firmware.Ensure(ctx, b.cacheDir) if err != nil { + fwSpan.RecordError(err) + fwSpan.SetStatus(codes.Error, err.Error()) + fwSpan.End() return nil, fmt.Errorf("resolve firmware: %w", err) } if libDir != "" { @@ -141,6 +162,7 @@ func (b *Backend) Start(ctx context.Context, cfg hypervisor.VMConfig) (hyperviso } else { libDir = fwDir } + fwSpan.End() } var netSocket string @@ -170,6 +192,8 @@ func (b *Backend) Start(ctx context.Context, cfg hypervisor.VMConfig) (hyperviso proc, err := spawner.Spawn(ctx, runCfg) if err != nil { + span.RecordError(err) + span.SetStatus(codes.Error, err.Error()) return nil, fmt.Errorf("spawn runner: %w", err) } diff --git a/image/pull.go b/image/pull.go index 00e3cb6..6ceb6e7 100644 --- a/image/pull.go +++ b/image/pull.go @@ -17,6 +17,11 @@ import ( "sync/atomic" "syscall" + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" + "github.com/google/go-containerregistry/pkg/name" v1 "github.com/google/go-containerregistry/pkg/v1" "github.com/google/go-containerregistry/pkg/v1/mutate" @@ -40,6 +45,8 @@ func Pull(ctx context.Context, imageRef string, cache *Cache) (*RootFS, error) { // which tries the local Docker/Podman daemon first before falling back to // remote registry pull. func PullWithFetcher(ctx context.Context, imageRef string, cache *Cache, fetcher ImageFetcher) (*RootFS, error) { + tracer := otel.Tracer("github.com/stacklok/go-microvm") + if fetcher == nil { fetcher = NewLocalThenRemoteFetcher() } @@ -54,15 +61,24 @@ func PullWithFetcher(ctx context.Context, imageRef string, cache *Cache, fetcher // because daemon.Image() does a full "docker save" export. if cached := cache.LookupRef(ref.String()); cached != nil { slog.Debug("using ref-indexed cache hit", "ref", ref.String(), "path", cached.Path) + _, span := tracer.Start(ctx, "microvm.image.CacheLookup", + trace.WithAttributes(attribute.Bool("microvm.image.cache_hit", true))) + span.End() return cached, nil } slog.Debug("pulling image", "ref", ref.String()) + // Fetch image from daemon or registry. + _, fetchSpan := tracer.Start(ctx, "microvm.image.Fetch") img, err := fetcher.Pull(ctx, ref.String()) if err != nil { + fetchSpan.RecordError(err) + fetchSpan.SetStatus(codes.Error, err.Error()) + fetchSpan.End() return nil, fmt.Errorf("pull image %q: %w", imageRef, err) } + fetchSpan.End() // Compute the manifest digest for cache keying. digest, err := img.Digest() @@ -78,6 +94,9 @@ func PullWithFetcher(ctx context.Context, imageRef string, cache *Cache, fetcher if cache != nil { if cachedPath, ok := cache.Get(digestStr); ok { slog.Debug("using cached rootfs", "path", cachedPath) + _, span := tracer.Start(ctx, "microvm.image.CacheLookup", + trace.WithAttributes(attribute.Bool("microvm.image.cache_hit", true))) + span.End() ociCfg, err := extractOCIConfig(img) if err != nil { return nil, fmt.Errorf("extract OCI config: %w", err) @@ -109,26 +128,40 @@ func PullWithFetcher(ctx context.Context, imageRef string, cache *Cache, fetcher // Extract the filesystem. When a cache is available, use layered // extraction to benefit from per-layer caching. Falls back to flat // extraction if layered extraction fails. + _, extractSpan := tracer.Start(ctx, "microvm.image.Extract") if cache != nil { lc := cache.LayerCache() + extractSpan.SetAttributes(attribute.Bool("microvm.image.layered", true)) if err := extractImageLayered(ctx, img, tmpDir, lc); err != nil { slog.Warn("layered extraction failed, falling back to flat extraction", "err", err) + extractSpan.SetAttributes(attribute.Bool("microvm.image.layered", false)) // Clean tmpDir contents before retrying with flat extraction. _ = os.RemoveAll(tmpDir) if tmpDir, err = cache.TempDir(); err != nil { + extractSpan.RecordError(err) + extractSpan.SetStatus(codes.Error, err.Error()) + extractSpan.End() return nil, fmt.Errorf("create temp dir for rootfs: %w", err) } if err := extractImage(ctx, img, tmpDir); err != nil { _ = os.RemoveAll(tmpDir) + extractSpan.RecordError(err) + extractSpan.SetStatus(codes.Error, err.Error()) + extractSpan.End() return nil, fmt.Errorf("extract image layers: %w", err) } } } else { + extractSpan.SetAttributes(attribute.Bool("microvm.image.layered", false)) if err := extractImage(ctx, img, tmpDir); err != nil { _ = os.RemoveAll(tmpDir) + extractSpan.RecordError(err) + extractSpan.SetStatus(codes.Error, err.Error()) + extractSpan.End() return nil, fmt.Errorf("extract image layers: %w", err) } } + extractSpan.End() // Ensure the rootfs root directory itself is world-accessible and has // the override_stat xattr. The root dir is created by os.MkdirTemp @@ -144,8 +177,12 @@ func PullWithFetcher(ctx context.Context, imageRef string, cache *Cache, fetcher // modify the rootfs in place without a COW clone. rootfsPath := tmpDir if cache != nil { + _, cacheSpan := tracer.Start(ctx, "microvm.image.CacheStore") if err := cache.Put(digestStr, tmpDir); err != nil { _ = os.RemoveAll(tmpDir) + cacheSpan.RecordError(err) + cacheSpan.SetStatus(codes.Error, err.Error()) + cacheSpan.End() return nil, fmt.Errorf("cache rootfs: %w", err) } // After Put, the canonical path is in the cache. @@ -154,6 +191,7 @@ func PullWithFetcher(ctx context.Context, imageRef string, cache *Cache, fetcher } // Record ref→digest mapping and OCI config for next-run fast path. cache.StoreRef(ref.String(), digestStr, ociCfg) + cacheSpan.End() } return &RootFS{Path: rootfsPath, Config: ociCfg}, nil diff --git a/microvm.go b/microvm.go index 8f39c03..32158cb 100644 --- a/microvm.go +++ b/microvm.go @@ -27,6 +27,11 @@ import ( "syscall" "time" + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" + "github.com/stacklok/go-microvm/guest/vmconfig" "github.com/stacklok/go-microvm/hooks" "github.com/stacklok/go-microvm/hypervisor" @@ -42,10 +47,24 @@ import ( // point for the happy path. For more control, use [Create] followed by // explicit lifecycle management. func Run(ctx context.Context, imageRef string, opts ...Option) (*VM, error) { + tracer := otel.Tracer("github.com/stacklok/go-microvm") + ctx, rootSpan := tracer.Start(ctx, "microvm.Run", + trace.WithAttributes( + attribute.String("microvm.image", imageRef), + )) + defer rootSpan.End() + cfg := defaultConfig() for _, opt := range opts { opt.apply(cfg) } + + rootSpan.SetAttributes( + attribute.String("microvm.name", cfg.name), + attribute.Int("microvm.cpus", int(cfg.cpus)), + attribute.Int("microvm.memory_mib", int(cfg.memory)), + ) + if cfg.cleanDataDir { if err := cleanDataDir(cfg); err != nil { return nil, err @@ -87,23 +106,40 @@ func Run(ctx context.Context, imageRef string, opts ...Option) (*VM, error) { } // 1. Preflight checks. - slog.Debug("running preflight checks") - if err := cfg.preflight.RunAll(ctx); err != nil { - return nil, fmt.Errorf("preflight: %w", err) + { + ctx, span := tracer.Start(ctx, "microvm.Preflight") + slog.Debug("running preflight checks") + if err := cfg.preflight.RunAll(ctx); err != nil { + span.RecordError(err) + span.SetStatus(codes.Error, err.Error()) + span.End() + return nil, fmt.Errorf("preflight: %w", err) + } + span.End() } // 2. Obtain rootfs: use pre-built path or pull OCI image. var rootfs *image.RootFS - if cfg.rootfsPath != "" { - slog.Debug("using pre-built rootfs", "path", cfg.rootfsPath) - rootfs = &image.RootFS{Path: cfg.rootfsPath, Config: nil} - } else { - slog.Debug("pulling image", "ref", imageRef) - var err error - rootfs, err = image.PullWithFetcher(ctx, imageRef, cfg.imageCache, cfg.imageFetcher) - if err != nil { - return nil, fmt.Errorf("pull image: %w", err) + { + ctx, span := tracer.Start(ctx, "microvm.ImagePull", + trace.WithAttributes(attribute.String("microvm.image_ref", imageRef))) + if cfg.rootfsPath != "" { + slog.Debug("using pre-built rootfs", "path", cfg.rootfsPath) + span.SetAttributes(attribute.Bool("microvm.image.prebuilt", true)) + rootfs = &image.RootFS{Path: cfg.rootfsPath, Config: nil} + } else { + slog.Debug("pulling image", "ref", imageRef) + var err error + rootfs, err = image.PullWithFetcher(ctx, imageRef, cfg.imageCache, cfg.imageFetcher) + if err != nil { + span.RecordError(err) + span.SetStatus(codes.Error, err.Error()) + span.End() + return nil, fmt.Errorf("pull image: %w", err) + } + span.SetAttributes(attribute.Bool("microvm.image.from_cache", rootfs.FromCache)) } + span.End() } // 2b. COW-clone cached rootfs so hooks and PrepareRootFS never @@ -111,29 +147,45 @@ func Run(ctx context.Context, imageRef string, opts ...Option) (*VM, error) { // writes .krun_config.json into the rootfs, so we must always // clone — not just when hooks are present. if rootfs.FromCache { + _, span := tracer.Start(ctx, "microvm.RootfsClone") workDir := filepath.Join(cfg.dataDir, "rootfs-work") if err := rootfspkg.CloneDir(rootfs.Path, workDir); err != nil { + span.RecordError(err) + span.SetStatus(codes.Error, err.Error()) + span.End() return nil, fmt.Errorf("clone rootfs: %w", err) } rootfs = &image.RootFS{Path: workDir, Config: rootfs.Config} + span.End() } // 3. Run rootfs hooks (no-op on happy path). - for _, hook := range cfg.rootfsHooks { - if err := hook(rootfs.Path, rootfs.Config); err != nil { - return nil, fmt.Errorf("rootfs hook: %w", err) + { + _, span := tracer.Start(ctx, "microvm.RootfsHooks", + trace.WithAttributes(attribute.Int("microvm.hook_count", len(cfg.rootfsHooks)))) + for _, hook := range cfg.rootfsHooks { + if err := hook(rootfs.Path, rootfs.Config); err != nil { + span.RecordError(err) + span.SetStatus(codes.Error, err.Error()) + span.End() + return nil, fmt.Errorf("rootfs hook: %w", err) + } } - } - // 3b. Inject VM config for the guest init (e.g. /tmp size, mount flags). - // Only written when non-default values are configured, keeping the - // file absent for callers that rely on built-in defaults. - guestVMCfg := buildVMConfig(cfg) - if guestVMCfg.TmpSizeMiB > 0 || len(guestVMCfg.VirtioFSMounts) > 0 { - vmCfgHook := hooks.InjectVMConfig(guestVMCfg) - if err := vmCfgHook(rootfs.Path, rootfs.Config); err != nil { - return nil, fmt.Errorf("inject vm config: %w", err) + // 3b. Inject VM config for the guest init (e.g. /tmp size, mount flags). + // Only written when non-default values are configured, keeping the + // file absent for callers that rely on built-in defaults. + guestVMCfg := buildVMConfig(cfg) + if guestVMCfg.TmpSizeMiB > 0 || len(guestVMCfg.VirtioFSMounts) > 0 { + vmCfgHook := hooks.InjectVMConfig(guestVMCfg) + if err := vmCfgHook(rootfs.Path, rootfs.Config); err != nil { + span.RecordError(err) + span.SetStatus(codes.Error, err.Error()) + span.End() + return nil, fmt.Errorf("inject vm config: %w", err) + } } + span.End() } // 4. Prepare rootfs via backend. @@ -142,12 +194,22 @@ func Run(ctx context.Context, imageRef string, opts ...Option) (*VM, error) { backend = libkrun.NewBackend() } initCfg := buildInitConfig(rootfs.Config, cfg) - preparedPath, err := backend.PrepareRootFS(ctx, rootfs.Path, initCfg) - if err != nil { - return nil, fmt.Errorf("prepare rootfs: %w", err) - } - if !isWithin(rootfs.Path, preparedPath) && preparedPath != rootfs.Path { - return nil, fmt.Errorf("backend returned rootfs path outside original: %s", preparedPath) + var preparedPath string + { + _, span := tracer.Start(ctx, "microvm.BackendPrepare") + var err error + preparedPath, err = backend.PrepareRootFS(ctx, rootfs.Path, initCfg) + if err != nil { + span.RecordError(err) + span.SetStatus(codes.Error, err.Error()) + span.End() + return nil, fmt.Errorf("prepare rootfs: %w", err) + } + if !isWithin(rootfs.Path, preparedPath) && preparedPath != rootfs.Path { + span.End() + return nil, fmt.Errorf("backend returned rootfs path outside original: %s", preparedPath) + } + span.End() } // 5. Start networking. @@ -160,15 +222,21 @@ func Run(ctx context.Context, imageRef string, opts ...Option) (*VM, error) { // external provider and pass its socket path to the runner instead. var netSocket string if cfg.netProvider != nil { + _, span := tracer.Start(ctx, "microvm.NetworkStart") slog.Debug("starting custom network provider") netCfg := cfg.buildNetConfig() if err := cfg.netProvider.Start(ctx, netCfg); err != nil { + span.RecordError(err) + span.SetStatus(codes.Error, err.Error()) + span.End() return nil, fmt.Errorf("networking: %w", err) } netSocket = cfg.netProvider.SocketPath() + span.End() } // 6. Start VM via backend. + _, vmSpawnSpan := tracer.Start(ctx, "microvm.VMSpawn") slog.Debug("starting VM") var netEndpoint hypervisor.NetEndpoint if netSocket != "" { @@ -192,8 +260,12 @@ func Run(ctx context.Context, imageRef string, opts ...Option) (*VM, error) { if cfg.netProvider != nil { cfg.netProvider.Stop() } + vmSpawnSpan.RecordError(err) + vmSpawnSpan.SetStatus(codes.Error, err.Error()) + vmSpawnSpan.End() return nil, fmt.Errorf("spawn vm: %w", err) } + vmSpawnSpan.End() vm := &VM{ name: cfg.name, @@ -229,13 +301,20 @@ func Run(ctx context.Context, imageRef string, opts ...Option) (*VM, error) { } // 7. Post-boot hooks (no-op on happy path). - for _, hook := range cfg.postBootHooks { - if err := hook(ctx, vm); err != nil { - if stopErr := vm.Stop(ctx); stopErr != nil { - slog.Warn("failed to stop VM after post-boot hook failure", "error", stopErr) + { + _, span := tracer.Start(ctx, "microvm.PostBoot") + for _, hook := range cfg.postBootHooks { + if err := hook(ctx, vm); err != nil { + span.RecordError(err) + span.SetStatus(codes.Error, err.Error()) + span.End() + if stopErr := vm.Stop(ctx); stopErr != nil { + slog.Warn("failed to stop VM after post-boot hook failure", "error", stopErr) + } + return nil, fmt.Errorf("post-boot hook: %w", err) } - return nil, fmt.Errorf("post-boot hook: %w", err) } + span.End() } slog.Info("VM running", "name", cfg.name, "id", handle.ID()) diff --git a/preflight/checker.go b/preflight/checker.go index b7bcfde..16bbfed 100644 --- a/preflight/checker.go +++ b/preflight/checker.go @@ -14,6 +14,11 @@ import ( "errors" "fmt" "log/slog" + + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // Check represents a single preflight verification. @@ -71,9 +76,20 @@ func (c *checker) Register(check Check) { // are collected and returned as a combined error. Non-required check failures // are logged as warnings but do not cause RunAll to return an error. func (c *checker) RunAll(ctx context.Context) error { + tracer := otel.Tracer("github.com/stacklok/go-microvm") + ctx, span := tracer.Start(ctx, "microvm.preflight.RunAll", + trace.WithAttributes(attribute.Int("preflight.check_count", len(c.checks)))) + defer span.End() + var errs []error for _, check := range c.checks { + _, checkSpan := tracer.Start(ctx, "microvm.preflight.Check", + trace.WithAttributes( + attribute.String("preflight.check.name", check.Name), + attribute.Bool("preflight.check.required", check.Required), + )) + slog.Debug("running preflight check", "name", check.Name, "description", check.Description, @@ -81,7 +97,9 @@ func (c *checker) RunAll(ctx context.Context) error { ) if err := check.Run(ctx); err != nil { + checkSpan.RecordError(err) if check.Required { + checkSpan.SetStatus(codes.Error, err.Error()) slog.Error("preflight check failed", "name", check.Name, "error", err, @@ -96,10 +114,14 @@ func (c *checker) RunAll(ctx context.Context) error { } else { slog.Debug("preflight check passed", "name", check.Name) } + checkSpan.End() } if len(errs) > 0 { - return fmt.Errorf("preflight checks failed: %w", errors.Join(errs...)) + combinedErr := fmt.Errorf("preflight checks failed: %w", errors.Join(errs...)) + span.RecordError(combinedErr) + span.SetStatus(codes.Error, combinedErr.Error()) + return combinedErr } return nil diff --git a/ssh/client.go b/ssh/client.go index be67f05..2ad51f2 100644 --- a/ssh/client.go +++ b/ssh/client.go @@ -14,6 +14,11 @@ import ( "strings" "time" + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" + "golang.org/x/crypto/ssh" ) @@ -212,6 +217,15 @@ func (c *Client) CopyFrom(ctx context.Context, remotePath, localPath string) err // or the context is cancelled. This is used to wait for the guest VM's // SSH server to come up after boot. func (c *Client) WaitForReady(ctx context.Context) error { + tracer := otel.Tracer("github.com/stacklok/go-microvm") + ctx, span := tracer.Start(ctx, "microvm.SSHWaitReady", + trace.WithAttributes( + attribute.String("ssh.host", c.host), + attribute.Int("ssh.port", int(c.port)), + attribute.String("ssh.user", c.user), + )) + defer span.End() + slog.Info("waiting for SSH to become ready", "host", c.host, "port", c.port, @@ -221,15 +235,25 @@ func (c *Client) WaitForReady(ctx context.Context) error { ticker := time.NewTicker(sshWaitPollInterval) defer ticker.Stop() + probeCount := 0 for { select { case <-ctx.Done(): - return fmt.Errorf("context cancelled waiting for SSH: %w", ctx.Err()) + err := fmt.Errorf("context cancelled waiting for SSH: %w", ctx.Err()) + span.RecordError(err) + span.SetStatus(codes.Error, err.Error()) + return err case <-ticker.C: + probeCount++ if err := c.probe(ctx); err != nil { slog.Debug("SSH not ready yet", "error", err) + span.AddEvent("ssh.probe_failed", trace.WithAttributes( + attribute.Int("ssh.probe_count", probeCount), + attribute.String("error", err.Error()), + )) continue } + span.SetAttributes(attribute.Int("ssh.probes_total", probeCount)) slog.Info("SSH is ready", "host", c.host, "port", c.port,