adobe · mgar · Jun 9, 2026 · Jun 9, 2026 · Jun 10, 2026
diff --git a/README.md b/README.md
@@ -92,9 +92,9 @@ AIO_TELEMETRY_DISABLED=yes aio app deploy
 
 Telemetry is **best-effort**: events are not persisted when the proxy is down or the network fails.
 
-On **`postrun`**, any in-memory metrics from earlier hooks in the same command are merged with the `postrun` metric and the combined batch is handed off to a **fire-and-forget detached subprocess** (`src/flush-worker.js`). The parent CLI spawns the worker and immediately `unref()`s it, so the CLI can exit without waiting for the HTTP POST. If the POST fails (network error or non-2xx response), the batch is dropped; telemetry must not block or slow normal CLI use.
+A flush happens on a **terminal event** — `postrun` (command succeeded), `command-error` (command threw), or `command-not-found`. oclif runs `postrun` only on success, so `command-error`/`command-not-found` must flush on their own; otherwise error telemetry (the most valuable signal) would be buffered and silently dropped on exit. On a terminal event, any in-memory metrics from earlier hooks in the same command are merged with the terminal metric and the combined batch is handed off to a **fire-and-forget detached subprocess** (`src/flush-worker.js`). The parent CLI spawns the worker and immediately `unref()`s it, so the CLI can exit without waiting for the HTTP POST. If the POST fails (network error or non-2xx response), the batch is dropped; telemetry must not block or slow normal CLI use.
 
-Non-`postrun` events (for example `command-error`, `telemetry-notice`) are held in an **in-memory buffer** until that flush. If the process exits before `postrun` (crash, `SIGKILL`), buffered events are lost. The buffer is cleared when telemetry is disabled or when `init` runs again (new command session).
+Non-terminal events (for example `telemetry-notice`) are held in an **in-memory buffer** until the next terminal event flushes them. If the process exits before any terminal event (crash, `SIGKILL`), buffered events are lost. The buffer is cleared when telemetry is disabled or when `init` runs again (new command session).
 
 ## Agent detection
 
@@ -134,7 +134,7 @@ Example shape of the metric payload:
     "value": 1,
     "timestamp": 1673404918437,
     "attributes": {
-      "eventType": "postrun",
+      "eventName": "postrun",
       "eventData": "{}",
       "cliVersion": "@adobe/aio-cli@11.0.2",
       "clientId": 264421030538,

diff --git a/src/telemetry-lib.js b/src/telemetry-lib.js
@@ -26,9 +26,18 @@ function isEnvTelemetryDisabled () {
 
 let isDisabledForCommand = false
 
-/** Metrics for non-`postrun` events in the current command; merged into one POST on `postrun`. */
+/** Metrics for non-terminal events in the current command; merged into the POST on a terminal event. */
 const pendingCommandMetrics = []
 
+/** Events with no `postrun` after them; each flushes immediately so error telemetry isn't lost. */
+const TERMINAL_EVENTS = ['postrun', 'command-error', 'command-not-found']
+
+/** Events that mean the command did not succeed (so `commandSuccess` is false). */
+const FAILED_COMMAND_EVENTS = ['command-error', 'command-not-found']
+
+/** Set by `command-not-found` so we can drop the host's immediate `command-error` rethrow of the same typo. */
+let commandNotFoundFired = false
+
 /**
  * Detects GitHub Copilot Chat on PATH via the extension id in globalStorage paths (any OS path separator).
  *
@@ -173,9 +182,10 @@ function formatEventDataAttribute (eventData) {
 }
 
 /**
- * Records a telemetry event. Non-`postrun` metrics are held in memory and sent in a single
- * batched POST when `postrun` runs. When enabled, the flush worker is detached so the CLI
- * never waits on the network; failed deliveries are dropped (no disk queue).
+ * Records a telemetry event. Non-terminal metrics are held in memory and sent in a single
+ * batched POST on the next terminal event (`postrun`, `command-error`, `command-not-found`).
+ * When enabled, the flush worker is detached so the CLI never waits on the network; failed
+ * deliveries are dropped (no disk queue).
  *
  * @param {string} eventType prerun, postrun, command-error, command-not-found, telemetry
  * @param {object|string|number|undefined} [rawEventData] Optional hook payload (e.g. `{ message }` on errors).
@@ -204,26 +214,38 @@ async function trackEvent (eventType, rawEventData = {}) {
       value: 1,
       timestamp,
       attributes: {
-        eventType,
+        // NB: the wire attribute is `eventName`, not `eventType` — `eventType` is a New Relic
+        // reserved word and is dropped on Metric API ingest, so it is unqueryable in NRQL.
+        eventName: eventType,
         eventData: formatEventDataAttribute(eventData),
         cliVersion: rootCliVersion,
         clientId,
         command: prerunEvent.command,
         commandDuration: timestamp - prerunEvent.start,
         commandFlags: prerunEvent.flags.toString(),
-        commandSuccess: eventType !== 'command-error',
+        commandSuccess: !FAILED_COMMAND_EVENTS.includes(eventType),
         nodeVersion: process.version,
         osNameVersion,
         invocation_context: /* istanbul ignore next */ invocationContext.isAgent ? 'agent' : 'human',
         agent_name: /* istanbul ignore next */ invocationContext.agentName || 'unknown'
       }
     }
 
-    if (eventType !== 'postrun') {
+    // Non-terminal events buffer; terminal events flush (oclif runs no postrun after error/not-found).
+    if (!TERMINAL_EVENTS.includes(eventType)) {
       pendingCommandMetrics.push(metric)
       return
     }
 
+    // A typo fires command-not-found, then the host rethrows it as command-error; drop that rethrow.
+    if (eventType === 'command-not-found') {
+      commandNotFoundFired = true
+    } else if (eventType === 'command-error' && commandNotFoundFired) {
+      commandNotFoundFired = false
+      debug('dropping command-error that rethrows a command-not-found (same typo)')
+      return
+    }
+
     const mergedMetrics = [...pendingCommandMetrics, metric]
     pendingCommandMetrics.length = 0
     const mergedBody = JSON.stringify([{ metrics: mergedMetrics }])
@@ -253,12 +275,15 @@ async function trackEvent (eventType, rawEventData = {}) {
  */
 function trackPrerun (command, flags, start) {
   prerunEvent = { command, flags, start }
+  // A real command is now running (e.g. an accepted "did you mean" suggestion), so its errors count.
+  commandNotFoundFired = false
 }
 
 module.exports = {
   getInvocationContext,
   init: (versionString, binName, remoteConf = {}) => {
     pendingCommandMetrics.length = 0
+    commandNotFoundFired = false
     global.commandHookStartTime = Date.now()
     rootCliVersion = versionString
     postUrl = remoteConf.postUrl || process.env.AIO_TELEMETRY_POST_URL || DEFAULT_TELEMETRY_POST_URL

diff --git a/test/flush-worker.test.js b/test/flush-worker.test.js
@@ -16,7 +16,7 @@ const fetch = createFetch()
 const { main } = require('../src/flush-worker')
 
 const PROXY = 'https://telemetry-proxy.example/api/v1/web/dx-excshell-1/telemetry'
-const METRIC = { name: 'aio.cli.telemetry', type: 'gauge', value: 1, timestamp: 1000, attributes: { eventType: 'postrun' } }
+const METRIC = { name: 'aio.cli.telemetry', type: 'gauge', value: 1, timestamp: 1000, attributes: { eventName: 'postrun' } }
 const BODY = JSON.stringify([{ metrics: [METRIC] }])
 const flushArg = (body = BODY, extraHeaders) => {
   const base = { body, postUrl: PROXY }

diff --git a/test/hooks.test.js b/test/hooks.test.js
@@ -41,32 +41,32 @@ describe('hook interfaces', () => {
     noticeSpy.mockRestore()
   })
 
-  test('command-error', async () => {
+  // oclif does not run `postrun` after a command throws, so the error hook itself must flush.
+  test('command-error flushes immediately (no postrun follows an error)', async () => {
     config.get.mockImplementation((key) => (String(key).includes('optOut') ? false : 'clientid'))
     telemetryLib.init('name@0.0.1', 'aio', mockPackageJson.aioTelemetry)
     const hook = require('../src/hooks/command-error')
     expect(typeof hook).toBe('function')
     await hook({ message: 'msg' })
-    expect(spawn).not.toHaveBeenCalled()
-    await telemetryLib.trackEvent('postrun')
     expect(spawn).toHaveBeenCalledTimes(1)
     const flushPayload = JSON.parse(spawn.mock.calls[0][1][1])
     const body = JSON.parse(flushPayload.body)
-    expect(body[0].metrics.map((m) => m.attributes.eventType)).toEqual(['command-error', 'postrun'])
+    expect(body[0].metrics.map((m) => m.attributes.eventName)).toEqual(['command-error'])
+    expect(body[0].metrics[0].attributes.commandSuccess).toBe(false)
   })
 
-  test('command-not-found', async () => {
+  // command-not-found is terminal too: no command runs, so no postrun.
+  test('command-not-found flushes immediately', async () => {
     config.get.mockImplementation((key) => (String(key).includes('optOut') ? false : 'clientid'))
     telemetryLib.init('name@0.0.1', 'aio', mockPackageJson.aioTelemetry)
     const hook = require('../src/hooks/command-not-found')
     expect(typeof hook).toBe('function')
     await hook({ id: 'id' })
-    expect(spawn).not.toHaveBeenCalled()
-    await telemetryLib.trackEvent('postrun')
     expect(spawn).toHaveBeenCalledTimes(1)
     const flushPayloadNf = JSON.parse(spawn.mock.calls[0][1][1])
     const bodyNf = JSON.parse(flushPayloadNf.body)
-    expect(bodyNf[0].metrics.map((m) => m.attributes.eventType)).toEqual(['command-not-found', 'postrun'])
+    expect(bodyNf[0].metrics.map((m) => m.attributes.eventName)).toEqual(['command-not-found'])
+    expect(bodyNf[0].metrics[0].attributes.commandSuccess).toBe(false)
   })
 
   test('init shows one-time notice on first run', async () => {
@@ -83,7 +83,7 @@ describe('hook interfaces', () => {
     expect(spawn).toHaveBeenCalledTimes(1)
     const flushPayloadAcc = JSON.parse(spawn.mock.calls[0][1][1])
     const bodyAcc = JSON.parse(flushPayloadAcc.body)
-    expect(bodyAcc[0].metrics.map((m) => m.attributes.eventType)).toEqual(['telemetry-notice', 'postrun'])
+    expect(bodyAcc[0].metrics.map((m) => m.attributes.eventName)).toEqual(['telemetry-notice', 'postrun'])
     expect(bodyAcc[0].metrics[0].attributes.eventData).toBe('shown')
     process.env = preEnv
   })
@@ -158,7 +158,7 @@ describe('hook interfaces', () => {
     expect(spawn).toHaveBeenCalledTimes(1)
     const flushPayloadCe = JSON.parse(spawn.mock.calls[0][1][1])
     const bodyCe = JSON.parse(flushPayloadCe.body)
-    expect(bodyCe[0].metrics.map((m) => m.attributes.eventType)).toEqual(['telemetry-custom-event', 'postrun'])
+    expect(bodyCe[0].metrics.map((m) => m.attributes.eventName)).toEqual(['telemetry-custom-event', 'postrun'])
   })
 
   test('postrun', async () => {
@@ -170,7 +170,7 @@ describe('hook interfaces', () => {
     expect(spawn).toHaveBeenCalledTimes(1)
     const flushPayload = JSON.parse(spawn.mock.calls[0][1][1])
     expect(flushPayload.headers).toBeUndefined()
-    expect(flushPayload.body).toContain('"eventType":"postrun"')
+    expect(flushPayload.body).toContain('"eventName":"postrun"')
   })
 
   /**

diff --git a/test/telemetry-lib.test.js b/test/telemetry-lib.test.js
@@ -78,8 +78,77 @@ describe('telemetry-lib', () => {
     const flushPayload = JSON.parse(spawn.mock.calls[0][1][1])
     const body = JSON.parse(flushPayload.body)
     expect(body[0].metrics).toHaveLength(2)
-    expect(body[0].metrics[0].attributes.eventType).toBe('telemetry-custom-event')
-    expect(body[0].metrics[1].attributes.eventType).toBe('postrun')
+    expect(body[0].metrics[0].attributes.eventName).toBe('telemetry-custom-event')
+    expect(body[0].metrics[1].attributes.eventName).toBe('postrun')
+  })
+
+  test('terminal command-error flushes buffered events without a postrun', async () => {
+    config.get.mockReturnValue('clientidxyz')
+    telemetryLib.init('a@4', 'binErr', {})
+    await telemetryLib.trackEvent('telemetry-custom-event')
+    expect(spawn).not.toHaveBeenCalled()
+    await telemetryLib.trackEvent('command-error', { message: 'boom' })
+    expect(spawn).toHaveBeenCalledTimes(1)
+    const flushPayload = JSON.parse(spawn.mock.calls[0][1][1])
+    const body = JSON.parse(flushPayload.body)
+    expect(body[0].metrics.map((m) => m.attributes.eventName)).toEqual(['telemetry-custom-event', 'command-error'])
+  })
+
+  test('a typo (command-not-found then host command-error) is recorded as a single command-not-found', async () => {
+    config.get.mockReturnValue('clientidxyz')
+    telemetryLib.init('a@4', 'binOnce', {})
+    // a typo fires command-not-found (from oclif) and then command-error (rethrown by the host)
+    await telemetryLib.trackEvent('command-not-found', 'bogus')
+    await telemetryLib.trackEvent('command-error', { message: 'Run aio help' })
+    expect(spawn).toHaveBeenCalledTimes(1)
+    const flushPayload = JSON.parse(spawn.mock.calls[0][1][1])
+    const body = JSON.parse(flushPayload.body)
+    expect(body[0].metrics.map((m) => m.attributes.eventName)).toEqual(['command-not-found'])
+  })
+
+  test('only the first command-error after a not-found is suppressed; a later one still flushes', async () => {
+    config.get.mockReturnValue('clientidxyz')
+    telemetryLib.init('a@4', 'binReset', {})
+    await telemetryLib.trackEvent('command-not-found', 'bogus') // flush #1
+    await telemetryLib.trackEvent('command-error', { message: 'Run aio help' }) // dropped: the typo rethrow
+    await telemetryLib.trackEvent('command-error', { message: 'unrelated later error' }) // flush #2
+    expect(spawn).toHaveBeenCalledTimes(2)
+    expect(JSON.parse(JSON.parse(spawn.mock.calls[0][1][1]).body)[0].metrics.map((m) => m.attributes.eventName)).toEqual(['command-not-found'])
+    expect(JSON.parse(JSON.parse(spawn.mock.calls[1][1][1]).body)[0].metrics.map((m) => m.attributes.eventName)).toEqual(['command-error'])
+  })
+
+  test('"Did you mean ...? Yes": not-found then the suggested command runs and its postrun flushes', async () => {
+    config.get.mockReturnValue('clientidxyz')
+    telemetryLib.init('a@4', 'binYes', {})
+    await telemetryLib.trackEvent('command-not-found', 'telemetryd') // flush #1
+    telemetryLib.trackPrerun('telemetry', [], Date.now()) // suggestion accepted -> command runs
+    await telemetryLib.trackEvent('postrun') // flush #2
+    expect(spawn).toHaveBeenCalledTimes(2)
+    expect(JSON.parse(JSON.parse(spawn.mock.calls[1][1][1]).body)[0].metrics.map((m) => m.attributes.eventName)).toEqual(['postrun'])
+  })
+
+  test('"Did you mean ...? Yes" then the suggested command errors: that command-error is NOT masked', async () => {
+    config.get.mockReturnValue('clientidxyz')
+    telemetryLib.init('a@4', 'binYesErr', {})
+    await telemetryLib.trackEvent('command-not-found', 'telemetryd') // flush #1
+    telemetryLib.trackPrerun('telemetry', [], Date.now()) // suggestion accepted -> command runs (clears the flag)
+    await telemetryLib.trackEvent('command-error', { message: 'the real command failed' }) // flush #2
+    expect(spawn).toHaveBeenCalledTimes(2)
+    expect(JSON.parse(JSON.parse(spawn.mock.calls[1][1][1]).body)[0].metrics.map((m) => m.attributes.eventName)).toEqual(['command-error'])
+  })
+
+  test('nested commands in one process each flush (no masking): two postruns => two flushes', async () => {
+    // A command that calls config.runCommand re-fires prerun/postrun for the child; init runs once.
+    // The child postrun must NOT suppress the parent terminal event.
+    config.get.mockReturnValue('clientidxyz')
+    telemetryLib.init('a@4', 'binNested', {})
+    telemetryLib.trackPrerun('app:clean', [], Date.now())
+    await telemetryLib.trackEvent('postrun') // child (e.g. app:clean) succeeds
+    telemetryLib.trackPrerun('app:undeploy', [], Date.now())
+    await telemetryLib.trackEvent('command-error', { message: 'parent failed after child succeeded' })
+    expect(spawn).toHaveBeenCalledTimes(2)
+    expect(JSON.parse(JSON.parse(spawn.mock.calls[0][1][1]).body)[0].metrics.map((m) => m.attributes.eventName)).toEqual(['postrun'])
+    expect(JSON.parse(JSON.parse(spawn.mock.calls[1][1][1]).body)[0].metrics.map((m) => m.attributes.eventName)).toEqual(['command-error'])
   })
 
   test('postrun adds durationMs to eventData when the hook omits payload and prerunTimer is set', async () => {