From 86d1e872df39067d5da645477d417bb17e04e2fd Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Tue, 9 Jun 2026 13:06:31 -0400 Subject: [PATCH 01/35] WIP: tagId-routed TagMap experiment (KnownTags + tests/benchmarks) Experimental branch for the known-tag tagId routing idea: KnownTags maps a 64-bit tagId ([63-48 globalSerial][47-32 fieldPos][31-0 nameHash]) to tag names via a registered Resolver, so TagMap can route known tags by slot instead of hashing strings. INCOMPLETE / DOES NOT COMPILE YET. These files depend on TagMap.java changes that were lost from the working tree (uncommitted, never committed on any branch, not stashed, not in the TracerProto prototype): - TagMap.Entry.tagId field (read as entry.tagId / status.tagId in TagMapTagIdTest) - TagMap.set(long tagId, String) / ledger().set(long, ...) overload (used by the test and TagMapInsertionBenchmark via readMap.set(IDS[i], VALUES[i])) KnownTags.java itself is self-contained. To make this branch build, the TagMap.Entry.tagId field and the set-by-id ledger path must be reconstructed (see KnownTags' bit-layout doc and TagMapTagIdTest for the expected API). Based on master to keep the experiment independent of the CSS v1.3.0 stack. Co-Authored-By: Claude Sonnet 4.6 --- .../api/TagMapInsertionBaselineBenchmark.java | 69 +++++++ .../trace/api/TagMapInsertionBenchmark.java | 152 ++++++++++++++++ .../java/datadog/trace/api/KnownTags.java | 66 +++++++ .../datadog/trace/api/TagMapTagIdTest.java | 168 ++++++++++++++++++ 4 files changed, 455 insertions(+) create mode 100644 internal-api/src/jmh/java/datadog/trace/api/TagMapInsertionBaselineBenchmark.java create mode 100644 internal-api/src/jmh/java/datadog/trace/api/TagMapInsertionBenchmark.java create mode 100644 internal-api/src/main/java/datadog/trace/api/KnownTags.java create mode 100644 internal-api/src/test/java/datadog/trace/api/TagMapTagIdTest.java diff --git a/internal-api/src/jmh/java/datadog/trace/api/TagMapInsertionBaselineBenchmark.java b/internal-api/src/jmh/java/datadog/trace/api/TagMapInsertionBaselineBenchmark.java new file mode 100644 index 00000000000..26e49376e16 --- /dev/null +++ b/internal-api/src/jmh/java/datadog/trace/api/TagMapInsertionBaselineBenchmark.java @@ -0,0 +1,69 @@ +package datadog.trace.api; + +import java.util.concurrent.TimeUnit; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Threads; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.infra.Blackhole; + +/** + * Master-equivalent control for {@link TagMapInsertionBenchmark}: string insertion / lookup with NO + * {@link KnownTags.Resolver} registered, so every tag uses the hash buckets (no slot routing, no + * keyOf). This mirrors how master behaves and isolates the comparison "automatic insertion by id + * (this branch) vs the pre-feature string baseline". + * + *

Runs in its own benchmark class so each method's fork has no resolver registered (the resolver + * is global static; {@code TagMapInsertionBenchmark} registers one in its own forks). + */ +@BenchmarkMode(Mode.Throughput) +@OutputTimeUnit(TimeUnit.SECONDS) +@Fork(1) +@Warmup(iterations = 3) +@Measurement(iterations = 5) +@Threads(1) +@State(Scope.Benchmark) +public class TagMapInsertionBaselineBenchmark { + // same tag set as TagMapInsertionBenchmark for an apples-to-apples comparison + static final String[] NAMES = TagMapInsertionBenchmark.NAMES; + + static final Object[] VALUES = new Object[NAMES.length]; + + TagMap readMap; + + @Setup(Level.Trial) + public void setup() { + KnownTags.register(null); // no resolver: pure string / bucket path, like master + for (int i = 0; i < NAMES.length; ++i) { + VALUES[i] = "value-" + i; + } + this.readMap = TagMap.create(); + for (int i = 0; i < NAMES.length; ++i) { + this.readMap.set(NAMES[i], VALUES[i]); + } + } + + @Benchmark + public TagMap insertByString_noResolver() { + TagMap map = TagMap.create(); + for (int i = 0; i < NAMES.length; ++i) { + map.set(NAMES[i], VALUES[i]); + } + return map; + } + + @Benchmark + public void getByString_noResolver(Blackhole bh) { + for (int i = 0; i < NAMES.length; ++i) { + bh.consume(this.readMap.getEntry(NAMES[i])); + } + } +} diff --git a/internal-api/src/jmh/java/datadog/trace/api/TagMapInsertionBenchmark.java b/internal-api/src/jmh/java/datadog/trace/api/TagMapInsertionBenchmark.java new file mode 100644 index 00000000000..c39fb9dc379 --- /dev/null +++ b/internal-api/src/jmh/java/datadog/trace/api/TagMapInsertionBenchmark.java @@ -0,0 +1,152 @@ +package datadog.trace.api; + +import datadog.trace.api.TagMap.Entry; +import java.util.concurrent.TimeUnit; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.TearDown; +import org.openjdk.jmh.annotations.Threads; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.infra.Blackhole; + +/** + * Compares tag insertion / lookup by generated tag id vs by string name, with a {@link + * KnownTags.Resolver} registered (the production configuration once code generation is live). + * + *

Placed in {@code datadog.trace.api} so it can build tag ids with the same {@code nameHash} the + * runtime uses ({@link TagMap.Entry#_hash}); a mismatch would only matter on the bucket-fallback + * path, but keeping it exact makes the comparison faithful. + * + *

The tags use distinct {@code fieldPos} values (no collisions), so every known tag lands in its + * positional slot. byId skips string hashing and the keyOf round-trip entirely; byString pays + * keyOf(name) to resolve the id before slotting. + */ +@BenchmarkMode(Mode.Throughput) +@OutputTimeUnit(TimeUnit.SECONDS) +@Fork(1) +@Warmup(iterations = 3) +@Measurement(iterations = 5) +@Threads(1) +@State(Scope.Benchmark) +public class TagMapInsertionBenchmark { + // a representative HTTP-server-ish tag set + static final String[] NAMES = { + "http.request.method", + "http.response.status_code", + "http.route", + "url.path", + "url.scheme", + "server.address", + "server.port", + "client.address", + "network.protocol.version", + "user_agent.original", + "span.kind", + "component", + "language", + "error", + "resource.name", + "service.name", + "operation.name", + "env", + }; + + static final long[] IDS = new long[NAMES.length]; + static final Object[] VALUES = new Object[NAMES.length]; + + // a pre-populated (slotted) map for the read benchmarks; built in setup once IDS exist + TagMap readMap; + + static int nameHash(String tag) { + int hash = tag.hashCode(); + return hash == 0 ? 0xDD06 : hash ^ (hash >>> 16); + } + + @Setup(Level.Trial) + public void setup() { + for (int i = 0; i < NAMES.length; ++i) { + // globalSerial = i + 1 (unique, non-zero); fieldPos = i (distinct - no collisions) + IDS[i] = ((long) (i + 1) << 48) | ((long) i << 32) | (nameHash(NAMES[i]) & 0xFFFFFFFFL); + VALUES[i] = "value-" + i; + } + // Representative resolver: nameOf is a dense array index by globalSerial; keyOf is a hash-table + // lookup (a stand-in for a generated minimal-perfect-hash / open-addressed name->id table). + // A linear scan here would make insertByString look artificially bad and misrepresent the cost. + final java.util.HashMap nameToId = new java.util.HashMap<>(NAMES.length * 2); + for (int i = 0; i < NAMES.length; ++i) { + nameToId.put(NAMES[i], IDS[i]); + } + KnownTags.register( + new KnownTags.Resolver() { + @Override + public String nameOf(long tagId) { + int globalSerial = (int) (tagId >>> 48); + return (globalSerial >= 1 && globalSerial <= NAMES.length) + ? NAMES[globalSerial - 1] + : null; + } + + @Override + public long keyOf(String name) { + Long id = nameToId.get(name); + return id == null ? 0L : id; + } + }); + // sanity: assert _hash matches our nameHash so string lookups hit the same bucket if they ever + // fall through (they shouldn't here, but keep the comparison honest) + if (Entry._hash(NAMES[0]) != nameHash(NAMES[0])) { + throw new IllegalStateException("nameHash mismatch with TagMap.Entry._hash"); + } + + // pre-populate the read map by id (entries land in their slots) + this.readMap = TagMap.create(); + for (int i = 0; i < IDS.length; ++i) { + this.readMap.set(IDS[i], VALUES[i]); + } + } + + @TearDown(Level.Trial) + public void tearDown() { + KnownTags.register(null); + } + + @Benchmark + public TagMap insertById() { + TagMap map = TagMap.create(); + for (int i = 0; i < IDS.length; ++i) { + map.set(IDS[i], VALUES[i]); + } + return map; + } + + @Benchmark + public TagMap insertByString() { + TagMap map = TagMap.create(); + for (int i = 0; i < NAMES.length; ++i) { + map.set(NAMES[i], VALUES[i]); + } + return map; + } + + @Benchmark + public void getById(Blackhole bh) { + for (int i = 0; i < IDS.length; ++i) { + bh.consume(this.readMap.getEntry(IDS[i])); + } + } + + @Benchmark + public void getByString(Blackhole bh) { + for (int i = 0; i < NAMES.length; ++i) { + bh.consume(this.readMap.getEntry(NAMES[i])); + } + } +} diff --git a/internal-api/src/main/java/datadog/trace/api/KnownTags.java b/internal-api/src/main/java/datadog/trace/api/KnownTags.java new file mode 100644 index 00000000000..170631c5b7e --- /dev/null +++ b/internal-api/src/main/java/datadog/trace/api/KnownTags.java @@ -0,0 +1,66 @@ +package datadog.trace.api; + +/** + * Registry for generated tag ID ↔ name resolution. The code generator populates this at tracer init + * via {@link #register(Resolver)}. Once registered, HotSpot CHA devirtualizes and inlines the + * resolver's switch, making {@link #nameOf}/{@link #keyOf} effectively zero-overhead. + */ +public final class KnownTags { + // Plain (non-volatile) fast-path flag: false until a resolver is ever registered. A plain read is + // free and hoistable, unlike a volatile read of `resolver` (costly on weak memory models such as + // ARM). A stale `false` is benign — callers treat the tag as unknown and use the hash buckets, + // which is correct, just unoptimized; the next read after publication takes the slot path. + private static boolean active; + + private static volatile Resolver resolver; + + /** Fast-path gate: true once a resolver has been registered. */ + public static boolean isActive() { + return active; + } + + /* + * tagId bit layout: [63-48 globalSerial] [47-32 fieldPos] [31-0 nameHash]. + * globalSerial is globally unique per known tag; fieldPos is the slot within a single span + * type's positional table (layout-relative — only meaningful within its own Prototype); nameHash + * is TagMap.Entry#_hash(name) and is layout-independent. Unknown (string-only) tags have the + * upper 32 bits zero. NOTE: TagMap.Entry decodes nameHash inline as (int) tagId on its hot path, + * so the low-32 encoding here must stay in sync with that. + */ + public static int globalSerial(long tagId) { + return (int) (tagId >>> 48); + } + + public static int fieldPos(long tagId) { + return (int) ((tagId >>> 32) & 0xFFFF); + } + + public static int nameHash(long tagId) { + return (int) tagId; + } + + public interface Resolver { + String nameOf(long tagId); + + long keyOf(String name); + } + + public static void register(Resolver resolver) { + KnownTags.resolver = resolver; // volatile write publishes the resolver + KnownTags.active = (resolver != null); // plain write; readers re-read resolver volatile anyway + } + + public static String nameOf(long tagId) { + if (!active) return null; + Resolver r = resolver; + return r != null ? r.nameOf(tagId) : null; + } + + public static long keyOf(String name) { + if (!active) return 0L; + Resolver r = resolver; + return r != null ? r.keyOf(name) : 0L; + } + + private KnownTags() {} +} diff --git a/internal-api/src/test/java/datadog/trace/api/TagMapTagIdTest.java b/internal-api/src/test/java/datadog/trace/api/TagMapTagIdTest.java new file mode 100644 index 00000000000..f9ad0867779 --- /dev/null +++ b/internal-api/src/test/java/datadog/trace/api/TagMapTagIdTest.java @@ -0,0 +1,168 @@ +package datadog.trace.api; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertSame; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import datadog.trace.api.TagMap.Entry; +import java.util.HashMap; +import java.util.Map; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +/** + * Exercises the tag-id keyed {@code set}/{@code getEntry} surface on {@link TagMap} and {@link + * TagMap.Ledger}. + * + *

In this (buckets-only) phase a tag-id keyed write builds a tag-id-bearing {@link Entry} and + * stores it in the regular hash buckets. The entry carries its full identity (globalSerial / + * fieldPos / nameHash) and resolves its name lazily via {@link KnownTags}, so it remains findable + * by string name once a {@link KnownTags.Resolver} is registered. + */ +public class TagMapTagIdTest { + // Test tags: name -> (globalSerial, fieldPos). nameHash is derived from Entry._hash(name) so the + // tag-id-bearing entry lands in the same hash bucket a string-keyed entry would. + static final String HTTP_METHOD = "http.request.method"; + static final String HTTP_STATUS = "http.response.status_code"; + static final String DB_SYSTEM = "db.system"; + + static final long HTTP_METHOD_ID = tagId(1, 2, HTTP_METHOD); + static final long HTTP_STATUS_ID = tagId(2, 5, HTTP_STATUS); + static final long DB_SYSTEM_ID = tagId(3, 0, DB_SYSTEM); + + static long tagId(int globalSerial, int fieldPos, String name) { + long nameHash = Entry._hash(name) & 0xFFFFFFFFL; + return ((long) globalSerial << 48) | ((long) fieldPos << 32) | nameHash; + } + + @BeforeEach + public void registerResolver() { + Map nameById = new HashMap<>(); + Map idByName = new HashMap<>(); + for (long id : new long[] {HTTP_METHOD_ID, HTTP_STATUS_ID, DB_SYSTEM_ID}) { + // resolve name from the tag's own definition above + String name = + id == HTTP_METHOD_ID ? HTTP_METHOD : id == HTTP_STATUS_ID ? HTTP_STATUS : DB_SYSTEM; + nameById.put(id, name); + idByName.put(name, id); + } + KnownTags.register( + new KnownTags.Resolver() { + @Override + public String nameOf(long tagId) { + return nameById.get(tagId); + } + + @Override + public long keyOf(String name) { + Long id = idByName.get(name); + return id == null ? 0L : id; + } + }); + } + + @AfterEach + public void clearResolver() { + KnownTags.register(null); + } + + @Test + public void setById_findableByIdAndName() { + TagMap map = TagMap.create(); + map.set(HTTP_METHOD_ID, "GET"); + + // findable by tag id + Entry byId = map.getEntry(HTTP_METHOD_ID); + assertNotNull(byId); + assertEquals("GET", byId.stringValue()); + + // findable by the resolved string name (read-path unification) + Entry byName = map.getEntry(HTTP_METHOD); + assertSame(byId, byName); + assertEquals("GET", map.get(HTTP_METHOD)); + } + + @Test + public void setById_preservesIdentityOnEntry() { + TagMap map = TagMap.create(); + map.set(HTTP_STATUS_ID, 404); + + Entry entry = map.getEntry(HTTP_STATUS_ID); + assertNotNull(entry); + // globalSerial survives on the stored entry + assertEquals(2, KnownTags.globalSerial(entry.tagId)); + assertEquals(5, KnownTags.fieldPos(entry.tagId)); + // name resolves lazily from the id + assertEquals(HTTP_STATUS, entry.tag()); + } + + @Test + public void setById_typedValues() { + TagMap map = TagMap.create(); + map.set(HTTP_STATUS_ID, 500); + + Entry entry = map.getEntry(HTTP_STATUS_ID); + assertTrue(entry.is(Entry.INT)); + assertEquals(500, entry.intValue()); + assertEquals(500, map.getInt(HTTP_STATUS)); + } + + @Test + public void setById_overwriteSameTag() { + TagMap map = TagMap.create(); + map.set(HTTP_METHOD_ID, "GET"); + map.set(HTTP_METHOD_ID, "POST"); + + assertEquals("POST", map.get(HTTP_METHOD)); + assertEquals(1, map.size()); + } + + @Test + public void setByName_findableById() { + TagMap map = TagMap.create(); + // string write of a known tag is still findable through the id read path (resolves to the + // same name and hash bucket) + map.set(DB_SYSTEM, "postgresql"); + + Entry byId = map.getEntry(DB_SYSTEM_ID); + assertNotNull(byId); + assertEquals("postgresql", byId.stringValue()); + } + + @Test + public void getEntryById_missingReturnsNull() { + TagMap map = TagMap.create(); + assertNull(map.getEntry(HTTP_METHOD_ID)); + } + + @Test + public void ledger_setById_buildsMap() { + TagMap map = + TagMap.ledger() + .set(HTTP_METHOD_ID, "GET") + .set(HTTP_STATUS_ID, 204) + .set(DB_SYSTEM_ID, "mysql") + .build(); + + assertEquals("GET", map.get(HTTP_METHOD)); + assertEquals(204, map.getInt(HTTP_STATUS)); + assertEquals("mysql", map.get(DB_SYSTEM)); + + // tag id survives the ledger -> build path + Entry status = map.getEntry(HTTP_STATUS_ID); + assertEquals(2, KnownTags.globalSerial(status.tagId)); + assertEquals(204, status.intValue()); + } + + @Test + public void ledger_mixedIdAndName() { + TagMap map = TagMap.ledger().set(HTTP_METHOD_ID, "PUT").set(DB_SYSTEM, "redis").build(); + + assertEquals("PUT", map.get(HTTP_METHOD)); + assertEquals("redis", map.get(DB_SYSTEM)); + assertEquals(2, map.size()); + } +} From f6f4f9ea9046424c4b7954c17c9d1df08b27666e Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Tue, 9 Jun 2026 13:47:31 -0400 Subject: [PATCH 02/35] Recover TagMap tagId/slot-routing integration from stash 602e6c47 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Restores the load-bearing TagMap.java changes that were lost from the working tree (session 94f3bac2): set(long tagId,...) overload family, getEntry(long), tagId-encoded Entry factories, knownEntries slot routing in getAndSet with occupant first-writer-wins + collidedSlots bitmask, and setKnown/setInBuckets extraction. Also recovers the matching TagMapFuzzTest.java setById/putAllLedgerById coverage and the internal-api jmh property config (jmhInclude/jmhWarmup/jmhIterations/jmhFork) for the insertion benchmarks. Extracted from stash@{0} (602e6c47, "WIP on css-ring-buffer-v2") which bundled this work alongside unrelated CSS ring-buffer changes; only the TagMap-related files were taken. Combined with KnownTags.java + TagMapTagIdTest.java + the two insertion benchmarks already on this branch, the integration compiles (main+test+jmh). KNOWN WIP GAP: TagMapTest.fromMapImmutable_empty NPEs — a slot-aware op dereferences the lazily-allocated knownEntries array on an empty/immutable map without a null guard. TagMapTagIdTest and TagMapFuzzTest pass. Co-Authored-By: Claude Sonnet 4.6 --- internal-api/build.gradle.kts | 12 + .../main/java/datadog/trace/api/TagMap.java | 505 ++++++++++++++++-- .../datadog/trace/api/TagMapFuzzTest.java | 136 ++++- 3 files changed, 619 insertions(+), 34 deletions(-) diff --git a/internal-api/build.gradle.kts b/internal-api/build.gradle.kts index fc95dd9e1f1..2e76d189eb8 100644 --- a/internal-api/build.gradle.kts +++ b/internal-api/build.gradle.kts @@ -282,4 +282,16 @@ dependencies { jmh { jmhVersion = libs.versions.jmh.get() duplicateClassesStrategy = DuplicatesStrategy.EXCLUDE + if (project.hasProperty("jmhInclude")) { + includes.set(listOf(project.property("jmhInclude") as String)) + } + if (project.hasProperty("jmhWarmup")) { + warmupIterations.set((project.property("jmhWarmup") as String).toInt()) + } + if (project.hasProperty("jmhIterations")) { + iterations.set((project.property("jmhIterations") as String).toInt()) + } + if (project.hasProperty("jmhFork")) { + fork.set((project.property("jmhFork") as String).toInt()) + } } diff --git a/internal-api/src/main/java/datadog/trace/api/TagMap.java b/internal-api/src/main/java/datadog/trace/api/TagMap.java index 95f676245ef..7ec78bd7e64 100644 --- a/internal-api/src/main/java/datadog/trace/api/TagMap.java +++ b/internal-api/src/main/java/datadog/trace/api/TagMap.java @@ -71,6 +71,10 @@ static TagMap create(int size) { return TagMapFactory.INSTANCE.create(size); } + static TagMap create(TagMap.Prototype proto) { + return new OptimizedTagMap(proto); + } + /** Creates a new TagMap.Ledger */ static Ledger ledger() { return new Ledger(); @@ -172,6 +176,46 @@ static Ledger ledger(int size) { void set(EntryReader newEntry); + /* + * Tag-id keyed variants. The tagId encodes the tag's identity (see KnownTags); generated + * instrumentation uses these to avoid hashing tag-name strings. The default implementations + * resolve the name via KnownTags.nameOf and delegate to the string-keyed methods; OptimizedTagMap + * overrides them to build tag-id-bearing entries directly. Requires a registered + * KnownTags.Resolver to resolve the tag name. + */ + default void set(long tagId, Object value) { + this.set(KnownTags.nameOf(tagId), value); + } + + default void set(long tagId, CharSequence value) { + this.set(KnownTags.nameOf(tagId), value); + } + + default void set(long tagId, boolean value) { + this.set(KnownTags.nameOf(tagId), value); + } + + default void set(long tagId, int value) { + this.set(KnownTags.nameOf(tagId), value); + } + + default void set(long tagId, long value) { + this.set(KnownTags.nameOf(tagId), value); + } + + default void set(long tagId, float value) { + this.set(KnownTags.nameOf(tagId), value); + } + + default void set(long tagId, double value) { + this.set(KnownTags.nameOf(tagId), value); + } + + default Entry getEntry(long tagId) { + String name = KnownTags.nameOf(tagId); + return name == null ? null : this.getEntry(name); + } + /** sets the value while returning the prior Entry */ Entry getAndSet(String tag, Object value); @@ -284,18 +328,40 @@ public static final EntryRemoval newRemoval(String tag) { return new EntryRemoval(tag); } - final String tag; + // tagId encoding: bits 63-48 = globalSerial (0 for unknown tags), bits 47-32 = fieldPos, + // bits 31-0 = nameHash (_hash(tagName)). String-constructed entries have upper 32 bits zero + // with the hash lazily populated on first hash(). tagId-constructed entries have all bits set + // at construction; their tag name is resolved lazily via tag(). + long tagId; + + // Non-volatile: for tagId-constructed entries the name is resolved lazily and cached here. + // A benign race may cause multiple threads to re-resolve, but KnownTags.nameOf returns the + // same interned constant each time, so the extra lookup is harmless. + String tag; EntryChange(String tag) { this.tag = tag; + this.tagId = 0; // nameHash populated lazily in hash() } - public final String tag() { + EntryChange(long tagId) { + this.tagId = tagId; + this.tag = null; // resolved lazily via tag() + } + + public String tag() { return this.tag; } public final boolean matches(String tag) { - return (this.tag == tag) || this.tag.equals(tag); + // Read the field directly for the common (string-constructed) case so this stays inlinable. + // Only tagId-constructed entries with an unresolved name fall back to the virtual tag(). + String myTag = this.tag; + if (myTag == null) { + myTag = this.tag(); + if (myTag == null) return false; + } + return (myTag == tag) || myTag.equals(tag); } public abstract boolean isRemoval(); @@ -465,11 +531,33 @@ static Entry newDoubleEntry(String tag, Double box) { return new Entry(tag, DOUBLE, double2Prim(box.doubleValue()), box); } - /* - * hash is stored in line for fast handling of Entry-s coming from another TagMap - * However, hash is lazily computed using the same trick as {@link java.lang.String}. - */ - int lazyTagHash; + static Entry newAnyEntry(long tagId, Object value) { + return new Entry(tagId, ANY, 0L, value); + } + + static Entry newObjectEntry(long tagId, Object value) { + return new Entry(tagId, OBJECT, 0L, value); + } + + static Entry newBooleanEntry(long tagId, boolean value) { + return new Entry(tagId, BOOLEAN, boolean2Prim(value), Boolean.valueOf(value)); + } + + static Entry newIntEntry(long tagId, int value) { + return new Entry(tagId, INT, int2Prim(value), null); + } + + static Entry newLongEntry(long tagId, long value) { + return new Entry(tagId, LONG, long2Prim(value), null); + } + + static Entry newFloatEntry(long tagId, float value) { + return new Entry(tagId, FLOAT, float2Prim(value), null); + } + + static Entry newDoubleEntry(long tagId, double value) { + return new Entry(tagId, DOUBLE, double2Prim(value), null); + } // To optimize construction of Entry around boxed primitives and Object entries, // no type checks are done during construction. @@ -493,24 +581,35 @@ static Entry newDoubleEntry(String tag, Double box) { private Entry(String tag, byte type, long prim, Object obj) { super(tag); - this.lazyTagHash = 0; // lazily computed + this.rawType = type; + this.rawPrim = prim; + this.rawObj = obj; + } + private Entry(long tagId, byte type, long prim, Object obj) { + super(tagId); this.rawType = type; this.rawPrim = prim; this.rawObj = obj; } int hash() { - // If value of hash read in this thread is zero, then hash is computed. - // hash is not held as a volatile, since this computation can safely be repeated as any time - int hash = this.lazyTagHash; + int hash = (int) this.tagId; if (hash != 0) return hash; - hash = _hash(this.tag); - this.lazyTagHash = hash; + this.tagId = hash & 0xFFFFFFFFL; return hash; } + @Override + public String tag() { + String name = this.tag; + if (name != null) return name; + name = KnownTags.nameOf(this.tagId); + if (name != null) this.tag = name; + return name; + } + @Override public Entry entry() { return this; @@ -965,6 +1064,19 @@ static int _hash(String tag) { } } + /** + * Per-span-type factory for an {@link OptimizedTagMap} backed by a positional {@link Entry} + * array. Known tags for the span type are stored directly at their {@code fieldPos} slot (O(1), + * no hashing); unexpected tags fall back to the hash buckets. + * + *

For now this just vends a blank positional {@code Entry[]} sized for the span type's layout. + * Later it will stamp out a prepopulated template (constant-valued entries copied per span) and + * may cache shared Entry instances for common values. + */ + abstract class Prototype { + public abstract Entry[] createKnownEntries(); + } + /* * An in-order ledger of changes to be made to a TagMap. * Ledger can also serves as a builder for TagMap-s via build & buildImmutable. @@ -1028,6 +1140,36 @@ public Ledger set(String tag, double value) { return this.recordEntry(Entry.newDoubleEntry(tag, value)); } + // Tag-id keyed variants — record a tag-id-bearing Entry. build()/fill() shares the Entry + // object, so the tagId survives into the built map. + public Ledger set(long tagId, Object value) { + return this.recordEntry(Entry.newAnyEntry(tagId, value)); + } + + public Ledger set(long tagId, CharSequence value) { + return this.recordEntry(Entry.newObjectEntry(tagId, value)); + } + + public Ledger set(long tagId, boolean value) { + return this.recordEntry(Entry.newBooleanEntry(tagId, value)); + } + + public Ledger set(long tagId, int value) { + return this.recordEntry(Entry.newIntEntry(tagId, value)); + } + + public Ledger set(long tagId, long value) { + return this.recordEntry(Entry.newLongEntry(tagId, value)); + } + + public Ledger set(long tagId, float value) { + return this.recordEntry(Entry.newFloatEntry(tagId, value)); + } + + public Ledger set(long tagId, double value) { + return this.recordEntry(Entry.newDoubleEntry(tagId, value)); + } + public Ledger set(Entry entry) { return this.recordEntry(entry); } @@ -1256,15 +1398,39 @@ final class OptimizedTagMap implements TagMap { // e.g. size 0 will not work, it results in ArrayIndexOutOfBoundsException, but size 1 does static final OptimizedTagMap EMPTY = new OptimizedTagMap(new Object[1], 0); + // Default capacity for the lazily-allocated knownEntries array (one slot per fieldPos). Known + // tags' fieldPos values are small (a span type carries well under this many tags); a tagId whose + // fieldPos is >= the array length simply falls back to the hash buckets. + static final int KNOWN_ENTRIES_CAPACITY = 32; + private final Object[] buckets; private int size; private boolean frozen; + // Positional store for known tags, indexed by fieldPos. Lazily allocated on the first known-tag + // write (or supplied up front by a Prototype). A known tag claims its slot first-writer-wins; + // colliding tags (a different globalSerial already owns the slot) fall back to the hash buckets. + // Entries are self-describing (carry their tagId), so a bucketed tag still serializes correctly. + private TagMap.Entry[] knownEntries; + + // Bitmask of fieldPos slots that have ever had a collision (a known tag diverted to the buckets + // because a different tag owned the slot). Used to detect when claiming a freed slot might + // orphan a stale bucket copy of the same tag. Bit N covers slot N (capacity <= 32). + private int collidedSlots; + public OptimizedTagMap() { // needs to be a power of 2 for bucket masking calculation to work as intended this.buckets = new Object[1 << 4]; this.size = 0; this.frozen = false; + this.knownEntries = null; + } + + public OptimizedTagMap(TagMap.Prototype proto) { + this.buckets = new Object[1 << 4]; + this.size = 0; + this.frozen = false; + this.knownEntries = proto.createKnownEntries(); } /** Used for inexpensive immutable */ @@ -1272,6 +1438,7 @@ private OptimizedTagMap(Object[] buckets, int size) { this.buckets = buckets; this.size = size; this.frozen = true; + this.knownEntries = null; } @Override @@ -1397,6 +1564,27 @@ public Set> entrySet() { @Override public Entry getEntry(String tag) { + // Known tags live in their slot; resolve identity and check there first. keyOf is a no-op + // until a resolver is registered, so this is just a hash-bucket lookup in the common case. + long tagId = KnownTags.keyOf(tag); + if (tagId != 0L) { + Entry slot = this.knownGet(tagId); + if (slot != null) return slot; + } + return this.getEntryFromBuckets(tag); + } + + @Override + public Entry getEntry(long tagId) { + Entry slot = this.knownGet(tagId); + if (slot != null) return slot; + + // not slotted (unknown tag id, or it collided into the buckets) - look up by resolved name + String name = KnownTags.nameOf(tagId); + return name == null ? null : this.getEntryFromBuckets(name); + } + + private Entry getEntryFromBuckets(String tag) { Object[] thisBuckets = this.buckets; int hash = TagMap.Entry._hash(tag); @@ -1464,15 +1652,151 @@ public void set(String tag, double value) { this.getAndSet(Entry.newDoubleEntry(tag, value)); } + // Tag-id keyed setters. Build a tag-id-bearing Entry (carrying globalSerial/fieldPos/nameHash) + // and store it in the hash buckets like any other entry; positional knownEntries routing comes + // in a later PR. The Entry resolves its tag name lazily via KnownTags, so it remains findable by + // string name and serializes correctly once a KnownTags.Resolver is registered. + @Override + public void set(long tagId, Object value) { + this.getAndSet(Entry.newAnyEntry(tagId, value)); + } + + @Override + public void set(long tagId, CharSequence value) { + this.getAndSet(Entry.newObjectEntry(tagId, value)); + } + + @Override + public void set(long tagId, boolean value) { + this.getAndSet(Entry.newBooleanEntry(tagId, value)); + } + + @Override + public void set(long tagId, int value) { + this.getAndSet(Entry.newIntEntry(tagId, value)); + } + + @Override + public void set(long tagId, long value) { + this.getAndSet(Entry.newLongEntry(tagId, value)); + } + + @Override + public void set(long tagId, float value) { + this.getAndSet(Entry.newFloatEntry(tagId, value)); + } + + @Override + public void set(long tagId, double value) { + this.getAndSet(Entry.newDoubleEntry(tagId, value)); + } + + // Returns the slot entry for tagId if a known tag owns its fieldPos slot, else null. + private Entry knownGet(long tagId) { + Entry[] known = this.knownEntries; + if (known == null) return null; + + int globalSerial = KnownTags.globalSerial(tagId); + if (globalSerial == 0) return null; + + int pos = KnownTags.fieldPos(tagId); + if (pos >= known.length) return null; + + Entry occupant = known[pos]; + return (occupant != null && KnownTags.globalSerial(occupant.tagId) == globalSerial) + ? occupant + : null; + } + + // Clears and returns the slot entry for tagId if a known tag owns its slot, else null. + private Entry knownRemove(long tagId) { + Entry[] known = this.knownEntries; + if (known == null) return null; + + int globalSerial = KnownTags.globalSerial(tagId); + if (globalSerial == 0) return null; + + int pos = KnownTags.fieldPos(tagId); + if (pos >= known.length) return null; + + Entry occupant = known[pos]; + if (occupant != null && KnownTags.globalSerial(occupant.tagId) == globalSerial) { + known[pos] = null; + this.size -= 1; + return occupant; + } + return null; + } + @Override public Entry getAndSet(Entry newEntry) { this.checkWriteAccess(); + // Resolve the entry's identity. A tag-id-constructed entry already carries its globalSerial; a + // string-constructed entry may be a known tag — resolve via KnownTags and upgrade its tagId so + // it routes to (and is recognized in) its slot. keyOf is a no-op until a resolver is + // registered. The slot handling lives in setKnown so this hot method stays small and inlinable. + long tagId = newEntry.tagId; + int globalSerial = KnownTags.globalSerial(tagId); + if (globalSerial == 0 && KnownTags.isActive()) { + long resolved = KnownTags.keyOf(newEntry.tag()); + if (resolved != 0L) { + newEntry.tagId = resolved; + globalSerial = KnownTags.globalSerial(resolved); + } + } + + if (globalSerial != 0) { + return this.setKnown(newEntry, globalSerial); + } + return this.setInBuckets(newEntry); + } + + // Routes a known tag to its positional slot (first-writer-wins, same-tag overwrite). On collision + // (a different tag owns the slot) or out-of-range fieldPos, falls back to the hash buckets. + private Entry setKnown(Entry newEntry, int globalSerial) { + int pos = KnownTags.fieldPos(newEntry.tagId); + if (pos < KNOWN_ENTRIES_CAPACITY) { + Entry[] known = this.knownEntries; + if (known == null) { + known = this.knownEntries = new Entry[KNOWN_ENTRIES_CAPACITY]; + } + if (pos < known.length) { + Entry occupant = known[pos]; + if (occupant == null) { + // claim the empty slot + Entry prev = null; + if ((this.collidedSlots & (1 << pos)) != 0) { + // this slot previously collided, so a stale copy of this tag may be orphaned in the + // buckets (the slot was freed by a remove). Evict it to avoid a slot+bucket duplicate. + prev = this.removeFromBuckets(newEntry.tag(), newEntry.hash()); + } + known[pos] = newEntry; + this.size += 1; // if prev != null, removeFromBuckets already decremented -> net no change + return prev; + } else if (KnownTags.globalSerial(occupant.tagId) == globalSerial) { + // same tag - overwrite in place, no size change + known[pos] = newEntry; + return occupant; + } + // a different known tag owns this slot - record the collision and fall to the buckets + this.collidedSlots |= (1 << pos); + } + } + return this.setInBuckets(newEntry); + } + + private Entry setInBuckets(Entry newEntry) { Object[] thisBuckets = this.buckets; int newHash = newEntry.hash(); int bucketIndex = newHash & (thisBuckets.length - 1); + // Use the resolved accessor, not the raw field: a tag-id-constructed entry has a null tag + // field until its name is lazily resolved. For string-constructed entries this is just a field + // read. + String newTag = newEntry.tag(); + Object bucket = thisBuckets[bucketIndex]; if (bucket == null) { thisBuckets[bucketIndex] = newEntry; @@ -1481,7 +1805,7 @@ public Entry getAndSet(Entry newEntry) { return null; } else if (bucket instanceof Entry) { Entry existingEntry = (Entry) bucket; - if (existingEntry.matches(newEntry.tag)) { + if (existingEntry.matches(newTag)) { thisBuckets[bucketIndex] = newEntry; // replaced existing entry - no size change @@ -1496,7 +1820,7 @@ public Entry getAndSet(Entry newEntry) { } else if (bucket instanceof BucketGroup) { BucketGroup lastGroup = (BucketGroup) bucket; - BucketGroup containingGroup = lastGroup.findContainingGroupInChain(newHash, newEntry.tag); + BucketGroup containingGroup = lastGroup.findContainingGroupInChain(newHash, newTag); if (containingGroup != null) { // replaced existing entry - no size change return containingGroup._replace(newHash, newEntry); @@ -1584,12 +1908,24 @@ public void putAll(TagMap that) { private void putAllOptimizedMap(OptimizedTagMap that) { if (this.size == 0) { + // empty dest: clone source buckets + slots wholesale (no duplication possible) this.putAllIntoEmptyMap(that); + } else if (this.knownEntries != null || that.knownEntries != null) { + // slots in play with a non-empty dest: the fast bucket-aligned merge could place a known tag + // into a bucket while dest already holds it in a slot (or vice versa). Route every source + // entry through getAndSet so slot/bucket placement stays consistent. getAndSet is + // order-independent for collisions. + this.putAllByEntry(that); } else { this.putAllMerge(that); } } + private void putAllByEntry(OptimizedTagMap that) { + // ctx-passing forEach avoids a capturing lambda allocation + that.forEach(this, (dest, reader) -> dest.getAndSet(reader.entry())); + } + private void putAllMerge(OptimizedTagMap that) { Object[] thisBuckets = this.buckets; Object[] thatBuckets = that.buckets; @@ -1728,10 +2064,25 @@ private void putAllIntoEmptyMap(OptimizedTagMap that) { thisBuckets[i] = thatBucket; } } + + // dest is empty, so the source's positional slots transfer directly (entries are shared, as + // with buckets above). size is copied wholesale below and already accounts for slot entries. + if (that.knownEntries != null) { + this.knownEntries = that.knownEntries.clone(); + } + this.collidedSlots = that.collidedSlots; + this.size = that.size; } public void fillMap(Map map) { + Entry[] known = this.knownEntries; + if (known != null) { + for (Entry slotEntry : known) { + if (slotEntry != null) map.put(slotEntry.tag(), slotEntry.objectValue()); + } + } + Object[] thisBuckets = this.buckets; for (int i = 0; i < thisBuckets.length; ++i) { @@ -1740,7 +2091,7 @@ public void fillMap(Map map) { if (thisBucket instanceof Entry) { Entry thisEntry = (Entry) thisBucket; - map.put(thisEntry.tag, thisEntry.objectValue()); + map.put(thisEntry.tag(), thisEntry.objectValue()); } else if (thisBucket instanceof BucketGroup) { BucketGroup thisGroup = (BucketGroup) thisBucket; @@ -1750,6 +2101,13 @@ public void fillMap(Map map) { } public void fillStringMap(Map stringMap) { + Entry[] known = this.knownEntries; + if (known != null) { + for (Entry slotEntry : known) { + if (slotEntry != null) stringMap.put(slotEntry.tag(), slotEntry.stringValue()); + } + } + Object[] thisBuckets = this.buckets; for (int i = 0; i < thisBuckets.length; ++i) { @@ -1758,7 +2116,7 @@ public void fillStringMap(Map stringMap) { if (thisBucket instanceof Entry) { Entry thisEntry = (Entry) thisBucket; - stringMap.put(thisEntry.tag, thisEntry.stringValue()); + stringMap.put(thisEntry.tag(), thisEntry.stringValue()); } else if (thisBucket instanceof BucketGroup) { BucketGroup thisGroup = (BucketGroup) thisBucket; @@ -1783,9 +2141,21 @@ public boolean remove(String tag) { public Entry getAndRemove(String tag) { this.checkWriteAccess(); + // known tags live in their slot - clear there first + long tagId = KnownTags.keyOf(tag); + if (tagId != 0L) { + Entry slotEntry = this.knownRemove(tagId); + if (slotEntry != null) return slotEntry; + } + + return this.removeFromBuckets(tag, TagMap.Entry._hash(tag)); + } + + // Removes tag from the hash buckets (only), decrementing size if found. Returns the removed entry + // or null. + private Entry removeFromBuckets(String tag, int hash) { Object[] thisBuckets = this.buckets; - int hash = TagMap.Entry._hash(tag); int bucketIndex = hash & (thisBuckets.length - 1); Object bucket = thisBuckets[bucketIndex]; @@ -1846,6 +2216,13 @@ public Stream stream() { @Override public void forEach(Consumer consumer) { + Entry[] known = this.knownEntries; + if (known != null) { + for (Entry slotEntry : known) { + if (slotEntry != null) consumer.accept(slotEntry); + } + } + Object[] thisBuckets = this.buckets; for (int i = 0; i < thisBuckets.length; ++i) { @@ -1865,6 +2242,13 @@ public void forEach(Consumer consumer) { @Override public void forEach(T thisObj, BiConsumer consumer) { + Entry[] known = this.knownEntries; + if (known != null) { + for (Entry slotEntry : known) { + if (slotEntry != null) consumer.accept(thisObj, slotEntry); + } + } + Object[] thisBuckets = this.buckets; for (int i = 0; i < thisBuckets.length; ++i) { @@ -1885,6 +2269,13 @@ public void forEach(T thisObj, BiConsumer con @Override public void forEach( T thisObj, U otherObj, TriConsumer consumer) { + Entry[] known = this.knownEntries; + if (known != null) { + for (Entry slotEntry : known) { + if (slotEntry != null) consumer.accept(thisObj, otherObj, slotEntry); + } + } + Object[] thisBuckets = this.buckets; for (int i = 0; i < thisBuckets.length; ++i) { @@ -1906,7 +2297,11 @@ public void clear() { this.checkWriteAccess(); Arrays.fill(this.buckets, null); + if (this.knownEntries != null) { + Arrays.fill(this.knownEntries, null); + } this.size = 0; + this.collidedSlots = 0; } public OptimizedTagMap freeze() { @@ -1928,6 +2323,21 @@ void checkIntegrity() { // That was done to avoid the extra static initialization needed for an assertion // While that's probably an unnecessary optimization, this method is only called in tests + Entry[] known = this.knownEntries; + if (known != null) { + for (int i = 0; i < known.length; ++i) { + Entry slotEntry = known[i]; + if (slotEntry == null) continue; + + if (KnownTags.globalSerial(slotEntry.tagId) == 0) { + throw new IllegalStateException("slotted entry without globalSerial"); + } + if (KnownTags.fieldPos(slotEntry.tagId) != i) { + throw new IllegalStateException("incorrect slot"); + } + } + } + Object[] thisBuckets = this.buckets; for (int i = 0; i < thisBuckets.length; ++i) { @@ -1970,9 +2380,16 @@ void checkIntegrity() { } int computeSize() { - Object[] thisBuckets = this.buckets; - int size = 0; + + Entry[] known = this.knownEntries; + if (known != null) { + for (Entry slotEntry : known) { + if (slotEntry != null) size += 1; + } + } + + Object[] thisBuckets = this.buckets; for (int i = 0; i < thisBuckets.length; ++i) { Object curBucket = thisBuckets[i]; @@ -1987,6 +2404,13 @@ int computeSize() { } boolean checkIfEmpty() { + Entry[] known = this.knownEntries; + if (known != null) { + for (Entry slotEntry : known) { + if (slotEntry != null) return false; + } + } + Object[] thisBuckets = this.buckets; for (int i = 0; i < thisBuckets.length; ++i) { @@ -2082,16 +2506,19 @@ String toInternalString() { } abstract static class IteratorBase { + private final Entry[] knownEntries; private final Object[] buckets; private Entry nextEntry; + private int knownIndex = -1; private int bucketIndex = -1; private BucketGroup group = null; private int groupIndex = 0; IteratorBase(OptimizedTagMap map) { + this.knownEntries = map.knownEntries; this.buckets = map.buckets; } @@ -2131,6 +2558,15 @@ final Entry nextEntryOrNull() { } private final Entry advance() { + // drain the positional known-entries slots first + Entry[] known = this.knownEntries; + if (known != null) { + for (++this.knownIndex; this.knownIndex < known.length; ++this.knownIndex) { + Entry slotEntry = known[this.knownIndex]; + if (slotEntry != null) return slotEntry; + } + } + while (this.bucketIndex < this.buckets.length) { if (this.group != null) { for (++this.groupIndex; this.groupIndex < BucketGroup.LEN; ++this.groupIndex) { @@ -2429,18 +2865,21 @@ Entry replaceInChain(int hash, Entry entry) { Entry _replace(int hash, Entry entry) { // if ( this._mayContain(hash) ) return null; + // resolved accessor, not the raw field: tag-id entries have a null tag field until resolved + String tag = entry.tag(); + // first check to see if the item is already present Entry prevEntry = null; - if (this.hash0 == hash && this.entry0.matches(entry.tag)) { + if (this.hash0 == hash && this.entry0.matches(tag)) { prevEntry = this.entry0; this.entry0 = entry; - } else if (this.hash1 == hash && this.entry1.matches(entry.tag)) { + } else if (this.hash1 == hash && this.entry1.matches(tag)) { prevEntry = this.entry1; this.entry1 = entry; - } else if (this.hash2 == hash && this.entry2.matches(entry.tag)) { + } else if (this.hash2 == hash && this.entry2.matches(tag)) { prevEntry = this.entry2; this.entry2 = entry; - } else if (this.hash3 == hash && this.entry3.matches(entry.tag)) { + } else if (this.hash3 == hash && this.entry3.matches(tag)) { prevEntry = this.entry3; this.entry3 = entry; } @@ -2571,16 +3010,16 @@ void fillMapFromChain(Map map) { void _fillMap(Map map) { Entry entry0 = this.entry0; - if (entry0 != null) map.put(entry0.tag, entry0.objectValue()); + if (entry0 != null) map.put(entry0.tag(), entry0.objectValue()); Entry entry1 = this.entry1; - if (entry1 != null) map.put(entry1.tag, entry1.objectValue()); + if (entry1 != null) map.put(entry1.tag(), entry1.objectValue()); Entry entry2 = this.entry2; - if (entry2 != null) map.put(entry2.tag, entry2.objectValue()); + if (entry2 != null) map.put(entry2.tag(), entry2.objectValue()); Entry entry3 = this.entry3; - if (entry3 != null) map.put(entry3.tag, entry3.objectValue()); + if (entry3 != null) map.put(entry3.tag(), entry3.objectValue()); } void fillStringMapFromChain(Map map) { @@ -2591,16 +3030,16 @@ void fillStringMapFromChain(Map map) { void _fillStringMap(Map map) { Entry entry0 = this.entry0; - if (entry0 != null) map.put(entry0.tag, entry0.stringValue()); + if (entry0 != null) map.put(entry0.tag(), entry0.stringValue()); Entry entry1 = this.entry1; - if (entry1 != null) map.put(entry1.tag, entry1.stringValue()); + if (entry1 != null) map.put(entry1.tag(), entry1.stringValue()); Entry entry2 = this.entry2; - if (entry2 != null) map.put(entry2.tag, entry2.stringValue()); + if (entry2 != null) map.put(entry2.tag(), entry2.stringValue()); Entry entry3 = this.entry3; - if (entry3 != null) map.put(entry3.tag, entry3.stringValue()); + if (entry3 != null) map.put(entry3.tag(), entry3.stringValue()); } BucketGroup cloneChain() { diff --git a/internal-api/src/test/java/datadog/trace/api/TagMapFuzzTest.java b/internal-api/src/test/java/datadog/trace/api/TagMapFuzzTest.java index 48254ae9bd1..27205261dd3 100644 --- a/internal-api/src/test/java/datadog/trace/api/TagMapFuzzTest.java +++ b/internal-api/src/test/java/datadog/trace/api/TagMapFuzzTest.java @@ -13,6 +13,8 @@ import java.util.Map; import java.util.concurrent.ThreadLocalRandom; import java.util.function.Supplier; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; public final class TagMapFuzzTest { @@ -20,6 +22,45 @@ public final class TagMapFuzzTest { static final int MAX_NUM_ACTIONS = 32; static final int MIN_NUM_ACTIONS = 8; + // Closed-form KnownTags resolver for the fuzz keys ("key-0".."key-(NUM_KEYS-1)"). Lets the + // tag-id keyed actions (setById / putAllLedgerById) resolve their names so id-bearing entries + // unify with string-keyed entries in the buckets and remain findable by name. + @BeforeAll + static void registerResolver() { + KnownTags.register( + new KnownTags.Resolver() { + @Override + public String nameOf(long tagId) { + int globalSerial = (int) (tagId >>> 48); + return globalSerial == 0 ? null : "key-" + (globalSerial - 1); + } + + @Override + public long keyOf(String name) { + return isFuzzKey(name) ? tagIdOf(name) : 0L; + } + }); + } + + @AfterAll + static void clearResolver() { + KnownTags.register(null); + } + + static boolean isFuzzKey(String name) { + return name != null && name.startsWith("key-"); + } + + static long tagIdOf(String key) { + int n = Integer.parseInt(key.substring("key-".length())); + long nameHash = TagMap.Entry._hash(key) & 0xFFFFFFFFL; + // globalSerial = n + 1 (non-zero, unique per key); fieldPos spreads keys across the slot array + // (n % CAPACITY), so distinct keys occupy distinct slots AND keys that share a fieldPos collide + // (first-writer-wins -> the rest fall to buckets), exercising both paths. + int fieldPos = n % OptimizedTagMap.KNOWN_ENTRIES_CAPACITY; + return ((long) (n + 1) << 48) | ((long) fieldPos << 32) | nameHash; + } + @Test void test() { test(generateTest()); @@ -958,7 +999,8 @@ public static final MapAction randomAction() { return randomChoice( () -> putAll(randomKeysAndValues()), () -> putAllTagMap(randomKeysAndValues()), - () -> putAllLedger(randomKeysAndValues())); + () -> putAllLedger(randomKeysAndValues()), + () -> putAllLedgerById(randomKeysAndValues())); case 2: return randomChoice( @@ -970,6 +1012,7 @@ public static final MapAction randomAction() { return randomChoice( () -> put(randomKey(), randomValue()), () -> set(randomKey(), randomValue()), + () -> setById(randomKey(), randomValue()), () -> getAndSet(randomKey(), randomValue())); } } @@ -982,6 +1025,10 @@ public static final MapAction set(String key, String value) { return new Set(key, value); } + public static final MapAction setById(String key, String value) { + return new SetById(key, value); + } + public static final MapAction getAndSet(String key, String value) { return new GetAndSet(key, value); } @@ -998,6 +1045,10 @@ public static final MapAction putAllLedger(String... keysAndValues) { return new PutAllLedger(keysAndValues); } + public static final MapAction putAllLedgerById(String... keysAndValues) { + return new PutAllLedgerById(keysAndValues); + } + public static final MapAction clear() { return Clear.INSTANCE; } @@ -1123,6 +1174,17 @@ static final TagMap.Ledger ledgerOf(String... keysAndValues) { return ledger; } + static final TagMap.Ledger ledgerByIdOf(String... keysAndValues) { + TagMap.Ledger ledger = TagMap.ledger(); + for (int i = 0; i < keysAndValues.length; i += 2) { + String key = keysAndValues[i]; + String value = keysAndValues[i + 1]; + + ledger.set(tagIdOf(key), value); + } + return ledger; + } + static final class TestCase { final List actions; @@ -1286,6 +1348,41 @@ public String toString() { } } + static final class SetById extends BasicAction { + final String key; + final String value; + + SetById(String key, String value) { + this.key = key; + this.value = value; + } + + @Override + protected void _applyToTestMap(TagMap testMap) { + testMap.set(tagIdOf(this.key), this.value); + } + + @Override + protected void _applyToExpectedMap(Map expectedMap) { + expectedMap.put(this.key, this.value); + } + + @Override + public void verifyTestMap(TagMap testMap) { + // findable by name (read-path unification) ... + assertEquals(this.value, testMap.get(this.key)); + // ... and by tag id + TagMap.Entry byId = testMap.getEntry(tagIdOf(this.key)); + assertNotNull(byId); + assertEquals(this.value, byId.objectValue()); + } + + @Override + public String toString() { + return String.format("setById(%s,%s)", literal(this.key), literal(this.value)); + } + } + static final class GetAndSet extends ReturningAction { final String key; final String value; @@ -1428,6 +1525,43 @@ public String toString() { } } + static final class PutAllLedgerById extends BasicAction { + final String[] keysAndValues; + final TagMap.Ledger ledger; + + PutAllLedgerById(String... keysAndValues) { + this.keysAndValues = keysAndValues; + this.ledger = ledgerByIdOf(keysAndValues); + } + + @Override + protected void _applyToTestMap(TagMap testMap) { + this.ledger.fill(testMap); + } + + @Override + protected void _applyToExpectedMap(Map expectedMap) { + for (TagMap.EntryChange change : this.ledger) { + // ledgerByIdOf doesn't produce removals, so this cast is safe + TagMap.Entry entry = (TagMap.Entry) change; + expectedMap.put(entry.tag(), entry.objectValue()); + } + } + + @Override + public void verifyTestMap(TagMap expectedMap) { + // ledger may contain multiple updates of the same key; compare against a built map + for (TagMap.EntryReader entry : this.ledger.buildImmutable()) { + assertEquals(entry.objectValue(), expectedMap.get(entry.tag()), "key=" + entry.tag()); + } + } + + @Override + public String toString() { + return String.format("putAllLedgerById(%s)", literalVarArgs(this.keysAndValues)); + } + } + static final class Remove extends BasicReturningAction { final String key; From 027404f63e0662fd16f438adca272bacd7405bac Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Tue, 9 Jun 2026 15:51:30 -0400 Subject: [PATCH 03/35] Fix TagMap.EMPTY static-init-order NPE; add seedable TagMapFuzzTest MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit TagMap.EMPTY (interface field, computed via the factory) could capture null when OptimizedTagMap initialized first: TagMap. runs during OptimizedTagMap. before its static fields are assigned, so the factory returned the not-yet-set OptimizedTagMap.EMPTY. Empty-ledger buildImmutable() and fromMapImmutable(empty) then returned null -> NPE (flaky by class-load order). Fixed with an initialization-on-demand holder (OptimizedTagMap.empty() -> EmptyHolder.EMPTY) so the empty instance initializes independently of order. Also make TagMapFuzzTest reproducible: -Ddatadog.tagmap.fuzz.seed= (random and logged when unset) + a SeedReporter TestWatcher that prints the seed and a reproduce command on failure, plus -Ddatadog.tagmap.fuzz.iterations= to run many sequences per JVM for hunting rare cases. This combination caught the EMPTY bug deterministically. TagMapEmptyInitTest guards the init order. The set(long)/getEntry(long) methods are now abstract (with LegacyTagMap impls) rather than default — explicit per implementation. Co-Authored-By: Claude Opus 4.8 --- internal-api/build.gradle.kts | 11 ++ .../main/java/datadog/trace/api/TagMap.java | 107 ++++++++++++------ .../trace/api/TagMapEmptyInitTest.java | 28 +++++ .../datadog/trace/api/TagMapFuzzTest.java | 98 ++++++++++++---- 4 files changed, 190 insertions(+), 54 deletions(-) create mode 100644 internal-api/src/test/java/datadog/trace/api/TagMapEmptyInitTest.java diff --git a/internal-api/build.gradle.kts b/internal-api/build.gradle.kts index 2e76d189eb8..28662725bbd 100644 --- a/internal-api/build.gradle.kts +++ b/internal-api/build.gradle.kts @@ -279,6 +279,17 @@ dependencies { testImplementation(libs.bundles.mockito) } +// Forward TagMapFuzzTest knobs (datadog.tagmap.fuzz.seed / .iterations) to the forked test JVM, so +// a failing run can be reproduced via -Ddatadog.tagmap.fuzz.seed= (forked test JVMs don't +// inherit -D from the Gradle invocation). +tasks.withType().configureEach { + System.getProperties().stringPropertyNames().forEach { + if (it.startsWith("datadog.tagmap.fuzz.")) { + systemProperty(it, System.getProperty(it)) + } + } +} + jmh { jmhVersion = libs.versions.jmh.get() duplicateClassesStrategy = DuplicatesStrategy.EXCLUDE diff --git a/internal-api/src/main/java/datadog/trace/api/TagMap.java b/internal-api/src/main/java/datadog/trace/api/TagMap.java index 7ec78bd7e64..b159a7410d8 100644 --- a/internal-api/src/main/java/datadog/trace/api/TagMap.java +++ b/internal-api/src/main/java/datadog/trace/api/TagMap.java @@ -178,43 +178,25 @@ static Ledger ledger(int size) { /* * Tag-id keyed variants. The tagId encodes the tag's identity (see KnownTags); generated - * instrumentation uses these to avoid hashing tag-name strings. The default implementations - * resolve the name via KnownTags.nameOf and delegate to the string-keyed methods; OptimizedTagMap - * overrides them to build tag-id-bearing entries directly. Requires a registered - * KnownTags.Resolver to resolve the tag name. + * instrumentation uses these to avoid hashing tag-name strings. OptimizedTagMap routes these to + * its positional slots; LegacyTagMap resolves the name via KnownTags.nameOf and delegates to the + * string-keyed methods. (Abstract rather than default to keep the two implementations explicit.) */ - default void set(long tagId, Object value) { - this.set(KnownTags.nameOf(tagId), value); - } + void set(long tagId, Object value); - default void set(long tagId, CharSequence value) { - this.set(KnownTags.nameOf(tagId), value); - } + void set(long tagId, CharSequence value); - default void set(long tagId, boolean value) { - this.set(KnownTags.nameOf(tagId), value); - } + void set(long tagId, boolean value); - default void set(long tagId, int value) { - this.set(KnownTags.nameOf(tagId), value); - } + void set(long tagId, int value); - default void set(long tagId, long value) { - this.set(KnownTags.nameOf(tagId), value); - } + void set(long tagId, long value); - default void set(long tagId, float value) { - this.set(KnownTags.nameOf(tagId), value); - } + void set(long tagId, float value); - default void set(long tagId, double value) { - this.set(KnownTags.nameOf(tagId), value); - } + void set(long tagId, double value); - default Entry getEntry(long tagId) { - String name = KnownTags.nameOf(tagId); - return name == null ? null : this.getEntry(name); - } + Entry getEntry(long tagId); /** sets the value while returning the prior Entry */ Entry getAndSet(String tag, Object value); @@ -1348,7 +1330,7 @@ public OptimizedTagMap create(int size) { @Override public OptimizedTagMap empty() { - return OptimizedTagMap.EMPTY; + return OptimizedTagMap.empty(); } } @@ -1393,10 +1375,24 @@ public LegacyTagMap empty() { * removed from the collision chain. */ final class OptimizedTagMap implements TagMap { - // Using special constructor that creates a frozen view of an existing array - // Bucket calculation requires that array length is a power of 2 - // e.g. size 0 will not work, it results in ArrayIndexOutOfBoundsException, but size 1 does - static final OptimizedTagMap EMPTY = new OptimizedTagMap(new Object[1], 0); + // The shared empty (frozen) instance, via an initialization-on-demand holder. + // + // It must NOT be a direct static field of OptimizedTagMap: TagMap.EMPTY is computed in + // TagMap. through the factory (which returns this empty instance), and TagMap. + // can run *during* OptimizedTagMap. (before its static fields are assigned). A direct + // field would still be null at that point, so TagMap.EMPTY would capture null. A separate holder + // class initializes independently, so the factory always gets a valid instance regardless of + // which class is touched first. + // + // Special constructor creates a frozen view of an existing array; bucket calculation requires a + // power-of-2 length (size 0 fails with AIOOBE, size 1 works). + static OptimizedTagMap empty() { + return EmptyHolder.EMPTY; + } + + private static final class EmptyHolder { + static final OptimizedTagMap EMPTY = new OptimizedTagMap(new Object[1], 0); + } // Default capacity for the lazily-allocated knownEntries array (one slot per fieldPos). Known // tags' fieldPos values are small (a span type carries well under this many tags); a tagId whose @@ -3587,6 +3583,49 @@ public void set(TagMap.EntryReader newEntryReader) { this.put(newEntryReader.tag(), newEntryReader.objectValue()); } + // Tag-id keyed variants: LegacyTagMap is name-keyed, so resolve the name via KnownTags and + // delegate to the string-keyed methods. Requires a registered KnownTags.Resolver. + @Override + public void set(long tagId, Object value) { + this.set(KnownTags.nameOf(tagId), value); + } + + @Override + public void set(long tagId, CharSequence value) { + this.set(KnownTags.nameOf(tagId), value); + } + + @Override + public void set(long tagId, boolean value) { + this.set(KnownTags.nameOf(tagId), value); + } + + @Override + public void set(long tagId, int value) { + this.set(KnownTags.nameOf(tagId), value); + } + + @Override + public void set(long tagId, long value) { + this.set(KnownTags.nameOf(tagId), value); + } + + @Override + public void set(long tagId, float value) { + this.set(KnownTags.nameOf(tagId), value); + } + + @Override + public void set(long tagId, double value) { + this.set(KnownTags.nameOf(tagId), value); + } + + @Override + public TagMap.Entry getEntry(long tagId) { + String name = KnownTags.nameOf(tagId); + return name == null ? null : this.getEntry(name); + } + @Override public Object put(String key, Object value) { this.checkWriteAccess(); diff --git a/internal-api/src/test/java/datadog/trace/api/TagMapEmptyInitTest.java b/internal-api/src/test/java/datadog/trace/api/TagMapEmptyInitTest.java new file mode 100644 index 00000000000..3ec4f6c6641 --- /dev/null +++ b/internal-api/src/test/java/datadog/trace/api/TagMapEmptyInitTest.java @@ -0,0 +1,28 @@ +package datadog.trace.api; + +import static org.junit.jupiter.api.Assertions.assertNotNull; + +import java.util.Collections; +import org.junit.jupiter.api.Test; + +/** + * Diagnostic: is TagMap.EMPTY null when OptimizedTagMap is the first TagMap-related class touched + * in the JVM? Forked so nothing else initializes TagMap first. + */ +public class TagMapEmptyInitTest { + @Test + void emptyNotNull_whenOptimizedInitsFirst() { + // force OptimizedTagMap to initialize before the TagMap interface + OptimizedTagMap m = new OptimizedTagMap(); + m.set("x", "y"); + + System.out.println("OptimizedTagMap.EMPTY=" + OptimizedTagMap.EMPTY); + System.out.println("TagMap.EMPTY=" + TagMap.EMPTY); + + assertNotNull(OptimizedTagMap.EMPTY, "OptimizedTagMap.EMPTY null"); + assertNotNull(TagMap.EMPTY, "TagMap.EMPTY null"); + assertNotNull( + TagMap.fromMapImmutable(Collections.emptyMap()), "fromMapImmutable(empty) returned null"); + assertNotNull(TagMap.ledger().buildImmutable(), "empty ledger buildImmutable returned null"); + } +} diff --git a/internal-api/src/test/java/datadog/trace/api/TagMapFuzzTest.java b/internal-api/src/test/java/datadog/trace/api/TagMapFuzzTest.java index 27205261dd3..f0b8ef3698e 100644 --- a/internal-api/src/test/java/datadog/trace/api/TagMapFuzzTest.java +++ b/internal-api/src/test/java/datadog/trace/api/TagMapFuzzTest.java @@ -11,17 +11,66 @@ import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.concurrent.ThreadLocalRandom; +import java.util.Random; import java.util.function.Supplier; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInfo; +import org.junit.jupiter.api.extension.ExtendWith; +import org.junit.jupiter.api.extension.ExtensionContext; +import org.junit.jupiter.api.extension.TestWatcher; +@ExtendWith(TagMapFuzzTest.SeedReporter.class) public final class TagMapFuzzTest { static final int NUM_KEYS = 128; static final int MAX_NUM_ACTIONS = 32; static final int MIN_NUM_ACTIONS = 8; + // Seedable RNG for reproducibility. Each test reseeds in @BeforeEach: from + // -Ddatadog.tagmap.fuzz.seed when set, else a fresh random seed (always logged). On failure + // SeedReporter reprints the seed + reproduce command. Static because the random* helpers are + // static; the fuzz tests are single-threaded. + static final String SEED_PROPERTY = "datadog.tagmap.fuzz.seed"; + static Random rng; + static long currentSeed; + + @BeforeEach + void seedRng(TestInfo info) { + String prop = System.getProperty(SEED_PROPERTY); + currentSeed = (prop != null) ? Long.parseLong(prop.trim()) : new Random().nextLong(); + rng = new Random(currentSeed); + System.out.println( + info.getDisplayName() + + " seed=" + + currentSeed + + " (reproduce with -D" + + SEED_PROPERTY + + "=" + + currentSeed + + ")"); + } + + static final class SeedReporter implements TestWatcher { + @Override + public void testFailed(ExtensionContext ctx, Throwable cause) { + System.err.println( + "TagMapFuzzTest." + + ctx.getDisplayName() + + " FAILED with seed=" + + currentSeed + + "\n reproduce: ./gradlew :internal-api:test --tests \"" + + ctx.getRequiredTestClass().getName() + + "." + + ctx.getDisplayName() + + "\" -D" + + SEED_PROPERTY + + "=" + + currentSeed); + } + } + // Closed-form KnownTags resolver for the fuzz keys ("key-0".."key-(NUM_KEYS-1)"). Lets the // tag-id keyed actions (setById / putAllLedgerById) resolve their names so id-bearing entries // unify with string-keyed entries in the buckets and remain findable by name. @@ -61,26 +110,36 @@ static long tagIdOf(String key) { return ((long) (n + 1) << 48) | ((long) fieldPos << 32) | nameHash; } + // Number of random sequences per @Test run. Default 1 (fast CI); crank via + // -Ddatadog.tagmap.fuzz.iterations=N to hunt rare cases. Deterministic under a fixed seed. + static int iterations() { + return Integer.getInteger("datadog.tagmap.fuzz.iterations", 1); + } + @Test void test() { - test(generateTest()); + for (int i = 0, n = iterations(); i < n; ++i) { + test(generateTest()); + } } @Test void testMerge() { - TestCase mapACase = generateTest(); - TestCase mapBCase = generateTest(); + for (int i = 0, n = iterations(); i < n; ++i) { + TestCase mapACase = generateTest(); + TestCase mapBCase = generateTest(); - OptimizedTagMap tagMapA = test(mapACase); - OptimizedTagMap tagMapB = test(mapBCase); + OptimizedTagMap tagMapA = test(mapACase); + OptimizedTagMap tagMapB = test(mapBCase); - HashMap hashMapA = new HashMap<>(tagMapA); - HashMap hashMapB = new HashMap<>(tagMapB); + HashMap hashMapA = new HashMap<>(tagMapA); + HashMap hashMapB = new HashMap<>(tagMapB); - tagMapA.putAll(tagMapB); - hashMapA.putAll(hashMapB); + tagMapA.putAll(tagMapB); + hashMapA.putAll(hashMapB); - assertMapEquals(hashMapA, tagMapA); + assertMapEquals(hashMapA, tagMapA); + } } @Test @@ -975,8 +1034,7 @@ public static final OptimizedTagMap test(TestCase test) { } public static final TestCase generateTest() { - int numActions = - ThreadLocalRandom.current().nextInt(MAX_NUM_ACTIONS - MIN_NUM_ACTIONS) + MIN_NUM_ACTIONS; + int numActions = rng.nextInt(MAX_NUM_ACTIONS - MIN_NUM_ACTIONS) + MIN_NUM_ACTIONS; return generateTest(numActions); } @@ -989,7 +1047,7 @@ public static final TestCase generateTest(int size) { } public static final MapAction randomAction() { - float actionSelector = ThreadLocalRandom.current().nextFloat(); + float actionSelector = rng.nextFloat(); switch (randomChoice(0.02, 0.1, 0.2)) { case 0: @@ -1083,11 +1141,11 @@ static final void assertMapEquals(Map expected, OptimizedTagMap } static final float randomFloat() { - return ThreadLocalRandom.current().nextFloat(); + return rng.nextFloat(); } static final int randomChoice(int numChoices) { - return ThreadLocalRandom.current().nextInt(numChoices); + return rng.nextInt(numChoices); } static final T randomChoice(Supplier... choiceSuppliers) { @@ -1097,7 +1155,7 @@ static final T randomChoice(Supplier... choiceSuppliers) { } static final int randomChoice(double... proportions) { - double selector = ThreadLocalRandom.current().nextDouble(); + double selector = rng.nextDouble(); for (int i = 0; i < proportions.length; ++i) { if (selector < proportions[i]) return i; @@ -1108,15 +1166,15 @@ static final int randomChoice(double... proportions) { } static final String randomKey() { - return "key-" + ThreadLocalRandom.current().nextInt(NUM_KEYS); + return "key-" + rng.nextInt(NUM_KEYS); } static final String randomValue() { - return "values-" + ThreadLocalRandom.current().nextInt(); + return "values-" + rng.nextInt(); } static final String[] randomKeysAndValues() { - int numEntries = ThreadLocalRandom.current().nextInt(NUM_KEYS); + int numEntries = rng.nextInt(NUM_KEYS); String[] keysAndValues = new String[numEntries << 1]; for (int i = 0; i < keysAndValues.length; i += 2) { From c89fad95a3ed2905a6ad6d1f9dddacf10f4c396e Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Tue, 9 Jun 2026 16:00:23 -0400 Subject: [PATCH 04/35] Add Entry concurrency tests for tag-id lazy name/hash resolution MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tag-id-constructed entries resolve their name lazily from the tagId via KnownTags on first tag()/getKey(), caching into the non-volatile `tag` field — a benign race. Run tag-id entries (Object/int/boolean) plus matches() through the existing shuffled multi-threaded harness so 4 threads resolve concurrently; assert all agree on the same interned constant and that hash() equals the tagId's nameHash. Also stress a string entry's lazy hash() now that it writes into the low 32 bits of `tagId` (formerly a separate int lazyTagHash). Co-Authored-By: Claude Opus 4.8 --- .../datadog/trace/api/TagMapEntryTest.java | 125 ++++++++++++++++++ 1 file changed, 125 insertions(+) diff --git a/internal-api/src/test/java/datadog/trace/api/TagMapEntryTest.java b/internal-api/src/test/java/datadog/trace/api/TagMapEntryTest.java index e7c483e80ec..3c66a212b44 100644 --- a/internal-api/src/test/java/datadog/trace/api/TagMapEntryTest.java +++ b/internal-api/src/test/java/datadog/trace/api/TagMapEntryTest.java @@ -20,6 +20,8 @@ import java.util.concurrent.ThreadFactory; import java.util.function.Function; import java.util.function.Supplier; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.DisplayName; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; @@ -550,6 +552,129 @@ public void removalChange() { assertTrue(removalChange.isRemoval()); } + // --------------------------------------------------------------------------------------------- + // Tag-id-constructed entries: the name is resolved lazily from the tagId via KnownTags on first + // tag()/getKey(). That resolution (and the cache write to the volatile-free `tag` field) is a + // benign race — these tests run tag-id entries through the existing multi-threaded harness so 4 + // threads resolve the name concurrently; all must agree, and on the same interned constant. + // --------------------------------------------------------------------------------------------- + + static final String[] TAG_NAMES = {"tag.alpha", "tag.beta", "tag.gamma"}; + + static long tagId(int serial, int fieldPos, String name) { + long nameHash = TagMap.Entry._hash(name) & 0xFFFFFFFFL; + return ((long) serial << 48) | ((long) fieldPos << 32) | nameHash; + } + + @BeforeAll + static void registerResolver() { + KnownTags.register( + new KnownTags.Resolver() { + @Override + public String nameOf(long tagId) { + int serial = (int) (tagId >>> 48); + // returns the same interned constant each call, so racing resolutions agree by identity + return (serial >= 1 && serial <= TAG_NAMES.length) ? TAG_NAMES[serial - 1] : null; + } + + @Override + public long keyOf(String name) { + for (int i = 0; i < TAG_NAMES.length; ++i) { + if (TAG_NAMES[i].equals(name)) return tagId(i + 1, i, TAG_NAMES[i]); + } + return 0L; + } + }); + } + + @AfterAll + static void clearResolver() { + KnownTags.register(null); + } + + // resolved name must be the exact interned constant, and hash() must equal the tagId's low 32 + // bits (nameHash) — both stressed concurrently by the shared-entry multi-threaded harness. + static Check checkResolvedTagId(long id, String name, TagMap.Entry entry) { + return multiCheck( + checkKey(name, entry), + checkTrue(() -> entry.tag() == name, "tag() returns interned constant"), + checkEquals((int) (id & 0xFFFFFFFFL), () -> entry.hash(), "Entry::hash == nameHash")); + } + + @Test + @DisplayName("tag-id entry: Object resolves name lazily under race") + public void tagIdEntryObject() { + long id = tagId(1, 0, TAG_NAMES[0]); + test( + () -> TagMap.Entry.newAnyEntry(id, "bar"), + TagMap.Entry.ANY, + (entry) -> + multiCheck( + checkResolvedTagId(id, TAG_NAMES[0], entry), + checkValue("bar", entry), + checkTrue(entry::isObject), + checkType(TagMap.Entry.OBJECT, entry))); + } + + @Test + @DisplayName("tag-id entry: int resolves name lazily under race") + public void tagIdEntryInt() { + long id = tagId(2, 1, TAG_NAMES[1]); + test( + () -> TagMap.Entry.newIntEntry(id, 42), + TagMap.Entry.INT, + (entry) -> + multiCheck( + checkResolvedTagId(id, TAG_NAMES[1], entry), + checkValue(42, entry), + checkIsNumericPrimitive(entry), + checkType(TagMap.Entry.INT, entry))); + } + + @Test + @DisplayName("tag-id entry: boolean resolves name lazily under race") + public void tagIdEntryBoolean() { + long id = tagId(3, 2, TAG_NAMES[2]); + test( + () -> TagMap.Entry.newBooleanEntry(id, true), + TagMap.Entry.BOOLEAN, + (entry) -> + multiCheck( + checkResolvedTagId(id, TAG_NAMES[2], entry), + checkValue(true, entry), + checkType(TagMap.Entry.BOOLEAN, entry))); + } + + @Test + @DisplayName("string entry: lazy hash() under race") + public void stringEntryLazyHash() { + // string-constructed entry computes hash() lazily, writing into the low 32 bits of the `tagId` + // field (formerly a separate `int lazyTagHash`). Stress concurrent first-resolution. + String name = "some.unknown.tag.name"; + test( + () -> TagMap.Entry.newObjectEntry(name, "v"), + TagMap.Entry.OBJECT, + (entry) -> + multiCheck( + checkEquals(TagMap.Entry._hash(name), () -> entry.hash(), "lazy hash()"), + checkKey(name, entry), + checkValue("v", entry))); + } + + @Test + @DisplayName("tag-id entry: matches() resolves the name under race") + public void tagIdEntryMatches() { + long id = tagId(1, 0, TAG_NAMES[0]); + test( + () -> TagMap.Entry.newObjectEntry(id, "bar"), + TagMap.Entry.OBJECT, + (entry) -> + multiCheck( + checkTrue(() -> entry.matches(TAG_NAMES[0]), "matches(name)"), + checkFalse(() -> entry.matches("nope"), "!matches(other)"), + checkKey(TAG_NAMES[0], entry))); + } + static final int NUM_THREADS = 4; static final ExecutorService EXECUTOR = Executors.newFixedThreadPool( From 9f8b2265dad012f845949b2114922be5038a00cd Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Tue, 9 Jun 2026 16:05:53 -0400 Subject: [PATCH 05/35] Add KnownTags.tagId(serial, fieldPos, name) encoder; dedupe callers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The tagId bit-packing was duplicated in four places (fuzz/unit/Entry tests and the insertion benchmark, which even hand-rolled the name hash). Add a single KnownTags.tagId(globalSerial, fieldPos, name) factory — the inverse of the existing globalSerial/fieldPos/nameHash extractors — that computes nameHash via the runtime's TagMap.Entry._hash so the low 32 bits always match Entry.hash(). Intended for the code generator and tests. Route all callers through it and add an encoder/decoder round-trip test. Co-Authored-By: Claude Opus 4.8 --- .../trace/api/TagMapInsertionBenchmark.java | 18 +++--------------- .../main/java/datadog/trace/api/KnownTags.java | 12 ++++++++++++ .../datadog/trace/api/TagMapEntryTest.java | 3 +-- .../java/datadog/trace/api/TagMapFuzzTest.java | 4 +--- .../datadog/trace/api/TagMapTagIdTest.java | 11 +++++++++-- 5 files changed, 26 insertions(+), 22 deletions(-) diff --git a/internal-api/src/jmh/java/datadog/trace/api/TagMapInsertionBenchmark.java b/internal-api/src/jmh/java/datadog/trace/api/TagMapInsertionBenchmark.java index c39fb9dc379..e75a7468d96 100644 --- a/internal-api/src/jmh/java/datadog/trace/api/TagMapInsertionBenchmark.java +++ b/internal-api/src/jmh/java/datadog/trace/api/TagMapInsertionBenchmark.java @@ -1,6 +1,5 @@ package datadog.trace.api; -import datadog.trace.api.TagMap.Entry; import java.util.concurrent.TimeUnit; import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.BenchmarkMode; @@ -21,9 +20,8 @@ * Compares tag insertion / lookup by generated tag id vs by string name, with a {@link * KnownTags.Resolver} registered (the production configuration once code generation is live). * - *

Placed in {@code datadog.trace.api} so it can build tag ids with the same {@code nameHash} the - * runtime uses ({@link TagMap.Entry#_hash}); a mismatch would only matter on the bucket-fallback - * path, but keeping it exact makes the comparison faithful. + *

Tag ids are built via {@link KnownTags#tagId} (which uses the runtime's own name hash), so the + * comparison is faithful even on the bucket-fallback path. * *

The tags use distinct {@code fieldPos} values (no collisions), so every known tag lands in its * positional slot. byId skips string hashing and the keyOf round-trip entirely; byString pays @@ -65,16 +63,11 @@ public class TagMapInsertionBenchmark { // a pre-populated (slotted) map for the read benchmarks; built in setup once IDS exist TagMap readMap; - static int nameHash(String tag) { - int hash = tag.hashCode(); - return hash == 0 ? 0xDD06 : hash ^ (hash >>> 16); - } - @Setup(Level.Trial) public void setup() { for (int i = 0; i < NAMES.length; ++i) { // globalSerial = i + 1 (unique, non-zero); fieldPos = i (distinct - no collisions) - IDS[i] = ((long) (i + 1) << 48) | ((long) i << 32) | (nameHash(NAMES[i]) & 0xFFFFFFFFL); + IDS[i] = KnownTags.tagId(i + 1, i, NAMES[i]); VALUES[i] = "value-" + i; } // Representative resolver: nameOf is a dense array index by globalSerial; keyOf is a hash-table @@ -100,11 +93,6 @@ public long keyOf(String name) { return id == null ? 0L : id; } }); - // sanity: assert _hash matches our nameHash so string lookups hit the same bucket if they ever - // fall through (they shouldn't here, but keep the comparison honest) - if (Entry._hash(NAMES[0]) != nameHash(NAMES[0])) { - throw new IllegalStateException("nameHash mismatch with TagMap.Entry._hash"); - } // pre-populate the read map by id (entries land in their slots) this.readMap = TagMap.create(); diff --git a/internal-api/src/main/java/datadog/trace/api/KnownTags.java b/internal-api/src/main/java/datadog/trace/api/KnownTags.java index 170631c5b7e..547f490a051 100644 --- a/internal-api/src/main/java/datadog/trace/api/KnownTags.java +++ b/internal-api/src/main/java/datadog/trace/api/KnownTags.java @@ -39,6 +39,18 @@ public static int nameHash(long tagId) { return (int) tagId; } + /** + * Builds a tagId from its parts: {@code globalSerial} (globally unique per known tag), {@code + * fieldPos} (the tag's slot within its span type's positional table), and the tag {@code name} + * (whose hash is computed via the same function the runtime uses, so the low 32 bits match {@link + * TagMap.Entry#hash()}). Inverse of {@link #globalSerial}/{@link #fieldPos}/{@link #nameHash}. + * Intended for the code generator and tests. + */ + public static long tagId(int globalSerial, int fieldPos, String name) { + long nameHash = TagMap.Entry._hash(name) & 0xFFFFFFFFL; + return ((long) globalSerial << 48) | ((long) (fieldPos & 0xFFFF) << 32) | nameHash; + } + public interface Resolver { String nameOf(long tagId); diff --git a/internal-api/src/test/java/datadog/trace/api/TagMapEntryTest.java b/internal-api/src/test/java/datadog/trace/api/TagMapEntryTest.java index 3c66a212b44..cfa119d10a5 100644 --- a/internal-api/src/test/java/datadog/trace/api/TagMapEntryTest.java +++ b/internal-api/src/test/java/datadog/trace/api/TagMapEntryTest.java @@ -562,8 +562,7 @@ public void removalChange() { static final String[] TAG_NAMES = {"tag.alpha", "tag.beta", "tag.gamma"}; static long tagId(int serial, int fieldPos, String name) { - long nameHash = TagMap.Entry._hash(name) & 0xFFFFFFFFL; - return ((long) serial << 48) | ((long) fieldPos << 32) | nameHash; + return KnownTags.tagId(serial, fieldPos, name); } @BeforeAll diff --git a/internal-api/src/test/java/datadog/trace/api/TagMapFuzzTest.java b/internal-api/src/test/java/datadog/trace/api/TagMapFuzzTest.java index f0b8ef3698e..6f29327c2cf 100644 --- a/internal-api/src/test/java/datadog/trace/api/TagMapFuzzTest.java +++ b/internal-api/src/test/java/datadog/trace/api/TagMapFuzzTest.java @@ -102,12 +102,10 @@ static boolean isFuzzKey(String name) { static long tagIdOf(String key) { int n = Integer.parseInt(key.substring("key-".length())); - long nameHash = TagMap.Entry._hash(key) & 0xFFFFFFFFL; // globalSerial = n + 1 (non-zero, unique per key); fieldPos spreads keys across the slot array // (n % CAPACITY), so distinct keys occupy distinct slots AND keys that share a fieldPos collide // (first-writer-wins -> the rest fall to buckets), exercising both paths. - int fieldPos = n % OptimizedTagMap.KNOWN_ENTRIES_CAPACITY; - return ((long) (n + 1) << 48) | ((long) fieldPos << 32) | nameHash; + return KnownTags.tagId(n + 1, n % OptimizedTagMap.KNOWN_ENTRIES_CAPACITY, key); } // Number of random sequences per @Test run. Default 1 (fast CI); crank via diff --git a/internal-api/src/test/java/datadog/trace/api/TagMapTagIdTest.java b/internal-api/src/test/java/datadog/trace/api/TagMapTagIdTest.java index f9ad0867779..7fddda69434 100644 --- a/internal-api/src/test/java/datadog/trace/api/TagMapTagIdTest.java +++ b/internal-api/src/test/java/datadog/trace/api/TagMapTagIdTest.java @@ -34,8 +34,15 @@ public class TagMapTagIdTest { static final long DB_SYSTEM_ID = tagId(3, 0, DB_SYSTEM); static long tagId(int globalSerial, int fieldPos, String name) { - long nameHash = Entry._hash(name) & 0xFFFFFFFFL; - return ((long) globalSerial << 48) | ((long) fieldPos << 32) | nameHash; + return KnownTags.tagId(globalSerial, fieldPos, name); + } + + @Test + public void tagId_roundTripsThroughExtractors() { + long id = KnownTags.tagId(7, 13, "some.tag.name"); + assertEquals(7, KnownTags.globalSerial(id)); + assertEquals(13, KnownTags.fieldPos(id)); + assertEquals(Entry._hash("some.tag.name"), KnownTags.nameHash(id)); } @BeforeEach From c42402d5a35abb99ccff8e91706d8250da7da92a Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Tue, 9 Jun 2026 17:45:27 -0400 Subject: [PATCH 06/35] Add KnownTags globalSerial reserve partition for virtual/special tags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Establishes the boundary FIRST_STORED_SERIAL=256: globalSerials [1,256) are reserved for "virtual" tags that are specially handled (redirected to span fields or processed by the tag interceptor) and NOT stored in the TagMap — hand-assigned in tracer core; [256,..) are generated convention tags that ARE stored (slotted/bucketed); 0 stays unknown/string-only. Adds isReserved()/ isStored() so setTag(long) can classify a tag by an O(1) range check (its "needsIntercept by id") before routing to the interceptor vs the slot/bucket store. Both core and the code generator agree on this boundary. Co-Authored-By: Claude Opus 4.8 --- .../java/datadog/trace/api/KnownTags.java | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/internal-api/src/main/java/datadog/trace/api/KnownTags.java b/internal-api/src/main/java/datadog/trace/api/KnownTags.java index 547f490a051..539c5b17e7e 100644 --- a/internal-api/src/main/java/datadog/trace/api/KnownTags.java +++ b/internal-api/src/main/java/datadog/trace/api/KnownTags.java @@ -39,6 +39,27 @@ public static int nameHash(long tagId) { return (int) tagId; } + /** + * globalSerial partition. {@code [1, FIRST_STORED_SERIAL)} is reserved for "virtual" tags that + * are specially handled (redirected to span fields or processed by the tag interceptor) and are + * NOT stored in the TagMap — these are hand-assigned in tracer core. {@code [FIRST_STORED_SERIAL, + * ..]} is for generated convention tags that ARE stored (slotted/bucketed). {@code globalSerial + * == 0} means unknown / string-only. Both core and the code generator must agree on this + * boundary. + */ + public static final int FIRST_STORED_SERIAL = 256; + + /** True if the tagId names a reserved "virtual"/specially-handled tag (not stored in the map). */ + public static boolean isReserved(long tagId) { + int globalSerial = globalSerial(tagId); + return globalSerial > 0 && globalSerial < FIRST_STORED_SERIAL; + } + + /** True if the tagId names a generated, map-stored (slotted/bucketed) tag. */ + public static boolean isStored(long tagId) { + return globalSerial(tagId) >= FIRST_STORED_SERIAL; + } + /** * Builds a tagId from its parts: {@code globalSerial} (globally unique per known tag), {@code * fieldPos} (the tag's slot within its span type's positional table), and the tag {@code name} From 93263f1c5183f8bd0e004ccc0a3ed769cde7b6a3 Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Tue, 9 Jun 2026 17:53:48 -0400 Subject: [PATCH 07/35] Wire tag-id setTag into DDSpanContext (slice A): stored + intercepted paths MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Connect the tag-id fast path into the span layer: - CoreTagIds: hand-assigned tag-id constants for core tags + a KnownTags.Resolver that registers on class init (so id resolution is live before the first span). PARENT_ID is a stored tag (serial >= FIRST_STORED_SERIAL); ERROR is a reserved virtual tag (serial < FIRST_STORED_SERIAL, fieldPos sentinel so it never slots). - DDSpanContext.setTag(long, Object): O(1) range-check routing — reserved tags go to the interceptor via id dispatch, stored tags go straight to the map by id (slot/bucket), bypassing the per-tag interceptor string switch. - TagInterceptor.interceptTag(span, long, value): int-switch on globalSerial (ERROR), falling back to the string path by resolved name for other reserved ids. - Migrate the constructor's PARENT_ID set to the id; drop the now-unused import. Tests: PARENT_ID set-by-id is findable/serialized as _dd.parent_id; ERROR set-by-id sets the error flag and is not stored. Existing DDSpanContext/serialization/tracer/ interceptor suites pass with the resolver now registered tracer-wide. Co-Authored-By: Claude Opus 4.8 --- .../java/datadog/trace/core/CoreTagIds.java | 65 +++++++++++++++++++ .../datadog/trace/core/DDSpanContext.java | 31 ++++++++- .../core/taginterceptor/TagInterceptor.java | 18 +++++ .../datadog/trace/core/DDSpanContextTest.java | 28 ++++++++ 4 files changed, 140 insertions(+), 2 deletions(-) create mode 100644 dd-trace-core/src/main/java/datadog/trace/core/CoreTagIds.java diff --git a/dd-trace-core/src/main/java/datadog/trace/core/CoreTagIds.java b/dd-trace-core/src/main/java/datadog/trace/core/CoreTagIds.java new file mode 100644 index 00000000000..1390022f2f1 --- /dev/null +++ b/dd-trace-core/src/main/java/datadog/trace/core/CoreTagIds.java @@ -0,0 +1,65 @@ +package datadog.trace.core; + +import datadog.trace.api.DDTags; +import datadog.trace.api.KnownTags; +import datadog.trace.bootstrap.instrumentation.api.Tags; + +/** + * Hand-assigned tag-id constants for tracer-core tags, plus the {@link KnownTags.Resolver} that + * resolves them. + * + *

Reserved serials {@code [1, KnownTags.FIRST_STORED_SERIAL)} name "virtual" tags handled by the + * tag interceptor / span fields and are NOT stored in the {@code TagMap}; their {@code fieldPos} is + * a sentinel ({@link #RESERVED_FIELD_POS}) that is out of slot range, so any incidental store + * routes to the hash buckets rather than a positional slot. Serials {@code >= FIRST_STORED_SERIAL} + * name stored tags that slot/bucket normally. + * + *

The resolver registers on class initialization, so simply referencing any constant here makes + * tag-id resolution live before the first span is built. + */ +public final class CoreTagIds { + // sentinel fieldPos for reserved (non-stored) tags: >= TagMap KNOWN_ENTRIES_CAPACITY, so set() + // can never place them in a positional slot + static final int RESERVED_FIELD_POS = 0xFFFF; + + // ---- reserved / virtual (tag-interceptor handled, not stored) ---- + public static final int ERROR_SERIAL = 1; + public static final long ERROR = KnownTags.tagId(ERROR_SERIAL, RESERVED_FIELD_POS, Tags.ERROR); + + // ---- stored (slotted / bucketed) ---- + public static final int PARENT_ID_SERIAL = KnownTags.FIRST_STORED_SERIAL; + public static final long PARENT_ID = KnownTags.tagId(PARENT_ID_SERIAL, 0, DDTags.PARENT_ID); + + static final KnownTags.Resolver RESOLVER = + new KnownTags.Resolver() { + @Override + public String nameOf(long tagId) { + switch (KnownTags.globalSerial(tagId)) { + case ERROR_SERIAL: + return Tags.ERROR; + case PARENT_ID_SERIAL: + return DDTags.PARENT_ID; + default: + return null; + } + } + + @Override + public long keyOf(String name) { + switch (name) { + case Tags.ERROR: + return ERROR; + case DDTags.PARENT_ID: + return PARENT_ID; + default: + return 0L; + } + } + }; + + static { + KnownTags.register(RESOLVER); + } + + private CoreTagIds() {} +} diff --git a/dd-trace-core/src/main/java/datadog/trace/core/DDSpanContext.java b/dd-trace-core/src/main/java/datadog/trace/core/DDSpanContext.java index e7038db5dbe..48eea7f9339 100644 --- a/dd-trace-core/src/main/java/datadog/trace/core/DDSpanContext.java +++ b/dd-trace-core/src/main/java/datadog/trace/core/DDSpanContext.java @@ -1,6 +1,5 @@ package datadog.trace.core; -import static datadog.trace.api.DDTags.PARENT_ID; import static datadog.trace.api.DDTags.SPAN_LINKS; import static datadog.trace.api.cache.RadixTreeCache.HTTP_STATUSES; import static datadog.trace.bootstrap.instrumentation.api.ErrorPriorities.UNSET; @@ -11,6 +10,7 @@ import datadog.trace.api.DDTags; import datadog.trace.api.DDTraceId; import datadog.trace.api.Functions; +import datadog.trace.api.KnownTags; import datadog.trace.api.ProcessTags; import datadog.trace.api.TagMap; import datadog.trace.api.cache.DDCache; @@ -385,7 +385,7 @@ public DDSpanContext( if (samplingPriority != PrioritySampling.UNSET) { setSamplingPriority(samplingPriority, SamplingMechanism.UNKNOWN); } - setTag(PARENT_ID, this.propagationTags.getLastParentId()); + setTag(CoreTagIds.PARENT_ID, this.propagationTags.getLastParentId()); } @Override @@ -901,6 +901,33 @@ public void setTag(final String tag, final String value) { } } + /** + * Sets a tag by its generated tag id. Reserved "virtual" tags (interceptor-handled, not stored) + * are routed to the interceptor via an id dispatch; stored tags go straight to the map (slot or + * bucket) keyed by id, bypassing the per-tag interceptor string switch. The id classification is + * a single range check (see {@link KnownTags#isReserved}). + */ + public void setTag(final long tagId, final Object value) { + if (null == value) { + String name = KnownTags.nameOf(tagId); + if (name != null) { + removeTag(name); + } + return; + } + if (KnownTags.isReserved(tagId)) { + if (!tagInterceptor.interceptTag(this, tagId, value)) { + synchronized (unsafeTags) { + unsafeTags.set(tagId, value); + } + } + } else { + synchronized (unsafeTags) { + unsafeTags.set(tagId, value); + } + } + } + public void setTag(TagMap.EntryReader entry) { if (entry == null) { return; diff --git a/dd-trace-core/src/main/java/datadog/trace/core/taginterceptor/TagInterceptor.java b/dd-trace-core/src/main/java/datadog/trace/core/taginterceptor/TagInterceptor.java index 64bf017e9db..1473f894e93 100644 --- a/dd-trace-core/src/main/java/datadog/trace/core/taginterceptor/TagInterceptor.java +++ b/dd-trace-core/src/main/java/datadog/trace/core/taginterceptor/TagInterceptor.java @@ -24,6 +24,7 @@ import datadog.trace.api.Config; import datadog.trace.api.ConfigDefaults; import datadog.trace.api.DDTags; +import datadog.trace.api.KnownTags; import datadog.trace.api.Pair; import datadog.trace.api.TagMap; import datadog.trace.api.config.GeneralConfig; @@ -36,6 +37,7 @@ import datadog.trace.bootstrap.instrumentation.api.Tags; import datadog.trace.bootstrap.instrumentation.api.URIUtils; import datadog.trace.bootstrap.instrumentation.api.UTF8BytesString; +import datadog.trace.core.CoreTagIds; import datadog.trace.core.DDSpanContext; import java.net.URI; import java.util.Map; @@ -131,6 +133,22 @@ public boolean needsIntercept(String tag) { } } + /** + * Id-dispatched variant of {@link #interceptTag(DDSpanContext, String, Object)}: switches on the + * tagId's globalSerial (an int) instead of the tag-name string. Used by {@code + * DDSpanContext.setTag(long, Object)} for reserved (virtual) tags. Falls back to the string path + * for any reserved id without a dedicated case. + */ + public boolean interceptTag(DDSpanContext span, long tagId, Object value) { + switch (KnownTags.globalSerial(tagId)) { + case CoreTagIds.ERROR_SERIAL: + return interceptError(span, value); + default: + String name = KnownTags.nameOf(tagId); + return name != null && interceptTag(span, name, value); + } + } + public boolean interceptTag(DDSpanContext span, String tag, Object value) { switch (tag) { case DDTags.RESOURCE_NAME: diff --git a/dd-trace-core/src/test/java/datadog/trace/core/DDSpanContextTest.java b/dd-trace-core/src/test/java/datadog/trace/core/DDSpanContextTest.java index 4185c9acdab..79c91a0465b 100644 --- a/dd-trace-core/src/test/java/datadog/trace/core/DDSpanContextTest.java +++ b/dd-trace-core/src/test/java/datadog/trace/core/DDSpanContextTest.java @@ -24,6 +24,7 @@ import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertInstanceOf; import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; @@ -70,6 +71,33 @@ void setup() { .build(); } + @Test + void setTagById_storedTagResolvesByName() { + AgentSpan span = tracer.buildSpan("datadog", "fakeOperation").start(); + DDSpanContext context = (DDSpanContext) span.context(); + + // PARENT_ID is a stored tag (serial >= FIRST_STORED_SERIAL): set by id, it lands in the map and + // is findable / serialized by its resolved name. + context.setTag(CoreTagIds.PARENT_ID, "p123"); + assertEquals("p123", context.getTags().get(DDTags.PARENT_ID)); + + span.finish(); + } + + @Test + void setTagById_reservedTagIsIntercepted() { + AgentSpan span = tracer.buildSpan("datadog", "fakeOperation").start(); + DDSpanContext context = (DDSpanContext) span.context(); + + // ERROR is a reserved (virtual) tag: setting it by id dispatches through the interceptor + // (id-keyed), which sets the error flag and does NOT store an "error" tag. + context.setTag(CoreTagIds.ERROR, true); + assertTrue(context.getErrorFlag()); + assertNull(context.getTags().get(Tags.ERROR)); + + span.finish(); + } + @ParameterizedTest @ValueSource(strings = {DDTags.SERVICE_NAME, DDTags.RESOURCE_NAME, DDTags.SPAN_TYPE, "some.tag"}) void nullValuesForTagsDeleteExistingTags(String name) throws Exception { From b3a506813a72308d9345f4aba4af5f399a6b32eb Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Tue, 9 Jun 2026 18:07:07 -0400 Subject: [PATCH 08/35] Add tag-id keyed removal to TagMap and TagMap.Ledger - Move the lazy tagId->name resolution into the base EntryChange.tag() (final) and drop Entry's override, so EntryRemoval resolves its name from a tagId too. - EntryChange.newRemoval(long) / EntryRemoval(long) carry a tag id. - TagMap.remove(long)/getAndRemove(long): OptimizedTagMap clears the slot by id (knownRemove) then falls back to the resolved-name bucket lookup; LegacyTagMap resolves the name and delegates. - Ledger.remove(long) records an id-keyed removal; fill() replays id removals via map.remove(long) (slot-aware, no name round-trip), string removals by name. Tests: unit coverage for remove/getAndRemove by id (slot clear, prior value, string-set-then-remove-by-id, ledger remove-by-id) plus a fuzz removeById action woven into the random mix (exercises slot-clear + collided-slot reclaim); ~48k seeded sequences clean. Co-Authored-By: Claude Opus 4.8 --- .../main/java/datadog/trace/api/TagMap.java | 76 ++++++++++++++++--- .../datadog/trace/api/TagMapFuzzTest.java | 38 ++++++++++ .../datadog/trace/api/TagMapTagIdTest.java | 49 ++++++++++++ 3 files changed, 151 insertions(+), 12 deletions(-) diff --git a/internal-api/src/main/java/datadog/trace/api/TagMap.java b/internal-api/src/main/java/datadog/trace/api/TagMap.java index b159a7410d8..27e8cdda7b1 100644 --- a/internal-api/src/main/java/datadog/trace/api/TagMap.java +++ b/internal-api/src/main/java/datadog/trace/api/TagMap.java @@ -253,6 +253,12 @@ static Ledger ledger(int size) { */ Entry getAndRemove(String tag); + /** Tag-id keyed removal (no prior value). See {@link #remove(String)}. */ + boolean remove(long tagId); + + /** Tag-id keyed removal returning the prior Entry. See {@link #getAndRemove(String)}. */ + Entry getAndRemove(long tagId); + /** Returns a mutable copy of this TagMap */ TagMap copy(); @@ -310,6 +316,10 @@ public static final EntryRemoval newRemoval(String tag) { return new EntryRemoval(tag); } + public static final EntryRemoval newRemoval(long tagId) { + return new EntryRemoval(tagId); + } + // tagId encoding: bits 63-48 = globalSerial (0 for unknown tags), bits 47-32 = fieldPos, // bits 31-0 = nameHash (_hash(tagName)). String-constructed entries have upper 32 bits zero // with the hash lazily populated on first hash(). tagId-constructed entries have all bits set @@ -331,8 +341,15 @@ public static final EntryRemoval newRemoval(String tag) { this.tag = null; // resolved lazily via tag() } - public String tag() { - return this.tag; + // For tagId-constructed changes (entries and removals) the name is resolved lazily from the + // tagId via KnownTags and cached. Benign race: KnownTags.nameOf returns the same interned + // constant, so re-resolution is harmless. + public final String tag() { + String name = this.tag; + if (name != null) return name; + name = KnownTags.nameOf(this.tagId); + if (name != null) this.tag = name; + return name; } public final boolean matches(String tag) { @@ -354,6 +371,10 @@ final class EntryRemoval extends EntryChange { super(tag); } + EntryRemoval(long tagId) { + super(tagId); + } + @Override public boolean isRemoval() { return true; @@ -583,15 +604,6 @@ int hash() { return hash; } - @Override - public String tag() { - String name = this.tag; - if (name != null) return name; - name = KnownTags.nameOf(this.tagId); - if (name != null) this.tag = name; - return name; - } - @Override public Entry entry() { return this; @@ -1160,6 +1172,10 @@ public Ledger remove(String tag) { return this.recordRemoval(EntryChange.newRemoval(tag)); } + public Ledger remove(long tagId) { + return this.recordRemoval(EntryChange.newRemoval(tagId)); + } + private Ledger recordEntry(Entry entry) { this.recordChange(entry); return this; @@ -1243,7 +1259,12 @@ void fill(TagMap map) { EntryChange change = entryChanges[i]; if (change.isRemoval()) { - map.remove(change.tag()); + // route tag-id removals by id (slot-aware, no name round-trip); string removals by name + if (KnownTags.globalSerial(change.tagId) != 0) { + map.remove(change.tagId); + } else { + map.remove(change.tag()); + } } else { map.set((Entry) change); } @@ -2133,6 +2154,24 @@ public boolean remove(String tag) { return (this.getAndRemove(tag) != null); } + @Override + public boolean remove(long tagId) { + return (this.getAndRemove(tagId) != null); + } + + @Override + public Entry getAndRemove(long tagId) { + this.checkWriteAccess(); + + // known tags live in their slot - clear there first (by id, no name needed) + Entry slotEntry = this.knownRemove(tagId); + if (slotEntry != null) return slotEntry; + + // otherwise it may have collided into the buckets - look up by resolved name + String name = KnownTags.nameOf(tagId); + return name == null ? null : this.removeFromBuckets(name, KnownTags.nameHash(tagId)); + } + @Override public Entry getAndRemove(String tag) { this.checkWriteAccess(); @@ -3426,6 +3465,19 @@ public TagMap.Entry getAndRemove(String tag) { return prior == null ? null : TagMap.Entry.newAnyEntry(tag, prior); } + // Tag-id keyed removals: LegacyTagMap is name-keyed, so resolve the name and delegate. + @Override + public boolean remove(long tagId) { + String name = KnownTags.nameOf(tagId); + return name != null && this.remove(name); + } + + @Override + public TagMap.Entry getAndRemove(long tagId) { + String name = KnownTags.nameOf(tagId); + return name == null ? null : this.getAndRemove(name); + } + @Override public Object getObject(String tag) { return this.get(tag); diff --git a/internal-api/src/test/java/datadog/trace/api/TagMapFuzzTest.java b/internal-api/src/test/java/datadog/trace/api/TagMapFuzzTest.java index 6f29327c2cf..deb705d9b4d 100644 --- a/internal-api/src/test/java/datadog/trace/api/TagMapFuzzTest.java +++ b/internal-api/src/test/java/datadog/trace/api/TagMapFuzzTest.java @@ -1062,6 +1062,7 @@ public static final MapAction randomAction() { return randomChoice( () -> remove(randomKey()), () -> removeLight(randomKey()), + () -> removeById(randomKey()), () -> getAndRemove(randomKey())); default: @@ -1117,6 +1118,10 @@ public static final MapAction removeLight(String key) { return new RemoveLight(key); } + public static final MapAction removeById(String key) { + return new RemoveById(key); + } + public static final MapAction getAndRemove(String key) { return new GetAndRemove(key); } @@ -1679,6 +1684,39 @@ public String toString() { } } + static final class RemoveById extends ReturningAction { + final String key; + + RemoveById(String key) { + this.key = key; + } + + @Override + protected Boolean _applyToTestMap(TagMap testMap) { + return testMap.remove(tagIdOf(this.key)); + } + + @Override + protected Object _applyToExpectedMap(Map expectedMap) { + return expectedMap.remove(this.key); + } + + @Override + protected void _verifyResults(Object expected, Boolean actual) { + assertEquals((expected != null), actual); + } + + @Override + public void verifyTestMap(TagMap testMap) { + assertFalse(testMap.containsKey(this.key)); + } + + @Override + public String toString() { + return String.format("removeById(%s)", literal(this.key)); + } + } + static final class GetAndRemove extends ReturningAction { final String key; diff --git a/internal-api/src/test/java/datadog/trace/api/TagMapTagIdTest.java b/internal-api/src/test/java/datadog/trace/api/TagMapTagIdTest.java index 7fddda69434..1e3db09c392 100644 --- a/internal-api/src/test/java/datadog/trace/api/TagMapTagIdTest.java +++ b/internal-api/src/test/java/datadog/trace/api/TagMapTagIdTest.java @@ -1,6 +1,7 @@ package datadog.trace.api; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertSame; @@ -172,4 +173,52 @@ public void ledger_mixedIdAndName() { assertEquals("redis", map.get(DB_SYSTEM)); assertEquals(2, map.size()); } + + @Test + public void removeById_clearsAndReportsSize() { + TagMap map = TagMap.create(); + map.set(HTTP_METHOD_ID, "GET"); + assertEquals(1, map.size()); + + assertTrue(map.remove(HTTP_METHOD_ID)); + assertNull(map.getEntry(HTTP_METHOD_ID)); + assertNull(map.getEntry(HTTP_METHOD)); + assertEquals(0, map.size()); + assertFalse(map.remove(HTTP_METHOD_ID)); // already gone + } + + @Test + public void getAndRemoveById_returnsPrior() { + TagMap map = TagMap.create(); + map.set(HTTP_STATUS_ID, 404); + + Entry prior = map.getAndRemove(HTTP_STATUS_ID); + assertNotNull(prior); + assertEquals(404, prior.intValue()); + assertNull(map.getEntry(HTTP_STATUS_ID)); + } + + @Test + public void removeById_removesStringSetTag() { + // set by name, removed by id (id resolves to the same tag, slot or bucket) + TagMap map = TagMap.create(); + map.set(DB_SYSTEM, "postgresql"); + + assertTrue(map.remove(DB_SYSTEM_ID)); + assertNull(map.get(DB_SYSTEM)); + } + + @Test + public void ledger_removeById() { + TagMap map = + TagMap.ledger() + .set(HTTP_METHOD_ID, "GET") + .set(DB_SYSTEM_ID, "mysql") + .remove(DB_SYSTEM_ID) + .build(); + + assertEquals("GET", map.get(HTTP_METHOD)); + assertNull(map.get(DB_SYSTEM)); + assertEquals(1, map.size()); + } } From 69353ec2314ee5263e549ba9d46f2a51bebb33dc Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Tue, 9 Jun 2026 20:12:42 -0400 Subject: [PATCH 09/35] Pre-build id-bearing shared Entries for common tags (InternalTagsAdder) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Handle the per-span "common" tags (base.service / version) via the tag-id fast path. These values are fixed for the tracer's life, so build their TagMap.Entry once and share across every span (Entry is immutable + safe to share) — dropping InternalTagsAdder's per-span Entry allocation to zero (cf. PR #11555, the string-keyed precursor), and making the entries tag-id-bearing so they also land in their positional slot. - TagMap.Entry.create(long, Object)/create(long, CharSequence): tag-id keyed, null/empty-rejecting factories mirroring the String create(). - CoreTagIds.BASE_SERVICE / VERSION (stored range) + resolver entries. - InternalTagsAdder prebuilds baseServiceEntry/versionEntry in its ctor and set()s the shared entry; empty DD_SERVICE early-returns (regression test added). Co-Authored-By: Claude Opus 4.8 --- .../java/datadog/trace/core/CoreTagIds.java | 16 ++++++++++ .../core/tagprocessor/InternalTagsAdder.java | 29 +++++++++++++------ .../tagprocessor/InternalTagsAdderTest.java | 13 +++++++++ .../main/java/datadog/trace/api/TagMap.java | 12 ++++++++ 4 files changed, 61 insertions(+), 9 deletions(-) diff --git a/dd-trace-core/src/main/java/datadog/trace/core/CoreTagIds.java b/dd-trace-core/src/main/java/datadog/trace/core/CoreTagIds.java index 1390022f2f1..85c4b3a2c33 100644 --- a/dd-trace-core/src/main/java/datadog/trace/core/CoreTagIds.java +++ b/dd-trace-core/src/main/java/datadog/trace/core/CoreTagIds.java @@ -30,6 +30,14 @@ public final class CoreTagIds { public static final int PARENT_ID_SERIAL = KnownTags.FIRST_STORED_SERIAL; public static final long PARENT_ID = KnownTags.tagId(PARENT_ID_SERIAL, 0, DDTags.PARENT_ID); + // common (process-constant) tags added by InternalTagsAdder to ~every span + public static final int BASE_SERVICE_SERIAL = KnownTags.FIRST_STORED_SERIAL + 1; + public static final long BASE_SERVICE = + KnownTags.tagId(BASE_SERVICE_SERIAL, 1, DDTags.BASE_SERVICE); + + public static final int VERSION_SERIAL = KnownTags.FIRST_STORED_SERIAL + 2; + public static final long VERSION = KnownTags.tagId(VERSION_SERIAL, 2, Tags.VERSION); + static final KnownTags.Resolver RESOLVER = new KnownTags.Resolver() { @Override @@ -39,6 +47,10 @@ public String nameOf(long tagId) { return Tags.ERROR; case PARENT_ID_SERIAL: return DDTags.PARENT_ID; + case BASE_SERVICE_SERIAL: + return DDTags.BASE_SERVICE; + case VERSION_SERIAL: + return Tags.VERSION; default: return null; } @@ -51,6 +63,10 @@ public long keyOf(String name) { return ERROR; case DDTags.PARENT_ID: return PARENT_ID; + case DDTags.BASE_SERVICE: + return BASE_SERVICE; + case Tags.VERSION: + return VERSION; default: return 0L; } diff --git a/dd-trace-core/src/main/java/datadog/trace/core/tagprocessor/InternalTagsAdder.java b/dd-trace-core/src/main/java/datadog/trace/core/tagprocessor/InternalTagsAdder.java index 68b13d19faf..3a68a45ffed 100644 --- a/dd-trace-core/src/main/java/datadog/trace/core/tagprocessor/InternalTagsAdder.java +++ b/dd-trace-core/src/main/java/datadog/trace/core/tagprocessor/InternalTagsAdder.java @@ -2,37 +2,48 @@ import static datadog.trace.bootstrap.instrumentation.api.Tags.VERSION; -import datadog.trace.api.DDTags; import datadog.trace.api.TagMap; import datadog.trace.bootstrap.instrumentation.api.AppendableSpanLinks; import datadog.trace.bootstrap.instrumentation.api.UTF8BytesString; +import datadog.trace.core.CoreTagIds; import datadog.trace.core.DDSpanContext; import javax.annotation.Nullable; public final class InternalTagsAdder extends TagsPostProcessor { private final UTF8BytesString ddService; - private final UTF8BytesString version; + + // base.service / version are fixed for the life of the tracer, so their TagMap.Entry objects are + // pre-built once and shared across every span (Entry is immutable and safe to share between + // maps). + // The entries are tag-id-bearing (CoreTagIds), so they also land in their positional slot. null + // when the corresponding value is absent/empty. See PR #11555 for the string-keyed precursor. + @Nullable private final TagMap.Entry baseServiceEntry; + @Nullable private final TagMap.Entry versionEntry; public InternalTagsAdder(@Nullable final String ddService, @Nullable final String version) { this.ddService = ddService != null ? UTF8BytesString.create(ddService) : null; - this.version = version != null && !version.isEmpty() ? UTF8BytesString.create(version) : null; + this.baseServiceEntry = TagMap.Entry.create(CoreTagIds.BASE_SERVICE, this.ddService); + this.versionEntry = + version != null && !version.isEmpty() + ? TagMap.Entry.create(CoreTagIds.VERSION, UTF8BytesString.create(version)) + : null; } @Override public void processTags( TagMap unsafeTags, DDSpanContext spanContext, AppendableSpanLinks spanLinks) { - if (spanContext == null || ddService == null) { + if (spanContext == null || ddService == null || ddService.length() == 0) { return; } if (!ddService.toString().equalsIgnoreCase(spanContext.getServiceName())) { - // service name != DD_SERVICE - unsafeTags.set(DDTags.BASE_SERVICE, ddService); + // service name != DD_SERVICE + unsafeTags.set(baseServiceEntry); } else { // as per config consistency, the version tag is added across tracers only if - // the service name is DD_SERVICE and version tag is not manually set - if (version != null && !unsafeTags.containsKey(VERSION)) { - unsafeTags.set(VERSION, version); + // the service name is DD_SERVICE and version tag is not manually set + if (versionEntry != null && !unsafeTags.containsKey(VERSION)) { + unsafeTags.set(versionEntry); } } } diff --git a/dd-trace-core/src/test/java/datadog/trace/core/tagprocessor/InternalTagsAdderTest.java b/dd-trace-core/src/test/java/datadog/trace/core/tagprocessor/InternalTagsAdderTest.java index ea3798a4427..a914b78fd28 100644 --- a/dd-trace-core/src/test/java/datadog/trace/core/tagprocessor/InternalTagsAdderTest.java +++ b/dd-trace-core/src/test/java/datadog/trace/core/tagprocessor/InternalTagsAdderTest.java @@ -14,6 +14,7 @@ import datadog.trace.test.util.DDJavaSpecification; import java.util.Collections; import java.util.Objects; +import org.junit.jupiter.api.Test; import org.tabletest.junit.TableTest; class InternalTagsAdderTest extends DDJavaSpecification { @@ -67,4 +68,16 @@ void shouldAddVersionWhenDdServiceEqualsServiceNameAndVersionSet( verify(spanContext, times(1)).getServiceName(); assertEquals(expected, Objects.toString(unsafeTags.get(VERSION), null)); } + + // Regression: empty DD_SERVICE is treated the same as unset — processTags exits early and writes + // no tags, regardless of the span's service name (the prebuilt base.service entry is null). + @Test + void emptyDdServiceWritesNoTags() { + InternalTagsAdder adder = new InternalTagsAdder("", "1.0"); + DDSpanContext spanContext = mock(DDSpanContext.class); + + TagMap tags = TagMap.fromMap(Collections.emptyMap()); + adder.processTags(tags, spanContext, link -> {}); + assertTrue(tags.isEmpty()); + } } diff --git a/internal-api/src/main/java/datadog/trace/api/TagMap.java b/internal-api/src/main/java/datadog/trace/api/TagMap.java index 27e8cdda7b1..d00e80a03ef 100644 --- a/internal-api/src/main/java/datadog/trace/api/TagMap.java +++ b/internal-api/src/main/java/datadog/trace/api/TagMap.java @@ -455,6 +455,18 @@ public static final Entry create(String tag, CharSequence value) { : TagMap.Entry.newObjectEntry(tag, value); } + /** Tag-id keyed {@link #create(String, Object)}: null value yields null. */ + public static final Entry create(long tagId, Object value) { + return (value == null) ? null : TagMap.Entry.newAnyEntry(tagId, value); + } + + /** Tag-id keyed {@link #create(String, CharSequence)}: null/empty value yields null. */ + public static final Entry create(long tagId, CharSequence value) { + return (value == null || value.length() == 0) + ? null + : TagMap.Entry.newObjectEntry(tagId, value); + } + public static final Entry create(String tag, boolean value) { return TagMap.Entry.newBooleanEntry(tag, value); } From 9dc21abaa35793513ef00a63f6454495601ebda1 Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Tue, 9 Jun 2026 20:48:54 -0400 Subject: [PATCH 10/35] Id build-time-known common tags (env, DJM/DSM enabled) for the common slot layout MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit env and the product-mixin flags _dd.djm.enabled / _dd.dsm.enabled are build-time-known constant tags merged into defaultSpanTags (CoreTracer. withTracerTags). Hand-assign tag ids for them (stored range, CoreTagIds) so they occupy the shared global slot layout: defaultSpanTags slots them on build, and they merge into each span's slots via the existing slot-aware merge — sharing entries, no per-span placement, no common prototype / construction change. Runtime-configured user tags keep no id and ride in the buckets, per the rule that only agent-build-time-known tags get slots. Co-Authored-By: Claude Opus 4.8 --- .../java/datadog/trace/core/CoreTagIds.java | 24 +++++++++++++++++++ .../datadog/trace/core/DDSpanContextTest.java | 20 ++++++++++++++++ 2 files changed, 44 insertions(+) diff --git a/dd-trace-core/src/main/java/datadog/trace/core/CoreTagIds.java b/dd-trace-core/src/main/java/datadog/trace/core/CoreTagIds.java index 85c4b3a2c33..2228968cebf 100644 --- a/dd-trace-core/src/main/java/datadog/trace/core/CoreTagIds.java +++ b/dd-trace-core/src/main/java/datadog/trace/core/CoreTagIds.java @@ -38,6 +38,18 @@ public final class CoreTagIds { public static final int VERSION_SERIAL = KnownTags.FIRST_STORED_SERIAL + 2; public static final long VERSION = KnownTags.tagId(VERSION_SERIAL, 2, Tags.VERSION); + // build-time-known constant tags merged into defaultSpanTags (see CoreTracer.withTracerTags). + // "env" is a base-mixin tag; the *_ENABLED flags are product-mixin tags. Hand-assigned for now. + public static final String ENV = "env"; + public static final int ENV_SERIAL = KnownTags.FIRST_STORED_SERIAL + 3; + public static final long ENV_ID = KnownTags.tagId(ENV_SERIAL, 3, ENV); + + public static final int DJM_ENABLED_SERIAL = KnownTags.FIRST_STORED_SERIAL + 4; + public static final long DJM_ENABLED = KnownTags.tagId(DJM_ENABLED_SERIAL, 4, DDTags.DJM_ENABLED); + + public static final int DSM_ENABLED_SERIAL = KnownTags.FIRST_STORED_SERIAL + 5; + public static final long DSM_ENABLED = KnownTags.tagId(DSM_ENABLED_SERIAL, 5, DDTags.DSM_ENABLED); + static final KnownTags.Resolver RESOLVER = new KnownTags.Resolver() { @Override @@ -51,6 +63,12 @@ public String nameOf(long tagId) { return DDTags.BASE_SERVICE; case VERSION_SERIAL: return Tags.VERSION; + case ENV_SERIAL: + return ENV; + case DJM_ENABLED_SERIAL: + return DDTags.DJM_ENABLED; + case DSM_ENABLED_SERIAL: + return DDTags.DSM_ENABLED; default: return null; } @@ -67,6 +85,12 @@ public long keyOf(String name) { return BASE_SERVICE; case Tags.VERSION: return VERSION; + case ENV: + return ENV_ID; + case DDTags.DJM_ENABLED: + return DJM_ENABLED; + case DDTags.DSM_ENABLED: + return DSM_ENABLED; default: return 0L; } diff --git a/dd-trace-core/src/test/java/datadog/trace/core/DDSpanContextTest.java b/dd-trace-core/src/test/java/datadog/trace/core/DDSpanContextTest.java index 79c91a0465b..90a98c5e2a2 100644 --- a/dd-trace-core/src/test/java/datadog/trace/core/DDSpanContextTest.java +++ b/dd-trace-core/src/test/java/datadog/trace/core/DDSpanContextTest.java @@ -23,6 +23,7 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertInstanceOf; +import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.mockito.Mockito.mock; @@ -98,6 +99,25 @@ void setTagById_reservedTagIsIntercepted() { span.finish(); } + @Test + void commonTags_slotByNameViaCommonLayout() { + // env / product flags are build-time-known common tags (CoreTagIds). Set by name they resolve + // to their id and land in the common slot layout, and remain findable by both name and id. + AgentSpan span = tracer.buildSpan("datadog", "fakeOperation").start(); + DDSpanContext context = (DDSpanContext) span.context(); + + context.setTag("env", "prod"); + context.setTag(DDTags.DJM_ENABLED, 1); + + assertEquals("prod", context.getTags().get("env")); + assertEquals(1, context.getTags().get(DDTags.DJM_ENABLED)); + // proves they occupy the shared slot layout (findable by id) + assertNotNull(context.getTags().getEntry(CoreTagIds.ENV_ID)); + assertNotNull(context.getTags().getEntry(CoreTagIds.DJM_ENABLED)); + + span.finish(); + } + @ParameterizedTest @ValueSource(strings = {DDTags.SERVICE_NAME, DDTags.RESOURCE_NAME, DDTags.SPAN_TYPE, "some.tag"}) void nullValuesForTagsDeleteExistingTags(String name) throws Exception { From 1a0d13e705d08be72e1f4bc5fc45870eb0877b7b Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Tue, 9 Jun 2026 21:04:34 -0400 Subject: [PATCH 11/35] Drop unused Prototype; size knownEntries to the provider's max slot MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The common-layout + fast-merge approach doesn't need a per-map prototype, so remove the now-unused scaffolding (Prototype, createKnownEntries, TagMap.create(Prototype), OptimizedTagMap(Prototype)) — recoverable from history when template-stamping is revisited. Replace the hardcoded KNOWN_ENTRIES_CAPACITY=32 with the registered provider's slot count: KnownTags.Resolver now declares slotCount() (= max stored fieldPos + 1), captured once at registration as a dynamic constant (KnownTags.slotCount()), and OptimizedTagMap sizes knownEntries to exactly that. CoreTagIds reports 6 (its stored tags occupy fieldPos 0..5); reserved tags keep their out-of-range sentinel and never slot. Resolvers in the tests/benchmark declare their own slot counts. Co-Authored-By: Claude Opus 4.8 --- .../java/datadog/trace/core/CoreTagIds.java | 12 +++++- .../trace/api/TagMapInsertionBenchmark.java | 5 +++ .../java/datadog/trace/api/KnownTags.java | 24 ++++++++--- .../main/java/datadog/trace/api/TagMap.java | 42 ++++--------------- .../datadog/trace/api/TagMapEntryTest.java | 5 +++ .../datadog/trace/api/TagMapFuzzTest.java | 15 +++++-- .../datadog/trace/api/TagMapTagIdTest.java | 5 +++ 7 files changed, 63 insertions(+), 45 deletions(-) diff --git a/dd-trace-core/src/main/java/datadog/trace/core/CoreTagIds.java b/dd-trace-core/src/main/java/datadog/trace/core/CoreTagIds.java index 2228968cebf..15171833c74 100644 --- a/dd-trace-core/src/main/java/datadog/trace/core/CoreTagIds.java +++ b/dd-trace-core/src/main/java/datadog/trace/core/CoreTagIds.java @@ -18,10 +18,13 @@ * tag-id resolution live before the first span is built. */ public final class CoreTagIds { - // sentinel fieldPos for reserved (non-stored) tags: >= TagMap KNOWN_ENTRIES_CAPACITY, so set() - // can never place them in a positional slot + // sentinel fieldPos for reserved (non-stored) tags: far beyond SLOT_COUNT, so set() can never + // place them in a positional slot static final int RESERVED_FIELD_POS = 0xFFFF; + // slot count = (max stored fieldPos) + 1. Stored tags use fieldPos 0..5 (PARENT_ID..DSM_ENABLED). + static final int SLOT_COUNT = 6; + // ---- reserved / virtual (tag-interceptor handled, not stored) ---- public static final int ERROR_SERIAL = 1; public static final long ERROR = KnownTags.tagId(ERROR_SERIAL, RESERVED_FIELD_POS, Tags.ERROR); @@ -74,6 +77,11 @@ public String nameOf(long tagId) { } } + @Override + public int slotCount() { + return SLOT_COUNT; + } + @Override public long keyOf(String name) { switch (name) { diff --git a/internal-api/src/jmh/java/datadog/trace/api/TagMapInsertionBenchmark.java b/internal-api/src/jmh/java/datadog/trace/api/TagMapInsertionBenchmark.java index e75a7468d96..50ad76869b5 100644 --- a/internal-api/src/jmh/java/datadog/trace/api/TagMapInsertionBenchmark.java +++ b/internal-api/src/jmh/java/datadog/trace/api/TagMapInsertionBenchmark.java @@ -92,6 +92,11 @@ public long keyOf(String name) { Long id = nameToId.get(name); return id == null ? 0L : id; } + + @Override + public int slotCount() { + return NAMES.length; // fieldPos 0..NAMES.length-1 + } }); // pre-populate the read map by id (entries land in their slots) diff --git a/internal-api/src/main/java/datadog/trace/api/KnownTags.java b/internal-api/src/main/java/datadog/trace/api/KnownTags.java index 539c5b17e7e..062fa155737 100644 --- a/internal-api/src/main/java/datadog/trace/api/KnownTags.java +++ b/internal-api/src/main/java/datadog/trace/api/KnownTags.java @@ -21,11 +21,11 @@ public static boolean isActive() { /* * tagId bit layout: [63-48 globalSerial] [47-32 fieldPos] [31-0 nameHash]. - * globalSerial is globally unique per known tag; fieldPos is the slot within a single span - * type's positional table (layout-relative — only meaningful within its own Prototype); nameHash - * is TagMap.Entry#_hash(name) and is layout-independent. Unknown (string-only) tags have the - * upper 32 bits zero. NOTE: TagMap.Entry decodes nameHash inline as (int) tagId on its hot path, - * so the low-32 encoding here must stay in sync with that. + * globalSerial is globally unique per known tag; fieldPos is its slot in the global positional + * layout (TagMap.knownEntries index); nameHash is TagMap.Entry#_hash(name) and is + * layout-independent. Unknown (string-only) tags have the upper 32 bits zero. NOTE: TagMap.Entry + * decodes nameHash inline as (int) tagId on its hot path, so the low-32 encoding here must stay + * in sync with that. */ public static int globalSerial(long tagId) { return (int) (tagId >>> 48); @@ -72,14 +72,28 @@ public static long tagId(int globalSerial, int fieldPos, String name) { return ((long) globalSerial << 48) | ((long) (fieldPos & 0xFFFF) << 32) | nameHash; } + // Number of positional slots in the global layout = (max stored fieldPos) + 1, declared by the + // registered provider. Captured once at registration and read as a dynamic constant; TagMap sizes + // its knownEntries array to exactly this rather than a hardcoded max. 0 when no resolver. + private static int slotCount; + + /** Slot count of the registered provider (max stored fieldPos + 1); 0 if none. */ + public static int slotCount() { + return slotCount; + } + public interface Resolver { String nameOf(long tagId); long keyOf(String name); + + /** Number of positional slots this provider uses: (max stored fieldPos) + 1. */ + int slotCount(); } public static void register(Resolver resolver) { KnownTags.resolver = resolver; // volatile write publishes the resolver + KnownTags.slotCount = (resolver != null) ? resolver.slotCount() : 0; KnownTags.active = (resolver != null); // plain write; readers re-read resolver volatile anyway } diff --git a/internal-api/src/main/java/datadog/trace/api/TagMap.java b/internal-api/src/main/java/datadog/trace/api/TagMap.java index d00e80a03ef..1d90ebfe54f 100644 --- a/internal-api/src/main/java/datadog/trace/api/TagMap.java +++ b/internal-api/src/main/java/datadog/trace/api/TagMap.java @@ -71,10 +71,6 @@ static TagMap create(int size) { return TagMapFactory.INSTANCE.create(size); } - static TagMap create(TagMap.Prototype proto) { - return new OptimizedTagMap(proto); - } - /** Creates a new TagMap.Ledger */ static Ledger ledger() { return new Ledger(); @@ -1070,19 +1066,6 @@ static int _hash(String tag) { } } - /** - * Per-span-type factory for an {@link OptimizedTagMap} backed by a positional {@link Entry} - * array. Known tags for the span type are stored directly at their {@code fieldPos} slot (O(1), - * no hashing); unexpected tags fall back to the hash buckets. - * - *

For now this just vends a blank positional {@code Entry[]} sized for the span type's layout. - * Later it will stamp out a prepopulated template (constant-valued entries copied per span) and - * may cache shared Entry instances for common values. - */ - abstract class Prototype { - public abstract Entry[] createKnownEntries(); - } - /* * An in-order ledger of changes to be made to a TagMap. * Ledger can also serves as a builder for TagMap-s via build & buildImmutable. @@ -1427,19 +1410,14 @@ private static final class EmptyHolder { static final OptimizedTagMap EMPTY = new OptimizedTagMap(new Object[1], 0); } - // Default capacity for the lazily-allocated knownEntries array (one slot per fieldPos). Known - // tags' fieldPos values are small (a span type carries well under this many tags); a tagId whose - // fieldPos is >= the array length simply falls back to the hash buckets. - static final int KNOWN_ENTRIES_CAPACITY = 32; - private final Object[] buckets; private int size; private boolean frozen; // Positional store for known tags, indexed by fieldPos. Lazily allocated on the first known-tag - // write (or supplied up front by a Prototype). A known tag claims its slot first-writer-wins; - // colliding tags (a different globalSerial already owns the slot) fall back to the hash buckets. - // Entries are self-describing (carry their tagId), so a bucketed tag still serializes correctly. + // write. A known tag claims its slot first-writer-wins; colliding tags (a different globalSerial + // already owns the slot) fall back to the hash buckets. Entries are self-describing (carry their + // tagId), so a bucketed tag still serializes correctly. private TagMap.Entry[] knownEntries; // Bitmask of fieldPos slots that have ever had a collision (a known tag diverted to the buckets @@ -1455,13 +1433,6 @@ public OptimizedTagMap() { this.knownEntries = null; } - public OptimizedTagMap(TagMap.Prototype proto) { - this.buckets = new Object[1 << 4]; - this.size = 0; - this.frozen = false; - this.knownEntries = proto.createKnownEntries(); - } - /** Used for inexpensive immutable */ private OptimizedTagMap(Object[] buckets, int size) { this.buckets = buckets; @@ -1785,10 +1756,13 @@ public Entry getAndSet(Entry newEntry) { // (a different tag owns the slot) or out-of-range fieldPos, falls back to the hash buckets. private Entry setKnown(Entry newEntry, int globalSerial) { int pos = KnownTags.fieldPos(newEntry.tagId); - if (pos < KNOWN_ENTRIES_CAPACITY) { + // knownEntries is sized to the registered provider's slot count (max stored fieldPos + 1); a + // larger fieldPos (e.g. a reserved tag's sentinel) routes to the buckets. + int slotCount = KnownTags.slotCount(); + if (pos < slotCount) { Entry[] known = this.knownEntries; if (known == null) { - known = this.knownEntries = new Entry[KNOWN_ENTRIES_CAPACITY]; + known = this.knownEntries = new Entry[slotCount]; } if (pos < known.length) { Entry occupant = known[pos]; diff --git a/internal-api/src/test/java/datadog/trace/api/TagMapEntryTest.java b/internal-api/src/test/java/datadog/trace/api/TagMapEntryTest.java index cfa119d10a5..be18fd25183 100644 --- a/internal-api/src/test/java/datadog/trace/api/TagMapEntryTest.java +++ b/internal-api/src/test/java/datadog/trace/api/TagMapEntryTest.java @@ -583,6 +583,11 @@ public long keyOf(String name) { } return 0L; } + + @Override + public int slotCount() { + return TAG_NAMES.length; // fieldPos 0..TAG_NAMES.length-1 + } }); } diff --git a/internal-api/src/test/java/datadog/trace/api/TagMapFuzzTest.java b/internal-api/src/test/java/datadog/trace/api/TagMapFuzzTest.java index deb705d9b4d..4ffeea90c7d 100644 --- a/internal-api/src/test/java/datadog/trace/api/TagMapFuzzTest.java +++ b/internal-api/src/test/java/datadog/trace/api/TagMapFuzzTest.java @@ -74,6 +74,10 @@ public void testFailed(ExtensionContext ctx, Throwable cause) { // Closed-form KnownTags resolver for the fuzz keys ("key-0".."key-(NUM_KEYS-1)"). Lets the // tag-id keyed actions (setById / putAllLedgerById) resolve their names so id-bearing entries // unify with string-keyed entries in the buckets and remain findable by name. + // slot count for the fuzz layout; fieldPos = n % SLOT_COUNT so keys spread across slots and some + // collide (first-writer-wins -> the rest fall to buckets), exercising both paths. + static final int SLOT_COUNT = 32; + @BeforeAll static void registerResolver() { KnownTags.register( @@ -88,6 +92,11 @@ public String nameOf(long tagId) { public long keyOf(String name) { return isFuzzKey(name) ? tagIdOf(name) : 0L; } + + @Override + public int slotCount() { + return SLOT_COUNT; + } }); } @@ -102,10 +111,8 @@ static boolean isFuzzKey(String name) { static long tagIdOf(String key) { int n = Integer.parseInt(key.substring("key-".length())); - // globalSerial = n + 1 (non-zero, unique per key); fieldPos spreads keys across the slot array - // (n % CAPACITY), so distinct keys occupy distinct slots AND keys that share a fieldPos collide - // (first-writer-wins -> the rest fall to buckets), exercising both paths. - return KnownTags.tagId(n + 1, n % OptimizedTagMap.KNOWN_ENTRIES_CAPACITY, key); + // globalSerial = n + 1 (non-zero, unique per key); fieldPos = n % SLOT_COUNT + return KnownTags.tagId(n + 1, n % SLOT_COUNT, key); } // Number of random sequences per @Test run. Default 1 (fast CI); crank via diff --git a/internal-api/src/test/java/datadog/trace/api/TagMapTagIdTest.java b/internal-api/src/test/java/datadog/trace/api/TagMapTagIdTest.java index 1e3db09c392..086faa3d848 100644 --- a/internal-api/src/test/java/datadog/trace/api/TagMapTagIdTest.java +++ b/internal-api/src/test/java/datadog/trace/api/TagMapTagIdTest.java @@ -69,6 +69,11 @@ public long keyOf(String name) { Long id = idByName.get(name); return id == null ? 0L : id; } + + @Override + public int slotCount() { + return 6; // max stored fieldPos (HTTP_STATUS=5) + 1 + } }); } From 6f348e9bc807600be9d32a1bad45c4db188c2447 Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Tue, 9 Jun 2026 21:45:11 -0400 Subject: [PATCH 12/35] Route non-intercepted post-processor tags through the tag-id slot path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Migrate the post-processors that set non-intercepted, stored common tags to id-keyed writes (the lower-friction interceptor surface — they operate on the TagMap directly, no AgentSpan/MutableSpan change): - RemoteHostnameAdder: _dd.tracer_host (its cached shared Entry is now id-bearing) - IntegrationAdder: _dd.integration - ServiceNameSourceAdder: _dd.svc_src Hand-assign their ids in CoreTagIds (stored range, fieldPos 6..8; SLOT_COUNT 9) + resolver entries. These tags now occupy the shared slot layout; since they're id'd, any string set of the same tag also slots via keyOf (unification). Co-Authored-By: Claude Opus 4.8 --- .../java/datadog/trace/core/CoreTagIds.java | 29 +++++++++++++++++-- .../core/tagprocessor/IntegrationAdder.java | 7 ++--- .../tagprocessor/RemoteHostnameAdder.java | 4 +-- .../tagprocessor/ServiceNameSourceAdder.java | 7 ++--- 4 files changed, 35 insertions(+), 12 deletions(-) diff --git a/dd-trace-core/src/main/java/datadog/trace/core/CoreTagIds.java b/dd-trace-core/src/main/java/datadog/trace/core/CoreTagIds.java index 15171833c74..c1f38dae2e0 100644 --- a/dd-trace-core/src/main/java/datadog/trace/core/CoreTagIds.java +++ b/dd-trace-core/src/main/java/datadog/trace/core/CoreTagIds.java @@ -22,8 +22,8 @@ public final class CoreTagIds { // place them in a positional slot static final int RESERVED_FIELD_POS = 0xFFFF; - // slot count = (max stored fieldPos) + 1. Stored tags use fieldPos 0..5 (PARENT_ID..DSM_ENABLED). - static final int SLOT_COUNT = 6; + // slot count = (max stored fieldPos) + 1. Stored tags use fieldPos 0..8. + static final int SLOT_COUNT = 9; // ---- reserved / virtual (tag-interceptor handled, not stored) ---- public static final int ERROR_SERIAL = 1; @@ -53,6 +53,19 @@ public final class CoreTagIds { public static final int DSM_ENABLED_SERIAL = KnownTags.FIRST_STORED_SERIAL + 5; public static final long DSM_ENABLED = KnownTags.tagId(DSM_ENABLED_SERIAL, 5, DDTags.DSM_ENABLED); + // common tags added by the tag post-processors (RemoteHostnameAdder / IntegrationAdder / + // ServiceNameSourceAdder). Not intercepted; stored. + public static final int TRACER_HOST_SERIAL = KnownTags.FIRST_STORED_SERIAL + 6; + public static final long TRACER_HOST_ID = + KnownTags.tagId(TRACER_HOST_SERIAL, 6, DDTags.TRACER_HOST); + + public static final int INTEGRATION_SERIAL = KnownTags.FIRST_STORED_SERIAL + 7; + public static final long INTEGRATION_ID = + KnownTags.tagId(INTEGRATION_SERIAL, 7, DDTags.DD_INTEGRATION); + + public static final int SVC_SRC_SERIAL = KnownTags.FIRST_STORED_SERIAL + 8; + public static final long SVC_SRC_ID = KnownTags.tagId(SVC_SRC_SERIAL, 8, DDTags.DD_SVC_SRC); + static final KnownTags.Resolver RESOLVER = new KnownTags.Resolver() { @Override @@ -72,6 +85,12 @@ public String nameOf(long tagId) { return DDTags.DJM_ENABLED; case DSM_ENABLED_SERIAL: return DDTags.DSM_ENABLED; + case TRACER_HOST_SERIAL: + return DDTags.TRACER_HOST; + case INTEGRATION_SERIAL: + return DDTags.DD_INTEGRATION; + case SVC_SRC_SERIAL: + return DDTags.DD_SVC_SRC; default: return null; } @@ -99,6 +118,12 @@ public long keyOf(String name) { return DJM_ENABLED; case DDTags.DSM_ENABLED: return DSM_ENABLED; + case DDTags.TRACER_HOST: + return TRACER_HOST_ID; + case DDTags.DD_INTEGRATION: + return INTEGRATION_ID; + case DDTags.DD_SVC_SRC: + return SVC_SRC_ID; default: return 0L; } diff --git a/dd-trace-core/src/main/java/datadog/trace/core/tagprocessor/IntegrationAdder.java b/dd-trace-core/src/main/java/datadog/trace/core/tagprocessor/IntegrationAdder.java index 0aabbc29c47..29d88771357 100644 --- a/dd-trace-core/src/main/java/datadog/trace/core/tagprocessor/IntegrationAdder.java +++ b/dd-trace-core/src/main/java/datadog/trace/core/tagprocessor/IntegrationAdder.java @@ -1,9 +1,8 @@ package datadog.trace.core.tagprocessor; -import static datadog.trace.api.DDTags.DD_INTEGRATION; - import datadog.trace.api.TagMap; import datadog.trace.bootstrap.instrumentation.api.AppendableSpanLinks; +import datadog.trace.core.CoreTagIds; import datadog.trace.core.DDSpanContext; public class IntegrationAdder extends TagsPostProcessor { @@ -12,9 +11,9 @@ public void processTags( TagMap unsafeTags, DDSpanContext spanContext, AppendableSpanLinks spanLinks) { final CharSequence instrumentationName = spanContext.getIntegrationName(); if (instrumentationName != null) { - unsafeTags.set(DD_INTEGRATION, instrumentationName); + unsafeTags.set(CoreTagIds.INTEGRATION_ID, instrumentationName); } else { - unsafeTags.remove(DD_INTEGRATION); + unsafeTags.remove(CoreTagIds.INTEGRATION_ID); } } } diff --git a/dd-trace-core/src/main/java/datadog/trace/core/tagprocessor/RemoteHostnameAdder.java b/dd-trace-core/src/main/java/datadog/trace/core/tagprocessor/RemoteHostnameAdder.java index bc0939a74cb..708ed926e0c 100644 --- a/dd-trace-core/src/main/java/datadog/trace/core/tagprocessor/RemoteHostnameAdder.java +++ b/dd-trace-core/src/main/java/datadog/trace/core/tagprocessor/RemoteHostnameAdder.java @@ -1,8 +1,8 @@ package datadog.trace.core.tagprocessor; -import datadog.trace.api.DDTags; import datadog.trace.api.TagMap; import datadog.trace.bootstrap.instrumentation.api.AppendableSpanLinks; +import datadog.trace.core.CoreTagIds; import datadog.trace.core.DDSpanContext; import java.util.function.Supplier; @@ -33,7 +33,7 @@ public void processTags( return; } - TagMap.Entry newEntry = TagMap.Entry.create(DDTags.TRACER_HOST, hostname); + TagMap.Entry newEntry = TagMap.Entry.create(CoreTagIds.TRACER_HOST_ID, hostname); unsafeTags.set(newEntry); this.cachedHostEntry = newEntry; } diff --git a/dd-trace-core/src/main/java/datadog/trace/core/tagprocessor/ServiceNameSourceAdder.java b/dd-trace-core/src/main/java/datadog/trace/core/tagprocessor/ServiceNameSourceAdder.java index 4b081889039..5c157e4e1e0 100644 --- a/dd-trace-core/src/main/java/datadog/trace/core/tagprocessor/ServiceNameSourceAdder.java +++ b/dd-trace-core/src/main/java/datadog/trace/core/tagprocessor/ServiceNameSourceAdder.java @@ -1,9 +1,8 @@ package datadog.trace.core.tagprocessor; -import static datadog.trace.api.DDTags.DD_SVC_SRC; - import datadog.trace.api.TagMap; import datadog.trace.bootstrap.instrumentation.api.AppendableSpanLinks; +import datadog.trace.core.CoreTagIds; import datadog.trace.core.DDSpanContext; public class ServiceNameSourceAdder extends TagsPostProcessor { @@ -12,9 +11,9 @@ public void processTags( TagMap unsafeTags, DDSpanContext spanContext, AppendableSpanLinks spanLinks) { final CharSequence serviceNameSource = spanContext.getServiceNameSource(); if (serviceNameSource != null) { - unsafeTags.set(DD_SVC_SRC, serviceNameSource); + unsafeTags.set(CoreTagIds.SVC_SRC_ID, serviceNameSource); } else { - unsafeTags.remove(DD_SVC_SRC); + unsafeTags.remove(CoreTagIds.SVC_SRC_ID); } } } From 8c742b8807987dc74df1c0d01063ca2a83cbde8f Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Tue, 9 Jun 2026 21:56:22 -0400 Subject: [PATCH 13/35] Route peer.service + HTTP-endpoint post-processor tags through tag-id slots MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Migrate the two remaining stored-tag post-processors to id-keyed access: - PeerServiceCalculator: reads peer.service via getEntry(id); writes peer.service and _dd.peer.service.remapped_from via set(long,...) (was Map put which bypassed the interceptor anyway — recalculation, no behavior change). - HttpEndpointPostProcessor: reads http.method/http.route/http.url via getEntry(id). Hand-assign their ids in CoreTagIds (stored range, fieldPos 9..13; SLOT_COUNT 14) + resolver entries. peer.service / http.method / http.url are intercepted-but- stored: the string set-path still runs the interceptor side-effect then slots via keyOf, so these id reads find the same entry. http.route is not intercepted. Co-Authored-By: Claude Opus 4.8 --- .../java/datadog/trace/core/CoreTagIds.java | 46 ++++++++++++++++++- .../HttpEndpointPostProcessor.java | 16 ++++--- .../tagprocessor/PeerServiceCalculator.java | 16 ++++--- 3 files changed, 63 insertions(+), 15 deletions(-) diff --git a/dd-trace-core/src/main/java/datadog/trace/core/CoreTagIds.java b/dd-trace-core/src/main/java/datadog/trace/core/CoreTagIds.java index c1f38dae2e0..730d6409d73 100644 --- a/dd-trace-core/src/main/java/datadog/trace/core/CoreTagIds.java +++ b/dd-trace-core/src/main/java/datadog/trace/core/CoreTagIds.java @@ -22,8 +22,8 @@ public final class CoreTagIds { // place them in a positional slot static final int RESERVED_FIELD_POS = 0xFFFF; - // slot count = (max stored fieldPos) + 1. Stored tags use fieldPos 0..8. - static final int SLOT_COUNT = 9; + // slot count = (max stored fieldPos) + 1. Stored tags use fieldPos 0..13. + static final int SLOT_COUNT = 14; // ---- reserved / virtual (tag-interceptor handled, not stored) ---- public static final int ERROR_SERIAL = 1; @@ -66,6 +66,28 @@ public final class CoreTagIds { public static final int SVC_SRC_SERIAL = KnownTags.FIRST_STORED_SERIAL + 8; public static final long SVC_SRC_ID = KnownTags.tagId(SVC_SRC_SERIAL, 8, DDTags.DD_SVC_SRC); + // peer.service tags, read/written by PeerServiceCalculator (post-processor; uses Map put/get that + // bypass the interceptor). peer.service is intercepted on the set-path but STORED, so it slots. + public static final int PEER_SERVICE_SERIAL = KnownTags.FIRST_STORED_SERIAL + 9; + public static final long PEER_SERVICE = + KnownTags.tagId(PEER_SERVICE_SERIAL, 9, Tags.PEER_SERVICE); + + public static final int PEER_SERVICE_REMAPPED_FROM_SERIAL = KnownTags.FIRST_STORED_SERIAL + 10; + public static final long PEER_SERVICE_REMAPPED_FROM = + KnownTags.tagId(PEER_SERVICE_REMAPPED_FROM_SERIAL, 10, DDTags.PEER_SERVICE_REMAPPED_FROM); + + // HTTP tags read by HttpEndpointPostProcessor. http.method/http.url are intercepted-but-stored + // (interceptTag side-effects then returns false → stored); http.route is not intercepted. All + // stored, so the string set-path slots them via keyOf and the id reads here find them. + public static final int HTTP_METHOD_SERIAL = KnownTags.FIRST_STORED_SERIAL + 11; + public static final long HTTP_METHOD = KnownTags.tagId(HTTP_METHOD_SERIAL, 11, Tags.HTTP_METHOD); + + public static final int HTTP_ROUTE_SERIAL = KnownTags.FIRST_STORED_SERIAL + 12; + public static final long HTTP_ROUTE = KnownTags.tagId(HTTP_ROUTE_SERIAL, 12, Tags.HTTP_ROUTE); + + public static final int HTTP_URL_SERIAL = KnownTags.FIRST_STORED_SERIAL + 13; + public static final long HTTP_URL = KnownTags.tagId(HTTP_URL_SERIAL, 13, Tags.HTTP_URL); + static final KnownTags.Resolver RESOLVER = new KnownTags.Resolver() { @Override @@ -91,6 +113,16 @@ public String nameOf(long tagId) { return DDTags.DD_INTEGRATION; case SVC_SRC_SERIAL: return DDTags.DD_SVC_SRC; + case PEER_SERVICE_SERIAL: + return Tags.PEER_SERVICE; + case PEER_SERVICE_REMAPPED_FROM_SERIAL: + return DDTags.PEER_SERVICE_REMAPPED_FROM; + case HTTP_METHOD_SERIAL: + return Tags.HTTP_METHOD; + case HTTP_ROUTE_SERIAL: + return Tags.HTTP_ROUTE; + case HTTP_URL_SERIAL: + return Tags.HTTP_URL; default: return null; } @@ -124,6 +156,16 @@ public long keyOf(String name) { return INTEGRATION_ID; case DDTags.DD_SVC_SRC: return SVC_SRC_ID; + case Tags.PEER_SERVICE: + return PEER_SERVICE; + case DDTags.PEER_SERVICE_REMAPPED_FROM: + return PEER_SERVICE_REMAPPED_FROM; + case Tags.HTTP_METHOD: + return HTTP_METHOD; + case Tags.HTTP_ROUTE: + return HTTP_ROUTE; + case Tags.HTTP_URL: + return HTTP_URL; default: return 0L; } diff --git a/dd-trace-core/src/main/java/datadog/trace/core/tagprocessor/HttpEndpointPostProcessor.java b/dd-trace-core/src/main/java/datadog/trace/core/tagprocessor/HttpEndpointPostProcessor.java index c2e0dd72761..07900f369d2 100644 --- a/dd-trace-core/src/main/java/datadog/trace/core/tagprocessor/HttpEndpointPostProcessor.java +++ b/dd-trace-core/src/main/java/datadog/trace/core/tagprocessor/HttpEndpointPostProcessor.java @@ -1,13 +1,10 @@ package datadog.trace.core.tagprocessor; -import static datadog.trace.bootstrap.instrumentation.api.Tags.HTTP_METHOD; -import static datadog.trace.bootstrap.instrumentation.api.Tags.HTTP_ROUTE; -import static datadog.trace.bootstrap.instrumentation.api.Tags.HTTP_URL; - import datadog.trace.api.TagMap; import datadog.trace.api.endpoint.EndpointResolver; import datadog.trace.api.internal.VisibleForTesting; import datadog.trace.bootstrap.instrumentation.api.AppendableSpanLinks; +import datadog.trace.core.CoreTagIds; import datadog.trace.core.DDSpanContext; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -61,16 +58,21 @@ public void processTags( return; } - if (unsafeTags.getObject(HTTP_METHOD) == null) { + if (unsafeTags.getEntry(CoreTagIds.HTTP_METHOD) == null) { return; } try { - String httpRoute = unsafeTags.getString(HTTP_ROUTE); - String httpUrl = unsafeTags.getString(HTTP_URL); + String httpRoute = stringValue(unsafeTags, CoreTagIds.HTTP_ROUTE); + String httpUrl = stringValue(unsafeTags, CoreTagIds.HTTP_URL); endpointResolver.resolveEndpoint(unsafeTags, httpRoute, httpUrl); } catch (Throwable t) { log.debug("Error processing HTTP endpoint for span {}", spanContext.getSpanId(), t); } } + + private static String stringValue(TagMap unsafeTags, long tagId) { + TagMap.Entry entry = unsafeTags.getEntry(tagId); + return entry == null ? null : entry.stringValue(); + } } diff --git a/dd-trace-core/src/main/java/datadog/trace/core/tagprocessor/PeerServiceCalculator.java b/dd-trace-core/src/main/java/datadog/trace/core/tagprocessor/PeerServiceCalculator.java index 198e2c78f1c..0f3f8e6d670 100644 --- a/dd-trace-core/src/main/java/datadog/trace/core/tagprocessor/PeerServiceCalculator.java +++ b/dd-trace-core/src/main/java/datadog/trace/core/tagprocessor/PeerServiceCalculator.java @@ -1,13 +1,12 @@ package datadog.trace.core.tagprocessor; import datadog.trace.api.Config; -import datadog.trace.api.DDTags; import datadog.trace.api.TagMap; import datadog.trace.api.internal.VisibleForTesting; import datadog.trace.api.naming.NamingSchema; import datadog.trace.api.naming.SpanNaming; import datadog.trace.bootstrap.instrumentation.api.AppendableSpanLinks; -import datadog.trace.bootstrap.instrumentation.api.Tags; +import datadog.trace.core.CoreTagIds; import datadog.trace.core.DDSpanContext; import java.util.Map; import javax.annotation.Nonnull; @@ -35,7 +34,7 @@ public PeerServiceCalculator() { @Override public void processTags( TagMap unsafeTags, DDSpanContext spanContext, AppendableSpanLinks spanLinks) { - Object peerService = unsafeTags.getObject(Tags.PEER_SERVICE); + Object peerService = peerService(unsafeTags); // the user set it if (peerService != null) { if (canRemap) { @@ -46,18 +45,23 @@ public void processTags( // calculate the defaults (if any) peerServiceNaming.tags(unsafeTags); // only remap if the mapping is not empty (saves one get) - remapPeerService(unsafeTags, canRemap ? unsafeTags.getObject(Tags.PEER_SERVICE) : null); + remapPeerService(unsafeTags, canRemap ? peerService(unsafeTags) : null); return; } // we have no peer.service and we do not compute defaults. Leave the map untouched } + private static Object peerService(TagMap unsafeTags) { + TagMap.Entry entry = unsafeTags.getEntry(CoreTagIds.PEER_SERVICE); + return entry == null ? null : entry.objectValue(); + } + private void remapPeerService(TagMap unsafeTags, Object value) { if (value != null) { String mapped = peerServiceMapping.get(value); if (mapped != null) { - unsafeTags.put(Tags.PEER_SERVICE, mapped); - unsafeTags.put(DDTags.PEER_SERVICE_REMAPPED_FROM, value); + unsafeTags.set(CoreTagIds.PEER_SERVICE, mapped); + unsafeTags.set(CoreTagIds.PEER_SERVICE_REMAPPED_FROM, value); } } } From 6874f08a298fd393cb10f4baba8f9f5b932ec352 Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Tue, 9 Jun 2026 22:05:50 -0400 Subject: [PATCH 14/35] Add KnownTags.NO_SLOT sentinel + tagId(serial, name) for unslotted stored tags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Codify the "id but no fast slot" tier: a tag can carry a stable id (so keyOf/ nameOf unify it with its string form) while deliberately not owning a positional slot, so it lives in the hash buckets and doesn't widen knownEntries[] for every span. This is how narrow/low-priority tags get ids without slot bloat. - KnownTags.NO_SLOT (0xFFFF): canonical out-of-slot-range fieldPos sentinel. The existing routing already buckets any fieldPos >= slotCount() (setKnown/knownGet/ knownRemove), so no engine change is needed — only a named encoding. - KnownTags.tagId(serial, name): overload stamping NO_SLOT. - KnownTags.isUnslotted(tagId): stored serial + NO_SLOT. - CoreTagIds: drop the local RESERVED_FIELD_POS constant; ERROR now uses the tagId(serial, name) overload. No tag reassignments. - TagMapTagIdTest: unslotted-tier coverage (set/get/remove by id + string, NO_SLOT survives on the stored entry, unification both directions). Co-Authored-By: Claude Opus 4.8 --- .../java/datadog/trace/core/CoreTagIds.java | 12 ++-- .../java/datadog/trace/api/KnownTags.java | 33 +++++++++ .../datadog/trace/api/TagMapTagIdTest.java | 68 +++++++++++++++++-- 3 files changed, 99 insertions(+), 14 deletions(-) diff --git a/dd-trace-core/src/main/java/datadog/trace/core/CoreTagIds.java b/dd-trace-core/src/main/java/datadog/trace/core/CoreTagIds.java index 730d6409d73..f35721115a3 100644 --- a/dd-trace-core/src/main/java/datadog/trace/core/CoreTagIds.java +++ b/dd-trace-core/src/main/java/datadog/trace/core/CoreTagIds.java @@ -10,24 +10,20 @@ * *

Reserved serials {@code [1, KnownTags.FIRST_STORED_SERIAL)} name "virtual" tags handled by the * tag interceptor / span fields and are NOT stored in the {@code TagMap}; their {@code fieldPos} is - * a sentinel ({@link #RESERVED_FIELD_POS}) that is out of slot range, so any incidental store - * routes to the hash buckets rather than a positional slot. Serials {@code >= FIRST_STORED_SERIAL} - * name stored tags that slot/bucket normally. + * the {@link KnownTags#NO_SLOT} sentinel that is out of slot range, so any incidental store routes + * to the hash buckets rather than a positional slot. Serials {@code >= FIRST_STORED_SERIAL} name + * stored tags that slot/bucket normally (or, with {@code NO_SLOT}, are stored bucket-only). * *

The resolver registers on class initialization, so simply referencing any constant here makes * tag-id resolution live before the first span is built. */ public final class CoreTagIds { - // sentinel fieldPos for reserved (non-stored) tags: far beyond SLOT_COUNT, so set() can never - // place them in a positional slot - static final int RESERVED_FIELD_POS = 0xFFFF; - // slot count = (max stored fieldPos) + 1. Stored tags use fieldPos 0..13. static final int SLOT_COUNT = 14; // ---- reserved / virtual (tag-interceptor handled, not stored) ---- public static final int ERROR_SERIAL = 1; - public static final long ERROR = KnownTags.tagId(ERROR_SERIAL, RESERVED_FIELD_POS, Tags.ERROR); + public static final long ERROR = KnownTags.tagId(ERROR_SERIAL, Tags.ERROR); // ---- stored (slotted / bucketed) ---- public static final int PARENT_ID_SERIAL = KnownTags.FIRST_STORED_SERIAL; diff --git a/internal-api/src/main/java/datadog/trace/api/KnownTags.java b/internal-api/src/main/java/datadog/trace/api/KnownTags.java index 062fa155737..73ee7ee2e9a 100644 --- a/internal-api/src/main/java/datadog/trace/api/KnownTags.java +++ b/internal-api/src/main/java/datadog/trace/api/KnownTags.java @@ -60,6 +60,30 @@ public static boolean isStored(long tagId) { return globalSerial(tagId) >= FIRST_STORED_SERIAL; } + /** + * Sentinel {@code fieldPos} meaning "no positional slot". It is the maximum value the 16-bit + * fieldPos field can hold, so it always compares {@code >= slotCount()} and routes to the hash + * buckets rather than the fast positional array. Two kinds of tagId use it: + * + *

    + *
  • Reserved/virtual tags ({@code globalSerial < FIRST_STORED_SERIAL}) — not stored at all; + * the sentinel just guarantees an incidental store never lands in a slot. + *
  • Unslotted stored tags ({@code globalSerial >= FIRST_STORED_SERIAL}) — "low-priority" tags + * that get a stable id (and so {@code keyOf}/{@code nameOf} unification with their string + * form) but are deliberately not given a slot, so they live in the buckets and don't widen + * {@code knownEntries[]} for every span. {@code getEntry(long)} for these resolves the name + * and rehashes — the cost of not owning a slot. + *
+ */ + public static final int NO_SLOT = 0xFFFF; + + /** + * True if the tagId names a stored tag that deliberately has no positional slot (bucket-only). + */ + public static boolean isUnslotted(long tagId) { + return isStored(tagId) && fieldPos(tagId) == NO_SLOT; + } + /** * Builds a tagId from its parts: {@code globalSerial} (globally unique per known tag), {@code * fieldPos} (the tag's slot within its span type's positional table), and the tag {@code name} @@ -72,6 +96,15 @@ public static long tagId(int globalSerial, int fieldPos, String name) { return ((long) globalSerial << 48) | ((long) (fieldPos & 0xFFFF) << 32) | nameHash; } + /** + * Builds a tagId with no positional slot ({@code fieldPos == }{@link #NO_SLOT}). Use for reserved + * "virtual" tags and for "low-priority" stored tags that get a stable id but are intentionally + * kept out of the fast slot array (they route to the hash buckets). See {@link #NO_SLOT}. + */ + public static long tagId(int globalSerial, String name) { + return tagId(globalSerial, NO_SLOT, name); + } + // Number of positional slots in the global layout = (max stored fieldPos) + 1, declared by the // registered provider. Captured once at registration and read as a dynamic constant; TagMap sizes // its knownEntries array to exactly this rather than a hardcoded max. 0 when no resolver. diff --git a/internal-api/src/test/java/datadog/trace/api/TagMapTagIdTest.java b/internal-api/src/test/java/datadog/trace/api/TagMapTagIdTest.java index 086faa3d848..94a01ae9470 100644 --- a/internal-api/src/test/java/datadog/trace/api/TagMapTagIdTest.java +++ b/internal-api/src/test/java/datadog/trace/api/TagMapTagIdTest.java @@ -29,10 +29,16 @@ public class TagMapTagIdTest { static final String HTTP_METHOD = "http.request.method"; static final String HTTP_STATUS = "http.response.status_code"; static final String DB_SYSTEM = "db.system"; + // a "low-priority" stored tag: has an id, but deliberately no positional slot (NO_SLOT) so it + // lives in the hash buckets rather than widening knownEntries[]. + static final String MESSAGING_SYSTEM = "messaging.system"; static final long HTTP_METHOD_ID = tagId(1, 2, HTTP_METHOD); static final long HTTP_STATUS_ID = tagId(2, 5, HTTP_STATUS); static final long DB_SYSTEM_ID = tagId(3, 0, DB_SYSTEM); + // stored-range serial (>= FIRST_STORED_SERIAL) so it is an *unslotted stored* tag, not a reserved + static final long MESSAGING_SYSTEM_ID = + KnownTags.tagId(KnownTags.FIRST_STORED_SERIAL + 4, MESSAGING_SYSTEM); static long tagId(int globalSerial, int fieldPos, String name) { return KnownTags.tagId(globalSerial, fieldPos, name); @@ -50,12 +56,12 @@ public void tagId_roundTripsThroughExtractors() { public void registerResolver() { Map nameById = new HashMap<>(); Map idByName = new HashMap<>(); - for (long id : new long[] {HTTP_METHOD_ID, HTTP_STATUS_ID, DB_SYSTEM_ID}) { - // resolve name from the tag's own definition above - String name = - id == HTTP_METHOD_ID ? HTTP_METHOD : id == HTTP_STATUS_ID ? HTTP_STATUS : DB_SYSTEM; - nameById.put(id, name); - idByName.put(name, id); + nameById.put(HTTP_METHOD_ID, HTTP_METHOD); + nameById.put(HTTP_STATUS_ID, HTTP_STATUS); + nameById.put(DB_SYSTEM_ID, DB_SYSTEM); + nameById.put(MESSAGING_SYSTEM_ID, MESSAGING_SYSTEM); + for (Map.Entry e : nameById.entrySet()) { + idByName.put(e.getValue(), e.getKey()); } KnownTags.register( new KnownTags.Resolver() { @@ -226,4 +232,54 @@ public void ledger_removeById() { assertNull(map.get(DB_SYSTEM)); assertEquals(1, map.size()); } + + @Test + public void noSlotOverload_stampsNoSlotSentinel() { + long id = KnownTags.tagId(KnownTags.FIRST_STORED_SERIAL + 4, MESSAGING_SYSTEM); + assertEquals(KnownTags.FIRST_STORED_SERIAL + 4, KnownTags.globalSerial(id)); + assertEquals(KnownTags.NO_SLOT, KnownTags.fieldPos(id)); + assertEquals(Entry._hash(MESSAGING_SYSTEM), KnownTags.nameHash(id)); + // a stored serial + NO_SLOT fieldPos == an unslotted (bucket-only) stored tag + assertTrue(KnownTags.isStored(id)); + assertTrue(KnownTags.isUnslotted(id)); + } + + @Test + public void unslotted_setFindableByIdAndName() { + TagMap map = TagMap.create(); + map.set(MESSAGING_SYSTEM_ID, "kafka"); + + Entry byId = map.getEntry(MESSAGING_SYSTEM_ID); + assertNotNull(byId); + assertEquals("kafka", byId.stringValue()); + // NO_SLOT survives on the stored entry — it lives in the buckets, not a slot + assertEquals(KnownTags.NO_SLOT, KnownTags.fieldPos(byId.tagId)); + assertEquals(MESSAGING_SYSTEM, byId.tag()); + + // string read of the same tag unifies with the id-stored entry + assertSame(byId, map.getEntry(MESSAGING_SYSTEM)); + assertEquals("kafka", map.get(MESSAGING_SYSTEM)); + } + + @Test + public void unslotted_stringSetFindableById() { + TagMap map = TagMap.create(); + map.set(MESSAGING_SYSTEM, "rabbitmq"); + + Entry byId = map.getEntry(MESSAGING_SYSTEM_ID); + assertNotNull(byId); + assertEquals("rabbitmq", byId.stringValue()); + } + + @Test + public void unslotted_removeById() { + TagMap map = TagMap.create(); + map.set(MESSAGING_SYSTEM_ID, "kafka"); + assertEquals(1, map.size()); + + assertTrue(map.remove(MESSAGING_SYSTEM_ID)); + assertNull(map.getEntry(MESSAGING_SYSTEM_ID)); + assertNull(map.get(MESSAGING_SYSTEM)); + assertEquals(0, map.size()); + } } From bb95fec66cf796263d59d40befa28bd9c3d17b6c Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Tue, 9 Jun 2026 22:31:33 -0400 Subject: [PATCH 15/35] Slice B: AgentSpan.setTag(long) + id-key BaseDecorator.onPeerConnection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wire the tag-id fast-path up through the span layer and migrate the first real decorator, so we can measure the id+slot path on a real span. - AgentSpan: new default setTag(long tagId, Object) that resolves the id to its name and delegates to setTag(String, Object) — zero blast radius for the dozen other AgentSpan implementors (OTSpan/OtelSpan/Spark/etc). DDSpan overrides it to take the fast-path via the already-present DDSpanContext.setTag(long, Object). - Relocate the hand-assigned tag-id registry from dd-trace-core CoreTagIds to internal-api as KnownTagIds, so both core AND instrumentation (decorators, which only see internal-api) reference one registry — the single source of truth the eventual codegen will replace. Updated all core/test references. - KnownTagIds: slot peer.hostname/peer.ipv4/peer.ipv6 (non-intercepted, common on client/producer spans), SLOT_COUNT 17. - BaseDecorator.onPeerConnection: set peer.hostname/ipv4/ipv6 by id. peer.port left on the string path (int overload; deferred). Updated the inherited Spock onPeerConnection expectations (minimal Groovy edit, per decision; full groovy->java migration of the decorator test hierarchy deferred). - PeerConnectionBenchmark (jmh): measures onPeerConnection on a real DDSpan for the string-vs-id A/B. tag: ai generated tag: no release note Co-Authored-By: Claude Opus 4.8 --- .../decorator/PeerConnectionBenchmark.java | 97 +++++++++++++++++++ .../decorator/BaseDecorator.java | 7 +- .../decorator/BaseDecoratorTest.groovy | 7 +- .../main/java/datadog/trace/core/DDSpan.java | 6 ++ .../datadog/trace/core/DDSpanContext.java | 3 +- .../core/taginterceptor/TagInterceptor.java | 4 +- .../HttpEndpointPostProcessor.java | 8 +- .../core/tagprocessor/IntegrationAdder.java | 6 +- .../core/tagprocessor/InternalTagsAdder.java | 8 +- .../tagprocessor/PeerServiceCalculator.java | 8 +- .../tagprocessor/RemoteHostnameAdder.java | 4 +- .../tagprocessor/ServiceNameSourceAdder.java | 6 +- .../datadog/trace/core/DDSpanContextTest.java | 11 ++- .../java/datadog/trace/api/KnownTagIds.java | 44 +++++++-- .../instrumentation/api/AgentSpan.java | 12 +++ 15 files changed, 188 insertions(+), 43 deletions(-) create mode 100644 dd-java-agent/agent-bootstrap/src/jmh/java/datadog/trace/bootstrap/instrumentation/decorator/PeerConnectionBenchmark.java rename dd-trace-core/src/main/java/datadog/trace/core/CoreTagIds.java => internal-api/src/main/java/datadog/trace/api/KnownTagIds.java (80%) diff --git a/dd-java-agent/agent-bootstrap/src/jmh/java/datadog/trace/bootstrap/instrumentation/decorator/PeerConnectionBenchmark.java b/dd-java-agent/agent-bootstrap/src/jmh/java/datadog/trace/bootstrap/instrumentation/decorator/PeerConnectionBenchmark.java new file mode 100644 index 00000000000..c7738d44192 --- /dev/null +++ b/dd-java-agent/agent-bootstrap/src/jmh/java/datadog/trace/bootstrap/instrumentation/decorator/PeerConnectionBenchmark.java @@ -0,0 +1,97 @@ +package datadog.trace.bootstrap.instrumentation.decorator; + +import static java.util.concurrent.TimeUnit.NANOSECONDS; +import static java.util.concurrent.TimeUnit.SECONDS; + +import datadog.trace.api.GlobalTracer; +import datadog.trace.bootstrap.instrumentation.api.AgentSpan; +import datadog.trace.common.writer.Writer; +import datadog.trace.core.CoreTracer; +import datadog.trace.core.DDSpan; +import java.net.InetAddress; +import java.net.InetSocketAddress; +import java.util.List; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Warmup; + +/** + * Measures {@link BaseDecorator#onPeerConnection} on a real {@link DDSpan}. This is the + * tag-id-keyed fast-path (peer.hostname / peer.ipv4) end-to-end through the span/context/TagMap + * layers: compare this branch (id-keyed, slotted) against the prior commit (string-keyed, bucketed) + * by running the same benchmark on each. + */ +@State(Scope.Benchmark) +@Warmup(iterations = 3, time = 5, timeUnit = SECONDS) +@Measurement(iterations = 5, time = 5, timeUnit = SECONDS) +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(NANOSECONDS) +@Fork(value = 1) +public class PeerConnectionBenchmark { + + BenchmarkDecorator decorator; + InetSocketAddress connection; + AgentSpan span; + + @Setup(Level.Trial) + public void setUp() throws Exception { + CoreTracer tracer = + CoreTracer.builder().strictTraceWrites(true).writer(new NoOpWriter()).build(); + GlobalTracer.forceRegister(tracer); + decorator = new BenchmarkDecorator(); + span = tracer.startSpan("benchmark", "peer.connection"); + // resolved IPv4 address carrying an explicit host name, so onPeerConnection exercises + // peer.hostname + peer.ipv4 without triggering a reverse-DNS lookup. + InetAddress address = InetAddress.getByAddress("benchmark.host", new byte[] {10, 0, 0, 1}); + connection = new InetSocketAddress(address, 8080); + } + + @Benchmark + public AgentSpan onPeerConnection() { + return decorator.onPeerConnection(span, connection); + } + + static final class BenchmarkDecorator extends BaseDecorator { + @Override + protected String[] instrumentationNames() { + return new String[] {"benchmark"}; + } + + @Override + protected CharSequence spanType() { + return "benchmark"; + } + + @Override + protected CharSequence component() { + return "benchmark"; + } + } + + private static final class NoOpWriter implements Writer { + @Override + public void write(final List trace) {} + + @Override + public void start() {} + + @Override + public boolean flush() { + return false; + } + + @Override + public void close() {} + + @Override + public void incrementDropCounts(final int spanCount) {} + } +} diff --git a/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/instrumentation/decorator/BaseDecorator.java b/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/instrumentation/decorator/BaseDecorator.java index 6a8767e523f..8841dca9519 100644 --- a/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/instrumentation/decorator/BaseDecorator.java +++ b/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/instrumentation/decorator/BaseDecorator.java @@ -7,6 +7,7 @@ import datadog.trace.api.Config; import datadog.trace.api.DDTags; import datadog.trace.api.Functions; +import datadog.trace.api.KnownTagIds; import datadog.trace.api.TagMap; import datadog.trace.api.cache.QualifiedClassNameCache; import datadog.trace.bootstrap.instrumentation.api.AgentScope; @@ -165,12 +166,12 @@ public AgentSpan onPeerConnection(AgentSpan span, InetAddress remoteAddress, boo if (remoteAddress != null) { String ip = remoteAddress.getHostAddress(); if (resolved && Config.get().isPeerHostNameEnabled()) { - span.setTag(Tags.PEER_HOSTNAME, hostName(remoteAddress, ip)); + span.setTag(KnownTagIds.PEER_HOSTNAME, hostName(remoteAddress, ip)); } if (remoteAddress instanceof Inet4Address) { - span.setTag(Tags.PEER_HOST_IPV4, ip); + span.setTag(KnownTagIds.PEER_HOST_IPV4, ip); } else if (remoteAddress instanceof Inet6Address) { - span.setTag(Tags.PEER_HOST_IPV6, ip); + span.setTag(KnownTagIds.PEER_HOST_IPV6, ip); } } return span; diff --git a/dd-java-agent/agent-bootstrap/src/test/groovy/datadog/trace/bootstrap/instrumentation/decorator/BaseDecoratorTest.groovy b/dd-java-agent/agent-bootstrap/src/test/groovy/datadog/trace/bootstrap/instrumentation/decorator/BaseDecoratorTest.groovy index 354a9c6bc4f..9ddae8d9f46 100644 --- a/dd-java-agent/agent-bootstrap/src/test/groovy/datadog/trace/bootstrap/instrumentation/decorator/BaseDecoratorTest.groovy +++ b/dd-java-agent/agent-bootstrap/src/test/groovy/datadog/trace/bootstrap/instrumentation/decorator/BaseDecoratorTest.groovy @@ -1,5 +1,6 @@ package datadog.trace.bootstrap.instrumentation.decorator +import datadog.trace.api.KnownTagIds import datadog.trace.api.TagMap import datadog.trace.bootstrap.instrumentation.api.AgentSpan import datadog.trace.bootstrap.instrumentation.api.AgentSpanContext @@ -52,14 +53,14 @@ class BaseDecoratorTest extends DDSpecification { then: if (!connection.isUnresolved()) { - 1 * span.setTag(Tags.PEER_HOSTNAME, connection.hostName) + 1 * span.setTag(KnownTagIds.PEER_HOSTNAME, connection.hostName) } 1 * span.setTag(Tags.PEER_PORT, connection.port) if (connection.address instanceof Inet4Address) { - 1 * span.setTag(Tags.PEER_HOST_IPV4, connection.address.hostAddress) + 1 * span.setTag(KnownTagIds.PEER_HOST_IPV4, connection.address.hostAddress) } if (connection.address instanceof Inet6Address) { - 1 * span.setTag(Tags.PEER_HOST_IPV6, connection.address.hostAddress) + 1 * span.setTag(KnownTagIds.PEER_HOST_IPV6, connection.address.hostAddress) } 0 * _ diff --git a/dd-trace-core/src/main/java/datadog/trace/core/DDSpan.java b/dd-trace-core/src/main/java/datadog/trace/core/DDSpan.java index 8ffcc77b49c..5480314d52d 100644 --- a/dd-trace-core/src/main/java/datadog/trace/core/DDSpan.java +++ b/dd-trace-core/src/main/java/datadog/trace/core/DDSpan.java @@ -512,6 +512,12 @@ public DDSpan setTag(final String tag, final Object value) { return this; } + @Override + public DDSpan setTag(final long tagId, final Object value) { + context.setTag(tagId, value); + return this; + } + @Override public AgentSpan setAllTags(Map map) { context.setAllTags(map); diff --git a/dd-trace-core/src/main/java/datadog/trace/core/DDSpanContext.java b/dd-trace-core/src/main/java/datadog/trace/core/DDSpanContext.java index 48eea7f9339..cd8b0288fd2 100644 --- a/dd-trace-core/src/main/java/datadog/trace/core/DDSpanContext.java +++ b/dd-trace-core/src/main/java/datadog/trace/core/DDSpanContext.java @@ -10,6 +10,7 @@ import datadog.trace.api.DDTags; import datadog.trace.api.DDTraceId; import datadog.trace.api.Functions; +import datadog.trace.api.KnownTagIds; import datadog.trace.api.KnownTags; import datadog.trace.api.ProcessTags; import datadog.trace.api.TagMap; @@ -385,7 +386,7 @@ public DDSpanContext( if (samplingPriority != PrioritySampling.UNSET) { setSamplingPriority(samplingPriority, SamplingMechanism.UNKNOWN); } - setTag(CoreTagIds.PARENT_ID, this.propagationTags.getLastParentId()); + setTag(KnownTagIds.PARENT_ID, this.propagationTags.getLastParentId()); } @Override diff --git a/dd-trace-core/src/main/java/datadog/trace/core/taginterceptor/TagInterceptor.java b/dd-trace-core/src/main/java/datadog/trace/core/taginterceptor/TagInterceptor.java index 1473f894e93..ae9f7eca4ec 100644 --- a/dd-trace-core/src/main/java/datadog/trace/core/taginterceptor/TagInterceptor.java +++ b/dd-trace-core/src/main/java/datadog/trace/core/taginterceptor/TagInterceptor.java @@ -24,6 +24,7 @@ import datadog.trace.api.Config; import datadog.trace.api.ConfigDefaults; import datadog.trace.api.DDTags; +import datadog.trace.api.KnownTagIds; import datadog.trace.api.KnownTags; import datadog.trace.api.Pair; import datadog.trace.api.TagMap; @@ -37,7 +38,6 @@ import datadog.trace.bootstrap.instrumentation.api.Tags; import datadog.trace.bootstrap.instrumentation.api.URIUtils; import datadog.trace.bootstrap.instrumentation.api.UTF8BytesString; -import datadog.trace.core.CoreTagIds; import datadog.trace.core.DDSpanContext; import java.net.URI; import java.util.Map; @@ -141,7 +141,7 @@ public boolean needsIntercept(String tag) { */ public boolean interceptTag(DDSpanContext span, long tagId, Object value) { switch (KnownTags.globalSerial(tagId)) { - case CoreTagIds.ERROR_SERIAL: + case KnownTagIds.ERROR_SERIAL: return interceptError(span, value); default: String name = KnownTags.nameOf(tagId); diff --git a/dd-trace-core/src/main/java/datadog/trace/core/tagprocessor/HttpEndpointPostProcessor.java b/dd-trace-core/src/main/java/datadog/trace/core/tagprocessor/HttpEndpointPostProcessor.java index 07900f369d2..7955593f220 100644 --- a/dd-trace-core/src/main/java/datadog/trace/core/tagprocessor/HttpEndpointPostProcessor.java +++ b/dd-trace-core/src/main/java/datadog/trace/core/tagprocessor/HttpEndpointPostProcessor.java @@ -1,10 +1,10 @@ package datadog.trace.core.tagprocessor; +import datadog.trace.api.KnownTagIds; import datadog.trace.api.TagMap; import datadog.trace.api.endpoint.EndpointResolver; import datadog.trace.api.internal.VisibleForTesting; import datadog.trace.bootstrap.instrumentation.api.AppendableSpanLinks; -import datadog.trace.core.CoreTagIds; import datadog.trace.core.DDSpanContext; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -58,13 +58,13 @@ public void processTags( return; } - if (unsafeTags.getEntry(CoreTagIds.HTTP_METHOD) == null) { + if (unsafeTags.getEntry(KnownTagIds.HTTP_METHOD) == null) { return; } try { - String httpRoute = stringValue(unsafeTags, CoreTagIds.HTTP_ROUTE); - String httpUrl = stringValue(unsafeTags, CoreTagIds.HTTP_URL); + String httpRoute = stringValue(unsafeTags, KnownTagIds.HTTP_ROUTE); + String httpUrl = stringValue(unsafeTags, KnownTagIds.HTTP_URL); endpointResolver.resolveEndpoint(unsafeTags, httpRoute, httpUrl); } catch (Throwable t) { log.debug("Error processing HTTP endpoint for span {}", spanContext.getSpanId(), t); diff --git a/dd-trace-core/src/main/java/datadog/trace/core/tagprocessor/IntegrationAdder.java b/dd-trace-core/src/main/java/datadog/trace/core/tagprocessor/IntegrationAdder.java index 29d88771357..c4babce9941 100644 --- a/dd-trace-core/src/main/java/datadog/trace/core/tagprocessor/IntegrationAdder.java +++ b/dd-trace-core/src/main/java/datadog/trace/core/tagprocessor/IntegrationAdder.java @@ -1,8 +1,8 @@ package datadog.trace.core.tagprocessor; +import datadog.trace.api.KnownTagIds; import datadog.trace.api.TagMap; import datadog.trace.bootstrap.instrumentation.api.AppendableSpanLinks; -import datadog.trace.core.CoreTagIds; import datadog.trace.core.DDSpanContext; public class IntegrationAdder extends TagsPostProcessor { @@ -11,9 +11,9 @@ public void processTags( TagMap unsafeTags, DDSpanContext spanContext, AppendableSpanLinks spanLinks) { final CharSequence instrumentationName = spanContext.getIntegrationName(); if (instrumentationName != null) { - unsafeTags.set(CoreTagIds.INTEGRATION_ID, instrumentationName); + unsafeTags.set(KnownTagIds.INTEGRATION_ID, instrumentationName); } else { - unsafeTags.remove(CoreTagIds.INTEGRATION_ID); + unsafeTags.remove(KnownTagIds.INTEGRATION_ID); } } } diff --git a/dd-trace-core/src/main/java/datadog/trace/core/tagprocessor/InternalTagsAdder.java b/dd-trace-core/src/main/java/datadog/trace/core/tagprocessor/InternalTagsAdder.java index 3a68a45ffed..1ee17e829e3 100644 --- a/dd-trace-core/src/main/java/datadog/trace/core/tagprocessor/InternalTagsAdder.java +++ b/dd-trace-core/src/main/java/datadog/trace/core/tagprocessor/InternalTagsAdder.java @@ -2,10 +2,10 @@ import static datadog.trace.bootstrap.instrumentation.api.Tags.VERSION; +import datadog.trace.api.KnownTagIds; import datadog.trace.api.TagMap; import datadog.trace.bootstrap.instrumentation.api.AppendableSpanLinks; import datadog.trace.bootstrap.instrumentation.api.UTF8BytesString; -import datadog.trace.core.CoreTagIds; import datadog.trace.core.DDSpanContext; import javax.annotation.Nullable; @@ -15,17 +15,17 @@ public final class InternalTagsAdder extends TagsPostProcessor { // base.service / version are fixed for the life of the tracer, so their TagMap.Entry objects are // pre-built once and shared across every span (Entry is immutable and safe to share between // maps). - // The entries are tag-id-bearing (CoreTagIds), so they also land in their positional slot. null + // The entries are tag-id-bearing (KnownTagIds), so they also land in their positional slot. null // when the corresponding value is absent/empty. See PR #11555 for the string-keyed precursor. @Nullable private final TagMap.Entry baseServiceEntry; @Nullable private final TagMap.Entry versionEntry; public InternalTagsAdder(@Nullable final String ddService, @Nullable final String version) { this.ddService = ddService != null ? UTF8BytesString.create(ddService) : null; - this.baseServiceEntry = TagMap.Entry.create(CoreTagIds.BASE_SERVICE, this.ddService); + this.baseServiceEntry = TagMap.Entry.create(KnownTagIds.BASE_SERVICE, this.ddService); this.versionEntry = version != null && !version.isEmpty() - ? TagMap.Entry.create(CoreTagIds.VERSION, UTF8BytesString.create(version)) + ? TagMap.Entry.create(KnownTagIds.VERSION, UTF8BytesString.create(version)) : null; } diff --git a/dd-trace-core/src/main/java/datadog/trace/core/tagprocessor/PeerServiceCalculator.java b/dd-trace-core/src/main/java/datadog/trace/core/tagprocessor/PeerServiceCalculator.java index 0f3f8e6d670..bc17f6ec04c 100644 --- a/dd-trace-core/src/main/java/datadog/trace/core/tagprocessor/PeerServiceCalculator.java +++ b/dd-trace-core/src/main/java/datadog/trace/core/tagprocessor/PeerServiceCalculator.java @@ -1,12 +1,12 @@ package datadog.trace.core.tagprocessor; import datadog.trace.api.Config; +import datadog.trace.api.KnownTagIds; import datadog.trace.api.TagMap; import datadog.trace.api.internal.VisibleForTesting; import datadog.trace.api.naming.NamingSchema; import datadog.trace.api.naming.SpanNaming; import datadog.trace.bootstrap.instrumentation.api.AppendableSpanLinks; -import datadog.trace.core.CoreTagIds; import datadog.trace.core.DDSpanContext; import java.util.Map; import javax.annotation.Nonnull; @@ -52,7 +52,7 @@ public void processTags( } private static Object peerService(TagMap unsafeTags) { - TagMap.Entry entry = unsafeTags.getEntry(CoreTagIds.PEER_SERVICE); + TagMap.Entry entry = unsafeTags.getEntry(KnownTagIds.PEER_SERVICE); return entry == null ? null : entry.objectValue(); } @@ -60,8 +60,8 @@ private void remapPeerService(TagMap unsafeTags, Object value) { if (value != null) { String mapped = peerServiceMapping.get(value); if (mapped != null) { - unsafeTags.set(CoreTagIds.PEER_SERVICE, mapped); - unsafeTags.set(CoreTagIds.PEER_SERVICE_REMAPPED_FROM, value); + unsafeTags.set(KnownTagIds.PEER_SERVICE, mapped); + unsafeTags.set(KnownTagIds.PEER_SERVICE_REMAPPED_FROM, value); } } } diff --git a/dd-trace-core/src/main/java/datadog/trace/core/tagprocessor/RemoteHostnameAdder.java b/dd-trace-core/src/main/java/datadog/trace/core/tagprocessor/RemoteHostnameAdder.java index 708ed926e0c..245a3f2a18f 100644 --- a/dd-trace-core/src/main/java/datadog/trace/core/tagprocessor/RemoteHostnameAdder.java +++ b/dd-trace-core/src/main/java/datadog/trace/core/tagprocessor/RemoteHostnameAdder.java @@ -1,8 +1,8 @@ package datadog.trace.core.tagprocessor; +import datadog.trace.api.KnownTagIds; import datadog.trace.api.TagMap; import datadog.trace.bootstrap.instrumentation.api.AppendableSpanLinks; -import datadog.trace.core.CoreTagIds; import datadog.trace.core.DDSpanContext; import java.util.function.Supplier; @@ -33,7 +33,7 @@ public void processTags( return; } - TagMap.Entry newEntry = TagMap.Entry.create(CoreTagIds.TRACER_HOST_ID, hostname); + TagMap.Entry newEntry = TagMap.Entry.create(KnownTagIds.TRACER_HOST_ID, hostname); unsafeTags.set(newEntry); this.cachedHostEntry = newEntry; } diff --git a/dd-trace-core/src/main/java/datadog/trace/core/tagprocessor/ServiceNameSourceAdder.java b/dd-trace-core/src/main/java/datadog/trace/core/tagprocessor/ServiceNameSourceAdder.java index 5c157e4e1e0..0a72d02c73e 100644 --- a/dd-trace-core/src/main/java/datadog/trace/core/tagprocessor/ServiceNameSourceAdder.java +++ b/dd-trace-core/src/main/java/datadog/trace/core/tagprocessor/ServiceNameSourceAdder.java @@ -1,8 +1,8 @@ package datadog.trace.core.tagprocessor; +import datadog.trace.api.KnownTagIds; import datadog.trace.api.TagMap; import datadog.trace.bootstrap.instrumentation.api.AppendableSpanLinks; -import datadog.trace.core.CoreTagIds; import datadog.trace.core.DDSpanContext; public class ServiceNameSourceAdder extends TagsPostProcessor { @@ -11,9 +11,9 @@ public void processTags( TagMap unsafeTags, DDSpanContext spanContext, AppendableSpanLinks spanLinks) { final CharSequence serviceNameSource = spanContext.getServiceNameSource(); if (serviceNameSource != null) { - unsafeTags.set(CoreTagIds.SVC_SRC_ID, serviceNameSource); + unsafeTags.set(KnownTagIds.SVC_SRC_ID, serviceNameSource); } else { - unsafeTags.remove(CoreTagIds.SVC_SRC_ID); + unsafeTags.remove(KnownTagIds.SVC_SRC_ID); } } } diff --git a/dd-trace-core/src/test/java/datadog/trace/core/DDSpanContextTest.java b/dd-trace-core/src/test/java/datadog/trace/core/DDSpanContextTest.java index 90a98c5e2a2..03af9b571c3 100644 --- a/dd-trace-core/src/test/java/datadog/trace/core/DDSpanContextTest.java +++ b/dd-trace-core/src/test/java/datadog/trace/core/DDSpanContextTest.java @@ -33,6 +33,7 @@ import datadog.trace.api.DDTags; import datadog.trace.api.DDTraceId; +import datadog.trace.api.KnownTagIds; import datadog.trace.api.internal.TraceSegment; import datadog.trace.bootstrap.instrumentation.api.AgentSpan; import datadog.trace.bootstrap.instrumentation.api.AgentSpanContext; @@ -79,7 +80,7 @@ void setTagById_storedTagResolvesByName() { // PARENT_ID is a stored tag (serial >= FIRST_STORED_SERIAL): set by id, it lands in the map and // is findable / serialized by its resolved name. - context.setTag(CoreTagIds.PARENT_ID, "p123"); + context.setTag(KnownTagIds.PARENT_ID, "p123"); assertEquals("p123", context.getTags().get(DDTags.PARENT_ID)); span.finish(); @@ -92,7 +93,7 @@ void setTagById_reservedTagIsIntercepted() { // ERROR is a reserved (virtual) tag: setting it by id dispatches through the interceptor // (id-keyed), which sets the error flag and does NOT store an "error" tag. - context.setTag(CoreTagIds.ERROR, true); + context.setTag(KnownTagIds.ERROR, true); assertTrue(context.getErrorFlag()); assertNull(context.getTags().get(Tags.ERROR)); @@ -101,7 +102,7 @@ void setTagById_reservedTagIsIntercepted() { @Test void commonTags_slotByNameViaCommonLayout() { - // env / product flags are build-time-known common tags (CoreTagIds). Set by name they resolve + // env / product flags are build-time-known common tags (KnownTagIds). Set by name they resolve // to their id and land in the common slot layout, and remain findable by both name and id. AgentSpan span = tracer.buildSpan("datadog", "fakeOperation").start(); DDSpanContext context = (DDSpanContext) span.context(); @@ -112,8 +113,8 @@ void commonTags_slotByNameViaCommonLayout() { assertEquals("prod", context.getTags().get("env")); assertEquals(1, context.getTags().get(DDTags.DJM_ENABLED)); // proves they occupy the shared slot layout (findable by id) - assertNotNull(context.getTags().getEntry(CoreTagIds.ENV_ID)); - assertNotNull(context.getTags().getEntry(CoreTagIds.DJM_ENABLED)); + assertNotNull(context.getTags().getEntry(KnownTagIds.ENV_ID)); + assertNotNull(context.getTags().getEntry(KnownTagIds.DJM_ENABLED)); span.finish(); } diff --git a/dd-trace-core/src/main/java/datadog/trace/core/CoreTagIds.java b/internal-api/src/main/java/datadog/trace/api/KnownTagIds.java similarity index 80% rename from dd-trace-core/src/main/java/datadog/trace/core/CoreTagIds.java rename to internal-api/src/main/java/datadog/trace/api/KnownTagIds.java index f35721115a3..357d136ff53 100644 --- a/dd-trace-core/src/main/java/datadog/trace/core/CoreTagIds.java +++ b/internal-api/src/main/java/datadog/trace/api/KnownTagIds.java @@ -1,12 +1,12 @@ -package datadog.trace.core; +package datadog.trace.api; -import datadog.trace.api.DDTags; -import datadog.trace.api.KnownTags; import datadog.trace.bootstrap.instrumentation.api.Tags; /** - * Hand-assigned tag-id constants for tracer-core tags, plus the {@link KnownTags.Resolver} that - * resolves them. + * Hand-assigned tag-id constants for well-known tags, plus the {@link KnownTags.Resolver} that + * resolves them. This is the single registry shared by the tracer core and by instrumentation + * (decorators) — it lives in {@code internal-api} so both layers can reference the ids; the + * eventual code generator will replace the hand assignment here. * *

Reserved serials {@code [1, KnownTags.FIRST_STORED_SERIAL)} name "virtual" tags handled by the * tag interceptor / span fields and are NOT stored in the {@code TagMap}; their {@code fieldPos} is @@ -17,9 +17,9 @@ *

The resolver registers on class initialization, so simply referencing any constant here makes * tag-id resolution live before the first span is built. */ -public final class CoreTagIds { - // slot count = (max stored fieldPos) + 1. Stored tags use fieldPos 0..13. - static final int SLOT_COUNT = 14; +public final class KnownTagIds { + // slot count = (max stored fieldPos) + 1. Stored tags use fieldPos 0..16. + static final int SLOT_COUNT = 17; // ---- reserved / virtual (tag-interceptor handled, not stored) ---- public static final int ERROR_SERIAL = 1; @@ -84,6 +84,20 @@ public final class CoreTagIds { public static final int HTTP_URL_SERIAL = KnownTags.FIRST_STORED_SERIAL + 13; public static final long HTTP_URL = KnownTags.tagId(HTTP_URL_SERIAL, 13, Tags.HTTP_URL); + // peer connection tags set by BaseDecorator.onPeerConnection on ~every client/producer span. + // Not intercepted; stored. Slotted (common across client instrumentations). + public static final int PEER_HOSTNAME_SERIAL = KnownTags.FIRST_STORED_SERIAL + 14; + public static final long PEER_HOSTNAME = + KnownTags.tagId(PEER_HOSTNAME_SERIAL, 14, Tags.PEER_HOSTNAME); + + public static final int PEER_HOST_IPV4_SERIAL = KnownTags.FIRST_STORED_SERIAL + 15; + public static final long PEER_HOST_IPV4 = + KnownTags.tagId(PEER_HOST_IPV4_SERIAL, 15, Tags.PEER_HOST_IPV4); + + public static final int PEER_HOST_IPV6_SERIAL = KnownTags.FIRST_STORED_SERIAL + 16; + public static final long PEER_HOST_IPV6 = + KnownTags.tagId(PEER_HOST_IPV6_SERIAL, 16, Tags.PEER_HOST_IPV6); + static final KnownTags.Resolver RESOLVER = new KnownTags.Resolver() { @Override @@ -119,6 +133,12 @@ public String nameOf(long tagId) { return Tags.HTTP_ROUTE; case HTTP_URL_SERIAL: return Tags.HTTP_URL; + case PEER_HOSTNAME_SERIAL: + return Tags.PEER_HOSTNAME; + case PEER_HOST_IPV4_SERIAL: + return Tags.PEER_HOST_IPV4; + case PEER_HOST_IPV6_SERIAL: + return Tags.PEER_HOST_IPV6; default: return null; } @@ -162,6 +182,12 @@ public long keyOf(String name) { return HTTP_ROUTE; case Tags.HTTP_URL: return HTTP_URL; + case Tags.PEER_HOSTNAME: + return PEER_HOSTNAME; + case Tags.PEER_HOST_IPV4: + return PEER_HOST_IPV4; + case Tags.PEER_HOST_IPV6: + return PEER_HOST_IPV6; default: return 0L; } @@ -172,5 +198,5 @@ public long keyOf(String name) { KnownTags.register(RESOLVER); } - private CoreTagIds() {} + private KnownTagIds() {} } diff --git a/internal-api/src/main/java/datadog/trace/bootstrap/instrumentation/api/AgentSpan.java b/internal-api/src/main/java/datadog/trace/bootstrap/instrumentation/api/AgentSpan.java index 99c90b53b30..348a870b76b 100644 --- a/internal-api/src/main/java/datadog/trace/bootstrap/instrumentation/api/AgentSpan.java +++ b/internal-api/src/main/java/datadog/trace/bootstrap/instrumentation/api/AgentSpan.java @@ -83,6 +83,18 @@ default boolean isValid() { AgentSpan setTag(String key, Object value); + /** + * Sets a tag by its generated tag id (see {@link datadog.trace.api.KnownTags}). The default + * resolves the id to its name and delegates to {@link #setTag(String, Object)}, so every span + * implementation works unchanged; mutable spans backed by {@code DDSpanContext} override this to + * take the id fast-path (no name hashing / interceptor string switch). If the id cannot be + * resolved (no resolver registered) the tag is left unchanged. + */ + default AgentSpan setTag(long tagId, Object value) { + String name = datadog.trace.api.KnownTags.nameOf(tagId); + return name == null ? this : setTag(name, value); + } + /** entry may be null - in which case the tags remained unchanged */ AgentSpan setTag(TagMap.EntryReader entry); From 4e33fe08a5ff271d6b11781acf25031c34f1c119 Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Tue, 9 Jun 2026 22:43:19 -0400 Subject: [PATCH 16/35] Add typed setTag(long, ...) overloads; id-key peer.port MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mirror the String-keyed primitive setters on the tag-id surface so numeric/boolean tags id-key without boxing: - AgentSpan: default setTag(long, {CharSequence,boolean,int,long,float,double}) resolving name -> string path (zero blast radius); DDSpan overrides each to context.setTag(tagId, value). - DDSpanContext: typed setTag(long, ...) routing like setTag(long, Object) — reserved -> interceptor (boxes only on that rare path), else store by id (no box). - KnownTagIds: slot peer.port (SLOT_COUNT 18). - BaseDecorator.setPeerPort(int/String) now id-keys peer.port; updated the inherited Spock PEER_PORT expectations across the decorator test hierarchy. All peer.* tags are non-intercepted (case b), so this preserves behavior. tag: ai generated tag: no release note Co-Authored-By: Claude Opus 4.8 --- .../decorator/BaseDecorator.java | 4 +- .../decorator/BaseDecoratorTest.groovy | 2 +- .../decorator/HttpClientDecoratorTest.groovy | 6 +- .../decorator/HttpServerDecoratorTest.groovy | 4 +- .../UrlConnectionDecoratorTest.groovy | 4 +- .../main/java/datadog/trace/core/DDSpan.java | 36 ++++++++ .../datadog/trace/core/DDSpanContext.java | 92 +++++++++++++++++++ .../java/datadog/trace/api/KnownTagIds.java | 11 ++- .../instrumentation/api/AgentSpan.java | 30 ++++++ 9 files changed, 180 insertions(+), 9 deletions(-) diff --git a/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/instrumentation/decorator/BaseDecorator.java b/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/instrumentation/decorator/BaseDecorator.java index 8841dca9519..bd930a03e67 100644 --- a/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/instrumentation/decorator/BaseDecorator.java +++ b/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/instrumentation/decorator/BaseDecorator.java @@ -178,14 +178,14 @@ public AgentSpan onPeerConnection(AgentSpan span, InetAddress remoteAddress, boo } public AgentSpan setPeerPort(AgentSpan span, String port) { - span.setTag(Tags.PEER_PORT, port); + span.setTag(KnownTagIds.PEER_PORT, (CharSequence) port); return span; } public AgentSpan setPeerPort(AgentSpan span, int port) { if (port > UNSET_PORT) { - span.setTag(Tags.PEER_PORT, port); + span.setTag(KnownTagIds.PEER_PORT, port); } return span; } diff --git a/dd-java-agent/agent-bootstrap/src/test/groovy/datadog/trace/bootstrap/instrumentation/decorator/BaseDecoratorTest.groovy b/dd-java-agent/agent-bootstrap/src/test/groovy/datadog/trace/bootstrap/instrumentation/decorator/BaseDecoratorTest.groovy index 9ddae8d9f46..bffa7de056e 100644 --- a/dd-java-agent/agent-bootstrap/src/test/groovy/datadog/trace/bootstrap/instrumentation/decorator/BaseDecoratorTest.groovy +++ b/dd-java-agent/agent-bootstrap/src/test/groovy/datadog/trace/bootstrap/instrumentation/decorator/BaseDecoratorTest.groovy @@ -55,7 +55,7 @@ class BaseDecoratorTest extends DDSpecification { if (!connection.isUnresolved()) { 1 * span.setTag(KnownTagIds.PEER_HOSTNAME, connection.hostName) } - 1 * span.setTag(Tags.PEER_PORT, connection.port) + 1 * span.setTag(KnownTagIds.PEER_PORT, connection.port) if (connection.address instanceof Inet4Address) { 1 * span.setTag(KnownTagIds.PEER_HOST_IPV4, connection.address.hostAddress) } diff --git a/dd-java-agent/agent-bootstrap/src/test/groovy/datadog/trace/bootstrap/instrumentation/decorator/HttpClientDecoratorTest.groovy b/dd-java-agent/agent-bootstrap/src/test/groovy/datadog/trace/bootstrap/instrumentation/decorator/HttpClientDecoratorTest.groovy index 1bc83457bd0..3c1881397a9 100644 --- a/dd-java-agent/agent-bootstrap/src/test/groovy/datadog/trace/bootstrap/instrumentation/decorator/HttpClientDecoratorTest.groovy +++ b/dd-java-agent/agent-bootstrap/src/test/groovy/datadog/trace/bootstrap/instrumentation/decorator/HttpClientDecoratorTest.groovy @@ -1,5 +1,7 @@ package datadog.trace.bootstrap.instrumentation.decorator +import datadog.trace.api.KnownTagIds + import datadog.trace.api.DDTags import datadog.trace.api.appsec.HttpClientRequest import datadog.trace.api.config.AppSecConfig @@ -69,7 +71,7 @@ class HttpClientDecoratorTest extends ClientDecoratorTest { 1 * span.setTag(DDTags.HTTP_QUERY, null) 1 * span.setTag(DDTags.HTTP_FRAGMENT, null) 1 * span.setTag(Tags.PEER_HOSTNAME, req.url.host) - 1 * span.setTag(Tags.PEER_PORT, req.url.port) + 1 * span.setTag(KnownTagIds.PEER_PORT, req.url.port) 1 * span.setResourceName({ it as String == req.method.toUpperCase() + " " + req.path }, ResourceNamePriorities.HTTP_PATH_NORMALIZER) if (renameService) { 1 * span.setServiceName(req.url.host, _) @@ -107,7 +109,7 @@ class HttpClientDecoratorTest extends ClientDecoratorTest { 1 * span.setTag(Tags.PEER_HOSTNAME, hostname) } if (port) { - 1 * span.setTag(Tags.PEER_PORT, port) + 1 * span.setTag(KnownTagIds.PEER_PORT, port) } if (url != null) { 1 * span.setResourceName({ it as String == expectedPath }, ResourceNamePriorities.HTTP_PATH_NORMALIZER) diff --git a/dd-java-agent/agent-bootstrap/src/test/groovy/datadog/trace/bootstrap/instrumentation/decorator/HttpServerDecoratorTest.groovy b/dd-java-agent/agent-bootstrap/src/test/groovy/datadog/trace/bootstrap/instrumentation/decorator/HttpServerDecoratorTest.groovy index da411dc2431..afd2dd5e882 100644 --- a/dd-java-agent/agent-bootstrap/src/test/groovy/datadog/trace/bootstrap/instrumentation/decorator/HttpServerDecoratorTest.groovy +++ b/dd-java-agent/agent-bootstrap/src/test/groovy/datadog/trace/bootstrap/instrumentation/decorator/HttpServerDecoratorTest.groovy @@ -1,5 +1,7 @@ package datadog.trace.bootstrap.instrumentation.decorator +import datadog.trace.api.KnownTagIds + import datadog.trace.api.DDTags import datadog.trace.api.TraceConfig @@ -229,7 +231,7 @@ class HttpServerDecoratorTest extends ServerDecoratorTest { } 1 * this.span.setTag(Tags.HTTP_FORWARDED_PORT, "123") if (conn?.port) { - 1 * this.span.setTag(Tags.PEER_PORT, conn.port) + 1 * this.span.setTag(KnownTagIds.PEER_PORT, conn.port) } 1 * this.span.setTag(Tags.HTTP_USER_AGENT, "some-user-agent") _ * this.span.getRequestContext() >> null diff --git a/dd-java-agent/agent-bootstrap/src/test/groovy/datadog/trace/bootstrap/instrumentation/decorator/UrlConnectionDecoratorTest.groovy b/dd-java-agent/agent-bootstrap/src/test/groovy/datadog/trace/bootstrap/instrumentation/decorator/UrlConnectionDecoratorTest.groovy index b31ab85d2d1..74bc461c73f 100644 --- a/dd-java-agent/agent-bootstrap/src/test/groovy/datadog/trace/bootstrap/instrumentation/decorator/UrlConnectionDecoratorTest.groovy +++ b/dd-java-agent/agent-bootstrap/src/test/groovy/datadog/trace/bootstrap/instrumentation/decorator/UrlConnectionDecoratorTest.groovy @@ -1,5 +1,7 @@ package datadog.trace.bootstrap.instrumentation.decorator +import datadog.trace.api.KnownTagIds + import datadog.trace.api.DDSpanTypes import datadog.trace.bootstrap.instrumentation.api.Tags import datadog.trace.bootstrap.instrumentation.api.UTF8BytesString @@ -20,7 +22,7 @@ class UrlConnectionDecoratorTest extends ClientDecoratorTest { 1 * span.setTag(Tags.PEER_HOSTNAME, hostname) } if (port) { - 1 * span.setTag(Tags.PEER_PORT, port) + 1 * span.setTag(KnownTagIds.PEER_PORT, port) } 0 * _ diff --git a/dd-trace-core/src/main/java/datadog/trace/core/DDSpan.java b/dd-trace-core/src/main/java/datadog/trace/core/DDSpan.java index 5480314d52d..bbc0e08d708 100644 --- a/dd-trace-core/src/main/java/datadog/trace/core/DDSpan.java +++ b/dd-trace-core/src/main/java/datadog/trace/core/DDSpan.java @@ -518,6 +518,42 @@ public DDSpan setTag(final long tagId, final Object value) { return this; } + @Override + public DDSpan setTag(final long tagId, final CharSequence value) { + context.setTag(tagId, value); + return this; + } + + @Override + public DDSpan setTag(final long tagId, final boolean value) { + context.setTag(tagId, value); + return this; + } + + @Override + public DDSpan setTag(final long tagId, final int value) { + context.setTag(tagId, value); + return this; + } + + @Override + public DDSpan setTag(final long tagId, final long value) { + context.setTag(tagId, value); + return this; + } + + @Override + public DDSpan setTag(final long tagId, final float value) { + context.setTag(tagId, value); + return this; + } + + @Override + public DDSpan setTag(final long tagId, final double value) { + context.setTag(tagId, value); + return this; + } + @Override public AgentSpan setAllTags(Map map) { context.setAllTags(map); diff --git a/dd-trace-core/src/main/java/datadog/trace/core/DDSpanContext.java b/dd-trace-core/src/main/java/datadog/trace/core/DDSpanContext.java index cd8b0288fd2..b7cc6b7bcb6 100644 --- a/dd-trace-core/src/main/java/datadog/trace/core/DDSpanContext.java +++ b/dd-trace-core/src/main/java/datadog/trace/core/DDSpanContext.java @@ -929,6 +929,98 @@ public void setTag(final long tagId, final Object value) { } } + public void setTag(final long tagId, final CharSequence value) { + if (null == value) { + String name = KnownTags.nameOf(tagId); + if (name != null) { + removeTag(name); + } + return; + } + if (KnownTags.isReserved(tagId)) { + if (!tagInterceptor.interceptTag(this, tagId, value)) { + synchronized (unsafeTags) { + unsafeTags.set(tagId, value); + } + } + } else { + synchronized (unsafeTags) { + unsafeTags.set(tagId, value); + } + } + } + + public void setTag(final long tagId, final boolean value) { + if (KnownTags.isReserved(tagId)) { + // boxes on the (rare) reserved/intercepted path only + if (!tagInterceptor.interceptTag(this, tagId, value)) { + synchronized (unsafeTags) { + unsafeTags.set(tagId, value); + } + } + } else { + synchronized (unsafeTags) { + unsafeTags.set(tagId, value); + } + } + } + + public void setTag(final long tagId, final int value) { + if (KnownTags.isReserved(tagId)) { + if (!tagInterceptor.interceptTag(this, tagId, value)) { + synchronized (unsafeTags) { + unsafeTags.set(tagId, value); + } + } + } else { + synchronized (unsafeTags) { + unsafeTags.set(tagId, value); + } + } + } + + public void setTag(final long tagId, final long value) { + if (KnownTags.isReserved(tagId)) { + if (!tagInterceptor.interceptTag(this, tagId, value)) { + synchronized (unsafeTags) { + unsafeTags.set(tagId, value); + } + } + } else { + synchronized (unsafeTags) { + unsafeTags.set(tagId, value); + } + } + } + + public void setTag(final long tagId, final float value) { + if (KnownTags.isReserved(tagId)) { + if (!tagInterceptor.interceptTag(this, tagId, value)) { + synchronized (unsafeTags) { + unsafeTags.set(tagId, value); + } + } + } else { + synchronized (unsafeTags) { + unsafeTags.set(tagId, value); + } + } + } + + public void setTag(final long tagId, final double value) { + if (KnownTags.isReserved(tagId)) { + if (!tagInterceptor.interceptTag(this, tagId, value)) { + synchronized (unsafeTags) { + unsafeTags.set(tagId, value); + } + } + } else { + synchronized (unsafeTags) { + unsafeTags.set(tagId, value); + } + } + } + public void setTag(TagMap.EntryReader entry) { if (entry == null) { return; diff --git a/internal-api/src/main/java/datadog/trace/api/KnownTagIds.java b/internal-api/src/main/java/datadog/trace/api/KnownTagIds.java index 357d136ff53..c9e71f6d8e4 100644 --- a/internal-api/src/main/java/datadog/trace/api/KnownTagIds.java +++ b/internal-api/src/main/java/datadog/trace/api/KnownTagIds.java @@ -18,8 +18,8 @@ * tag-id resolution live before the first span is built. */ public final class KnownTagIds { - // slot count = (max stored fieldPos) + 1. Stored tags use fieldPos 0..16. - static final int SLOT_COUNT = 17; + // slot count = (max stored fieldPos) + 1. Stored tags use fieldPos 0..17. + static final int SLOT_COUNT = 18; // ---- reserved / virtual (tag-interceptor handled, not stored) ---- public static final int ERROR_SERIAL = 1; @@ -98,6 +98,9 @@ public final class KnownTagIds { public static final long PEER_HOST_IPV6 = KnownTags.tagId(PEER_HOST_IPV6_SERIAL, 16, Tags.PEER_HOST_IPV6); + public static final int PEER_PORT_SERIAL = KnownTags.FIRST_STORED_SERIAL + 17; + public static final long PEER_PORT = KnownTags.tagId(PEER_PORT_SERIAL, 17, Tags.PEER_PORT); + static final KnownTags.Resolver RESOLVER = new KnownTags.Resolver() { @Override @@ -139,6 +142,8 @@ public String nameOf(long tagId) { return Tags.PEER_HOST_IPV4; case PEER_HOST_IPV6_SERIAL: return Tags.PEER_HOST_IPV6; + case PEER_PORT_SERIAL: + return Tags.PEER_PORT; default: return null; } @@ -188,6 +193,8 @@ public long keyOf(String name) { return PEER_HOST_IPV4; case Tags.PEER_HOST_IPV6: return PEER_HOST_IPV6; + case Tags.PEER_PORT: + return PEER_PORT; default: return 0L; } diff --git a/internal-api/src/main/java/datadog/trace/bootstrap/instrumentation/api/AgentSpan.java b/internal-api/src/main/java/datadog/trace/bootstrap/instrumentation/api/AgentSpan.java index 348a870b76b..a10a4645edb 100644 --- a/internal-api/src/main/java/datadog/trace/bootstrap/instrumentation/api/AgentSpan.java +++ b/internal-api/src/main/java/datadog/trace/bootstrap/instrumentation/api/AgentSpan.java @@ -95,6 +95,36 @@ default AgentSpan setTag(long tagId, Object value) { return name == null ? this : setTag(name, value); } + default AgentSpan setTag(long tagId, CharSequence value) { + String name = datadog.trace.api.KnownTags.nameOf(tagId); + return name == null ? this : setTag(name, value); + } + + default AgentSpan setTag(long tagId, boolean value) { + String name = datadog.trace.api.KnownTags.nameOf(tagId); + return name == null ? this : setTag(name, value); + } + + default AgentSpan setTag(long tagId, int value) { + String name = datadog.trace.api.KnownTags.nameOf(tagId); + return name == null ? this : setTag(name, value); + } + + default AgentSpan setTag(long tagId, long value) { + String name = datadog.trace.api.KnownTags.nameOf(tagId); + return name == null ? this : setTag(name, value); + } + + default AgentSpan setTag(long tagId, float value) { + String name = datadog.trace.api.KnownTags.nameOf(tagId); + return name == null ? this : setTag(name, value); + } + + default AgentSpan setTag(long tagId, double value) { + String name = datadog.trace.api.KnownTags.nameOf(tagId); + return name == null ? this : setTag(name, value); + } + /** entry may be null - in which case the tags remained unchanged */ AgentSpan setTag(TagMap.EntryReader entry); From f60d90673aa4310d0207588881c1f50c8705de9c Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Tue, 9 Jun 2026 22:56:57 -0400 Subject: [PATCH 17/35] Make setTag(long) interception-safe (3-case) via an INTERCEPTED bit in the id MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Close the latent regression in the id set-path: setTag(long) previously only routed reserved serials to the interceptor, so id-setting an intercepted-but- stored tag (http.method/url, peer.service) would silently skip its side-effect. - KnownTags: encode an INTERCEPTED flag in the tagId sign bit (bit 63), so the check is a single `tagId < 0` (isIntercepted) — fast, matching "most sets are by id". globalSerial now masks to 15 bits. Helper KnownTags.intercepted(id). - KnownTagIds: flag the intercepted ids (ERROR, HTTP_METHOD, HTTP_URL, PEER_SERVICE); leave non-intercepted ids (peer.*, base.service, http.route, …) clear so they keep the fast store path. - DDSpanContext.setTag(long, …): 3-case routing — (a) reserved + (c) intercepted- stored -> interceptor (then store if not handled); (b) non-intercepted -> store by id directly. - TagInterceptor.interceptTag(long): dispatched on serial; specialized cases for hot tags (ERROR), default resolves the name and runs the proven string interception, so behavior matches the string set-path exactly. - Tests: reflective consistency guard (every id's INTERCEPTED bit must agree with needsIntercept(name)) + behavioral proof that id-setting peer.service runs the interceptor side-effect and stores. tag: ai generated tag: no release note Co-Authored-By: Claude Opus 4.8 --- .../datadog/trace/core/DDSpanContext.java | 23 ++++++------ .../core/taginterceptor/TagInterceptor.java | 10 +++--- .../datadog/trace/core/DDSpanContextTest.java | 17 +++++++++ .../taginterceptor/TagInterceptorTest.java | 34 ++++++++++++++++++ .../java/datadog/trace/api/KnownTagIds.java | 11 +++--- .../java/datadog/trace/api/KnownTags.java | 35 +++++++++++++++---- 6 files changed, 104 insertions(+), 26 deletions(-) diff --git a/dd-trace-core/src/main/java/datadog/trace/core/DDSpanContext.java b/dd-trace-core/src/main/java/datadog/trace/core/DDSpanContext.java index b7cc6b7bcb6..20c46b73951 100644 --- a/dd-trace-core/src/main/java/datadog/trace/core/DDSpanContext.java +++ b/dd-trace-core/src/main/java/datadog/trace/core/DDSpanContext.java @@ -903,10 +903,11 @@ public void setTag(final String tag, final String value) { } /** - * Sets a tag by its generated tag id. Reserved "virtual" tags (interceptor-handled, not stored) - * are routed to the interceptor via an id dispatch; stored tags go straight to the map (slot or - * bucket) keyed by id, bypassing the per-tag interceptor string switch. The id classification is - * a single range check (see {@link KnownTags#isReserved}). + * Sets a tag by its generated tag id. Three cases, classified by a single sign test on the id + * ({@link KnownTags#isIntercepted}): (a) reserved "virtual" tags and (c) intercepted-but-stored + * tags (e.g. http.method) are routed to the interceptor via an id dispatch, then stored if the + * interceptor didn't fully handle them; (b) non-intercepted stored tags go straight to the map + * (slot or bucket) keyed by id, bypassing the per-tag interceptor string switch. */ public void setTag(final long tagId, final Object value) { if (null == value) { @@ -916,7 +917,7 @@ public void setTag(final long tagId, final Object value) { } return; } - if (KnownTags.isReserved(tagId)) { + if (KnownTags.isIntercepted(tagId)) { if (!tagInterceptor.interceptTag(this, tagId, value)) { synchronized (unsafeTags) { unsafeTags.set(tagId, value); @@ -937,7 +938,7 @@ public void setTag(final long tagId, final CharSequence value) { } return; } - if (KnownTags.isReserved(tagId)) { + if (KnownTags.isIntercepted(tagId)) { if (!tagInterceptor.interceptTag(this, tagId, value)) { synchronized (unsafeTags) { unsafeTags.set(tagId, value); @@ -951,7 +952,7 @@ public void setTag(final long tagId, final CharSequence value) { } public void setTag(final long tagId, final boolean value) { - if (KnownTags.isReserved(tagId)) { + if (KnownTags.isIntercepted(tagId)) { // boxes on the (rare) reserved/intercepted path only if (!tagInterceptor.interceptTag(this, tagId, value)) { synchronized (unsafeTags) { @@ -966,7 +967,7 @@ public void setTag(final long tagId, final boolean value) { } public void setTag(final long tagId, final int value) { - if (KnownTags.isReserved(tagId)) { + if (KnownTags.isIntercepted(tagId)) { if (!tagInterceptor.interceptTag(this, tagId, value)) { synchronized (unsafeTags) { unsafeTags.set(tagId, value); @@ -980,7 +981,7 @@ public void setTag(final long tagId, final int value) { } public void setTag(final long tagId, final long value) { - if (KnownTags.isReserved(tagId)) { + if (KnownTags.isIntercepted(tagId)) { if (!tagInterceptor.interceptTag(this, tagId, value)) { synchronized (unsafeTags) { unsafeTags.set(tagId, value); @@ -994,7 +995,7 @@ public void setTag(final long tagId, final long value) { } public void setTag(final long tagId, final float value) { - if (KnownTags.isReserved(tagId)) { + if (KnownTags.isIntercepted(tagId)) { if (!tagInterceptor.interceptTag(this, tagId, value)) { synchronized (unsafeTags) { unsafeTags.set(tagId, value); @@ -1008,7 +1009,7 @@ public void setTag(final long tagId, final float value) { } public void setTag(final long tagId, final double value) { - if (KnownTags.isReserved(tagId)) { + if (KnownTags.isIntercepted(tagId)) { if (!tagInterceptor.interceptTag(this, tagId, value)) { synchronized (unsafeTags) { unsafeTags.set(tagId, value); diff --git a/dd-trace-core/src/main/java/datadog/trace/core/taginterceptor/TagInterceptor.java b/dd-trace-core/src/main/java/datadog/trace/core/taginterceptor/TagInterceptor.java index ae9f7eca4ec..2099bbd248d 100644 --- a/dd-trace-core/src/main/java/datadog/trace/core/taginterceptor/TagInterceptor.java +++ b/dd-trace-core/src/main/java/datadog/trace/core/taginterceptor/TagInterceptor.java @@ -134,10 +134,12 @@ public boolean needsIntercept(String tag) { } /** - * Id-dispatched variant of {@link #interceptTag(DDSpanContext, String, Object)}: switches on the - * tagId's globalSerial (an int) instead of the tag-name string. Used by {@code - * DDSpanContext.setTag(long, Object)} for reserved (virtual) tags. Falls back to the string path - * for any reserved id without a dedicated case. + * Id-dispatched (fast) variant of {@link #interceptTag(DDSpanContext, String, Object)}: switches + * on the tagId's globalSerial (an int) instead of the tag-name string. Used by {@code + * DDSpanContext.setTag(long, Object)} for any {@link KnownTags#isIntercepted} id — reserved + * "virtual" tags AND intercepted-but-stored tags (e.g. http.method/url, peer.service). Hot tags + * get a dedicated case; the default falls back to resolving the name and running the (slower) + * string interception, so behavior matches the string set-path exactly. */ public boolean interceptTag(DDSpanContext span, long tagId, Object value) { switch (KnownTags.globalSerial(tagId)) { diff --git a/dd-trace-core/src/test/java/datadog/trace/core/DDSpanContextTest.java b/dd-trace-core/src/test/java/datadog/trace/core/DDSpanContextTest.java index 03af9b571c3..9193d29dbd1 100644 --- a/dd-trace-core/src/test/java/datadog/trace/core/DDSpanContextTest.java +++ b/dd-trace-core/src/test/java/datadog/trace/core/DDSpanContextTest.java @@ -100,6 +100,23 @@ void setTagById_reservedTagIsIntercepted() { span.finish(); } + @Test + void setTagById_interceptedButStoredTagRunsInterceptor() { + AgentSpan span = tracer.buildSpan("datadog", "fakeOperation").start(); + DDSpanContext context = (DDSpanContext) span.context(); + + // peer.service is intercepted-but-stored (case c): setting it by id must run the interceptor + // side-effect (which records the peer.service source) AND store the value, exactly like the + // string set-path. The id carries the INTERCEPTED flag so setTag(long) routes through the + // interceptor. + context.setTag(KnownTagIds.PEER_SERVICE, "my-remote-svc"); + + assertEquals(Tags.PEER_SERVICE, context.getTags().get(DDTags.PEER_SERVICE_SOURCE)); + assertEquals("my-remote-svc", context.getTags().get(Tags.PEER_SERVICE)); + + span.finish(); + } + @Test void commonTags_slotByNameViaCommonLayout() { // env / product flags are build-time-known common tags (KnownTagIds). Set by name they resolve diff --git a/dd-trace-core/src/test/java/datadog/trace/core/taginterceptor/TagInterceptorTest.java b/dd-trace-core/src/test/java/datadog/trace/core/taginterceptor/TagInterceptorTest.java index 42a8b80e054..a63a10db1c7 100644 --- a/dd-trace-core/src/test/java/datadog/trace/core/taginterceptor/TagInterceptorTest.java +++ b/dd-trace-core/src/test/java/datadog/trace/core/taginterceptor/TagInterceptorTest.java @@ -5,6 +5,7 @@ import static datadog.trace.junit.utils.config.WithConfigExtension.injectSysConfig; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.junit.jupiter.params.provider.Arguments.arguments; @@ -16,6 +17,8 @@ import datadog.trace.api.DDSpanTypes; import datadog.trace.api.DDTags; +import datadog.trace.api.KnownTagIds; +import datadog.trace.api.KnownTags; import datadog.trace.api.ProductTraceSource; import datadog.trace.api.remoteconfig.ServiceNameCollector; import datadog.trace.api.remoteconfig.ServiceNameCollectorTestBridge; @@ -688,6 +691,37 @@ void whenInterceptServletContextExtraServiceProviderIsCalled(String value, Strin } } + @Test + void knownTagIdInterceptedFlagMatchesNameBasedNeedsIntercept() throws Exception { + // No-regression guard: the INTERCEPTED bit baked into each KnownTagIds id must agree with the + // interceptor's name-based needsIntercept(name). If a new id is added (or interception of a + // name changes) without keeping the flag in sync, DDSpanContext.setTag(long) would either skip + // a needed interception or pointlessly intercept — this catches the drift. + RuleFlags ruleFlags = mock(RuleFlags.class); + when(ruleFlags.isEnabled(any())).thenReturn(true); + TagInterceptor interceptor = new TagInterceptor(ruleFlags); + + int checked = 0; + for (java.lang.reflect.Field field : KnownTagIds.class.getDeclaredFields()) { + if (field.getType() != long.class) { + continue; // ids are the long constants; skip *_SERIAL ints, ENV string, etc. + } + long tagId = field.getLong(null); + String name = KnownTags.nameOf(tagId); + assertNotNull(name, "id " + field.getName() + " should resolve to a name"); + assertEquals( + interceptor.needsIntercept(name), + KnownTags.isIntercepted(tagId), + "INTERCEPTED flag for " + + field.getName() + + " (\"" + + name + + "\") disagrees with needsIntercept"); + checked++; + } + assertTrue(checked > 0, "expected to check at least one tag id"); + } + @Test void whenInterceptsProductTraceSourcePropagationTagUpdatePropagatedTraceSourceIsCalled() { RuleFlags ruleFlags = mock(RuleFlags.class); diff --git a/internal-api/src/main/java/datadog/trace/api/KnownTagIds.java b/internal-api/src/main/java/datadog/trace/api/KnownTagIds.java index c9e71f6d8e4..5de47effbef 100644 --- a/internal-api/src/main/java/datadog/trace/api/KnownTagIds.java +++ b/internal-api/src/main/java/datadog/trace/api/KnownTagIds.java @@ -22,8 +22,9 @@ public final class KnownTagIds { static final int SLOT_COUNT = 18; // ---- reserved / virtual (tag-interceptor handled, not stored) ---- + // Reserved tags are always intercepted -> set the INTERCEPTED flag. public static final int ERROR_SERIAL = 1; - public static final long ERROR = KnownTags.tagId(ERROR_SERIAL, Tags.ERROR); + public static final long ERROR = KnownTags.intercepted(KnownTags.tagId(ERROR_SERIAL, Tags.ERROR)); // ---- stored (slotted / bucketed) ---- public static final int PARENT_ID_SERIAL = KnownTags.FIRST_STORED_SERIAL; @@ -66,7 +67,7 @@ public final class KnownTagIds { // bypass the interceptor). peer.service is intercepted on the set-path but STORED, so it slots. public static final int PEER_SERVICE_SERIAL = KnownTags.FIRST_STORED_SERIAL + 9; public static final long PEER_SERVICE = - KnownTags.tagId(PEER_SERVICE_SERIAL, 9, Tags.PEER_SERVICE); + KnownTags.intercepted(KnownTags.tagId(PEER_SERVICE_SERIAL, 9, Tags.PEER_SERVICE)); public static final int PEER_SERVICE_REMAPPED_FROM_SERIAL = KnownTags.FIRST_STORED_SERIAL + 10; public static final long PEER_SERVICE_REMAPPED_FROM = @@ -76,13 +77,15 @@ public final class KnownTagIds { // (interceptTag side-effects then returns false → stored); http.route is not intercepted. All // stored, so the string set-path slots them via keyOf and the id reads here find them. public static final int HTTP_METHOD_SERIAL = KnownTags.FIRST_STORED_SERIAL + 11; - public static final long HTTP_METHOD = KnownTags.tagId(HTTP_METHOD_SERIAL, 11, Tags.HTTP_METHOD); + public static final long HTTP_METHOD = + KnownTags.intercepted(KnownTags.tagId(HTTP_METHOD_SERIAL, 11, Tags.HTTP_METHOD)); public static final int HTTP_ROUTE_SERIAL = KnownTags.FIRST_STORED_SERIAL + 12; public static final long HTTP_ROUTE = KnownTags.tagId(HTTP_ROUTE_SERIAL, 12, Tags.HTTP_ROUTE); public static final int HTTP_URL_SERIAL = KnownTags.FIRST_STORED_SERIAL + 13; - public static final long HTTP_URL = KnownTags.tagId(HTTP_URL_SERIAL, 13, Tags.HTTP_URL); + public static final long HTTP_URL = + KnownTags.intercepted(KnownTags.tagId(HTTP_URL_SERIAL, 13, Tags.HTTP_URL)); // peer connection tags set by BaseDecorator.onPeerConnection on ~every client/producer span. // Not intercepted; stored. Slotted (common across client instrumentations). diff --git a/internal-api/src/main/java/datadog/trace/api/KnownTags.java b/internal-api/src/main/java/datadog/trace/api/KnownTags.java index 73ee7ee2e9a..41167077b87 100644 --- a/internal-api/src/main/java/datadog/trace/api/KnownTags.java +++ b/internal-api/src/main/java/datadog/trace/api/KnownTags.java @@ -20,15 +20,36 @@ public static boolean isActive() { } /* - * tagId bit layout: [63-48 globalSerial] [47-32 fieldPos] [31-0 nameHash]. - * globalSerial is globally unique per known tag; fieldPos is its slot in the global positional - * layout (TagMap.knownEntries index); nameHash is TagMap.Entry#_hash(name) and is - * layout-independent. Unknown (string-only) tags have the upper 32 bits zero. NOTE: TagMap.Entry - * decodes nameHash inline as (int) tagId on its hot path, so the low-32 encoding here must stay - * in sync with that. + * tagId bit layout: [63 intercepted] [62-48 globalSerial (15 bits)] [47-32 fieldPos] + * [31-0 nameHash]. Bit 63 (the sign bit) marks a tag the tag interceptor must see, so the check + * is a single {@code tagId < 0}. globalSerial is globally unique per known tag; fieldPos is its + * slot in the global positional layout (TagMap.knownEntries index); nameHash is + * TagMap.Entry#_hash(name) and is layout-independent. Unknown (string-only) tags have the upper + * 32 bits zero. NOTE: TagMap.Entry decodes nameHash inline as (int) tagId on its hot path, so the + * low-32 encoding here must stay in sync with that. */ public static int globalSerial(long tagId) { - return (int) (tagId >>> 48); + return (int) ((tagId >>> 48) & 0x7FFF); + } + + /** + * Flag bit (the sign bit) marking a tag the tag interceptor must process — reserved/"virtual" + * tags AND intercepted-but-stored tags (e.g. http.method, which the interceptor side-effects and + * also stores). Encoded in the id so {@code DDSpanContext.setTag(long)} can route with a single + * sign test ({@link #isIntercepted}) instead of resolving the name. Non-intercepted tags (peer.*, + * base.service, …) leave it clear and take the fast store path. Must agree with the interceptor's + * name-based {@code needsIntercept} for every assigned id. + */ + public static final long INTERCEPTED = Long.MIN_VALUE; // 1L << 63 + + /** True if the tagId is flagged for tag-interceptor processing. */ + public static boolean isIntercepted(long tagId) { + return tagId < 0L; + } + + /** Returns the tagId with the {@link #INTERCEPTED} flag set. */ + public static long intercepted(long tagId) { + return tagId | INTERCEPTED; } public static int fieldPos(long tagId) { From b80c1892957aa25207e15567a5418817ff2e1ee5 Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Tue, 9 Jun 2026 23:18:54 -0400 Subject: [PATCH 18/35] Specialize hot interceptTag(long) arms (http.method/url, peer.service) The id interceptor dispatch previously resolved nameOf -> string switch for every intercepted tag except ERROR. Give the hot tags dedicated serial-keyed arms so the id path is fully string-free (no name resolution, no string switch): - HTTP_METHOD_SERIAL / HTTP_URL_SERIAL: the serial already distinguishes the two, so the url-as-resource rule is invoked with the known name constant directly. - PEER_SERVICE_SERIAL: mirrors the Tags.PEER_SERVICE string arm (sets the peer.service source, then interceptServiceName). Other intercepted ids still fall back to the name path, so behavior is unchanged. Test: urlAsResourceNameRuleViaTagId drives the http.method/url arms end-to-end and asserts the same resource name as the string path. tag: ai generated tag: no release note Co-Authored-By: Claude Opus 4.8 --- .../core/taginterceptor/TagInterceptor.java | 12 ++++++++++++ .../core/taginterceptor/TagInterceptorTest.java | 17 +++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/dd-trace-core/src/main/java/datadog/trace/core/taginterceptor/TagInterceptor.java b/dd-trace-core/src/main/java/datadog/trace/core/taginterceptor/TagInterceptor.java index 2099bbd248d..d32a37c9ca5 100644 --- a/dd-trace-core/src/main/java/datadog/trace/core/taginterceptor/TagInterceptor.java +++ b/dd-trace-core/src/main/java/datadog/trace/core/taginterceptor/TagInterceptor.java @@ -142,9 +142,21 @@ public boolean needsIntercept(String tag) { * string interception, so behavior matches the string set-path exactly. */ public boolean interceptTag(DDSpanContext span, long tagId, Object value) { + // Hot intercepted tags get a dedicated arm so the id path is fully string-free (no nameOf, no + // string switch). The serial already distinguishes http.method from http.url, so the + // url-as-resource rule is called with the known name constant directly. Any other intercepted + // id falls back to resolving the name and running the string interception (same behavior). switch (KnownTags.globalSerial(tagId)) { case KnownTagIds.ERROR_SERIAL: return interceptError(span, value); + case KnownTagIds.HTTP_METHOD_SERIAL: + return interceptUrlResourceAsNameRule(span, HTTP_METHOD, value); + case KnownTagIds.HTTP_URL_SERIAL: + return interceptUrlResourceAsNameRule(span, HTTP_URL, value); + case KnownTagIds.PEER_SERVICE_SERIAL: + // mirrors the Tags.PEER_SERVICE arm of the string switch + span.setTag(DDTags.PEER_SERVICE_SOURCE, Tags.PEER_SERVICE); + return interceptServiceName(PEER_SERVICE, span, value); default: String name = KnownTags.nameOf(tagId); return name != null && interceptTag(span, name, value); diff --git a/dd-trace-core/src/test/java/datadog/trace/core/taginterceptor/TagInterceptorTest.java b/dd-trace-core/src/test/java/datadog/trace/core/taginterceptor/TagInterceptorTest.java index a63a10db1c7..8e2b925b35b 100644 --- a/dd-trace-core/src/test/java/datadog/trace/core/taginterceptor/TagInterceptorTest.java +++ b/dd-trace-core/src/test/java/datadog/trace/core/taginterceptor/TagInterceptorTest.java @@ -633,6 +633,23 @@ void urlAsResourceNameRuleSetsTheResourceName( } } + @Test + void urlAsResourceNameRuleViaTagId() { + // Drives the specialized HTTP_METHOD_SERIAL / HTTP_URL_SERIAL arms of interceptTag(long): + // setting http.method + http.url BY ID must run the same url-as-resource rule as the string + // path and produce the same resource name. + CoreTracer tracer = tracerBuilder().writer(new ListWriter()).build(); + + AgentSpan span = tracer.buildSpan("datadog", "fakeOperation").start(); + try { + span.setTag(KnownTagIds.HTTP_METHOD, "POST"); + span.setTag(KnownTagIds.HTTP_URL, "/with-method"); + assertEquals("POST /with-method", span.getResourceName().toString()); + } finally { + span.finish(); + } + } + @Test void whenUserSetsPeerServiceTheSourceShouldBePeerService() { CoreTracer tracer = tracerBuilder().writer(new ListWriter()).build(); From 0f4733fe2772a303c839f778a278bbdddd6427e5 Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Tue, 9 Jun 2026 23:22:28 -0400 Subject: [PATCH 19/35] Id-key http.method/http.url in HttpServerDecorator.onRequest MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The two highest-volume server-span tags now set by id. Both are intercepted-but- stored (case c): setTag(long) routes them through the (now specialized) id interceptor arm, which runs the url-as-resource rule and stores them in their slots — identical behavior to the string path, verified by urlAsResourceNameRuleViaTagId. Scope note: http.status is not a setTag (it's span.setHttpStatusCode, already a dedicated fast field); span.kind uses a cached TagMap.Entry + setSpanKindOrdinal fast field and is deferred (needs id-aware Entry-path interception). Updated the HttpServerDecoratorTest Spock HTTP_METHOD/HTTP_URL expectations. tag: ai generated tag: no release note Co-Authored-By: Claude Opus 4.8 --- .../decorator/HttpServerDecorator.java | 8 +++++--- .../decorator/HttpServerDecoratorTest.groovy | 12 ++++++------ 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/instrumentation/decorator/HttpServerDecorator.java b/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/instrumentation/decorator/HttpServerDecorator.java index 267d0149c3c..c47c5d6d792 100644 --- a/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/instrumentation/decorator/HttpServerDecorator.java +++ b/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/instrumentation/decorator/HttpServerDecorator.java @@ -13,6 +13,7 @@ import datadog.context.propagation.Propagators; import datadog.trace.api.Config; import datadog.trace.api.DDTags; +import datadog.trace.api.KnownTagIds; import datadog.trace.api.datastreams.DataStreamsTransactionExtractor; import datadog.trace.api.datastreams.DataStreamsTransactionTracker; import datadog.trace.api.function.TriConsumer; @@ -314,7 +315,7 @@ public AgentSpan onRequest( if (request != null) { String method = method(request); - span.setTag(Tags.HTTP_METHOD, method); + span.setTag(KnownTagIds.HTTP_METHOD, method); // Copy of HttpClientDecorator url handling try { @@ -326,9 +327,10 @@ public AgentSpan onRequest( String path = encoded ? url.rawPath() : url.path(); if (valid) { span.setTag( - Tags.HTTP_URL, URIUtils.lazyValidURL(url.scheme(), url.host(), url.port(), path)); + KnownTagIds.HTTP_URL, + URIUtils.lazyValidURL(url.scheme(), url.host(), url.port(), path)); } else if (supportsRaw) { - span.setTag(Tags.HTTP_URL, URIUtils.lazyInvalidUrl(url.raw())); + span.setTag(KnownTagIds.HTTP_URL, URIUtils.lazyInvalidUrl(url.raw())); } if (extracted != null && extracted.getXForwardedHost() != null) { span.setTag(Tags.HTTP_HOSTNAME, extracted.getXForwardedHost()); diff --git a/dd-java-agent/agent-bootstrap/src/test/groovy/datadog/trace/bootstrap/instrumentation/decorator/HttpServerDecoratorTest.groovy b/dd-java-agent/agent-bootstrap/src/test/groovy/datadog/trace/bootstrap/instrumentation/decorator/HttpServerDecoratorTest.groovy index afd2dd5e882..782f7e6241d 100644 --- a/dd-java-agent/agent-bootstrap/src/test/groovy/datadog/trace/bootstrap/instrumentation/decorator/HttpServerDecoratorTest.groovy +++ b/dd-java-agent/agent-bootstrap/src/test/groovy/datadog/trace/bootstrap/instrumentation/decorator/HttpServerDecoratorTest.groovy @@ -72,10 +72,10 @@ class HttpServerDecoratorTest extends ServerDecoratorTest { then: if (req) { - 1 * this.span.setTag(Tags.HTTP_METHOD, "test-method") + 1 * this.span.setTag(KnownTagIds.HTTP_METHOD, "test-method") 1 * this.span.setTag(DDTags.HTTP_QUERY, _) 1 * this.span.setTag(DDTags.HTTP_FRAGMENT, _) - 1 * this.span.setTag(Tags.HTTP_URL, {it.toString() == url}) + 1 * this.span.setTag(KnownTagIds.HTTP_URL, {it.toString() == url}) 1 * this.span.setTag(Tags.HTTP_HOSTNAME, req.url.host) 2 * this.span.getRequestContext() 1 * this.span.setResourceName({ it as String == req.method.toUpperCase() + " " + req.path }, ResourceNamePriorities.HTTP_PATH_NORMALIZER) @@ -105,7 +105,7 @@ class HttpServerDecoratorTest extends ServerDecoratorTest { then: if (expectedUrl) { - 1 * this.span.setTag(Tags.HTTP_URL, {it.toString() == expectedUrl}) + 1 * this.span.setTag(KnownTagIds.HTTP_URL, {it.toString() == expectedUrl}) 2 * this.span.getRequestContext() } if (expectedUrl && tagQueryString) { @@ -121,7 +121,7 @@ class HttpServerDecoratorTest extends ServerDecoratorTest { 1 * this.span.getRequestContext() 1 * this.span.setResourceName({ it as String == expectedPath }) } - 1 * this.span.setTag(Tags.HTTP_METHOD, null) + 1 * this.span.setTag(KnownTagIds.HTTP_METHOD, null) _ * this.span.getLocalRootSpan() >> this.span 0 * _ @@ -155,13 +155,13 @@ class HttpServerDecoratorTest extends ServerDecoratorTest { decorator.onRequest(this.span, null, req, root()) then: - 1 * this.span.setTag(Tags.HTTP_URL, {it.toString() == expectedUrl}) + 1 * this.span.setTag(KnownTagIds.HTTP_URL, {it.toString() == expectedUrl}) 1 * this.span.setTag(Tags.HTTP_HOSTNAME, req.url.host) 1 * this.span.setTag(DDTags.HTTP_QUERY, expectedQuery) 1 * this.span.setTag(DDTags.HTTP_FRAGMENT, null) 2 * this.span.getRequestContext() 1 * this.span.setResourceName({ it as String == expectedResource }, ResourceNamePriorities.HTTP_PATH_NORMALIZER) - 1 * this.span.setTag(Tags.HTTP_METHOD, null) + 1 * this.span.setTag(KnownTagIds.HTTP_METHOD, null) _ * this.span.getLocalRootSpan() >> this.span 0 * _ From 42ac743c650f376a1baf349edf160f1beaec4941 Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Wed, 10 Jun 2026 07:00:56 -0400 Subject: [PATCH 20/35] Register slotted ids for universal + JDBC tags (petclinic hot path) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Petclinic span/tag capture shows the macro levers are component + span.kind (every span) and db.type/instance/user/operation/pool.name (58% of spans, JDBC) — none yet slotted. Register them (plus language) as slotted ids so the existing string/cached-Entry decorator sets are upgraded into positional slots via keyOf on store — no decorator or test changes needed. span.kind flagged INTERCEPTED (the consistency guard enforces it). SLOT_COUNT 18 -> 26. This is the registration-only step (slot storage benefit); decorator-level id set(long) migration can follow if the macro signal warrants. tag: ai generated tag: no release note Co-Authored-By: Claude Opus 4.8 --- .../java/datadog/trace/api/KnownTagIds.java | 67 ++++++++++++++++++- 1 file changed, 65 insertions(+), 2 deletions(-) diff --git a/internal-api/src/main/java/datadog/trace/api/KnownTagIds.java b/internal-api/src/main/java/datadog/trace/api/KnownTagIds.java index 5de47effbef..f7e420e06df 100644 --- a/internal-api/src/main/java/datadog/trace/api/KnownTagIds.java +++ b/internal-api/src/main/java/datadog/trace/api/KnownTagIds.java @@ -18,8 +18,8 @@ * tag-id resolution live before the first span is built. */ public final class KnownTagIds { - // slot count = (max stored fieldPos) + 1. Stored tags use fieldPos 0..17. - static final int SLOT_COUNT = 18; + // slot count = (max stored fieldPos) + 1. Stored tags use fieldPos 0..25. + static final int SLOT_COUNT = 26; // ---- reserved / virtual (tag-interceptor handled, not stored) ---- // Reserved tags are always intercepted -> set the INTERCEPTED flag. @@ -104,6 +104,37 @@ public final class KnownTagIds { public static final int PEER_PORT_SERIAL = KnownTags.FIRST_STORED_SERIAL + 17; public static final long PEER_PORT = KnownTags.tagId(PEER_PORT_SERIAL, 17, Tags.PEER_PORT); + // Universal decorator tags — set on ~every span (component/span.kind via Base/Server/Client + // decorators, language via ServerDecorator). span.kind is intercepted (setSpanKindOrdinal). + public static final int COMPONENT_SERIAL = KnownTags.FIRST_STORED_SERIAL + 18; + public static final long COMPONENT = KnownTags.tagId(COMPONENT_SERIAL, 18, Tags.COMPONENT); + + public static final int SPAN_KIND_SERIAL = KnownTags.FIRST_STORED_SERIAL + 19; + public static final long SPAN_KIND = + KnownTags.intercepted(KnownTags.tagId(SPAN_KIND_SERIAL, 19, Tags.SPAN_KIND)); + + public static final int LANGUAGE_SERIAL = KnownTags.FIRST_STORED_SERIAL + 20; + public static final long LANGUAGE = KnownTags.tagId(LANGUAGE_SERIAL, 20, DDTags.LANGUAGE_TAG_KEY); + + // JDBC / database-client tags — set on every db span (58% of petclinic spans). Not intercepted + // (only db.statement is, and that's handled separately). + public static final int DB_TYPE_SERIAL = KnownTags.FIRST_STORED_SERIAL + 21; + public static final long DB_TYPE = KnownTags.tagId(DB_TYPE_SERIAL, 21, Tags.DB_TYPE); + + public static final int DB_INSTANCE_SERIAL = KnownTags.FIRST_STORED_SERIAL + 22; + public static final long DB_INSTANCE = KnownTags.tagId(DB_INSTANCE_SERIAL, 22, Tags.DB_INSTANCE); + + public static final int DB_USER_SERIAL = KnownTags.FIRST_STORED_SERIAL + 23; + public static final long DB_USER = KnownTags.tagId(DB_USER_SERIAL, 23, Tags.DB_USER); + + public static final int DB_OPERATION_SERIAL = KnownTags.FIRST_STORED_SERIAL + 24; + public static final long DB_OPERATION = + KnownTags.tagId(DB_OPERATION_SERIAL, 24, Tags.DB_OPERATION); + + public static final int DB_POOL_NAME_SERIAL = KnownTags.FIRST_STORED_SERIAL + 25; + public static final long DB_POOL_NAME = + KnownTags.tagId(DB_POOL_NAME_SERIAL, 25, Tags.DB_POOL_NAME); + static final KnownTags.Resolver RESOLVER = new KnownTags.Resolver() { @Override @@ -147,6 +178,22 @@ public String nameOf(long tagId) { return Tags.PEER_HOST_IPV6; case PEER_PORT_SERIAL: return Tags.PEER_PORT; + case COMPONENT_SERIAL: + return Tags.COMPONENT; + case SPAN_KIND_SERIAL: + return Tags.SPAN_KIND; + case LANGUAGE_SERIAL: + return DDTags.LANGUAGE_TAG_KEY; + case DB_TYPE_SERIAL: + return Tags.DB_TYPE; + case DB_INSTANCE_SERIAL: + return Tags.DB_INSTANCE; + case DB_USER_SERIAL: + return Tags.DB_USER; + case DB_OPERATION_SERIAL: + return Tags.DB_OPERATION; + case DB_POOL_NAME_SERIAL: + return Tags.DB_POOL_NAME; default: return null; } @@ -198,6 +245,22 @@ public long keyOf(String name) { return PEER_HOST_IPV6; case Tags.PEER_PORT: return PEER_PORT; + case Tags.COMPONENT: + return COMPONENT; + case Tags.SPAN_KIND: + return SPAN_KIND; + case DDTags.LANGUAGE_TAG_KEY: + return LANGUAGE; + case Tags.DB_TYPE: + return DB_TYPE; + case Tags.DB_INSTANCE: + return DB_INSTANCE; + case Tags.DB_USER: + return DB_USER; + case Tags.DB_OPERATION: + return DB_OPERATION; + case Tags.DB_POOL_NAME: + return DB_POOL_NAME; default: return 0L; } From 0af252995bacbddada886cd7e20d8b8513885d59 Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Wed, 10 Jun 2026 08:28:58 -0400 Subject: [PATCH 21/35] Add tag-conventions.yaml: span-type inheritance + product mixins (draft spec) Language-agnostic declaration spec for the AttributeValueTable / codegen work: - structural span types compose via `extends` (multiple parents allowed); the `base` root holds common tags (incl. error, process-constants) implicitly in every span. peer.* lives in a `client` abstract layer (open question, noted). - product/enrichment mixins (profiling, dsm, appsec, ci_visibility) compose on the side via `applies: all | [types]`, gated by `enabled_by`. - tag fields carry logical type + OTel `aliases`; tracer-impl hints (slot, intercepted, source) are marked separately for the cross-language split. Reconciled from the TracerProto OTel-convention hierarchy + the tags PetClinic actually emits. Draft input for the design doc; not wired into the build. tag: no release note Co-Authored-By: Claude Opus 4.8 --- tag-conventions.yaml | 159 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 159 insertions(+) create mode 100644 tag-conventions.yaml diff --git a/tag-conventions.yaml b/tag-conventions.yaml new file mode 100644 index 00000000000..84b0057fbb4 --- /dev/null +++ b/tag-conventions.yaml @@ -0,0 +1,159 @@ +# Tag conventions — span-type inheritance + product mixins (draft) +# --------------------------------------------------------------------------- +# Source: the OTel-convention prototype (DatadogBase / Http / Client / DataSource +# hierarchy in TracerProto) reconciled with the tags Spring PetClinic emits. +# Language-agnostic spec the code generator consumes to emit per-language tag-id +# constants, the id<->name resolver, and the slot layout. +# +# TWO COMPOSITION MECHANISMS +# inheritance (`extends`) — the structural "is-a" chain of span types that +# carries COMMON tags down to specific types. The root (`base`) is implicitly +# in every span. `extends` may list MULTIPLE parents (e.g. an http client span +# is both `http` and `client`). This is core tracing structure. +# +# mixins (`mixins: [..]`) — PRODUCT / enrichment overlays composed ON THE SIDE, +# orthogonal to the span-type chain: CI Visibility, AppSec (ASM), Data Streams +# (DSM), profiling, etc. Each is gated by its product being enabled and may +# apply to all spans (implicitly) or to specific span types. +# +# resolved tags(span_type) = own tags +# + tags inherited up every `extends` parent (incl. base) +# + tags from product mixins whose `applies` matches +# +# `abstract: true` marks a structural layer that is never a span on its own +# (base, client, http) — it exists only to be extended. +# +# tag fields: +# tag canonical tag name written to the span +# type string | int | long | boolean | double (language-agnostic) +# required required | conditional | recommended | optional | opt_in +# aliases other names resolving to the same tag (e.g. OTel semconv names) +# -- tracer-implementation hints (layered out of the shared spec cross-language) -- +# slot true (default) = fast positional slot; false = id-only, lives in buckets +# intercepted true = setting it triggers tracer side-effects (must run the interceptor) +# source core (tracer sets once per span) | inst (instrumentation) [default inst] +# --------------------------------------------------------------------------- + +# ===== Structural span types (inheritance) ================================== +span_types: + + # ---- root: common tags, implicitly in EVERY span ---- + base: + abstract: true + tags: + - { tag: component, type: string, required: required } + - { tag: span.kind, type: string, required: required, intercepted: true } # otel: kind + - { tag: _dd.base_service, type: string, required: required, source: core } + - { tag: version, type: string, required: recommended, source: core } + - { tag: env, type: string, required: recommended, source: core } + - { tag: language, type: string, required: required, source: core } + - { tag: runtime-id, type: string, required: required, source: core } + - { tag: _dd.integration, type: string, required: optional, source: core, slot: false } + - { tag: _dd.git.commit.sha, type: string, required: optional, source: core, slot: false } + - { tag: _dd.git.repository_url, type: string, required: optional, source: core, slot: false } + # error enrichment — any span can fail; present only on failure, hence slot:false + - { tag: error.type, type: string, required: recommended, slot: false } + - { tag: error.message, type: string, required: recommended, slot: false } + - { tag: error.stack, type: string, required: recommended, slot: false } + + # ---- client/outbound: anything that calls a remote peer (proto: Client) ---- + # OPEN QUESTION: is "peer" best as this structural abstract type (an outbound span + # is-a client), or folded directly into db.client/http.client, or something else? + # Kept as a structural layer for now; revisit. + client: + abstract: true + extends: base + tags: + - { tag: peer.service, type: string, required: recommended, intercepted: true } + - { tag: _dd.peer.service.source, type: string, required: recommended } + - { tag: peer.hostname, type: string, required: recommended } + - { tag: peer.ipv4, type: string } + - { tag: peer.ipv6, type: string } + - { tag: peer.port, type: int } + + # ---- HTTP family ---- + http: + abstract: true + extends: base + tags: + - { tag: http.method, type: string, required: required, intercepted: true, aliases: [http.request.method] } + - { tag: http.status_code, type: int, required: conditional, aliases: [http.response.status_code] } + - { tag: network.protocol.version, type: string, required: recommended, slot: false } + + http.server: # servlet.request + extends: http + tags: + - { tag: http.url, type: string, required: required, intercepted: true, aliases: [url.full] } + - { tag: http.route, type: string, required: conditional } + - { tag: http.hostname, type: string, required: required, aliases: [server.address] } + - { tag: http.useragent, type: string, required: recommended, slot: false } + - { tag: http.query.string, type: string, required: recommended, slot: false, aliases: [url.query] } + - { tag: servlet.path, type: string, required: optional, slot: false } + - { tag: servlet.context, type: string, required: optional, slot: false, intercepted: true } + + http.client: + extends: [ http, client ] # multiple inheritance: an http span AND a peer client + tags: + - { tag: http.url, type: string, required: required, intercepted: true, aliases: [url.full] } + - { tag: http.resend_count, type: int, required: recommended, slot: false } + + # ---- Database client (h2.query, jdbc) ---- + db.client: + extends: client + tags: + - { tag: db.type, type: string, required: required, aliases: [db.system] } + - { tag: db.instance, type: string, required: recommended } + - { tag: db.operation, type: string, required: recommended, aliases: [db.operation.name] } + - { tag: db.user, type: string, required: recommended } + - { tag: db.pool.name, type: string, required: optional } + - { tag: db.statement, type: string, required: recommended, slot: false, intercepted: true, aliases: [db.query.text] } + + # ---- View render (response.render) ---- + view.render: + extends: base + tags: + - { tag: view.name, type: string, required: recommended, slot: false } + +# ===== Product / enrichment mixins (on the side) ============================ +# Composed orthogonally when the product is enabled. `applies: all` => overlaid on +# every span (implicitly); `applies: [types]` => only those span types. +# Tag lists below are ILLUSTRATIVE — each product team owns the authoritative set. +mixins: + + profiling: + enabled_by: dd.profiling.enabled + applies: all + tags: + - { tag: _dd.profiling.enabled, type: boolean, source: core } + + dsm: # Data Streams Monitoring + enabled_by: dd.data.streams.enabled + applies: all + tags: + - { tag: _dd.dsm.enabled, type: boolean, source: core } + # + pathway / checkpoint tags on messaging spans (owned by DSM) + + appsec: # ASM + enabled_by: dd.appsec.enabled + applies: [ http.server ] + tags: + - { tag: _dd.appsec.enabled, type: boolean, source: core, slot: false } + # + appsec event / threat tags (owned by AppSec) + + ci_visibility: # CI Visibility (test spans) + enabled_by: dd.civisibility.enabled + applies: [ test ] # a `test` span type (not modeled here yet) + tags: + - { tag: test.name, type: string, slot: false } + - { tag: test.suite, type: string, slot: false } + - { tag: test.status, type: string, slot: false } + - { tag: test.framework, type: string, slot: false } + +# --------------------------------------------------------------------------- +# Notes +# - `span.kind` enumerates: server | client | producer | consumer | internal | broker. +# - "virtual" tags (sampling.priority, resource.name, service, manual.keep/drop, +# span.type, measured, origin, analytics.sample_rate, error flag) are handled +# entirely by the interceptor / span fields and are NOT stored; they'd be declared +# with `virtual: true` (reserved-tier id, no slot). Omitted here for now. +# --------------------------------------------------------------------------- From 5893920ce7f6a2937b98e2a4bf5024c899461cf5 Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Wed, 10 Jun 2026 08:33:13 -0400 Subject: [PATCH 22/35] Add AttributeValueTable design doc (draft) Storage (typed arrays: byte[] types / long[] prims / Object[] objs by slot, no per-tag Entry), write/read paths, the no-Entry serialize cursor (the real alloc win), API-compat plan, and how product mixins interact with the fixed layout (unslotted vs composed-at-registration vs per-span-type). Measurement: standalone JMH first (vs OptimizedTagMap, -prof gc), then integrate + petclinic A/B. tag: no release note Co-Authored-By: Claude Opus 4.8 --- attribute-value-table-design.md | 145 ++++++++++++++++++++++++++++++++ 1 file changed, 145 insertions(+) create mode 100644 attribute-value-table-design.md diff --git a/attribute-value-table-design.md b/attribute-value-table-design.md new file mode 100644 index 00000000000..44c5655d3cd --- /dev/null +++ b/attribute-value-table-design.md @@ -0,0 +1,145 @@ +# AttributeValueTable — design + +Branch: `dougqh/attribute-value-table` (off `dougqh/tagmap-tagid-experiment`) + +## Goal + +Eliminate the **per-tag `TagMap$Entry` allocation** — the #1 remaining tracer allocator +(~1.1% of process allocation in the PetClinic JFR, even after the tag-id work). The tag-id +fast-path made tag *placement* fast (positional slot vs hash bucket) but still allocates one +`Entry` wrapper per tag set, and keeps it alive until serialize. + +**Idea:** for known (slotted) tags, store the *values* positionally in typed arrays — no +`Entry` object per tag. A span's known tags never materialize an `Entry`; the serializer reads +`(name, type, value)` straight from the arrays. + +This is the runtime counterpart to the [`tag-conventions.yaml`](tag-conventions.yaml) spec: the +generator assigns each known tag a `fieldPos`, and the `AttributeValueTable` is indexed by it. + +## Storage + +One global slot layout (today's model — all span types share the slot numbering; per-span-type +layouts are an open question, below). Sized to `KnownTags.slotCount()`. + +``` +byte[] types // per slot: UNSET=0 | OBJECT | CHARSEQUENCE | BOOLEAN | INT | LONG | FLOAT | DOUBLE +long[] prims // per slot: boolean(0/1) / int / long / float-bits / double-bits (no boxing) +Object[] objs // per slot: String/CharSequence/Object values; null for primitive slots +``` + +- **Presence** = `types[slot] != UNSET`. `size` maintained as a counter (+ bucket count). +- **Lazily allocated** on first known-tag write (like today's `knownEntries`). +- **Primitive arrays optional:** `prims` can be allocated lazily — most tags are strings (`objs`), + so a primitive-free span never allocates `prims`. +- **Unknown tags** (no slot: `globalSerial == 0` or `fieldPos == NO_SLOT`) fall back to the + existing hash buckets, which still use `Entry`. These are the minority (dynamic / rare tags); + the common known tags are the ones we de-allocate. + +Memory: trades *N* per-tag `Entry` objects (N = known tags on the span) for up to 3 fixed +per-span arrays. Net win when a span carries more than ~2–3 known tags (PetClinic spans carry +5–10) and especially on the serialize path (zero transient `Entry`). + +## Write path + +``` +set(long id, value): + pos = fieldPos(id) + if pos < slotCount: types[pos]=T; prims[pos]=packed OR objs[pos]=value // no Entry + else: bucketSet(id, value) // Entry (unknown) + +set(String name, value): + id = keyOf(name) // resolver + if id != 0: set(id, value) // known string set ALSO avoids Entry now + else: bucketSet(name,...) // unknown +``` + +Interception (the 3-case routing in `DDSpanContext.setTag`) is unchanged and sits *above* this — +the table is just the storage the non-intercepted / post-interceptor write lands in. + +## Read path + +- **Typed getters** (`getString(id)`, `getInt(id)`, `getBoolean(id)`…) read the arrays directly — + no allocation (boxing only if `getObject` is called on a primitive slot). +- **`getEntry(id)` / `getEntry(String)`** (API compat): materialize an `Entry` *on demand* from the + slot — allocation happens only when a caller explicitly asks for an `Entry`, which is rare on the + hot path now that typed getters and the serializer cursor exist. + +## The payoff: no-`Entry` serialize + +The real allocation win needs the **serializer to consume the table without materializing `Entry`**. +Add a no-alloc cursor: + +``` +forEachKnown(visitor): // visitor.accept(name, type, primValue, objValue) + for pos in 0..slotCount: + if types[pos] != UNSET: + visitor.accept(slotName(pos), types[pos], prims[pos], objs[pos]) +``` + +`slotName(pos)` comes from the layout (generated `String[] slotNames`, or +`KnownTags.Resolver.nameOfSlot(int)`). The msgpack `TraceMapper` is adapted to write +`(name, typed value)` from this cursor instead of iterating `Entry` objects. Unknown/bucket tags +are iterated separately (they still have `Entry`s). Result: a span's known tags serialize with +**zero `Entry` allocation**. + +## How product mixins interact with the layout + +The structural inheritance (`base → http → http.server`) is the **stable, build-time** part of +the layout — those tags map to fixed slots. Product mixins (profiling, dsm, appsec, ci) are the +**dynamic** part: present only when enabled, attached by `applies`. So the question is whether/how +they consume slots. Three models: + +1. **Unslotted product tags (recommended first).** Mixin tags are `slot: false` → keep an id but + live in the buckets. AVT slot layout = structural tags only. Products add few tags, so the bucket + cost is negligible, and **disabled products cost zero** per-span array space. Clean split: + *structure → slots, enrichment → buckets*; preserves the fixed-layout property. + +2. **Layout composed at registration.** `slotCount`/layout is assembled at init from + `structural + enabled products`. An `applies: all` product (profiling/dsm) extends the universal + slot region — its tag gets a slot on every span *only when enabled*; disabled products contribute + nothing. Fits the existing model (`slotCount` is a dynamic constant captured at resolver + registration; codegen emits each mixin's slot contribution, runtime concatenates the enabled + ones). `applies: [types]` products don't fit a single global layout (they'd waste global slots on + other types). + +3. **Per-span-type layouts.** Each type's AVT = resolved structural tags + product tags whose + `applies` matches. Tightest; `applies: [http.server]` appends appsec's slots to exactly that type. + Requires the span type at creation — the bigger change. + +`applies` is exactly the composer's signal: `all` → universal-region candidate; `[types]` → per-type +(or bucket). **Recommendation: model #1 for the experiment** (products don't perturb the AVT layout), +promote a hot `applies: all` product via #2 if measurement warrants, treat #3 as the long-term tight +design alongside span-type-at-creation. + +## API-compat strategy + +`TagMap` is a large `Entry`-centric interface. Plan: +1. Implement `AttributeValueTable` as an alternative storage *inside* `OptimizedTagMap` + (replace the `Entry[] knownEntries` with the three arrays), rather than a new top-level type — + keeps the whole interface working. +2. Slot get/set/remove/iterate operate on the arrays; bucket paths unchanged. +3. `Entry`-returning methods materialize lazily. +4. Add the `forEachKnown` cursor and wire the serializer. + +## Open questions + +1. **Global vs per-span-type layout.** Global (today) is simplest and needs no span-type at + creation, but sizes every span's arrays to the union of all known tags (~40+ with full + conventions). Per-span-type layout (from the YAML) is tighter but requires knowing the span type + when the span is created — a bigger change. *Recommend: start global, measure, then evaluate + per-type.* +2. **Serializer integration depth.** The `forEachKnown` cursor is the crux of the allocation win; + without it we only save on the write path. Worth doing for the real number. +3. **Primitive packing layout.** Single `long[] prims` + `byte[] types`, vs a tagged `Object[]` + with boxing — measure whether the extra arrays pay off vs just `Object[]` + box. +4. **`Ledger` / builder path** — how accumulated changes apply to arrays. +5. **Memory floor for tiny spans** — spans with 1–2 known tags: do the 3 arrays cost more than they + save? (lazy `prims`, and a small-size threshold, mitigate this.) + +## How we'll measure + +Per the agreed plan: **standalone JMH first** — `AttributeValueTable` vs `OptimizedTagMap` on a +realistic PetClinic-like tag set (component, span.kind, db.*, http.*), measuring throughput and +**allocation (`-prof gc`)**. Expect ~zero `Entry` allocs for known tags vs N today. If promising, +integrate (incl. the serializer cursor) and re-run the PetClinic CPU/alloc A/B with the existing +harness. From 1497f0de83ca294a310348d93069c72299076be0 Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Wed, 10 Jun 2026 09:11:49 -0400 Subject: [PATCH 23/35] Refine AttributeValueTable design: interface, EntryReader reads, resolver-driven MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - AttributeValueTable is an interface; array/segment-backed impl ships first, a codegen POJO-per-span-type impl can replace it later (same opaque contract). - set(long,..)->boolean: false on no-slot OR type-mismatch -> caller buckets (Entry). - reads route through get(long)->EntryReader (flyweight, EntryReadingHelper pattern; coercion via TagValueConversions; materialize via existing EntryReader.entry()) — no separate typed getters, no bespoke visitor; reuses the Iterable serialize path unchanged. - drop the separate Layout abstraction — consult KnownTags.Resolver directly, extended with typeOf (type-reject) + tagIdAt (iteration). static per-slot type => no per-span type array. - product mixins = lazily-allocated segments (fieldPos = [segment][offset]). tag: no release note Co-Authored-By: Claude Opus 4.8 --- attribute-value-table-design.md | 199 ++++++++++++++++++++++++-------- 1 file changed, 152 insertions(+), 47 deletions(-) diff --git a/attribute-value-table-design.md b/attribute-value-table-design.md index 44c5655d3cd..d3223dfc572 100644 --- a/attribute-value-table-design.md +++ b/attribute-value-table-design.md @@ -16,41 +16,69 @@ fast-path made tag *placement* fast (positional slot vs hash bucket) but still a This is the runtime counterpart to the [`tag-conventions.yaml`](tag-conventions.yaml) spec: the generator assigns each known tag a `fieldPos`, and the `AttributeValueTable` is indexed by it. -## Storage +## An interface, not one storage scheme -One global slot layout (today's model — all span types share the slot numbering; per-span-type -layouts are an open question, below). Sized to `KnownTags.slotCount()`. +`AttributeValueTable` is an **interface**. The opaque `set(long)→boolean` / `get(long)→EntryReader` +contract leaks nothing about storage, so the same interface can be satisfied by either backing: + +- **Array/segment-backed** (generic, resolver-driven) — the measurable first impl; no codegen. +- **POJO-backed** (codegen, per span type) — a generated class with real typed fields + generated + `set`/`get` switches. Densest and most JIT-friendly (fields inline, no bounds checks); type-reject + falls out for free (a wrong-type `set` finds no matching field → returns `false`). Lazily-created + mixin sub-POJOs for products. + +Callers (`OptimizedTagMap`) are impl-agnostic — the array impl ships first, the POJO impl can replace +it per span type later with no caller change. + +## Storage (array-backed impl) + +Slots are organized into lazily-allocated **segments** (segment 0 = structural, 1+ = product mixins; +see "How product mixins interact"). Each slot's **type is declared by the resolver** (`typeOf`, from the YAML `type:`) and is therefore static — the table stores no per-span type array. Per segment: ``` -byte[] types // per slot: UNSET=0 | OBJECT | CHARSEQUENCE | BOOLEAN | INT | LONG | FLOAT | DOUBLE -long[] prims // per slot: boolean(0/1) / int / long / float-bits / double-bits (no boxing) -Object[] objs // per slot: String/CharSequence/Object values; null for primitive slots +long present // presence bitmask (long[] if the segment has > 64 slots) — 0 is a valid primitive +Object[] objs // object/CharSequence-typed slots +long[] prims // primitive-typed slots: boolean(0/1)/int/long/float-bits/double-bits (no boxing) ``` -- **Presence** = `types[slot] != UNSET`. `size` maintained as a counter (+ bucket count). -- **Lazily allocated** on first known-tag write (like today's `knownEntries`). -- **Primitive arrays optional:** `prims` can be allocated lazily — most tags are strings (`objs`), - so a primitive-free span never allocates `prims`. -- **Unknown tags** (no slot: `globalSerial == 0` or `fieldPos == NO_SLOT`) fall back to the - existing hash buckets, which still use `Entry`. These are the minority (dynamic / rare tags); - the common known tags are the ones we de-allocate. +- **Type is static per slot** (`KnownTags` `typeOf`); the flyweight reader derives `type()` from it. +- **Lazily allocated** per segment on first write; `objs`/`prims` each lazy (a primitive-free segment + never allocates `prims`). `size` = popcount(present) + bucket count. +- **Unknown tags** (`globalSerial == 0`, `fieldPos == NO_SLOT`) and **type mismatches** fall back to + the hash buckets (still `Entry`) — the minority; correctly-typed known tags are what we de-allocate. + +### Type discipline + +The resolver declares each slot's type (`typeOf`). `set` accepts a value only if it matches; otherwise it returns +`false` and the caller buckets it as a normal `Entry`. Slots stay mono-typed (so type need not be +stored per span) and off-type writes degrade gracefully instead of corrupting a slot. Type *coercion +on read* (e.g. int → string for serialization) is `EntryReader`'s job (via `TagValueConversions`), +not a widening of the stored value. -Memory: trades *N* per-tag `Entry` objects (N = known tags on the span) for up to 3 fixed -per-span arrays. Net win when a span carries more than ~2–3 known tags (PetClinic spans carry -5–10) and especially on the serialize path (zero transient `Entry`). +### Reuse (read side already exists) + +The flyweight reader is the `EntryReadingHelper` pattern already used by `LegacyTagMap`: a reusable +`EntryReader` repositioned per slot, coercion delegated to `TagValueConversions`, and +`EntryReader.entry()` for materialize-on-demand. No new reader, visitor, or coercion code. + +Memory: trades *N* per-tag `Entry` objects (N = known tags on the span) for a small presence bitmask +plus up to two lazily-allocated arrays per occupied segment. Net win when a span carries more than +~2–3 known tags (PetClinic spans carry 5–10) and especially on the serialize path (zero transient +`Entry`). ## Write path ``` -set(long id, value): +table.set(long id, value): // returns true iff stored in a slot pos = fieldPos(id) - if pos < slotCount: types[pos]=T; prims[pos]=packed OR objs[pos]=value // no Entry - else: bucketSet(id, value) // Entry (unknown) - -set(String name, value): - id = keyOf(name) // resolver - if id != 0: set(id, value) // known string set ALSO avoids Entry now - else: bucketSet(name,...) // unknown + if pos < slotCount && typeMatches(id, value): + present |= bit(pos); prims[pos]=packed OR objs[pos]=value // no Entry + return true + return false // no slot or wrong type + +// caller (OptimizedTagMap): +if (!table.set(id, value)) setInBuckets(id, value) // Entry (unknown / off-type) +// string set: id = keyOf(name); known -> table.set; else -> buckets ``` Interception (the 3-case routing in `DDSpanContext.setTag`) is unchanged and sits *above* this — @@ -58,29 +86,20 @@ the table is just the storage the non-intercepted / post-interceptor write lands ## Read path -- **Typed getters** (`getString(id)`, `getInt(id)`, `getBoolean(id)`…) read the arrays directly — - no allocation (boxing only if `getObject` is called on a primitive slot). -- **`getEntry(id)` / `getEntry(String)`** (API compat): materialize an `Entry` *on demand* from the - slot — allocation happens only when a caller explicitly asks for an `Entry`, which is rare on the - hot path now that typed getters and the serializer cursor exist. +All reads go through **`get(long) → EntryReader`** (a repositioned flyweight, the `EntryReadingHelper` +pattern). `EntryReader`'s own accessors + `TagValueConversions` provide value reads and type coercion +(e.g. int → string for serialization) in one place — so there are no separate typed getters on the +table, and slot-stored vs bucket-stored values coerce identically. Materialize a retainable `Entry` +only when a caller needs to hold one, via the existing `EntryReader.entry()`. ## The payoff: no-`Entry` serialize -The real allocation win needs the **serializer to consume the table without materializing `Entry`**. -Add a no-alloc cursor: - -``` -forEachKnown(visitor): // visitor.accept(name, type, primValue, objValue) - for pos in 0..slotCount: - if types[pos] != UNSET: - visitor.accept(slotName(pos), types[pos], prims[pos], objs[pos]) -``` - -`slotName(pos)` comes from the layout (generated `String[] slotNames`, or -`KnownTags.Resolver.nameOfSlot(int)`). The msgpack `TraceMapper` is adapted to write -`(name, typed value)` from this cursor instead of iterating `Entry` objects. Unknown/bucket tags -are iterated separately (they still have `Entry`s). Result: a span's known tags serialize with -**zero `Entry` allocation**. +`TagMap` is already `Iterable` and the msgpack `TraceMapper` already consumes +`EntryReader` — so the table reuses that contract with **no serializer change and no bespoke visitor**. +`iterator()` walks occupied slots and yields the repositioned flyweight `EntryReader` (name via +`resolver.tagIdAt(pos)` → `nameOf`, value from the arrays). `OptimizedTagMap` chains the table's slot +readers then its bucket `Entry`s (also `EntryReader`s). Result: a span's known tags serialize with +**zero `Entry` allocation**; only unknown/bucket tags retain `Entry`s. ## How product mixins interact with the layout @@ -107,9 +126,95 @@ they consume slots. Three models: Requires the span type at creation — the bigger change. `applies` is exactly the composer's signal: `all` → universal-region candidate; `[types]` → per-type -(or bucket). **Recommendation: model #1 for the experiment** (products don't perturb the AVT layout), -promote a hot `applies: all` product via #2 if measurement warrants, treat #3 as the long-term tight -design alongside span-type-at-creation. +(or bucket). + +### Recommended: lazy per-mixin segments + +Because `set(tagId)→bool` / `get(tagId)→EntryReader` hide the storage strategy from callers, the table +can organize itself as **segments**, each lazily allocated: + +``` +segment 0 = structural tags (base + the span type's inherited/own tags) — the common case +segment 1+ = one per product mixin (profiling, dsm, appsec, ci, …) — allocated on first touch +``` + +- The `fieldPos` field partitions into `[segment : 4][offset : 12]`, so a `tagId` names its segment + and intra-segment offset directly — no extra lookup. +- `set` routes to `Segment[segOf(fieldPos)]`, allocating a mixin segment **on its first touch on this + span**; a span that never sets a product's tag never allocates that segment. +- `get`/iteration walk segment 0 + whatever mixin segments exist. + +This beats the three models above: product tags get positional, no-`Entry` storage *when present* +(unlike "always bucket"), with zero per-span cost *when absent* — decided **per span**, with no +registration-time composition and no need for the span type at creation. Each mixin = a segment in +codegen; `applies` tells codegen which span types can light up which segments; structural inheritance +is segment 0. Cost: one extra indirection (segment index + null-check) on `set`/`get`; the common +path (segment 0 only) is a single array deref either way. + +## API + +`AttributeValueTable` is the **slotted-only** store; `OptimizedTagMap` owns the hash buckets and +the composition. The key shape: **`set` returns whether it stored the value** — a `false` tells the +caller to place it in the buckets. The table knows nothing about buckets; routing is explicit and +the "did it slot?" check happens once, inside `set`. + +The table consults the registered `KnownTags.Resolver` directly (like `OptimizedTagMap` already uses +`KnownTags.slotCount()`) — no separate `Layout` object. The resolver gains two additions the codegen +already knows: `typeOf(long)` (for type-reject + the reader's `type()`) and `tagIdAt(int fieldPos)` +(only for iteration, to name a slot walked by index). + +```java +public final class AttributeValueTable { // backed by KnownTags.Resolver (global layout) + + // write: @return true if stored in a slot; false => caller must bucket it + public boolean set(long tagId, CharSequence value); + public boolean set(long tagId, Object value); + public boolean set(long tagId, boolean value); + public boolean set(long tagId, int value); + public boolean set(long tagId, long value); + public boolean set(long tagId, float value); + public boolean set(long tagId, double value); + + public boolean remove(long tagId); // @return true if a slot was cleared + public void clear(); + + public boolean remove(long tagId); + public boolean contains(long tagId); + public int size(); + + // read: returns a FLYWEIGHT EntryReader positioned at the slot (or null if absent). + // EntryReader's own type()/objectValue()/ accessors cover value reads, so no + // separate getString/getInt/... and no separate Visitor are needed. + // NOTE: transient view — valid until the next table op; not retainable. + public TagMap.EntryReader get(long tagId); + + // iteration yields the repositioned flyweight EntryReader -> plugs into the existing + // Iterable serialize path with ZERO per-tag allocation. + public Iterator iterator(); +} +``` + +Read model: `TagMap` is already `Iterable` and the msgpack writer already consumes +`EntryReader`, so the table reuses that contract — no bespoke visitor and no separate typed getters +(`EntryReader`'s own coercion covers reads, shared via `TagValueConversions`). `get`/`iterator` +return a **flyweight** `EntryReader` (the `EntryReadingHelper` pattern — one reusable cursor +repositioned per slot), so no `Entry` per tag. `OptimizedTagMap`'s iterator chains the table's slot +readers then its bucket `Entry`s (also `EntryReader`s) — uniform. Materialize a retainable `Entry` +via the existing `EntryReader.entry()` when a caller needs to hold it (the flyweight is otherwise a +transient view, valid until the next table op). + +Composition + the three tiers: + +```java +// OptimizedTagMap.set(long id, value) +if (!table.set(id, value)) setInBuckets(id, value); +// slotted known -> table stores, returns true +// unslotted known -> table returns false -> bucket (id-bearing Entry) +// unknown (keyOf==0)-> caller buckets directly +``` + +(Open: add a `getAndSet`-style variant only if a caller needs the prior value; `set->boolean` +covers the common write path.) ## API-compat strategy From 3bf6f92f19df56bbafe1d0ac59d7943c8b02e2b5 Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Wed, 10 Jun 2026 09:18:52 -0400 Subject: [PATCH 24/35] Add perf trade-off section to AttributeValueTable design Honest accounting: write path + allocation/GC improve; read/serialize carries some intrinsic extra CPU per tag for a generic layout-driven store (flyweight + array read + name resolve + coercion) that only POJOs (generated fields) fully recover. Net likely neutral-to-positive pre-POJO (cheap frequent writes, single serialize pass, lower GC). Measurement plan upgraded to a three-way JMH incl. a hand-written POJO to confirm the codegen endgame before building the generator. tag: no release note Co-Authored-By: Claude Opus 4.8 --- attribute-value-table-design.md | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/attribute-value-table-design.md b/attribute-value-table-design.md index d3223dfc572..c72b62473b7 100644 --- a/attribute-value-table-design.md +++ b/attribute-value-table-design.md @@ -241,10 +241,32 @@ covers the common write path.) 5. **Memory floor for tiny spans** — spans with 1–2 known tags: do the 3 arrays cost more than they save? (lazy `prims`, and a small-size threshold, mitigate this.) +## Performance: the trade, eyes open + +- **Write path (frequent): better** — set a bit + write one array slot, no per-tag `Entry`. +- **Allocation / GC: better** — removes the 1.1% `Entry` lever; less GC (CPU the profile attributes + elsewhere). With lazy `prims`, a typical (string-heavy) span allocates fewer objects than today. +- **Read / serialize: some extra CPU per tag** — flyweight reposition + array read + name resolve + + coercion dispatch, vs today's `Entry` that caches name and typed value. **This is intrinsic to a + generic, layout-driven store** — you cannot match direct-field access without generating the fields. + Mitigations (static `slotNames` index, lean flyweight, near-no-op coercion when the stored type + matches) narrow it but do not erase it. + +Why it's acceptable: the array-backed impl accepts that small read cost as the **price of generality** +(any tag, no codegen, no span-type-at-creation); **POJOs recover it for hot span types** on the same +interface. You pay the indirection only where you haven't specialized — i.e. where you don't care. +The net is likely neutral-to-positive even pre-POJO (cheaper frequent writes + lower GC; serialize is +a single pass per span); POJOs make it clearly positive where it counts. + ## How we'll measure -Per the agreed plan: **standalone JMH first** — `AttributeValueTable` vs `OptimizedTagMap` on a -realistic PetClinic-like tag set (component, span.kind, db.*, http.*), measuring throughput and -**allocation (`-prof gc`)**. Expect ~zero `Entry` allocs for known tags vs N today. If promising, -integrate (incl. the serializer cursor) and re-run the PetClinic CPU/alloc A/B with the existing -harness. +**Standalone JMH first, three-way**, on a realistic PetClinic-like tag set (component, span.kind, +db.*, http.*), measuring throughput and **allocation (`-prof gc`)**: +1. today's `OptimizedTagMap` (`Entry[]`) — the baseline, +2. array-backed `AttributeValueTable` — does it regress read CPU? how much alloc does it save? +3. a **hand-written POJO** for one span type (e.g. `db.client`) — confirms the codegen endgame wins + enough to justify building the generator. + +If array-backed is promising (or break-even on CPU with the alloc win), integrate it (incl. the +`EntryReader` serialize path) and re-run the PetClinic CPU/alloc A/B with the existing harness; build +codegen POJOs for the hot span types once the hand-POJO confirms the payoff. From 883b34e4436f6bfca0fc5343207de19aa2dbf154 Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Wed, 10 Jun 2026 09:27:28 -0400 Subject: [PATCH 25/35] Switch AttributeValueTable to dense parallel arrays (ids[] + values[]) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the positional-by-fieldPos + segments/bitmask scheme with a dense association list of only the tags present: long[] ids + Object[] values. - Mixins need no special machinery — a product tag is just another (id,value) pair; the list holds only what's set. Dropped the segment/[segment][offset] scheme entirely. - id is stored => iteration names via nameOf(ids[i]); no fieldPos reverse lookup. Resolver needs only typeOf added (type-reject + reader type()). - Maps directly onto the existing EntryReadingHelper flyweight + TagValueConversions. - Trade-offs: O(n) scan (fine for small spans) and boxing of fresh per-span primitives (status_code/port). Prebuilt primitive entries are NOT a loss — Entry caches its box, so storing objectValue() reuses the shared box (0 per-span alloc). Parallel long[] prims is a deferred hatch if primitive-heavy spans show it. tag: no release note Co-Authored-By: Claude Opus 4.8 --- attribute-value-table-design.md | 153 +++++++++++++------------------- 1 file changed, 64 insertions(+), 89 deletions(-) diff --git a/attribute-value-table-design.md b/attribute-value-table-design.md index c72b62473b7..91814db8972 100644 --- a/attribute-value-table-design.md +++ b/attribute-value-table-design.md @@ -30,51 +30,66 @@ contract leaks nothing about storage, so the same interface can be satisfied by Callers (`OptimizedTagMap`) are impl-agnostic — the array impl ships first, the POJO impl can replace it per span type later with no caller change. -## Storage (array-backed impl) +## Storage (array-backed impl) — dense parallel arrays -Slots are organized into lazily-allocated **segments** (segment 0 = structural, 1+ = product mixins; -see "How product mixins interact"). Each slot's **type is declared by the resolver** (`typeOf`, from the YAML `type:`) and is therefore static — the table stores no per-span type array. Per segment: +A **dense association list of only the tags actually present** — not arrays sized to the slot count: ``` -long present // presence bitmask (long[] if the segment has > 64 slots) — 0 is a valid primitive -Object[] objs // object/CharSequence-typed slots -long[] prims // primitive-typed slots: boolean(0/1)/int/long/float-bits/double-bits (no boxing) +long[] ids // the tag id of each present known tag, in insertion order +Object[] values // its value (boxed if primitive) +int size // number of used entries (arrays grow as needed) ``` -- **Type is static per slot** (`KnownTags` `typeOf`); the flyweight reader derives `type()` from it. -- **Lazily allocated** per segment on first write; `objs`/`prims` each lazy (a primitive-free segment - never allocates `prims`). `size` = popcount(present) + bucket count. -- **Unknown tags** (`globalSerial == 0`, `fieldPos == NO_SLOT`) and **type mismatches** fall back to - the hash buckets (still `Entry`) — the minority; correctly-typed known tags are what we de-allocate. +- **`set(id, v)`**: scan `ids[0..size)` for a match (overwrite) else append. No `Entry`. Returns + `true` (stored) — unknown ids / type mismatches return `false` and the caller buckets them. +- **`get(id)`**: scan `ids` for the match → flyweight `EntryReader` over `(nameOf(id), values[i])`. +- **iterate/serialize**: dense walk of `ids[0..size)`; name = `nameOf(ids[i])`, value = `values[i]`. +- **Unknown tags** (`globalSerial == 0`) and **type mismatches** fall back to the hash buckets (still + `Entry`) — the minority. + +Why dense rather than positional-by-`fieldPos`: +- **Mixins need no special machinery** — a product tag is just another `(id, value)` pair; the list + holds only what's set, so disabled/unused products cost nothing. No segments, presence bitmask, or + `fieldPos` partition. +- **The id is stored**, so iteration names a tag directly (`nameOf`) — no `fieldPos → id` reverse map. +- **Maps onto `EntryReadingHelper`** (already in `LegacyTagMap`): a reusable `EntryReader` holding + `(tag, Object value)` with coercion via `TagValueConversions` and `EntryReader.entry()` to + materialize. The flyweight per index is `(nameOf(ids[i]), values[i])`. Almost nothing new. +- **`fieldPos` stops mattering** for the generic store — identity + name come from the id; positional + field layout is the POJO specialization's concern only. + +Trade-offs (accepted): **O(n) scan** instead of O(1)-by-position — fine in the small-map regime +(spans carry ~5–15 tags; a packed `long[]` scan is cache-friendly, and the common path is set-once + +one dense serialize pass). **Boxing** of the few primitive tags (status_code, port) — most tags are +strings (no box), and a boxed `Integer` is smaller than the `Entry` it replaces, so still a net win; +if a primitive-heavy span type shows it in the JMH, add a parallel `long[] prims` aligned with `ids` +(value in `prims` when primitive, `values[i] = null`). + +Prebuilt/shared `Entry`s holding a primitive are **not** a loss: `Entry` caches its boxed value, so a +write sourced from a prebuilt `EntryReader` stores that *shared* box (`entry.objectValue()`) — zero +per-span allocation, same as today. The only residual boxing is a **fresh, per-span-varying primitive** +set via the typed `set(long, int/...)` overloads (status_code, port) — a tiny small-box cost, +removable later via the parallel `long[] prims` if it ever shows. So no real regression. ### Type discipline -The resolver declares each slot's type (`typeOf`). `set` accepts a value only if it matches; otherwise it returns -`false` and the caller buckets it as a normal `Entry`. Slots stay mono-typed (so type need not be -stored per span) and off-type writes degrade gracefully instead of corrupting a slot. Type *coercion -on read* (e.g. int → string for serialization) is `EntryReader`'s job (via `TagValueConversions`), -not a widening of the stored value. +The resolver declares each tag's type (`typeOf`). `set` accepts a value only if it matches; otherwise +it returns `false` and the caller buckets it as a normal `Entry`. Off-type writes degrade gracefully +instead of corrupting the slot. Type *coercion on read* (e.g. int → string for serialization) is +`EntryReader`'s job (via `TagValueConversions`), not a widening of the stored value. -### Reuse (read side already exists) - -The flyweight reader is the `EntryReadingHelper` pattern already used by `LegacyTagMap`: a reusable -`EntryReader` repositioned per slot, coercion delegated to `TagValueConversions`, and -`EntryReader.entry()` for materialize-on-demand. No new reader, visitor, or coercion code. - -Memory: trades *N* per-tag `Entry` objects (N = known tags on the span) for a small presence bitmask -plus up to two lazily-allocated arrays per occupied segment. Net win when a span carries more than -~2–3 known tags (PetClinic spans carry 5–10) and especially on the serialize path (zero transient -`Entry`). +Memory: trades *N* per-tag `Entry` objects for two arrays (`ids` + `values`) sized to the tags +present, plus a box per primitive tag. Net win when a span carries more than ~2–3 known tags +(PetClinic spans carry 5–10), and especially on the serialize path (zero transient `Entry`). ## Write path ``` -table.set(long id, value): // returns true iff stored in a slot - pos = fieldPos(id) - if pos < slotCount && typeMatches(id, value): - present |= bit(pos); prims[pos]=packed OR objs[pos]=value // no Entry - return true - return false // no slot or wrong type +table.set(long id, value): // returns true iff stored + if globalSerial(id) == 0 || !typeMatches(id, value): return false // unknown / wrong type + for i in 0..size: // small-n linear scan + if ids[i] == id: values[i] = value; return true // overwrite + ids[size] = id; values[size] = value; size++; return true // append, no Entry // caller (OptimizedTagMap): if (!table.set(id, value)) setInBuckets(id, value) // Entry (unknown / off-type) @@ -96,60 +111,20 @@ only when a caller needs to hold one, via the existing `EntryReader.entry()`. `TagMap` is already `Iterable` and the msgpack `TraceMapper` already consumes `EntryReader` — so the table reuses that contract with **no serializer change and no bespoke visitor**. -`iterator()` walks occupied slots and yields the repositioned flyweight `EntryReader` (name via -`resolver.tagIdAt(pos)` → `nameOf`, value from the arrays). `OptimizedTagMap` chains the table's slot -readers then its bucket `Entry`s (also `EntryReader`s). Result: a span's known tags serialize with -**zero `Entry` allocation**; only unknown/bucket tags retain `Entry`s. - -## How product mixins interact with the layout - -The structural inheritance (`base → http → http.server`) is the **stable, build-time** part of -the layout — those tags map to fixed slots. Product mixins (profiling, dsm, appsec, ci) are the -**dynamic** part: present only when enabled, attached by `applies`. So the question is whether/how -they consume slots. Three models: - -1. **Unslotted product tags (recommended first).** Mixin tags are `slot: false` → keep an id but - live in the buckets. AVT slot layout = structural tags only. Products add few tags, so the bucket - cost is negligible, and **disabled products cost zero** per-span array space. Clean split: - *structure → slots, enrichment → buckets*; preserves the fixed-layout property. - -2. **Layout composed at registration.** `slotCount`/layout is assembled at init from - `structural + enabled products`. An `applies: all` product (profiling/dsm) extends the universal - slot region — its tag gets a slot on every span *only when enabled*; disabled products contribute - nothing. Fits the existing model (`slotCount` is a dynamic constant captured at resolver - registration; codegen emits each mixin's slot contribution, runtime concatenates the enabled - ones). `applies: [types]` products don't fit a single global layout (they'd waste global slots on - other types). - -3. **Per-span-type layouts.** Each type's AVT = resolved structural tags + product tags whose - `applies` matches. Tightest; `applies: [http.server]` appends appsec's slots to exactly that type. - Requires the span type at creation — the bigger change. - -`applies` is exactly the composer's signal: `all` → universal-region candidate; `[types]` → per-type -(or bucket). - -### Recommended: lazy per-mixin segments - -Because `set(tagId)→bool` / `get(tagId)→EntryReader` hide the storage strategy from callers, the table -can organize itself as **segments**, each lazily allocated: - -``` -segment 0 = structural tags (base + the span type's inherited/own tags) — the common case -segment 1+ = one per product mixin (profiling, dsm, appsec, ci, …) — allocated on first touch -``` +`iterator()` does a dense walk of `ids[0..size)` and yields the repositioned flyweight `EntryReader` +(name = `nameOf(ids[i])`, value = `values[i]`). `OptimizedTagMap` chains the table's readers then its +bucket `Entry`s (also `EntryReader`s). Result: a span's known tags serialize with **zero `Entry` +allocation**; only unknown/bucket tags retain `Entry`s. -- The `fieldPos` field partitions into `[segment : 4][offset : 12]`, so a `tagId` names its segment - and intra-segment offset directly — no extra lookup. -- `set` routes to `Segment[segOf(fieldPos)]`, allocating a mixin segment **on its first touch on this - span**; a span that never sets a product's tag never allocates that segment. -- `get`/iteration walk segment 0 + whatever mixin segments exist. +## How product mixins interact -This beats the three models above: product tags get positional, no-`Entry` storage *when present* -(unlike "always bucket"), with zero per-span cost *when absent* — decided **per span**, with no -registration-time composition and no need for the span type at creation. Each mixin = a segment in -codegen; `applies` tells codegen which span types can light up which segments; structural inheritance -is segment 0. Cost: one extra indirection (segment index + null-check) on `set`/`get`; the common -path (segment 0 only) is a single array deref either way. +The dense representation makes this nearly a non-question: **a product tag is just another `(id, value)` +pair**. The list holds only the tags actually set, so a span that doesn't trigger profiling/dsm/appsec +simply has none of their pairs — zero cost, decided per span, with no segments, no layout composition, +and no need for the span type at creation. `applies` stays a *codegen* concern (which span types may +emit which product tags / whether a product tag earns a stable id at all); it no longer shapes the +runtime storage. (The earlier positional-segment scheme — `fieldPos = [segment][offset]`, lazily +allocated per mixin — is moot under dense arrays and was dropped.) ## API @@ -159,9 +134,9 @@ caller to place it in the buckets. The table knows nothing about buckets; routin the "did it slot?" check happens once, inside `set`. The table consults the registered `KnownTags.Resolver` directly (like `OptimizedTagMap` already uses -`KnownTags.slotCount()`) — no separate `Layout` object. The resolver gains two additions the codegen -already knows: `typeOf(long)` (for type-reject + the reader's `type()`) and `tagIdAt(int fieldPos)` -(only for iteration, to name a slot walked by index). +`KnownTags.slotCount()`) — no separate `Layout` object. The dense store needs only **one** addition +the codegen already knows: `typeOf(long)` (for type-reject + the reader's `type()`). No reverse +`fieldPos → id` lookup is needed — the id is stored, so iteration names a tag via `nameOf(ids[i])`. ```java public final class AttributeValueTable { // backed by KnownTags.Resolver (global layout) @@ -182,7 +157,7 @@ public final class AttributeValueTable { // backed by KnownTags.Resolver public boolean contains(long tagId); public int size(); - // read: returns a FLYWEIGHT EntryReader positioned at the slot (or null if absent). + // read: returns a FLYWEIGHT EntryReader positioned at the matching entry (or null if absent). // EntryReader's own type()/objectValue()/ accessors cover value reads, so no // separate getString/getInt/... and no separate Visitor are needed. // NOTE: transient view — valid until the next table op; not retainable. @@ -198,7 +173,7 @@ Read model: `TagMap` is already `Iterable` and the msgpack writer a `EntryReader`, so the table reuses that contract — no bespoke visitor and no separate typed getters (`EntryReader`'s own coercion covers reads, shared via `TagValueConversions`). `get`/`iterator` return a **flyweight** `EntryReader` (the `EntryReadingHelper` pattern — one reusable cursor -repositioned per slot), so no `Entry` per tag. `OptimizedTagMap`'s iterator chains the table's slot +repositioned per entry), so no `Entry` per tag. `OptimizedTagMap`'s iterator chains the table's readers then its bucket `Entry`s (also `EntryReader`s) — uniform. Materialize a retainable `Entry` via the existing `EntryReader.entry()` when a caller needs to hold it (the flyweight is otherwise a transient view, valid until the next table op). From 40e5e4ed513f9b06cfd95662c6157ee37fee4701 Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Wed, 10 Jun 2026 09:29:36 -0400 Subject: [PATCH 26/35] Drop the parallel-prims hatch from AttributeValueTable design A parallel long[] prims adds a whole extra per-span array + per-entry type tracking, costing more than the few small boxes it saves -> rejected. Single Object[] values, box the few fresh primitives (prebuilt-primitive entries reuse Entry's cached box, so 0 per-span alloc there). Cleaned stale open-questions/perf references to prims/segments/positional. tag: no release note Co-Authored-By: Claude Opus 4.8 --- attribute-value-table-design.md | 51 ++++++++++++++++----------------- 1 file changed, 25 insertions(+), 26 deletions(-) diff --git a/attribute-value-table-design.md b/attribute-value-table-design.md index 91814db8972..4286475c796 100644 --- a/attribute-value-table-design.md +++ b/attribute-value-table-design.md @@ -61,15 +61,16 @@ Why dense rather than positional-by-`fieldPos`: Trade-offs (accepted): **O(n) scan** instead of O(1)-by-position — fine in the small-map regime (spans carry ~5–15 tags; a packed `long[]` scan is cache-friendly, and the common path is set-once + one dense serialize pass). **Boxing** of the few primitive tags (status_code, port) — most tags are -strings (no box), and a boxed `Integer` is smaller than the `Entry` it replaces, so still a net win; -if a primitive-heavy span type shows it in the JMH, add a parallel `long[] prims` aligned with `ids` -(value in `prims` when primitive, `values[i] = null`). +strings (no box), and a boxed `Integer` is smaller than the `Entry` it replaces, so still a net win. -Prebuilt/shared `Entry`s holding a primitive are **not** a loss: `Entry` caches its boxed value, so a -write sourced from a prebuilt `EntryReader` stores that *shared* box (`entry.objectValue()`) — zero -per-span allocation, same as today. The only residual boxing is a **fresh, per-span-varying primitive** -set via the typed `set(long, int/...)` overloads (status_code, port) — a tiny small-box cost, -removable later via the parallel `long[] prims` if it ever shows. So no real regression. +A parallel `long[] prims` to avoid that boxing was **considered and rejected**: it adds a whole extra +per-span array *and* per-entry type tracking (which array holds the value), which costs more than the +handful of small boxes it would save. Single `Object[] values`, box the few fresh primitives. + +Prebuilt/shared `Entry`s holding a primitive are **not** a loss either: `Entry` caches its boxed value, +so a write sourced from a prebuilt `EntryReader` stores that *shared* box (`entry.objectValue()`) — +zero per-span allocation, same as today. So the only boxing is a fresh, per-span-varying primitive set +via the typed `set(long, int/...)` overloads — negligible. No real regression. ### Type discipline @@ -203,29 +204,27 @@ covers the common write path.) ## Open questions -1. **Global vs per-span-type layout.** Global (today) is simplest and needs no span-type at - creation, but sizes every span's arrays to the union of all known tags (~40+ with full - conventions). Per-span-type layout (from the YAML) is tighter but requires knowing the span type - when the span is created — a bigger change. *Recommend: start global, measure, then evaluate - per-type.* -2. **Serializer integration depth.** The `forEachKnown` cursor is the crux of the allocation win; - without it we only save on the write path. Worth doing for the real number. -3. **Primitive packing layout.** Single `long[] prims` + `byte[] types`, vs a tagged `Object[]` - with boxing — measure whether the extra arrays pay off vs just `Object[]` + box. -4. **`Ledger` / builder path** — how accumulated changes apply to arrays. -5. **Memory floor for tiny spans** — spans with 1–2 known tags: do the 3 arrays cost more than they - save? (lazy `prims`, and a small-size threshold, mitigate this.) +1. **Initial array capacity / growth.** Starting size for `ids`/`values` and growth policy (spans + carry ~5–15 tags; pick a sensible default to avoid resizes without over-allocating tiny spans). +2. **`Ledger` / builder path** — how accumulated changes apply to the dense arrays. +3. **Scan vs index at larger N.** If some span types carry many tags, confirm the linear scan still + wins; otherwise a small index is an option (but adds cost the dense form is trying to avoid). + +Resolved during design: dense parallel arrays over positional-by-`fieldPos` (mixins become plain +pairs); single `Object[] values` over a parallel `long[] prims` (the extra array + type tracking +cost more than the few boxes); reads/serialize via the existing `EntryReader` rather than a bespoke +visitor; no separate `Layout` (consult the resolver, + `typeOf`). ## Performance: the trade, eyes open -- **Write path (frequent): better** — set a bit + write one array slot, no per-tag `Entry`. +- **Write path (frequent): better** — scan + append into `ids`/`values`, no per-tag `Entry`. - **Allocation / GC: better** — removes the 1.1% `Entry` lever; less GC (CPU the profile attributes - elsewhere). With lazy `prims`, a typical (string-heavy) span allocates fewer objects than today. -- **Read / serialize: some extra CPU per tag** — flyweight reposition + array read + name resolve + + elsewhere). A typical (string-heavy) span allocates two arrays instead of N `Entry`s. +- **Read / serialize: some extra CPU per tag** — flyweight reposition + array read + `nameOf` + coercion dispatch, vs today's `Entry` that caches name and typed value. **This is intrinsic to a - generic, layout-driven store** — you cannot match direct-field access without generating the fields. - Mitigations (static `slotNames` index, lean flyweight, near-no-op coercion when the stored type - matches) narrow it but do not erase it. + generic store** — you cannot match direct-field access without generating the fields (the POJO + endgame). Mitigations (lean flyweight, near-no-op coercion when the stored type matches) narrow it + but do not erase it. Why it's acceptable: the array-backed impl accepts that small read cost as the **price of generality** (any tag, no codegen, no span-type-at-creation); **POJOs recover it for hot span types** on the same From bd375d7aed37700c713f2f8725eeecbd233ac863 Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Wed, 10 Jun 2026 09:30:43 -0400 Subject: [PATCH 27/35] Final consistency pass on AttributeValueTable doc (dense arrays throughout) Fix lingering positional/segment/forEachKnown references in the intro, impl list, and API-compat strategy to match the dense (id,value) array design. tag: no release note Co-Authored-By: Claude Opus 4.8 --- attribute-value-table-design.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/attribute-value-table-design.md b/attribute-value-table-design.md index 4286475c796..9167a88459b 100644 --- a/attribute-value-table-design.md +++ b/attribute-value-table-design.md @@ -9,7 +9,7 @@ Eliminate the **per-tag `TagMap$Entry` allocation** — the #1 remaining tracer fast-path made tag *placement* fast (positional slot vs hash bucket) but still allocates one `Entry` wrapper per tag set, and keeps it alive until serialize. -**Idea:** for known (slotted) tags, store the *values* positionally in typed arrays — no +**Idea:** for known tags, store the *values* in a dense `(id, value)` pair array — no `Entry` object per tag. A span's known tags never materialize an `Entry`; the serializer reads `(name, type, value)` straight from the arrays. @@ -21,7 +21,7 @@ generator assigns each known tag a `fieldPos`, and the `AttributeValueTable` is `AttributeValueTable` is an **interface**. The opaque `set(long)→boolean` / `get(long)→EntryReader` contract leaks nothing about storage, so the same interface can be satisfied by either backing: -- **Array/segment-backed** (generic, resolver-driven) — the measurable first impl; no codegen. +- **Array-backed** (generic, resolver-driven, dense `(id, value)` arrays) — the measurable first impl; no codegen. - **POJO-backed** (codegen, per span type) — a generated class with real typed fields + generated `set`/`get` switches. Densest and most JIT-friendly (fields inline, no bounds checks); type-reject falls out for free (a wrong-type `set` finds no matching field → returns `false`). Lazily-created @@ -196,11 +196,11 @@ covers the common write path.) `TagMap` is a large `Entry`-centric interface. Plan: 1. Implement `AttributeValueTable` as an alternative storage *inside* `OptimizedTagMap` - (replace the `Entry[] knownEntries` with the three arrays), rather than a new top-level type — - keeps the whole interface working. -2. Slot get/set/remove/iterate operate on the arrays; bucket paths unchanged. -3. `Entry`-returning methods materialize lazily. -4. Add the `forEachKnown` cursor and wire the serializer. + (replace the `Entry[] knownEntries` with the dense `ids`/`values` arrays), rather than a new + top-level type — keeps the whole interface working. +2. Known-tag get/set/remove/iterate operate on the dense arrays; bucket paths unchanged. +3. `Entry`-returning methods materialize lazily via `EntryReader.entry()`. +4. Reuse the existing `Iterable` serialize path (flyweight per entry) — no new cursor. ## Open questions From fc2f28a719a0f66a1d74e6e655efd6ba914e158e Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Wed, 10 Jun 2026 09:41:37 -0400 Subject: [PATCH 28/35] Reframe design around phasing: phase 1 = in-place dense storage in OptimizedTagMap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 1 replaces OptimizedTagMap's Entry[] knownEntries with dense long[] ids + Object[] values in place — no new type/interface/codegen; it also removes the positional collision machinery (collidedSlots, occupancy, bucket-eviction). AttributeValueTable (interface + codegen POJO) is demoted to phase 2, extracted from the working dense impl when warranted. tag: no release note Co-Authored-By: Claude Opus 4.8 --- attribute-value-table-design.md | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/attribute-value-table-design.md b/attribute-value-table-design.md index 9167a88459b..f8a43343104 100644 --- a/attribute-value-table-design.md +++ b/attribute-value-table-design.md @@ -1,4 +1,4 @@ -# AttributeValueTable — design +# Dense known-tag storage (a.k.a. AttributeValueTable) — design Branch: `dougqh/attribute-value-table` (off `dougqh/tagmap-tagid-experiment`) @@ -13,24 +13,29 @@ fast-path made tag *placement* fast (positional slot vs hash bucket) but still a `Entry` object per tag. A span's known tags never materialize an `Entry`; the serializer reads `(name, type, value)` straight from the arrays. -This is the runtime counterpart to the [`tag-conventions.yaml`](tag-conventions.yaml) spec: the -generator assigns each known tag a `fieldPos`, and the `AttributeValueTable` is indexed by it. +## Phasing -## An interface, not one storage scheme +- **Phase 1 (this design): replace `OptimizedTagMap`'s `Entry[] knownEntries` in place** with dense + `long[] ids` + `Object[] values`. No new type, no interface, no codegen. This is purely an internal + storage change to one class, and it *removes* machinery as much as it adds (see below). It's the + measurable step that kills the per-tag `Entry` for known tags. +- **Phase 2 (later, if warranted): extract an `AttributeValueTable` interface + a codegen POJO** per + hot span type (real typed fields, no bounds checks, type-reject for free). Extracting the interface + from a *working* dense impl is an easy refactor — and we'll know its true shape from having built + it, rather than guessing now. The `set(long)→boolean` / `get(long)→EntryReader` contract below is + where that interface is headed; in phase 1 it's just how `OptimizedTagMap` works internally. -`AttributeValueTable` is an **interface**. The opaque `set(long)→boolean` / `get(long)→EntryReader` -contract leaks nothing about storage, so the same interface can be satisfied by either backing: +Everything below describes **phase 1** unless marked otherwise. -- **Array-backed** (generic, resolver-driven, dense `(id, value)` arrays) — the measurable first impl; no codegen. -- **POJO-backed** (codegen, per span type) — a generated class with real typed fields + generated - `set`/`get` switches. Densest and most JIT-friendly (fields inline, no bounds checks); type-reject - falls out for free (a wrong-type `set` finds no matching field → returns `false`). Lazily-created - mixin sub-POJOs for products. +## What phase 1 removes -Callers (`OptimizedTagMap`) are impl-agnostic — the array impl ships first, the POJO impl can replace -it per span type later with no caller change. +Replacing the positional `Entry[] knownEntries` with a dense scan-by-id store deletes the collision +machinery the positional slot model needed: first-writer-wins occupancy, the `collidedSlots` bitmask, +and bucket-eviction-on-reclaim. Dense `(id, value)` pairs have no positional collisions — you match by +id. `fieldPos`/`slotCount` stop mattering for storage (identity, name, and hash all come from the id); +they stay in the tagId for the eventual POJO but the dense store ignores them. -## Storage (array-backed impl) — dense parallel arrays +## Storage — dense parallel arrays A **dense association list of only the tags actually present** — not arrays sized to the slot count: From a58dc0af89b84246b66816dda3f62d28870958b6 Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Wed, 10 Jun 2026 10:08:13 -0400 Subject: [PATCH 29/35] Add AttrStoreBenchmark: three-way dense vs Entry[] vs POJO (+ jmhProfilers hook) Validates phase 1. db.client-like tag set, build + build-iterate, gc profiler: build: current 14.99M ops/s 752 B/op | dense 21.32M(+42%) 248 B(-67%) | pojo 38.5M 64 B buildIter: current 8.37M ops/s 720 B/op | dense 10.19M(+22%) 224 B(-69%) | pojo 25.9M ~0* Dense beats Entry[] on BOTH throughput and allocation (no read-path regression); POJO is the ~2-3x / near-zero-alloc endgame. (* pojo buildIter ~0 = escape-analysis scalar replacement; escaping it allocs ~64 B/op.) Also: -PjmhProfilers hook in internal-api build.gradle.kts. tag: no release note Co-Authored-By: Claude Opus 4.8 --- internal-api/build.gradle.kts | 3 + .../datadog/trace/api/AttrStoreBenchmark.java | 226 ++++++++++++++++++ 2 files changed, 229 insertions(+) create mode 100644 internal-api/src/jmh/java/datadog/trace/api/AttrStoreBenchmark.java diff --git a/internal-api/build.gradle.kts b/internal-api/build.gradle.kts index 28662725bbd..89b5b17e958 100644 --- a/internal-api/build.gradle.kts +++ b/internal-api/build.gradle.kts @@ -305,4 +305,7 @@ jmh { if (project.hasProperty("jmhFork")) { fork.set((project.property("jmhFork") as String).toInt()) } + if (project.hasProperty("jmhProfilers")) { + profilers.set((project.property("jmhProfilers") as String).split(",").toList()) + } } diff --git a/internal-api/src/jmh/java/datadog/trace/api/AttrStoreBenchmark.java b/internal-api/src/jmh/java/datadog/trace/api/AttrStoreBenchmark.java new file mode 100644 index 00000000000..3314302c89e --- /dev/null +++ b/internal-api/src/jmh/java/datadog/trace/api/AttrStoreBenchmark.java @@ -0,0 +1,226 @@ +package datadog.trace.api; + +import java.util.concurrent.TimeUnit; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.TearDown; +import org.openjdk.jmh.annotations.Threads; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.infra.Blackhole; + +/** + * Phase-1 validation: store a span's known tags three ways and measure throughput + allocation + * ({@code -prof gc}). Models the real lifecycle — set N tags, then iterate once (serialize). + * + *

    + *
  1. {@code current}: today's {@link TagMap} (OptimizedTagMap, Entry[] knownEntries) — one + * {@code Entry} allocated per tag. + *
  2. {@code dense}: dense {@code long[] ids + Object[] values} — no per-tag Entry (boxes the one + * int tag). The phase-1 design. + *
  3. {@code pojo}: a hand-written class with typed fields — the phase-2 codegen endgame (no Entry, + * no boxing, no arrays-per-tag). + *
+ * + * Tag set is db.client-like (the dominant PetClinic span): 11 strings + 1 int. + */ +@BenchmarkMode(Mode.Throughput) +@OutputTimeUnit(TimeUnit.SECONDS) +@Fork(1) +@Warmup(iterations = 3) +@Measurement(iterations = 5) +@Threads(1) +@State(Scope.Benchmark) +public class AttrStoreBenchmark { + static final String[] NAMES = { + "component", "span.kind", "language", "_dd.base_service", + "db.type", "db.instance", "db.operation", "db.user", "db.pool.name", + "peer.hostname", "peer.ipv4", "peer.port", // last is the int + }; + static final int PORT_IDX = 11; + static final int N = NAMES.length; + + static final long[] IDS = new long[N]; + static final Object[] VALUES = new Object[N]; // string values; port is boxed Integer + + @Setup + public void setup() { + for (int i = 0; i < N; ++i) { + IDS[i] = KnownTags.tagId(i + 1, i, NAMES[i]); // serial=i+1, fieldPos=i + VALUES[i] = (i == PORT_IDX) ? Integer.valueOf(5432) : ("value-" + i); + } + final java.util.HashMap nameToId = new java.util.HashMap<>(N * 2); + for (int i = 0; i < N; ++i) { + nameToId.put(NAMES[i], IDS[i]); + } + KnownTags.register( + new KnownTags.Resolver() { + @Override + public String nameOf(long tagId) { + int gs = (int) ((tagId >>> 48) & 0x7FFF); + return (gs >= 1 && gs <= N) ? NAMES[gs - 1] : null; + } + + @Override + public long keyOf(String name) { + Long id = nameToId.get(name); + return id == null ? 0L : id; + } + + @Override + public int slotCount() { + return N; + } + }); + } + + @TearDown + public void tearDown() { + KnownTags.register(null); + } + + // ---------- current: OptimizedTagMap (Entry per tag) ---------- + @Benchmark + public TagMap build_current() { + TagMap map = TagMap.create(); + for (int i = 0; i < N; ++i) { + map.set(IDS[i], VALUES[i]); + } + return map; + } + + @Benchmark + public void buildIter_current(Blackhole bh) { + TagMap map = TagMap.create(); + for (int i = 0; i < N; ++i) { + map.set(IDS[i], VALUES[i]); + } + for (TagMap.EntryReader e : map) { + bh.consume(e.tag()); + bh.consume(e.objectValue()); + } + } + + // ---------- dense: long[] ids + Object[] values ---------- + @Benchmark + public DenseStore build_dense() { + DenseStore s = new DenseStore(); + for (int i = 0; i < N; ++i) { + s.set(IDS[i], VALUES[i]); + } + return s; + } + + @Benchmark + public void buildIter_dense(Blackhole bh) { + DenseStore s = new DenseStore(); + for (int i = 0; i < N; ++i) { + s.set(IDS[i], VALUES[i]); + } + for (int i = 0; i < s.size; ++i) { + bh.consume(KnownTags.nameOf(s.ids[i])); + bh.consume(s.values[i]); + } + } + + // ---------- pojo: typed fields ---------- + @Benchmark + public DbPojo build_pojo() { + DbPojo p = new DbPojo(); + for (int i = 0; i < N; ++i) { + if (i == PORT_IDX) { + p.set(IDS[i], 5432); + } else { + p.set(IDS[i], VALUES[i]); + } + } + return p; + } + + @Benchmark + public void buildIter_pojo(Blackhole bh) { + DbPojo p = new DbPojo(); + for (int i = 0; i < N; ++i) { + if (i == PORT_IDX) { + p.set(IDS[i], 5432); + } else { + p.set(IDS[i], VALUES[i]); + } + } + p.iterate(bh); + } + + /** Dense (id, value) store — phase-1 design. */ + static final class DenseStore { + long[] ids = new long[16]; + Object[] values = new Object[16]; + int size; + + void set(long id, Object v) { + for (int i = 0; i < size; ++i) { + if (ids[i] == id) { + values[i] = v; + return; + } + } + if (size == ids.length) { + ids = java.util.Arrays.copyOf(ids, size * 2); + values = java.util.Arrays.copyOf(values, size * 2); + } + ids[size] = id; + values[size] = v; + size++; + } + } + + /** Hand-written POJO — phase-2 codegen endgame. serial = fieldPos+1 here. */ + static final class DbPojo { + String component, spanKind, language, baseService, dbType, dbInstance, dbOperation, dbUser, + dbPoolName, peerHostname, peerIpv4; + int peerPort; + + void set(long id, Object v) { + switch ((int) ((id >>> 48) & 0x7FFF)) { + case 1: component = (String) v; break; + case 2: spanKind = (String) v; break; + case 3: language = (String) v; break; + case 4: baseService = (String) v; break; + case 5: dbType = (String) v; break; + case 6: dbInstance = (String) v; break; + case 7: dbOperation = (String) v; break; + case 8: dbUser = (String) v; break; + case 9: dbPoolName = (String) v; break; + case 10: peerHostname = (String) v; break; + case 11: peerIpv4 = (String) v; break; + default: /* off-type / unknown -> would bucket */ break; + } + } + + void set(long id, int v) { + if (((int) ((id >>> 48) & 0x7FFF)) == 12) { + peerPort = v; + } + } + + void iterate(Blackhole bh) { + bh.consume(component); + bh.consume(spanKind); + bh.consume(language); + bh.consume(baseService); + bh.consume(dbType); + bh.consume(dbInstance); + bh.consume(dbOperation); + bh.consume(dbUser); + bh.consume(dbPoolName); + bh.consume(peerHostname); + bh.consume(peerIpv4); + bh.consume(peerPort); + } + } +} From bf9d743e9a8fda29375998bf5f8bb85d603556f4 Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Wed, 10 Jun 2026 10:36:32 -0400 Subject: [PATCH 30/35] Phase 1: dense known-tag storage in OptimizedTagMap (no per-tag Entry) Replace OptimizedTagMap's positional Entry[] knownEntries + collidedSlots collision machinery with a dense store: long[] knownIds + Object[] knownValues + int knownCount. Known tags (globalSerial != 0) scan-by-id to overwrite or append (grow from 8); unknown tags stay in the hash buckets. Deletes all collision logic (first-writer-wins, collidedSlots, bucket-eviction); fieldPos/slotCount no longer size storage. - Setting a known tag allocates NO Entry. getEntry/getAndSet/getAndRemove materialize on demand (explicit calls); knownRemove is O(1) swap-remove. - Iteration/forEach/serialize yield a REUSED flyweight EntryReader (EntryReadingHelper) for dense entries -> zero per-tag Entry on the serialize path; chained with bucket Entry-s. The one retention site (entrySet -> new HashMap<>(tagMap)) materializes Map.Entry via .mapEntry() in a dedicated EntriesIterator; no other consumer retains the reader. - Tests: TagMap* incl fuzz (50k iters) + DDSpanSerializationTest + dd-trace-core span/interceptor/tagprocessor suites all green. TagMapTagIdTest assertSame-> value-equality (getEntry now materializes fresh per call). Validated by AttrStoreBenchmark: dense beats the old Entry[] on throughput AND allocation (~+22-42% / -67-69%). tag: ai generated tag: no release note Co-Authored-By: Claude Opus 4.8 --- .../datadog/trace/api/AttrStoreBenchmark.java | 79 +++- .../main/java/datadog/trace/api/TagMap.java | 389 ++++++++++-------- .../datadog/trace/api/TagMapTagIdTest.java | 17 +- 3 files changed, 284 insertions(+), 201 deletions(-) diff --git a/internal-api/src/jmh/java/datadog/trace/api/AttrStoreBenchmark.java b/internal-api/src/jmh/java/datadog/trace/api/AttrStoreBenchmark.java index 3314302c89e..bc7488eb4c6 100644 --- a/internal-api/src/jmh/java/datadog/trace/api/AttrStoreBenchmark.java +++ b/internal-api/src/jmh/java/datadog/trace/api/AttrStoreBenchmark.java @@ -24,8 +24,8 @@ * {@code Entry} allocated per tag. *
  • {@code dense}: dense {@code long[] ids + Object[] values} — no per-tag Entry (boxes the one * int tag). The phase-1 design. - *
  • {@code pojo}: a hand-written class with typed fields — the phase-2 codegen endgame (no Entry, - * no boxing, no arrays-per-tag). + *
  • {@code pojo}: a hand-written class with typed fields — the phase-2 codegen endgame (no + * Entry, no boxing, no arrays-per-tag). * * * Tag set is db.client-like (the dominant PetClinic span): 11 strings + 1 int. @@ -39,9 +39,18 @@ @State(Scope.Benchmark) public class AttrStoreBenchmark { static final String[] NAMES = { - "component", "span.kind", "language", "_dd.base_service", - "db.type", "db.instance", "db.operation", "db.user", "db.pool.name", - "peer.hostname", "peer.ipv4", "peer.port", // last is the int + "component", + "span.kind", + "language", + "_dd.base_service", + "db.type", + "db.instance", + "db.operation", + "db.user", + "db.pool.name", + "peer.hostname", + "peer.ipv4", + "peer.port", // last is the int }; static final int PORT_IDX = 11; static final int N = NAMES.length; @@ -181,24 +190,56 @@ void set(long id, Object v) { /** Hand-written POJO — phase-2 codegen endgame. serial = fieldPos+1 here. */ static final class DbPojo { - String component, spanKind, language, baseService, dbType, dbInstance, dbOperation, dbUser, - dbPoolName, peerHostname, peerIpv4; + String component, + spanKind, + language, + baseService, + dbType, + dbInstance, + dbOperation, + dbUser, + dbPoolName, + peerHostname, + peerIpv4; int peerPort; void set(long id, Object v) { switch ((int) ((id >>> 48) & 0x7FFF)) { - case 1: component = (String) v; break; - case 2: spanKind = (String) v; break; - case 3: language = (String) v; break; - case 4: baseService = (String) v; break; - case 5: dbType = (String) v; break; - case 6: dbInstance = (String) v; break; - case 7: dbOperation = (String) v; break; - case 8: dbUser = (String) v; break; - case 9: dbPoolName = (String) v; break; - case 10: peerHostname = (String) v; break; - case 11: peerIpv4 = (String) v; break; - default: /* off-type / unknown -> would bucket */ break; + case 1: + component = (String) v; + break; + case 2: + spanKind = (String) v; + break; + case 3: + language = (String) v; + break; + case 4: + baseService = (String) v; + break; + case 5: + dbType = (String) v; + break; + case 6: + dbInstance = (String) v; + break; + case 7: + dbOperation = (String) v; + break; + case 8: + dbUser = (String) v; + break; + case 9: + dbPoolName = (String) v; + break; + case 10: + peerHostname = (String) v; + break; + case 11: + peerIpv4 = (String) v; + break; + default: /* off-type / unknown -> would bucket */ + break; } } diff --git a/internal-api/src/main/java/datadog/trace/api/TagMap.java b/internal-api/src/main/java/datadog/trace/api/TagMap.java index 1d90ebfe54f..6d8705376dc 100644 --- a/internal-api/src/main/java/datadog/trace/api/TagMap.java +++ b/internal-api/src/main/java/datadog/trace/api/TagMap.java @@ -1414,23 +1414,26 @@ private static final class EmptyHolder { private int size; private boolean frozen; - // Positional store for known tags, indexed by fieldPos. Lazily allocated on the first known-tag - // write. A known tag claims its slot first-writer-wins; colliding tags (a different globalSerial - // already owns the slot) fall back to the hash buckets. Entries are self-describing (carry their - // tagId), so a bucketed tag still serializes correctly. - private TagMap.Entry[] knownEntries; - - // Bitmask of fieldPos slots that have ever had a collision (a known tag diverted to the buckets - // because a different tag owned the slot). Used to detect when claiming a freed slot might - // orphan a stale bucket copy of the same tag. Bit N covers slot N (capacity <= 32). - private int collidedSlots; + // Dense store for known tags (any tag with a non-zero globalSerial), kept in insertion order. + // Lazily allocated on the first known-tag write. Parallel arrays: knownIds[i] is the tagId of the + // i-th present known tag and knownValues[i] its value (Object; primitives boxed). On set we + // linear-scan [0, knownCount) by globalSerial; a match overwrites, otherwise we append (growing + // the arrays). There are no positional collisions: every known tag simply gets a dense slot. + // Unknown tags (globalSerial == 0) still live in the hash buckets. + private long[] knownIds; + private Object[] knownValues; + private int knownCount; + + private static final int KNOWN_INITIAL_CAPACITY = 8; public OptimizedTagMap() { // needs to be a power of 2 for bucket masking calculation to work as intended this.buckets = new Object[1 << 4]; this.size = 0; this.frozen = false; - this.knownEntries = null; + this.knownIds = null; + this.knownValues = null; + this.knownCount = 0; } /** Used for inexpensive immutable */ @@ -1438,7 +1441,9 @@ private OptimizedTagMap(Object[] buckets, int size) { this.buckets = buckets; this.size = size; this.frozen = true; - this.knownEntries = null; + this.knownIds = null; + this.knownValues = null; + this.knownCount = 0; } @Override @@ -1564,22 +1569,22 @@ public Set> entrySet() { @Override public Entry getEntry(String tag) { - // Known tags live in their slot; resolve identity and check there first. keyOf is a no-op + // Known tags live in the dense store; resolve identity and check there first. keyOf is a no-op // until a resolver is registered, so this is just a hash-bucket lookup in the common case. long tagId = KnownTags.keyOf(tag); if (tagId != 0L) { - Entry slot = this.knownGet(tagId); - if (slot != null) return slot; + Entry known = this.knownGet(tagId); + if (known != null) return known; } return this.getEntryFromBuckets(tag); } @Override public Entry getEntry(long tagId) { - Entry slot = this.knownGet(tagId); - if (slot != null) return slot; + Entry known = this.knownGet(tagId); + if (known != null) return known; - // not slotted (unknown tag id, or it collided into the buckets) - look up by resolved name + // not a known tag (unknown tag id) - look up by resolved name String name = KnownTags.nameOf(tagId); return name == null ? null : this.getEntryFromBuckets(name); } @@ -1652,10 +1657,10 @@ public void set(String tag, double value) { this.getAndSet(Entry.newDoubleEntry(tag, value)); } - // Tag-id keyed setters. Build a tag-id-bearing Entry (carrying globalSerial/fieldPos/nameHash) - // and store it in the hash buckets like any other entry; positional knownEntries routing comes - // in a later PR. The Entry resolves its tag name lazily via KnownTags, so it remains findable by - // string name and serializes correctly once a KnownTags.Resolver is registered. + // Tag-id keyed setters. Build a tag-id-bearing Entry (carrying globalSerial/fieldPos/nameHash); + // getAndSet routes known tags (non-zero globalSerial) into the dense store and everything else to + // the hash buckets. The Entry resolves its tag name lazily via KnownTags, so it remains findable + // by string name and serializes correctly once a KnownTags.Resolver is registered. @Override public void set(long tagId, Object value) { this.getAndSet(Entry.newAnyEntry(tagId, value)); @@ -1691,41 +1696,47 @@ public void set(long tagId, double value) { this.getAndSet(Entry.newDoubleEntry(tagId, value)); } - // Returns the slot entry for tagId if a known tag owns its fieldPos slot, else null. - private Entry knownGet(long tagId) { - Entry[] known = this.knownEntries; - if (known == null) return null; - + // Returns the dense index of the known tag matching tagId (by globalSerial), or -1 if absent. + private int knownIndexOf(long tagId) { int globalSerial = KnownTags.globalSerial(tagId); - if (globalSerial == 0) return null; + if (globalSerial == 0) return -1; - int pos = KnownTags.fieldPos(tagId); - if (pos >= known.length) return null; - - Entry occupant = known[pos]; - return (occupant != null && KnownTags.globalSerial(occupant.tagId) == globalSerial) - ? occupant - : null; + long[] ids = this.knownIds; + int count = this.knownCount; + for (int i = 0; i < count; ++i) { + if (KnownTags.globalSerial(ids[i]) == globalSerial) return i; + } + return -1; } - // Clears and returns the slot entry for tagId if a known tag owns its slot, else null. - private Entry knownRemove(long tagId) { - Entry[] known = this.knownEntries; - if (known == null) return null; + // Materializes a real Entry for the dense entry at index i (carrying the stored tagId so it + // resolves its name and serializes correctly). + private Entry knownEntryAt(int i) { + return Entry.newAnyEntry(this.knownIds[i], this.knownValues[i]); + } - int globalSerial = KnownTags.globalSerial(tagId); - if (globalSerial == 0) return null; + // Returns a materialized entry for tagId if a known tag with that globalSerial is present, else + // null. (Explicit getEntry path - materializing here is fine, this is not iteration.) + private Entry knownGet(long tagId) { + int i = this.knownIndexOf(tagId); + return i < 0 ? null : this.knownEntryAt(i); + } - int pos = KnownTags.fieldPos(tagId); - if (pos >= known.length) return null; + // Removes and returns (materialized) the known tag matching tagId, else null. Compacts the dense + // store by swapping the last element into the removed slot (order need not be stable on remove). + private Entry knownRemove(long tagId) { + int i = this.knownIndexOf(tagId); + if (i < 0) return null; - Entry occupant = known[pos]; - if (occupant != null && KnownTags.globalSerial(occupant.tagId) == globalSerial) { - known[pos] = null; - this.size -= 1; - return occupant; - } - return null; + Entry removed = this.knownEntryAt(i); + int last = this.knownCount - 1; + this.knownIds[i] = this.knownIds[last]; + this.knownValues[i] = this.knownValues[last]; + this.knownIds[last] = 0L; + this.knownValues[last] = null; + this.knownCount = last; + this.size -= 1; + return removed; } @Override @@ -1752,41 +1763,35 @@ public Entry getAndSet(Entry newEntry) { return this.setInBuckets(newEntry); } - // Routes a known tag to its positional slot (first-writer-wins, same-tag overwrite). On collision - // (a different tag owns the slot) or out-of-range fieldPos, falls back to the hash buckets. + // Stores a known tag in the dense store: linear-scan by globalSerial and overwrite on a match, + // otherwise append (growing the parallel arrays). Returns the prior entry (materialized) or null. private Entry setKnown(Entry newEntry, int globalSerial) { - int pos = KnownTags.fieldPos(newEntry.tagId); - // knownEntries is sized to the registered provider's slot count (max stored fieldPos + 1); a - // larger fieldPos (e.g. a reserved tag's sentinel) routes to the buckets. - int slotCount = KnownTags.slotCount(); - if (pos < slotCount) { - Entry[] known = this.knownEntries; - if (known == null) { - known = this.knownEntries = new Entry[slotCount]; - } - if (pos < known.length) { - Entry occupant = known[pos]; - if (occupant == null) { - // claim the empty slot - Entry prev = null; - if ((this.collidedSlots & (1 << pos)) != 0) { - // this slot previously collided, so a stale copy of this tag may be orphaned in the - // buckets (the slot was freed by a remove). Evict it to avoid a slot+bucket duplicate. - prev = this.removeFromBuckets(newEntry.tag(), newEntry.hash()); - } - known[pos] = newEntry; - this.size += 1; // if prev != null, removeFromBuckets already decremented -> net no change - return prev; - } else if (KnownTags.globalSerial(occupant.tagId) == globalSerial) { - // same tag - overwrite in place, no size change - known[pos] = newEntry; - return occupant; - } - // a different known tag owns this slot - record the collision and fall to the buckets - this.collidedSlots |= (1 << pos); - } - } - return this.setInBuckets(newEntry); + long[] ids = this.knownIds; + int count = this.knownCount; + for (int i = 0; i < count; ++i) { + if (KnownTags.globalSerial(ids[i]) == globalSerial) { + // same tag - overwrite in place, no size change + Entry prev = this.knownEntryAt(i); + this.knownIds[i] = newEntry.tagId; + this.knownValues[i] = newEntry.objectValue(); + return prev; + } + } + + // append - grow if necessary + if (ids == null) { + ids = this.knownIds = new long[KNOWN_INITIAL_CAPACITY]; + this.knownValues = new Object[KNOWN_INITIAL_CAPACITY]; + } else if (count == ids.length) { + int newCapacity = count << 1; + ids = this.knownIds = Arrays.copyOf(ids, newCapacity); + this.knownValues = Arrays.copyOf(this.knownValues, newCapacity); + } + ids[count] = newEntry.tagId; + this.knownValues[count] = newEntry.objectValue(); + this.knownCount = count + 1; + this.size += 1; + return null; } private Entry setInBuckets(Entry newEntry) { @@ -1911,13 +1916,11 @@ public void putAll(TagMap that) { private void putAllOptimizedMap(OptimizedTagMap that) { if (this.size == 0) { - // empty dest: clone source buckets + slots wholesale (no duplication possible) + // empty dest: clone source buckets + dense store wholesale (no duplication possible) this.putAllIntoEmptyMap(that); - } else if (this.knownEntries != null || that.knownEntries != null) { - // slots in play with a non-empty dest: the fast bucket-aligned merge could place a known tag - // into a bucket while dest already holds it in a slot (or vice versa). Route every source - // entry through getAndSet so slot/bucket placement stays consistent. getAndSet is - // order-independent for collisions. + } else if (this.knownCount != 0 || that.knownCount != 0) { + // known tags in play with a non-empty dest: route every source entry through getAndSet so the + // dense store stays deduplicated against this map's existing entries. this.putAllByEntry(that); } else { this.putAllMerge(that); @@ -2068,22 +2071,23 @@ private void putAllIntoEmptyMap(OptimizedTagMap that) { } } - // dest is empty, so the source's positional slots transfer directly (entries are shared, as - // with buckets above). size is copied wholesale below and already accounts for slot entries. - if (that.knownEntries != null) { - this.knownEntries = that.knownEntries.clone(); + // dest is empty, so the source's dense store transfers directly (values are shared, as with + // buckets above). size is copied wholesale below and already accounts for known entries. + if (that.knownCount != 0) { + this.knownIds = that.knownIds.clone(); + this.knownValues = that.knownValues.clone(); + this.knownCount = that.knownCount; } - this.collidedSlots = that.collidedSlots; this.size = that.size; } public void fillMap(Map map) { - Entry[] known = this.knownEntries; - if (known != null) { - for (Entry slotEntry : known) { - if (slotEntry != null) map.put(slotEntry.tag(), slotEntry.objectValue()); - } + long[] ids = this.knownIds; + Object[] values = this.knownValues; + int count = this.knownCount; + for (int i = 0; i < count; ++i) { + map.put(KnownTags.nameOf(ids[i]), values[i]); } Object[] thisBuckets = this.buckets; @@ -2104,11 +2108,11 @@ public void fillMap(Map map) { } public void fillStringMap(Map stringMap) { - Entry[] known = this.knownEntries; - if (known != null) { - for (Entry slotEntry : known) { - if (slotEntry != null) stringMap.put(slotEntry.tag(), slotEntry.stringValue()); - } + long[] ids = this.knownIds; + Object[] values = this.knownValues; + int count = this.knownCount; + for (int i = 0; i < count; ++i) { + stringMap.put(KnownTags.nameOf(ids[i]), TagValueConversions.toString(values[i])); } Object[] thisBuckets = this.buckets; @@ -2237,10 +2241,14 @@ public Stream stream() { @Override public void forEach(Consumer consumer) { - Entry[] known = this.knownEntries; - if (known != null) { - for (Entry slotEntry : known) { - if (slotEntry != null) consumer.accept(slotEntry); + long[] ids = this.knownIds; + Object[] values = this.knownValues; + int count = this.knownCount; + if (count != 0) { + EntryReadingHelper reader = new EntryReadingHelper(); + for (int i = 0; i < count; ++i) { + reader.set(KnownTags.nameOf(ids[i]), values[i]); + consumer.accept(reader); } } @@ -2263,10 +2271,14 @@ public void forEach(Consumer consumer) { @Override public void forEach(T thisObj, BiConsumer consumer) { - Entry[] known = this.knownEntries; - if (known != null) { - for (Entry slotEntry : known) { - if (slotEntry != null) consumer.accept(thisObj, slotEntry); + long[] ids = this.knownIds; + Object[] values = this.knownValues; + int count = this.knownCount; + if (count != 0) { + EntryReadingHelper reader = new EntryReadingHelper(); + for (int i = 0; i < count; ++i) { + reader.set(KnownTags.nameOf(ids[i]), values[i]); + consumer.accept(thisObj, reader); } } @@ -2290,10 +2302,14 @@ public void forEach(T thisObj, BiConsumer con @Override public void forEach( T thisObj, U otherObj, TriConsumer consumer) { - Entry[] known = this.knownEntries; - if (known != null) { - for (Entry slotEntry : known) { - if (slotEntry != null) consumer.accept(thisObj, otherObj, slotEntry); + long[] ids = this.knownIds; + Object[] values = this.knownValues; + int count = this.knownCount; + if (count != 0) { + EntryReadingHelper reader = new EntryReadingHelper(); + for (int i = 0; i < count; ++i) { + reader.set(KnownTags.nameOf(ids[i]), values[i]); + consumer.accept(thisObj, otherObj, reader); } } @@ -2318,11 +2334,12 @@ public void clear() { this.checkWriteAccess(); Arrays.fill(this.buckets, null); - if (this.knownEntries != null) { - Arrays.fill(this.knownEntries, null); + if (this.knownCount != 0) { + Arrays.fill(this.knownIds, 0, this.knownCount, 0L); + Arrays.fill(this.knownValues, 0, this.knownCount, null); + this.knownCount = 0; } this.size = 0; - this.collidedSlots = 0; } public OptimizedTagMap freeze() { @@ -2344,17 +2361,17 @@ void checkIntegrity() { // That was done to avoid the extra static initialization needed for an assertion // While that's probably an unnecessary optimization, this method is only called in tests - Entry[] known = this.knownEntries; - if (known != null) { - for (int i = 0; i < known.length; ++i) { - Entry slotEntry = known[i]; - if (slotEntry == null) continue; - - if (KnownTags.globalSerial(slotEntry.tagId) == 0) { - throw new IllegalStateException("slotted entry without globalSerial"); - } - if (KnownTags.fieldPos(slotEntry.tagId) != i) { - throw new IllegalStateException("incorrect slot"); + long[] ids = this.knownIds; + int knownCount = this.knownCount; + for (int i = 0; i < knownCount; ++i) { + long id = ids[i]; + if (KnownTags.globalSerial(id) == 0) { + throw new IllegalStateException("known entry without globalSerial"); + } + // no duplicate globalSerials in the dense store + for (int j = i + 1; j < knownCount; ++j) { + if (KnownTags.globalSerial(ids[j]) == KnownTags.globalSerial(id)) { + throw new IllegalStateException("duplicate known entry"); } } } @@ -2401,14 +2418,7 @@ void checkIntegrity() { } int computeSize() { - int size = 0; - - Entry[] known = this.knownEntries; - if (known != null) { - for (Entry slotEntry : known) { - if (slotEntry != null) size += 1; - } - } + int size = this.knownCount; Object[] thisBuckets = this.buckets; for (int i = 0; i < thisBuckets.length; ++i) { @@ -2425,12 +2435,7 @@ int computeSize() { } boolean checkIfEmpty() { - Entry[] known = this.knownEntries; - if (known != null) { - for (Entry slotEntry : known) { - if (slotEntry != null) return false; - } - } + if (this.knownCount != 0) return false; Object[] thisBuckets = this.buckets; @@ -2527,10 +2532,19 @@ String toInternalString() { } abstract static class IteratorBase { - private final Entry[] knownEntries; + private final long[] knownIds; + private final Object[] knownValues; + private final int knownCount; private final Object[] buckets; - private Entry nextEntry; + // Reused flyweight reader for dense (known) entries - no per-tag Entry allocation. Lazily + // created on the first dense entry. Consumers that need to RETAIN a yielded reader across + // iteration steps must capture a copy via reader.entry(). + private EntryReadingHelper knownReader; + + // The pending reader (either the reused flyweight for dense entries, or a bucket Entry which is + // itself an EntryReader). null means none pending. + private EntryReader nextReader; private int knownIndex = -1; private int bucketIndex = -1; @@ -2539,53 +2553,61 @@ abstract static class IteratorBase { private int groupIndex = 0; IteratorBase(OptimizedTagMap map) { - this.knownEntries = map.knownEntries; + this.knownIds = map.knownIds; + this.knownValues = map.knownValues; + this.knownCount = map.knownCount; this.buckets = map.buckets; } public final boolean hasNext() { - if (this.nextEntry != null) return true; - - while (this.bucketIndex < this.buckets.length) { - this.nextEntry = this.advance(); - if (this.nextEntry != null) return true; - } + if (this.nextReader != null) return true; - return false; + this.nextReader = this.advance(); + return this.nextReader != null; } - final Entry nextEntryOrThrowNoSuchElement() { - if (this.nextEntry != null) { - Entry nextEntry = this.nextEntry; - this.nextEntry = null; - return nextEntry; + final EntryReader nextEntryOrThrowNoSuchElement() { + if (this.nextReader != null) { + EntryReader nextReader = this.nextReader; + this.nextReader = null; + return nextReader; } if (this.hasNext()) { - return this.nextEntry; + EntryReader nextReader = this.nextReader; + this.nextReader = null; + return nextReader; } else { throw new NoSuchElementException(); } } - final Entry nextEntryOrNull() { - if (this.nextEntry != null) { - Entry nextEntry = this.nextEntry; - this.nextEntry = null; - return nextEntry; + final EntryReader nextEntryOrNull() { + if (this.nextReader != null) { + EntryReader nextReader = this.nextReader; + this.nextReader = null; + return nextReader; } - return this.hasNext() ? this.nextEntry : null; + if (this.hasNext()) { + EntryReader nextReader = this.nextReader; + this.nextReader = null; + return nextReader; + } + return null; } - private final Entry advance() { - // drain the positional known-entries slots first - Entry[] known = this.knownEntries; - if (known != null) { - for (++this.knownIndex; this.knownIndex < known.length; ++this.knownIndex) { - Entry slotEntry = known[this.knownIndex]; - if (slotEntry != null) return slotEntry; + private final EntryReader advance() { + // drain the dense known entries first, via the reused flyweight reader + if (this.knownIndex + 1 < this.knownCount) { + ++this.knownIndex; + EntryReadingHelper reader = this.knownReader; + if (reader == null) { + reader = this.knownReader = new EntryReadingHelper(); } + reader.set( + KnownTags.nameOf(this.knownIds[this.knownIndex]), this.knownValues[this.knownIndex]); + return reader; } while (this.bucketIndex < this.buckets.length) { @@ -3118,9 +3140,22 @@ public boolean isEmpty() { @Override public Iterator> iterator() { - @SuppressWarnings({"rawtypes", "unchecked"}) - Iterator> iter = (Iterator) this.map.iterator(); - return iter; + return new EntriesIterator(this.map); + } + } + + // Map.Entry view over the iterator. Dense entries are yielded as a reused flyweight EntryReader + // (not a Map.Entry), so materialize a real Map.Entry per element here via mapEntry(). Bucket + // Entry-s are themselves Map.Entry, so mapEntry() returns them directly without allocating. + static final class EntriesIterator extends IteratorBase + implements Iterator> { + EntriesIterator(OptimizedTagMap map) { + super(map); + } + + @Override + public Map.Entry next() { + return this.nextEntryOrThrowNoSuchElement().mapEntry(); } } diff --git a/internal-api/src/test/java/datadog/trace/api/TagMapTagIdTest.java b/internal-api/src/test/java/datadog/trace/api/TagMapTagIdTest.java index 94a01ae9470..ba716f3c935 100644 --- a/internal-api/src/test/java/datadog/trace/api/TagMapTagIdTest.java +++ b/internal-api/src/test/java/datadog/trace/api/TagMapTagIdTest.java @@ -4,7 +4,6 @@ import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertNull; -import static org.junit.jupiter.api.Assertions.assertSame; import static org.junit.jupiter.api.Assertions.assertTrue; import datadog.trace.api.TagMap.Entry; @@ -98,9 +97,13 @@ public void setById_findableByIdAndName() { assertNotNull(byId); assertEquals("GET", byId.stringValue()); - // findable by the resolved string name (read-path unification) + // findable by the resolved string name (read-path unification). getEntry materializes a fresh + // Entry from the dense store on each call, so identity is not preserved across calls; the + // logical entry (tag + value) is. Entry byName = map.getEntry(HTTP_METHOD); - assertSame(byId, byName); + assertNotNull(byName); + assertEquals(byId.tag(), byName.tag()); + assertEquals(byId.stringValue(), byName.stringValue()); assertEquals("GET", map.get(HTTP_METHOD)); } @@ -256,8 +259,12 @@ public void unslotted_setFindableByIdAndName() { assertEquals(KnownTags.NO_SLOT, KnownTags.fieldPos(byId.tagId)); assertEquals(MESSAGING_SYSTEM, byId.tag()); - // string read of the same tag unifies with the id-stored entry - assertSame(byId, map.getEntry(MESSAGING_SYSTEM)); + // string read of the same tag unifies with the id-stored entry (logically; getEntry + // materializes a fresh Entry per call so identity is not preserved) + Entry byName = map.getEntry(MESSAGING_SYSTEM); + assertNotNull(byName); + assertEquals(byId.tag(), byName.tag()); + assertEquals(byId.stringValue(), byName.stringValue()); assertEquals("kafka", map.get(MESSAGING_SYSTEM)); } From aac8c00ebbfee8ad60083b3fadf0399cf3650211 Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Wed, 10 Jun 2026 11:04:00 -0400 Subject: [PATCH 31/35] No-Entry set/get on the dense store (set is the app-thread hot path) Stop allocating a transient Entry when a value lands in the flat (dense) store: - putKnownValue(long,Object): dense write core (scan-by-globalSerial -> overwrite or append/grow), builds no Entry, stores the value reference. - typed set(long,...) route to putKnownValue for known ids (strings/objects by reference = zero alloc; primitives boxed once, no Entry); globalSerial==0 -> bucket. set(String,...) resolve keyOf first: known -> putKnownValue, else bucket Entry (name preserved; no-resolver -> keyOf 0 -> bucket, unchanged). set(EntryReader) stores the reader's value via putKnownValue for known tags. setKnown(Entry) refactored to materialize prior only for getAndSet's return. - typed getters (getString/getInt/getBoolean/...) read knownValues directly via TagValueConversions, no Entry materialized; miss -> bucket path unchanged. getEntry still materializes (contract). Behavior note: getBoolean of a known NON-numeric object now coerces via TagValueConversions.toBoolean (-> false) vs the old ANY-Entry (value != null -> true). No production caller reads such a tag as boolean; boolean tags are stored as Boolean (fast path, identical). Acceptable. All TagMap*/fuzz/serialization/dd-trace-core suites green (force re-run). Benchmark pending. tag: ai generated tag: no release note Co-Authored-By: Claude Opus 4.8 --- .../main/java/datadog/trace/api/TagMap.java | 204 ++++++++++++++---- 1 file changed, 168 insertions(+), 36 deletions(-) diff --git a/internal-api/src/main/java/datadog/trace/api/TagMap.java b/internal-api/src/main/java/datadog/trace/api/TagMap.java index 6d8705376dc..ddbcc79c57f 100644 --- a/internal-api/src/main/java/datadog/trace/api/TagMap.java +++ b/internal-api/src/main/java/datadog/trace/api/TagMap.java @@ -1469,15 +1469,30 @@ public Object get(Object tag) { return this.getObject((String) tag); } + // No-alloc dense lookup for the typed getters: for a known tag (keyOf resolves to a non-zero id), + // returns the raw stored value (primitives pre-boxed) so the caller can coerce it via + // TagValueConversions WITHOUT materializing an Entry. Returns null when the tag is not a present + // known tag (caller then falls back to the bucket lookup). Stored values are never null. + private Object knownRawValue(String tag) { + long tagId = KnownTags.keyOf(tag); + if (tagId == 0L) return null; + int i = this.knownIndexOf(tagId); + return i < 0 ? null : this.knownValues[i]; + } + /** Provides the corresponding entry value as an Object - boxing if necessary */ public Object getObject(String tag) { - Entry entry = this.getEntry(tag); + Object known = this.knownRawValue(tag); + if (known != null) return known; + Entry entry = this.getEntryFromBuckets(tag); return entry == null ? null : entry.objectValue(); } /** Provides the corresponding entry value as a String - calling toString if necessary */ public String getString(String tag) { - Entry entry = this.getEntry(tag); + Object known = this.knownRawValue(tag); + if (known != null) return TagValueConversions.toString(known); + Entry entry = this.getEntryFromBuckets(tag); return entry == null ? null : entry.stringValue(); } @@ -1486,7 +1501,9 @@ public boolean getBoolean(String tag) { } public boolean getBooleanOrDefault(String tag, boolean defaultValue) { - Entry entry = this.getEntry(tag); + Object known = this.knownRawValue(tag); + if (known != null) return TagValueConversions.toBoolean(known); + Entry entry = this.getEntryFromBuckets(tag); return entry == null ? defaultValue : entry.booleanValue(); } @@ -1495,7 +1512,9 @@ public int getInt(String tag) { } public int getIntOrDefault(String tag, int defaultValue) { - Entry entry = this.getEntry(tag); + Object known = this.knownRawValue(tag); + if (known != null) return TagValueConversions.toInt(known); + Entry entry = this.getEntryFromBuckets(tag); return entry == null ? defaultValue : entry.intValue(); } @@ -1504,7 +1523,9 @@ public long getLong(String tag) { } public long getLongOrDefault(String tag, long defaultValue) { - Entry entry = this.getEntry(tag); + Object known = this.knownRawValue(tag); + if (known != null) return TagValueConversions.toLong(known); + Entry entry = this.getEntryFromBuckets(tag); return entry == null ? defaultValue : entry.longValue(); } @@ -1513,7 +1534,9 @@ public float getFloat(String tag) { } public float getFloatOrDefault(String tag, float defaultValue) { - Entry entry = this.getEntry(tag); + Object known = this.knownRawValue(tag); + if (known != null) return TagValueConversions.toFloat(known); + Entry entry = this.getEntryFromBuckets(tag); return entry == null ? defaultValue : entry.floatValue(); } @@ -1522,7 +1545,9 @@ public double getDouble(String tag) { } public double getDoubleOrDefault(String tag, double defaultValue) { - Entry entry = this.getEntry(tag); + Object known = this.knownRawValue(tag); + if (known != null) return TagValueConversions.toDouble(known); + Entry entry = this.getEntryFromBuckets(tag); return entry == null ? defaultValue : entry.doubleValue(); } @@ -1619,81 +1644,177 @@ public Object put(String tag, Object value) { @Override public void set(TagMap.EntryReader newEntryReader) { - this.getAndSet(newEntryReader.entry()); + this.checkWriteAccess(); + // Cached-entry path (e.g. decorator componentEntry). entry() returns the reader's own Entry (no + // NEW allocation for a real Entry), carrying any id-encoded globalSerial. For a known tag we + // store its value in the dense store directly; otherwise the reader's entry goes to the bucket + // path. keyOf is a no-op until a resolver is registered (string-only entries keep their name). + Entry entry = newEntryReader.entry(); + long tagId = entry.tagId; + if (KnownTags.globalSerial(tagId) == 0 && KnownTags.isActive()) { + long resolved = KnownTags.keyOf(entry.tag()); + if (resolved != 0L) tagId = resolved; + } + if (KnownTags.globalSerial(tagId) != 0) { + this.putKnownValue(tagId, entry.objectValue()); + } else { + this.setInBuckets(entry); + } } + // String-keyed setters. Resolve the tag identity once: a registered KnownTags resolver maps known + // tag names to a non-zero id (carrying globalSerial), letting us store the value in the dense + // store with NO Entry allocation. Until a resolver is registered keyOf returns 0 and we fall back + // to the bucket path, preserving the name-keyed Entry behavior. @Override public void set(String tag, Object value) { - this.getAndSet(Entry.newAnyEntry(tag, value)); + this.checkWriteAccess(); + long id = KnownTags.keyOf(tag); + if (id != 0L) { + this.putKnownValue(id, value); + } else { + this.setInBuckets(Entry.newAnyEntry(tag, value)); + } } @Override public void set(String tag, CharSequence value) { - this.getAndSet(Entry.newObjectEntry(tag, value)); + this.checkWriteAccess(); + long id = KnownTags.keyOf(tag); + if (id != 0L) { + this.putKnownValue(id, value); + } else { + this.setInBuckets(Entry.newObjectEntry(tag, value)); + } } @Override public void set(String tag, boolean value) { - this.getAndSet(Entry.newBooleanEntry(tag, value)); + this.checkWriteAccess(); + long id = KnownTags.keyOf(tag); + if (id != 0L) { + this.putKnownValue(id, Boolean.valueOf(value)); + } else { + this.setInBuckets(Entry.newBooleanEntry(tag, value)); + } } @Override public void set(String tag, int value) { - this.getAndSet(Entry.newIntEntry(tag, value)); + this.checkWriteAccess(); + long id = KnownTags.keyOf(tag); + if (id != 0L) { + this.putKnownValue(id, Integer.valueOf(value)); + } else { + this.setInBuckets(Entry.newIntEntry(tag, value)); + } } @Override public void set(String tag, long value) { - this.getAndSet(Entry.newLongEntry(tag, value)); + this.checkWriteAccess(); + long id = KnownTags.keyOf(tag); + if (id != 0L) { + this.putKnownValue(id, Long.valueOf(value)); + } else { + this.setInBuckets(Entry.newLongEntry(tag, value)); + } } @Override public void set(String tag, float value) { - this.getAndSet(Entry.newFloatEntry(tag, value)); + this.checkWriteAccess(); + long id = KnownTags.keyOf(tag); + if (id != 0L) { + this.putKnownValue(id, Float.valueOf(value)); + } else { + this.setInBuckets(Entry.newFloatEntry(tag, value)); + } } @Override public void set(String tag, double value) { - this.getAndSet(Entry.newDoubleEntry(tag, value)); + this.checkWriteAccess(); + long id = KnownTags.keyOf(tag); + if (id != 0L) { + this.putKnownValue(id, Double.valueOf(value)); + } else { + this.setInBuckets(Entry.newDoubleEntry(tag, value)); + } } - // Tag-id keyed setters. Build a tag-id-bearing Entry (carrying globalSerial/fieldPos/nameHash); - // getAndSet routes known tags (non-zero globalSerial) into the dense store and everything else to - // the hash buckets. The Entry resolves its tag name lazily via KnownTags, so it remains findable - // by string name and serializes correctly once a KnownTags.Resolver is registered. + // Tag-id keyed setters. The id already carries the globalSerial, so a known tag (non-zero + // globalSerial) goes straight into the dense store via putKnownValue with NO Entry allocation + // (strings/objects by reference; primitives boxed once). An id without a globalSerial is not a + // known tag — fall back to the bucket path, which builds an Entry that resolves its name lazily. @Override public void set(long tagId, Object value) { - this.getAndSet(Entry.newAnyEntry(tagId, value)); + this.checkWriteAccess(); + if (KnownTags.globalSerial(tagId) != 0) { + this.putKnownValue(tagId, value); + } else { + this.setInBuckets(Entry.newAnyEntry(tagId, value)); + } } @Override public void set(long tagId, CharSequence value) { - this.getAndSet(Entry.newObjectEntry(tagId, value)); + this.checkWriteAccess(); + if (KnownTags.globalSerial(tagId) != 0) { + this.putKnownValue(tagId, value); + } else { + this.setInBuckets(Entry.newObjectEntry(tagId, value)); + } } @Override public void set(long tagId, boolean value) { - this.getAndSet(Entry.newBooleanEntry(tagId, value)); + this.checkWriteAccess(); + if (KnownTags.globalSerial(tagId) != 0) { + this.putKnownValue(tagId, Boolean.valueOf(value)); + } else { + this.setInBuckets(Entry.newBooleanEntry(tagId, value)); + } } @Override public void set(long tagId, int value) { - this.getAndSet(Entry.newIntEntry(tagId, value)); + this.checkWriteAccess(); + if (KnownTags.globalSerial(tagId) != 0) { + this.putKnownValue(tagId, Integer.valueOf(value)); + } else { + this.setInBuckets(Entry.newIntEntry(tagId, value)); + } } @Override public void set(long tagId, long value) { - this.getAndSet(Entry.newLongEntry(tagId, value)); + this.checkWriteAccess(); + if (KnownTags.globalSerial(tagId) != 0) { + this.putKnownValue(tagId, Long.valueOf(value)); + } else { + this.setInBuckets(Entry.newLongEntry(tagId, value)); + } } @Override public void set(long tagId, float value) { - this.getAndSet(Entry.newFloatEntry(tagId, value)); + this.checkWriteAccess(); + if (KnownTags.globalSerial(tagId) != 0) { + this.putKnownValue(tagId, Float.valueOf(value)); + } else { + this.setInBuckets(Entry.newFloatEntry(tagId, value)); + } } @Override public void set(long tagId, double value) { - this.getAndSet(Entry.newDoubleEntry(tagId, value)); + this.checkWriteAccess(); + if (KnownTags.globalSerial(tagId) != 0) { + this.putKnownValue(tagId, Double.valueOf(value)); + } else { + this.setInBuckets(Entry.newDoubleEntry(tagId, value)); + } } // Returns the dense index of the known tag matching tagId (by globalSerial), or -1 if absent. @@ -1763,18 +1884,20 @@ public Entry getAndSet(Entry newEntry) { return this.setInBuckets(newEntry); } - // Stores a known tag in the dense store: linear-scan by globalSerial and overwrite on a match, - // otherwise append (growing the parallel arrays). Returns the prior entry (materialized) or null. - private Entry setKnown(Entry newEntry, int globalSerial) { + // Dense-write core: store a known tag's value WITHOUT constructing an Entry. tagId must carry a + // non-zero globalSerial. Linear-scan [0, knownCount) by globalSerial; a match overwrites the + // stored id+value in place (no size change), otherwise we append (growing the parallel arrays). + // Primitives are expected pre-boxed by the caller; strings/objects are stored by reference. + private void putKnownValue(long tagId, Object value) { + int globalSerial = KnownTags.globalSerial(tagId); long[] ids = this.knownIds; int count = this.knownCount; for (int i = 0; i < count; ++i) { if (KnownTags.globalSerial(ids[i]) == globalSerial) { // same tag - overwrite in place, no size change - Entry prev = this.knownEntryAt(i); - this.knownIds[i] = newEntry.tagId; - this.knownValues[i] = newEntry.objectValue(); - return prev; + this.knownIds[i] = tagId; + this.knownValues[i] = value; + return; } } @@ -1787,11 +1910,20 @@ private Entry setKnown(Entry newEntry, int globalSerial) { ids = this.knownIds = Arrays.copyOf(ids, newCapacity); this.knownValues = Arrays.copyOf(this.knownValues, newCapacity); } - ids[count] = newEntry.tagId; - this.knownValues[count] = newEntry.objectValue(); + ids[count] = tagId; + this.knownValues[count] = value; this.knownCount = count + 1; this.size += 1; - return null; + } + + // Stores a known tag in the dense store and returns the prior entry (materialized) or null. Used + // by the prior-returning getAndSet path; the void set(...) paths call putKnownValue directly to + // avoid even this Entry-materialization of the prior. + private Entry setKnown(Entry newEntry, int globalSerial) { + int i = this.knownIndexOf(newEntry.tagId); + Entry prev = (i < 0) ? null : this.knownEntryAt(i); + this.putKnownValue(newEntry.tagId, newEntry.objectValue()); + return prev; } private Entry setInBuckets(Entry newEntry) { From bf8783eb80a4e2867cf5f5eb6b53c870f78e7a65 Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Wed, 10 Jun 2026 12:56:59 -0400 Subject: [PATCH 32/35] Lazily allocate OptimizedTagMap buckets (all-known spans allocate none) The bucket array (Object[1<<4], for unknown/globalSerial==0 tags) was eagerly allocated + zeroed in every map's constructor, wasted on the common all-known span that never buckets anything. Now `buckets` starts null and is allocated only on the first unknown-tag insertion (setInBuckets / putAll); every read/scan/size/merge/ iterate path treats null as empty. EMPTY and clear() carry null buckets. A span whose tags are all known (the dense path) now allocates zero bucket array. TagMap* incl fuzz (empty/all-known/all-unknown/mixed/putAll/iterate) + serialization + dd-trace-core suites green (force re-run). tag: ai generated tag: no release note Co-Authored-By: Claude Opus 4.8 --- .../main/java/datadog/trace/api/TagMap.java | 81 +++++++++++++------ 1 file changed, 58 insertions(+), 23 deletions(-) diff --git a/internal-api/src/main/java/datadog/trace/api/TagMap.java b/internal-api/src/main/java/datadog/trace/api/TagMap.java index ddbcc79c57f..6e155181395 100644 --- a/internal-api/src/main/java/datadog/trace/api/TagMap.java +++ b/internal-api/src/main/java/datadog/trace/api/TagMap.java @@ -1407,10 +1407,13 @@ static OptimizedTagMap empty() { } private static final class EmptyHolder { - static final OptimizedTagMap EMPTY = new OptimizedTagMap(new Object[1], 0); + static final OptimizedTagMap EMPTY = new OptimizedTagMap(null, 0); } - private final Object[] buckets; + // Hash buckets for unknown tags (globalSerial == 0). Lazily allocated on the first unknown-tag + // insertion; an all-known map never allocates it. A null buckets array means "no bucketed + // entries" and must be treated as empty everywhere it is read/scanned. + private Object[] buckets; private int size; private boolean frozen; @@ -1427,8 +1430,8 @@ private static final class EmptyHolder { private static final int KNOWN_INITIAL_CAPACITY = 8; public OptimizedTagMap() { - // needs to be a power of 2 for bucket masking calculation to work as intended - this.buckets = new Object[1 << 4]; + // buckets stay null until the first unknown-tag insertion (see setInBuckets) + this.buckets = null; this.size = 0; this.frozen = false; this.knownIds = null; @@ -1616,6 +1619,7 @@ public Entry getEntry(long tagId) { private Entry getEntryFromBuckets(String tag) { Object[] thisBuckets = this.buckets; + if (thisBuckets == null) return null; int hash = TagMap.Entry._hash(tag); int bucketIndex = hash & (thisBuckets.length - 1); @@ -1928,6 +1932,11 @@ private Entry setKnown(Entry newEntry, int globalSerial) { private Entry setInBuckets(Entry newEntry) { Object[] thisBuckets = this.buckets; + if (thisBuckets == null) { + // first unknown-tag insertion - lazily allocate the bucket array + // needs to be a power of 2 for bucket masking calculation to work as intended + thisBuckets = this.buckets = new Object[1 << 4]; + } int newHash = newEntry.hash(); int bucketIndex = newHash & (thisBuckets.length - 1); @@ -2065,8 +2074,16 @@ private void putAllByEntry(OptimizedTagMap that) { } private void putAllMerge(OptimizedTagMap that) { - Object[] thisBuckets = this.buckets; Object[] thatBuckets = that.buckets; + // nothing bucketed in the source - nothing to merge + if (thatBuckets == null) return; + + Object[] thisBuckets = this.buckets; + if (thisBuckets == null) { + // dest has no bucket array yet (its size came from the dense store, but putAllMerge is only + // reached when both maps have knownCount == 0); allocate to receive the source's buckets + thisBuckets = this.buckets = new Object[1 << 4]; + } // Since TagMap-s don't support expansion, buckets are perfectly aligned // Check against both thisBuckets.length && thatBuckets.length is to help the JIT do bound check @@ -2182,24 +2199,32 @@ private void putAllMerge(OptimizedTagMap that) { * Specially optimized version of putAll for the common case of destination map being empty */ private void putAllIntoEmptyMap(OptimizedTagMap that) { - Object[] thisBuckets = this.buckets; Object[] thatBuckets = that.buckets; - // Check against both thisBuckets.length && thatBuckets.length is to help the JIT do bound check - // elimination - for (int i = 0; i < thisBuckets.length && i < thatBuckets.length; ++i) { - Object thatBucket = thatBuckets[i]; + // source has bucketed entries - lazily allocate dest buckets and clone them in. A source with + // null buckets leaves dest buckets null (still empty until something buckets). + if (thatBuckets != null) { + Object[] thisBuckets = this.buckets; + if (thisBuckets == null) { + thisBuckets = this.buckets = new Object[1 << 4]; + } - // faster to explicitly null check first, then do instanceof - if (thatBucket == null) { - // do nothing - } else if (thatBucket instanceof BucketGroup) { - // if it is a BucketGroup, then need to clone - BucketGroup thatGroup = (BucketGroup) thatBucket; + // Check against both thisBuckets.length && thatBuckets.length is to help the JIT do bound + // check elimination + for (int i = 0; i < thisBuckets.length && i < thatBuckets.length; ++i) { + Object thatBucket = thatBuckets[i]; - thisBuckets[i] = thatGroup.cloneChain(); - } else { // if ( thatBucket instanceof Entry ) - thisBuckets[i] = thatBucket; + // faster to explicitly null check first, then do instanceof + if (thatBucket == null) { + // do nothing + } else if (thatBucket instanceof BucketGroup) { + // if it is a BucketGroup, then need to clone + BucketGroup thatGroup = (BucketGroup) thatBucket; + + thisBuckets[i] = thatGroup.cloneChain(); + } else { // if ( thatBucket instanceof Entry ) + thisBuckets[i] = thatBucket; + } } } @@ -2223,6 +2248,7 @@ public void fillMap(Map map) { } Object[] thisBuckets = this.buckets; + if (thisBuckets == null) return; for (int i = 0; i < thisBuckets.length; ++i) { Object thisBucket = thisBuckets[i]; @@ -2248,6 +2274,7 @@ public void fillStringMap(Map stringMap) { } Object[] thisBuckets = this.buckets; + if (thisBuckets == null) return; for (int i = 0; i < thisBuckets.length; ++i) { Object thisBucket = thisBuckets[i]; @@ -2312,6 +2339,7 @@ public Entry getAndRemove(String tag) { // or null. private Entry removeFromBuckets(String tag, int hash) { Object[] thisBuckets = this.buckets; + if (thisBuckets == null) return null; int bucketIndex = hash & (thisBuckets.length - 1); @@ -2385,6 +2413,7 @@ public void forEach(Consumer consumer) { } Object[] thisBuckets = this.buckets; + if (thisBuckets == null) return; for (int i = 0; i < thisBuckets.length; ++i) { Object thisBucket = thisBuckets[i]; @@ -2415,6 +2444,7 @@ public void forEach(T thisObj, BiConsumer con } Object[] thisBuckets = this.buckets; + if (thisBuckets == null) return; for (int i = 0; i < thisBuckets.length; ++i) { Object thisBucket = thisBuckets[i]; @@ -2446,6 +2476,7 @@ public void forEach( } Object[] thisBuckets = this.buckets; + if (thisBuckets == null) return; for (int i = 0; i < thisBuckets.length; ++i) { Object thisBucket = thisBuckets[i]; @@ -2465,7 +2496,8 @@ public void forEach( public void clear() { this.checkWriteAccess(); - Arrays.fill(this.buckets, null); + // drop the bucket array entirely - it will be lazily re-allocated on the next unknown-tag write + this.buckets = null; if (this.knownCount != 0) { Arrays.fill(this.knownIds, 0, this.knownCount, 0L); Arrays.fill(this.knownValues, 0, this.knownCount, null); @@ -2510,7 +2542,7 @@ void checkIntegrity() { Object[] thisBuckets = this.buckets; - for (int i = 0; i < thisBuckets.length; ++i) { + for (int i = 0; thisBuckets != null && i < thisBuckets.length; ++i) { Object thisBucket = thisBuckets[i]; if (thisBucket instanceof Entry) { @@ -2553,7 +2585,7 @@ int computeSize() { int size = this.knownCount; Object[] thisBuckets = this.buckets; - for (int i = 0; i < thisBuckets.length; ++i) { + for (int i = 0; thisBuckets != null && i < thisBuckets.length; ++i) { Object curBucket = thisBuckets[i]; if (curBucket instanceof Entry) { @@ -2570,6 +2602,7 @@ boolean checkIfEmpty() { if (this.knownCount != 0) return false; Object[] thisBuckets = this.buckets; + if (thisBuckets == null) return true; for (int i = 0; i < thisBuckets.length; ++i) { Object curBucket = thisBuckets[i]; @@ -2643,7 +2676,7 @@ String toInternalString() { Object[] thisBuckets = this.buckets; StringBuilder ledger = new StringBuilder(128); - for (int i = 0; i < thisBuckets.length; ++i) { + for (int i = 0; thisBuckets != null && i < thisBuckets.length; ++i) { ledger.append('[').append(i).append("] = "); Object thisBucket = thisBuckets[i]; @@ -2742,6 +2775,8 @@ private final EntryReader advance() { return reader; } + if (this.buckets == null) return null; + while (this.bucketIndex < this.buckets.length) { if (this.group != null) { for (++this.groupIndex; this.groupIndex < BucketGroup.LEN; ++this.groupIndex) { From 0838d1e29f6fa66b3aa146e329aff65f025c1e3c Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Wed, 17 Jun 2026 00:40:17 -0400 Subject: [PATCH 33/35] Dense positional known-tag storage + id-keyed reads + integration migration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - OptimizedTagMap: positional dense store (knownValues[fieldPos], no linear scan, no per-tag Entry); id-keyed value reads getObject(long)/getString(long); cache resolved tagId on set(EntryReader) so shared cached entries skip keyOf. - TagSet: generic open-addressed string set; keyOf resolved through it. - Migrate hot decorators to setTag(long): span.kind/language/component, db.*, http.route — to skip the name->id keyOf on those set-sites. - Benchmarks: TagSet (SetBenchmark/KeyOfBenchmark), AttrStore (+ -prof gc), TagMapInsertion (id vs name insert/read). Co-Authored-By: Claude Opus 4.8 --- .../decorator/BaseDecorator.java | 4 +- .../decorator/ClientDecorator.java | 3 +- .../decorator/DatabaseClientDecorator.java | 11 +- .../decorator/ServerDecorator.java | 6 +- .../decorator/http/HttpResourceDecorator.java | 4 +- .../instrumentation/jdbc/JDBCDecorator.java | 3 +- .../datadog/trace/api/AttrStoreBenchmark.java | 36 +- .../api/TagMapInsertionBaselineBenchmark.java | 2 +- .../trace/api/TagMapInsertionBenchmark.java | 111 +++-- .../datadog/trace/util/KeyOfBenchmark.java | 255 +++++++++++ .../java/datadog/trace/util/SetBenchmark.java | 170 ++++++-- .../java/datadog/trace/api/KnownTagIds.java | 139 +++--- .../main/java/datadog/trace/api/TagMap.java | 409 ++++++++++-------- .../main/java/datadog/trace/util/TagSet.java | 142 ++++++ .../java/datadog/trace/util/TagSetTest.java | 102 +++++ tag-conventions.yaml | 117 +++-- 16 files changed, 1106 insertions(+), 408 deletions(-) create mode 100644 internal-api/src/jmh/java/datadog/trace/util/KeyOfBenchmark.java create mode 100644 internal-api/src/main/java/datadog/trace/util/TagSet.java create mode 100644 internal-api/src/test/java/datadog/trace/util/TagSetTest.java diff --git a/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/instrumentation/decorator/BaseDecorator.java b/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/instrumentation/decorator/BaseDecorator.java index bd930a03e67..bcd352786af 100644 --- a/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/instrumentation/decorator/BaseDecorator.java +++ b/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/instrumentation/decorator/BaseDecorator.java @@ -13,7 +13,6 @@ import datadog.trace.bootstrap.instrumentation.api.AgentScope; import datadog.trace.bootstrap.instrumentation.api.AgentSpan; import datadog.trace.bootstrap.instrumentation.api.ErrorPriorities; -import datadog.trace.bootstrap.instrumentation.api.Tags; import java.lang.reflect.Method; import java.net.Inet4Address; import java.net.Inet6Address; @@ -82,7 +81,8 @@ protected final TagMap.Entry componentEntry() { // This approach while more complicated doesn't have any field initialization ordering issues. TagMap.Entry componentEntry = cachedComponentEntry; if (componentEntry == null) { - cachedComponentEntry = componentEntry = TagMap.Entry.create(Tags.COMPONENT, component()); + cachedComponentEntry = + componentEntry = TagMap.Entry.create(KnownTagIds.COMPONENT, component()); } return componentEntry; } diff --git a/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/instrumentation/decorator/ClientDecorator.java b/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/instrumentation/decorator/ClientDecorator.java index 99dec2dbc08..48687afe130 100644 --- a/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/instrumentation/decorator/ClientDecorator.java +++ b/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/instrumentation/decorator/ClientDecorator.java @@ -1,5 +1,6 @@ package datadog.trace.bootstrap.instrumentation.decorator; +import datadog.trace.api.KnownTagIds; import datadog.trace.api.TagMap; import datadog.trace.bootstrap.instrumentation.api.AgentSpan; import datadog.trace.bootstrap.instrumentation.api.Tags; @@ -22,7 +23,7 @@ private final TagMap.Entry spanKindEntry() { // decided to be cautious here, too. TagMap.Entry kindEntry = cachedSpanKindEntry; if (kindEntry == null) { - cachedSpanKindEntry = kindEntry = TagMap.Entry.create(Tags.SPAN_KIND, spanKind()); + cachedSpanKindEntry = kindEntry = TagMap.Entry.create(KnownTagIds.SPAN_KIND, spanKind()); } return kindEntry; } diff --git a/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/instrumentation/decorator/DatabaseClientDecorator.java b/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/instrumentation/decorator/DatabaseClientDecorator.java index 7336a059bdc..b1ddc014314 100644 --- a/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/instrumentation/decorator/DatabaseClientDecorator.java +++ b/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/instrumentation/decorator/DatabaseClientDecorator.java @@ -2,10 +2,10 @@ import static datadog.trace.api.gateway.Events.EVENTS; import static datadog.trace.bootstrap.instrumentation.api.ServiceNameSources.DB_CLIENT_SPLIT_BY_HOST; -import static datadog.trace.bootstrap.instrumentation.api.Tags.DB_TYPE; import datadog.appsec.api.blocking.BlockingException; import datadog.trace.api.Config; +import datadog.trace.api.KnownTagIds; import datadog.trace.api.cache.DDCache; import datadog.trace.api.cache.DDCaches; import datadog.trace.api.gateway.BlockResponseFunction; @@ -16,7 +16,6 @@ import datadog.trace.api.naming.SpanNaming; import datadog.trace.bootstrap.instrumentation.api.AgentSpan; import datadog.trace.bootstrap.instrumentation.api.AgentTracer; -import datadog.trace.bootstrap.instrumentation.api.Tags; import datadog.trace.bootstrap.instrumentation.api.UTF8BytesString; import java.util.function.BiConsumer; import java.util.function.BiFunction; @@ -70,11 +69,11 @@ public String getDbType() { */ public AgentSpan onConnection(final AgentSpan span, final CONNECTION connection) { if (connection != null) { - span.setTag(Tags.DB_USER, dbUser(connection)); + span.setTag(KnownTagIds.DB_USER, dbUser(connection)); onInstance(span, dbInstance(connection)); CharSequence hostName = dbHostname(connection); if (hostName != null) { - span.setTag(Tags.PEER_HOSTNAME, hostName); + span.setTag(KnownTagIds.PEER_HOSTNAME, hostName); if (Config.get().isDbClientSplitByHost()) { span.setServiceName(hostName.toString(), DB_CLIENT_SPLIT_BY_HOST); @@ -86,7 +85,7 @@ public AgentSpan onConnection(final AgentSpan span, final CONNECTION connection) protected AgentSpan onInstance(final AgentSpan span, final String dbInstance) { if (dbInstance != null) { - span.setTag(Tags.DB_INSTANCE, dbInstance); + span.setTag(KnownTagIds.DB_INSTANCE, dbInstance); String serviceName = dbClientService(dbInstance); if (null != serviceName) { span.setServiceName(serviceName, component()); @@ -149,7 +148,7 @@ public void onRawStatement(AgentSpan span, String sql) { protected void processDatabaseType(AgentSpan span, String dbType) { final NamingEntry namingEntry = CACHE.computeIfAbsent(dbType, NamingEntry::new); - span.setTag(DB_TYPE, namingEntry.dbType); + span.setTag(KnownTagIds.DB_TYPE, namingEntry.dbType); postProcessServiceAndOperationName(span, namingEntry); if (Config.get().isAppSecRaspEnabled() && dbType != null) { diff --git a/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/instrumentation/decorator/ServerDecorator.java b/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/instrumentation/decorator/ServerDecorator.java index 20b11038ffd..d7a79f0cbd6 100644 --- a/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/instrumentation/decorator/ServerDecorator.java +++ b/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/instrumentation/decorator/ServerDecorator.java @@ -1,15 +1,17 @@ package datadog.trace.bootstrap.instrumentation.decorator; import datadog.trace.api.DDTags; +import datadog.trace.api.KnownTagIds; import datadog.trace.api.TagMap; import datadog.trace.bootstrap.instrumentation.api.AgentSpan; import datadog.trace.bootstrap.instrumentation.api.Tags; public abstract class ServerDecorator extends BaseDecorator { + // id-keyed cached entries (set on every server span) so set() skips keyOf - see KnownTagIds private static final TagMap.Entry SPAN_KIND_ENTRY = - TagMap.Entry.create(Tags.SPAN_KIND, Tags.SPAN_KIND_SERVER); + TagMap.Entry.create(KnownTagIds.SPAN_KIND, Tags.SPAN_KIND_SERVER); private static final TagMap.Entry LANG_ENTRY = - TagMap.Entry.create(DDTags.LANGUAGE_TAG_KEY, DDTags.LANGUAGE_TAG_VALUE); + TagMap.Entry.create(KnownTagIds.LANGUAGE, DDTags.LANGUAGE_TAG_VALUE); @Override public AgentSpan afterStart(final AgentSpan span) { diff --git a/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/instrumentation/decorator/http/HttpResourceDecorator.java b/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/instrumentation/decorator/http/HttpResourceDecorator.java index 1458bd04eb1..c88d37d0aa7 100644 --- a/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/instrumentation/decorator/http/HttpResourceDecorator.java +++ b/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/instrumentation/decorator/http/HttpResourceDecorator.java @@ -1,10 +1,10 @@ package datadog.trace.bootstrap.instrumentation.decorator.http; import datadog.trace.api.Config; +import datadog.trace.api.KnownTagIds; import datadog.trace.api.normalize.HttpResourceNames; import datadog.trace.bootstrap.instrumentation.api.AgentSpan; import datadog.trace.bootstrap.instrumentation.api.ResourceNamePriorities; -import datadog.trace.bootstrap.instrumentation.api.Tags; import datadog.trace.bootstrap.instrumentation.api.URIUtils; import datadog.trace.bootstrap.instrumentation.api.UTF8BytesString; @@ -42,7 +42,7 @@ public final AgentSpan withRoute( if (encoded) { routeTag = URIUtils.decode(route.toString()); } - span.setTag(Tags.HTTP_ROUTE, routeTag); + span.setTag(KnownTagIds.HTTP_ROUTE, routeTag); if (Config.get().isHttpServerRouteBasedNaming()) { final CharSequence resourceName = HttpResourceNames.join(method, route); span.setResourceName(resourceName, ResourceNamePriorities.HTTP_FRAMEWORK_ROUTE); diff --git a/dd-java-agent/instrumentation/jdbc/src/main/java/datadog/trace/instrumentation/jdbc/JDBCDecorator.java b/dd-java-agent/instrumentation/jdbc/src/main/java/datadog/trace/instrumentation/jdbc/JDBCDecorator.java index b6455d74372..b81bc1c69b1 100644 --- a/dd-java-agent/instrumentation/jdbc/src/main/java/datadog/trace/instrumentation/jdbc/JDBCDecorator.java +++ b/dd-java-agent/instrumentation/jdbc/src/main/java/datadog/trace/instrumentation/jdbc/JDBCDecorator.java @@ -11,6 +11,7 @@ import datadog.trace.api.BaseHash; import datadog.trace.api.Config; import datadog.trace.api.DDTraceId; +import datadog.trace.api.KnownTagIds; import datadog.trace.api.naming.SpanNaming; import datadog.trace.api.propagation.W3CTraceParent; import datadog.trace.api.telemetry.LogCollector; @@ -277,7 +278,7 @@ private AgentSpan withQueryInfo(AgentSpan span, DBQueryInfo info, CharSequence c span.setResourceName(DB_QUERY); } span.context().setIntegrationName(component); - return span.setTag(Tags.COMPONENT, component); + return span.setTag(KnownTagIds.COMPONENT, component); } public boolean isOracle(final DBInfo dbInfo) { diff --git a/internal-api/src/jmh/java/datadog/trace/api/AttrStoreBenchmark.java b/internal-api/src/jmh/java/datadog/trace/api/AttrStoreBenchmark.java index bc7488eb4c6..80745ef691b 100644 --- a/internal-api/src/jmh/java/datadog/trace/api/AttrStoreBenchmark.java +++ b/internal-api/src/jmh/java/datadog/trace/api/AttrStoreBenchmark.java @@ -1,5 +1,6 @@ package datadog.trace.api; +import datadog.trace.util.TagSet; import java.util.concurrent.TimeUnit; import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.BenchmarkMode; @@ -16,16 +17,24 @@ import org.openjdk.jmh.infra.Blackhole; /** - * Phase-1 validation: store a span's known tags three ways and measure throughput + allocation - * ({@code -prof gc}). Models the real lifecycle — set N tags, then iterate once (serialize). + * How much headroom is left in the dense known-tag store? Builds a span's known tags three ways and + * measures throughput (+ allocation via {@code -prof gc}). Models the real lifecycle — set N tags, + * then iterate once (serialize). + * + *

    Phase 1 (dense storage inside {@code OptimizedTagMap}) has already landed, so {@code current} + * below is the LIVE dense store, NOT the old Entry-per-tag design. The Entry[]->dense win is + * evidenced elsewhere (petclinic CPU/req, JFR); this benchmark now isolates what's LEFT to chase: * *

      - *
    1. {@code current}: today's {@link TagMap} (OptimizedTagMap, Entry[] knownEntries) — one - * {@code Entry} allocated per tag. - *
    2. {@code dense}: dense {@code long[] ids + Object[] values} — no per-tag Entry (boxes the one - * int tag). The phase-1 design. - *
    3. {@code pojo}: a hand-written class with typed fields — the phase-2 codegen endgame (no - * Entry, no boxing, no arrays-per-tag). + *
    4. {@code current}: the live {@link TagMap} ({@code OptimizedTagMap}) — already dense ({@code + * long[] knownIds + Object[] knownValues}, no per-tag {@code Entry}), plus the full TagMap + * machinery (size bookkeeping, lazy buckets, keyOf upgrade path). + *
    5. {@code dense}: a bare {@code long[] ids + Object[] values} store — strips the TagMap + * machinery, so {@code current} vs {@code dense} measures that overhead. Both box the one + * int. + *
    6. {@code pojo}: a hand-written class with typed fields — the codegen endgame (no {@code + * Entry}, no boxing, no arrays-per-tag); shows the ceiling {@code dense}->{@code pojo} + * buys. *
    * * Tag set is db.client-like (the dominant PetClinic span): 11 strings + 1 int. @@ -35,7 +44,7 @@ @Fork(1) @Warmup(iterations = 3) @Measurement(iterations = 5) -@Threads(1) +@Threads(8) @State(Scope.Benchmark) public class AttrStoreBenchmark { static final String[] NAMES = { @@ -64,9 +73,10 @@ public void setup() { IDS[i] = KnownTags.tagId(i + 1, i, NAMES[i]); // serial=i+1, fieldPos=i VALUES[i] = (i == PORT_IDX) ? Integer.valueOf(5432) : ("value-" + i); } - final java.util.HashMap nameToId = new java.util.HashMap<>(N * 2); + final TagSet.Data nameTable = TagSet.Support.create(NAMES); + final long[] slotIds = new long[nameTable.names.length]; for (int i = 0; i < N; ++i) { - nameToId.put(NAMES[i], IDS[i]); + slotIds[TagSet.Support.indexOf(nameTable.hashes, nameTable.names, NAMES[i])] = IDS[i]; } KnownTags.register( new KnownTags.Resolver() { @@ -78,8 +88,8 @@ public String nameOf(long tagId) { @Override public long keyOf(String name) { - Long id = nameToId.get(name); - return id == null ? 0L : id; + int slot = TagSet.Support.indexOf(nameTable.hashes, nameTable.names, name); + return slot < 0 ? 0L : slotIds[slot]; } @Override diff --git a/internal-api/src/jmh/java/datadog/trace/api/TagMapInsertionBaselineBenchmark.java b/internal-api/src/jmh/java/datadog/trace/api/TagMapInsertionBaselineBenchmark.java index 26e49376e16..9ddbe041480 100644 --- a/internal-api/src/jmh/java/datadog/trace/api/TagMapInsertionBaselineBenchmark.java +++ b/internal-api/src/jmh/java/datadog/trace/api/TagMapInsertionBaselineBenchmark.java @@ -29,7 +29,7 @@ @Fork(1) @Warmup(iterations = 3) @Measurement(iterations = 5) -@Threads(1) +@Threads(8) @State(Scope.Benchmark) public class TagMapInsertionBaselineBenchmark { // same tag set as TagMapInsertionBenchmark for an apples-to-apples comparison diff --git a/internal-api/src/jmh/java/datadog/trace/api/TagMapInsertionBenchmark.java b/internal-api/src/jmh/java/datadog/trace/api/TagMapInsertionBenchmark.java index 50ad76869b5..f3a324bb550 100644 --- a/internal-api/src/jmh/java/datadog/trace/api/TagMapInsertionBenchmark.java +++ b/internal-api/src/jmh/java/datadog/trace/api/TagMapInsertionBenchmark.java @@ -1,5 +1,6 @@ package datadog.trace.api; +import datadog.trace.util.TagSet; import java.util.concurrent.TimeUnit; import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.BenchmarkMode; @@ -11,7 +12,6 @@ import org.openjdk.jmh.annotations.Scope; import org.openjdk.jmh.annotations.Setup; import org.openjdk.jmh.annotations.State; -import org.openjdk.jmh.annotations.TearDown; import org.openjdk.jmh.annotations.Threads; import org.openjdk.jmh.annotations.Warmup; import org.openjdk.jmh.infra.Blackhole; @@ -23,16 +23,44 @@ *

    Tag ids are built via {@link KnownTags#tagId} (which uses the runtime's own name hash), so the * comparison is faithful even on the bucket-fallback path. * - *

    The tags use distinct {@code fieldPos} values (no collisions), so every known tag lands in its - * positional slot. byId skips string hashing and the keyOf round-trip entirely; byString pays - * keyOf(name) to resolve the id before slotting. + *

    byId stores straight into the dense known-tag store at its positional slot ({@code + * knownValues[fieldPos]}, O(1), no scan); byString pays {@code keyOf(name)} to resolve the id first + * (via the real {@link datadog.trace.util.TagSet} table) and then slots it the same way. The bucket + * baseline (no resolver, master-equivalent) is {@link TagMapInsertionBaselineBenchmark}. + * + * + * Apple M1 Max (10 core) - 8 threads - 1 fork - Java 8 (Zulu 8.0.382) - positional dense store + * + * Benchmark Mode Cnt Score Error Units + * insertById thrpt 5 126235943.1 ± 11653584.6 ops/s + * insertByString thrpt 5 57355057.5 ± 2976623.2 ops/s + * getObjectById thrpt 5 129726670.1 ± 10877596.1 ops/s + * getObjectByString thrpt 5 73544340.8 ± 1349944.7 ops/s + * getEntryById thrpt 5 129117822.8 ± 16455290.0 ops/s + * getEntryByString thrpt 5 73422181.5 ± 2210885.4 ops/s + * baseline.insertByString_noResolver thrpt 5 43334158.2 ± 4699836.5 ops/s (master path) + * baseline.getByString_noResolver thrpt 5 107969497.0 ± 7160811.9 ops/s (master path) + * + * + *

      + *
    • getObject by id vs by name: 129.7M vs 73.5M (~1.77x) — the common read. The whole gap + * is {@code keyOf}; both hit the slot and return the raw value with no Entry. Id-keyed value + * reads win. + *
    • getObject ~= getEntry (130M ~= 129M): the Entry "materialization penalty" vanishes for + * value use — escape analysis scalar-replaces the transient Entry when the caller consumes its + * value rather than retaining it, so {@code getEntry} needs no replacement here. (getEntryReader + * was measured and dropped: its eager name resolution made it the slowest read.) + *
    • insertById ~3x the bucket baseline (126M vs 43M) — O(1) positional claim + no per-tag + * Entry; insertByString +32% (57M vs 43M) even paying {@code keyOf}, so the former + * name-insert regression is gone. + *
    */ @BenchmarkMode(Mode.Throughput) @OutputTimeUnit(TimeUnit.SECONDS) @Fork(1) @Warmup(iterations = 3) @Measurement(iterations = 5) -@Threads(1) +@Threads(8) @State(Scope.Benchmark) public class TagMapInsertionBenchmark { // a representative HTTP-server-ish tag set @@ -57,25 +85,24 @@ public class TagMapInsertionBenchmark { "env", }; + // globalSerial = i + 1 (unique, non-zero); fieldPos = i (the positional slot in the dense store) static final long[] IDS = new long[NAMES.length]; static final Object[] VALUES = new Object[NAMES.length]; - // a pre-populated (slotted) map for the read benchmarks; built in setup once IDS exist - TagMap readMap; - - @Setup(Level.Trial) - public void setup() { + static { for (int i = 0; i < NAMES.length; ++i) { - // globalSerial = i + 1 (unique, non-zero); fieldPos = i (distinct - no collisions) IDS[i] = KnownTags.tagId(i + 1, i, NAMES[i]); VALUES[i] = "value-" + i; } - // Representative resolver: nameOf is a dense array index by globalSerial; keyOf is a hash-table - // lookup (a stand-in for a generated minimal-perfect-hash / open-addressed name->id table). - // A linear scan here would make insertByString look artificially bad and misrepresent the cost. - final java.util.HashMap nameToId = new java.util.HashMap<>(NAMES.length * 2); + // Register the resolver at CLASS INIT, not in @Setup: a benchmark-class @Setup and the + // per-thread ReadMap @Setup have no guaranteed cross-scope ordering, but class init does (any + // access to IDS triggers it before ReadMap.build runs). Process-global for this benchmark fork. + // nameOf is a dense array index by globalSerial; keyOf goes through the real open-addressed + // TagSet table (the algorithm KnownTagIds uses in production). + final TagSet.Data nameTable = TagSet.Support.create(NAMES); + final long[] slotIds = new long[nameTable.names.length]; for (int i = 0; i < NAMES.length; ++i) { - nameToId.put(NAMES[i], IDS[i]); + slotIds[TagSet.Support.indexOf(nameTable.hashes, nameTable.names, NAMES[i])] = IDS[i]; } KnownTags.register( new KnownTags.Resolver() { @@ -89,8 +116,8 @@ public String nameOf(long tagId) { @Override public long keyOf(String name) { - Long id = nameToId.get(name); - return id == null ? 0L : id; + int slot = TagSet.Support.indexOf(nameTable.hashes, nameTable.names, name); + return slot < 0 ? 0L : slotIds[slot]; } @Override @@ -98,17 +125,23 @@ public int slotCount() { return NAMES.length; // fieldPos 0..NAMES.length-1 } }); - - // pre-populate the read map by id (entries land in their slots) - this.readMap = TagMap.create(); - for (int i = 0; i < IDS.length; ++i) { - this.readMap.set(IDS[i], VALUES[i]); - } } - @TearDown(Level.Trial) - public void tearDown() { - KnownTags.register(null); + /** + * Pre-populated read map, PER-THREAD (Scope.Thread): each thread owns its map AND its reused + * reader flyweight, so getEntryReader doesn't contend on a shared flyweight under @Threads(8). + */ + @State(Scope.Thread) + public static class ReadMap { + OptimizedTagMap map; + + @Setup(Level.Trial) + public void build() { + this.map = (OptimizedTagMap) TagMap.create(); + for (int i = 0; i < IDS.length; ++i) { + this.map.set(IDS[i], VALUES[i]); + } + } } @Benchmark @@ -129,17 +162,33 @@ public TagMap insertByString() { return map; } + // ---- value reads (getObject - raw value, no Entry; the common read) ---- + @Benchmark + public void getObjectById(ReadMap rm, Blackhole bh) { + for (int i = 0; i < IDS.length; ++i) { + bh.consume(rm.map.getObject(IDS[i])); + } + } + + @Benchmark + public void getObjectByString(ReadMap rm, Blackhole bh) { + for (int i = 0; i < NAMES.length; ++i) { + bh.consume(rm.map.getObject(NAMES[i])); + } + } + + // ---- entry reads (materializes an Entry per call; EA elides it for transient value use) ---- @Benchmark - public void getById(Blackhole bh) { + public void getEntryById(ReadMap rm, Blackhole bh) { for (int i = 0; i < IDS.length; ++i) { - bh.consume(this.readMap.getEntry(IDS[i])); + bh.consume(rm.map.getEntry(IDS[i]).objectValue()); } } @Benchmark - public void getByString(Blackhole bh) { + public void getEntryByString(ReadMap rm, Blackhole bh) { for (int i = 0; i < NAMES.length; ++i) { - bh.consume(this.readMap.getEntry(NAMES[i])); + bh.consume(rm.map.getEntry(NAMES[i]).objectValue()); } } } diff --git a/internal-api/src/jmh/java/datadog/trace/util/KeyOfBenchmark.java b/internal-api/src/jmh/java/datadog/trace/util/KeyOfBenchmark.java new file mode 100644 index 00000000000..48021829c7d --- /dev/null +++ b/internal-api/src/jmh/java/datadog/trace/util/KeyOfBenchmark.java @@ -0,0 +1,255 @@ +package datadog.trace.util; + +import java.util.HashMap; +import java.util.Map; +import java.util.function.Supplier; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Threads; +import org.openjdk.jmh.annotations.Warmup; + +/** + * name -> id resolution shootout (the {@code keyOf} path), built on the generic {@link TagSet}. + * + *
      + *
    • tagSet — {@code TagSet.Support.indexOf} over static-final {@code int[] + * hashes} / {@code String[] names} (refs fold to constants) + a parallel {@code long[] ids}. + *
    • tagSet_throughClass — same, but via a {@code TagSet} instance (an + * instance-field load of hashes/names) — isolates the wrapper indirection vs static fields. + *
    • hashMap — {@code HashMap} (boxes the value). + *
    • switch — hand-written string {@code switch} (the thing keyOf replaces). At 16 cases + * it inlines fine; the at-scale degradation (hundreds of cases over FreqInlineSize) shows up + * against the real generated keyOf, not here. + *
    + * + *

    Two term flavors: interned (realistic — instrumentation passes string literals → the + * {@code ==} fast path in eq) and copies (non-interned → forces {@code String.equals}). + * Terms are hit-dominated. + * + * + * Apple M1 Max (10 core) - 8 threads (per-thread state) - 2 forks - Java 8 (Zulu 8.0.382) + * + * Benchmark Mode Cnt Score Error Units + * KeyOfBenchmark.aa_baseline_termSelection thrpt 6 2743246161.5 ± 29519843.7 ops/s + * KeyOfBenchmark.tagSet thrpt 6 2275407420.3 ± 35217527.6 ops/s + * KeyOfBenchmark.tagSet_throughClass thrpt 6 2036161909.9 ± 49813775.7 ops/s + * KeyOfBenchmark.hashMap thrpt 6 1889985340.4 ± 46434121.2 ops/s + * KeyOfBenchmark.switch_ thrpt 6 1132557957.9 ±128775728.2 ops/s + * // copies (non-interned): tagSet 1843M, tagSet_throughClass 1708M, hashMap 1593M, switch_ 1137M + * + * + *

      + *
    • tagSet ~2x the switch (2275M vs 1133M) at only 16 cases — the gap widens toward the + * generated hundreds, where the switch exceeds the inline budget. The keyOf swap's win. + *
    • tagSet ~20% over HashMap (2275M vs 1890M). + *
    • static ~12% over the instance (tagSet 2275M vs tagSet_throughClass 2036M) — folded + * static-final arrays beat the instance-field load; pull {@code Data} into your own statics. + *
    • The switch is interning-insensitive (1133≈1137, dispatch-bound); hash contenders gain + * ~16-19% interned via the {@code ==} fast path. + *
    + */ +@Fork(2) +@Warmup(iterations = 2) +@Measurement(iterations = 3) +@Threads(8) +@State(Scope.Thread) +public class KeyOfBenchmark { + static final long UNKNOWN = 0L; + + static final String[] NAMES_IN = { + "span.type", "component", "span.kind", "db.type", "db.instance", "db.statement", + "peer.hostname", "peer.port", "http.method", "http.route", "http.status_code", "http.url", + "error", "resource", "service", "operation" + }; + + /** ids parallel to NAMES_IN — id == index+1, matched across all contenders. */ + static final long[] IDS_IN = + init( + () -> { + long[] ids = new long[NAMES_IN.length]; + for (int j = 0; j < ids.length; j++) { + ids[j] = j + 1L; + } + return ids; + }); + + // fastest path: build once, pull into static final so the refs fold + static final int[] HASHES; + static final String[] NAMES; + static final long[] IDS; + + static { + TagSet.Data data = TagSet.Support.create(NAMES_IN); + long[] ids = new long[data.names.length]; + for (int j = 0; j < NAMES_IN.length; j++) { + ids[TagSet.Support.indexOf(data.hashes, data.names, NAMES_IN[j])] = IDS_IN[j]; + } + HASHES = data.hashes; + NAMES = data.names; + IDS = ids; + } + + static final Map HASH_MAP = + init( + () -> { + Map m = new HashMap<>(NAMES_IN.length * 2); + for (int j = 0; j < NAMES_IN.length; j++) { + m.put(NAMES_IN[j], IDS_IN[j]); + } + return m; + }); + + /** Convenience instance — the through-the-class path (instance-field loads vs folded statics). */ + static final TagSet TAG_SET = TagSet.of(NAMES_IN); + + // hit-dominated, two misses; interned and non-interned copies + static final String[] TERMS = { + "span.type", "component", "span.kind", "db.type", "db.instance", "db.statement", + "peer.hostname", "peer.port", "http.method", "http.route", "http.status_code", "http.url", + "error", "resource", "service", "operation", "unknown.tag", "custom.attr" + }; + + static final String[] TERM_COPIES = + init( + () -> { + String[] copies = new String[TERMS.length]; + for (int i = 0; i < TERMS.length; i++) { + copies[i] = new String(TERMS[i]); // defeat interning + } + return copies; + }); + + int termIndex = 0; // per-thread (Scope.Thread) — no shared-counter contention under @Threads(8) + + String nextTerm() { + int i = termIndex + 1; + if (i >= TERMS.length) { + i = 0; + } + termIndex = i; + return TERMS[i]; + } + + String nextTermCopy() { + int i = termIndex + 1; + if (i >= TERM_COPIES.length) { + i = 0; + } + termIndex = i; + return TERM_COPIES[i]; + } + + static T init(Supplier supplier) { + return supplier.get(); + } + + // ---- resolvers ---- + static long tagSetKeyOf(String t) { + int slot = TagSet.Support.indexOf(HASHES, NAMES, t); // folded static-final refs + return slot < 0 ? UNKNOWN : IDS[slot]; + } + + static long tagSetThroughClassKeyOf(String t) { + int slot = TAG_SET.indexOf(t); // instance-field load of hashes/names + return slot < 0 ? UNKNOWN : IDS[slot]; + } + + static long hashMapKeyOf(String t) { + Long v = HASH_MAP.get(t); + return v == null ? UNKNOWN : v.longValue(); + } + + static long switchKeyOf(String t) { + switch (t) { + case "span.type": + return 1L; + case "component": + return 2L; + case "span.kind": + return 3L; + case "db.type": + return 4L; + case "db.instance": + return 5L; + case "db.statement": + return 6L; + case "peer.hostname": + return 7L; + case "peer.port": + return 8L; + case "http.method": + return 9L; + case "http.route": + return 10L; + case "http.status_code": + return 11L; + case "http.url": + return 12L; + case "error": + return 13L; + case "resource": + return 14L; + case "service": + return 15L; + case "operation": + return 16L; + default: + return UNKNOWN; + } + } + + // ---- interned terms (realistic) ---- + @Benchmark + public String aa_baseline_termSelection() { + return nextTerm(); + } + + @Benchmark + public long tagSet() { + return tagSetKeyOf(nextTerm()); + } + + @Benchmark + public long tagSet_throughClass() { + return tagSetThroughClassKeyOf(nextTerm()); + } + + @Benchmark + public long hashMap() { + return hashMapKeyOf(nextTerm()); + } + + @Benchmark + public long switch_() { + return switchKeyOf(nextTerm()); + } + + // ---- non-interned copies (forces equals) ---- + @Benchmark + public String aa_baseline_termSelectionCopy() { + return nextTermCopy(); + } + + @Benchmark + public long tagSet_copy() { + return tagSetKeyOf(nextTermCopy()); + } + + @Benchmark + public long tagSet_throughClass_copy() { + return tagSetThroughClassKeyOf(nextTermCopy()); + } + + @Benchmark + public long hashMap_copy() { + return hashMapKeyOf(nextTermCopy()); + } + + @Benchmark + public long switch_copy() { + return switchKeyOf(nextTermCopy()); + } +} diff --git a/internal-api/src/jmh/java/datadog/trace/util/SetBenchmark.java b/internal-api/src/jmh/java/datadog/trace/util/SetBenchmark.java index 144e4748400..eec1bbee95e 100644 --- a/internal-api/src/jmh/java/datadog/trace/util/SetBenchmark.java +++ b/internal-api/src/jmh/java/datadog/trace/util/SetBenchmark.java @@ -1,44 +1,62 @@ package datadog.trace.util; import java.util.Arrays; -import java.util.Collections; import java.util.HashSet; import java.util.TreeSet; -import java.util.concurrent.ThreadLocalRandom; import java.util.function.Supplier; import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.Fork; import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.State; import org.openjdk.jmh.annotations.Threads; import org.openjdk.jmh.annotations.Warmup; /** - * + * Ways to represent a small set of strings and test membership, split into hit and miss lookups + * (different cost shapes per structure). Lookups are interned (the {@code ==} fast path); misses + * are short and never present. Per-thread state ({@code @State(Scope.Thread)}) keeps the rotation + * counter off the shared-write path under {@code @Threads(8)} — an earlier shared-counter version + * capped the fast structures at a ~1.4B contention ceiling (since superseded by the numbers below). * *
      - * Benchmark showing possible ways to represent and check if a set includes an elememt... - *
    • (RECOMMENDED) HashSet - on par with TreeSet - idiomatic - *
    • (RECOMMENDED) TreeMap - on par with HashSet - better solution if custom comparator is - * needed (see CaseInsensitiveMapBenchmark) - *
    • array - slower than HashSet - *
    • sortedArray - slowest - slower than array for common case of small arrays + *
    • tagSetSupport (static) is the fastest membership path — 2336M hit / 2170M miss. It + * beats the TagSet instance ({@code tagSet_*}) by ~7% (hit) to ~12% (miss): the instance pays + * an instance-field load of hashes/names, while {@code Support.indexOf} over {@code static + * final} arrays lets the refs fold to constants. Matches KeyOfBenchmark's ~12% static-vs- + * instance gap. So when the set is fixed, pull {@code Data} into your own static finals. + *
    • vs HashSet — the static path is ~12% faster on hit and ~par on miss. But HashSet was + * noisy here (±22% error) while TagSet was tight (±2-7%), so TagSet also wins on + * predictability — and is allocation-free and positional-capable. + *
    • array / sortedArray / treeSet cluster ~0.65-1.0B — they compare/scan per element, so they + * slow on miss (hit early-exits; miss does the full scan / binary descent / tree walk). + * TreeSet is NOT uniquely slowest — worth it only for a custom comparator (case-insensitive, + * dodging {@code toLowerCase}), not speed. *
    * * - * MacBook M1 - 8 threads - Java 21 - * 1/3 not found rate + * Apple M1 Max (10 core) - 8 threads (per-thread state) - 2 forks - Java 8 (Zulu 8.0.382) * - * Benchmark Mode Cnt Score Error Units - * SetBenchmark.contains_array thrpt 6 645561886.327 ± 100781717.494 ops/s - * SetBenchmark.contains_hashSet thrpt 6 1536236680.235 ± 114966961.506 ops/s - * SetBenchmark.contains_sortedArray thrpt 6 571476939.441 ± 21334620.460 ops/s - * SetBenchmark.contains_treeSet thrpt 6 1557663759.411 ± 95343683.124 ops/s + * Benchmark Mode Cnt Score Error Units + * SetBenchmark.array_hit thrpt 6 995578895.732 ± 73709080.997 ops/s + * SetBenchmark.array_miss thrpt 6 649860848.470 ± 32489300.626 ops/s + * SetBenchmark.hashSet_hit thrpt 6 2081738804.271 ± 464349157.190 ops/s + * SetBenchmark.hashSet_miss thrpt 6 2136501411.026 ± 474132929.024 ops/s + * SetBenchmark.sortedArray_hit thrpt 6 837595967.794 ± 113538780.712 ops/s + * SetBenchmark.sortedArray_miss thrpt 6 692064118.699 ± 25752553.077 ops/s + * SetBenchmark.tagSet_hit thrpt 6 2184722734.028 ± 61054981.099 ops/s + * SetBenchmark.tagSet_miss thrpt 6 1933588009.009 ± 159869680.982 ops/s + * SetBenchmark.tagSetSupport_hit thrpt 6 2335685599.706 ± 52460762.937 ops/s + * SetBenchmark.tagSetSupport_miss thrpt 6 2169715463.018 ± 141321499.862 ops/s + * SetBenchmark.treeSet_hit thrpt 6 798251906.675 ± 39041398.413 ops/s + * SetBenchmark.treeSet_miss thrpt 6 667078954.487 ± 56517120.187 ops/s * */ @Fork(2) @Warmup(iterations = 2) @Measurement(iterations = 3) @Threads(8) +@State(Scope.Thread) public class SetBenchmark { static final String[] STRINGS = new String[] { @@ -60,45 +78,60 @@ static T init(Supplier supplier) { return supplier.get(); } - static final String[] LOOKUPS = + /** Present in the set (interned). */ + static final String[] HITS = STRINGS; + + /** Never present. */ + static final String[] MISSES = init( () -> { - String[] lookups = Arrays.copyOf(STRINGS, STRINGS.length * 10); - - for (int i = 0; i < STRINGS.length; ++i) { - lookups[STRINGS.length + i] = new String(STRINGS[i]); + String[] misses = new String[STRINGS.length * 4]; + for (int i = 0; i < misses.length; ++i) { + misses[i] = "dne-" + i; } - - // 2 / 3 of the key look-ups miss the set - for (int i = STRINGS.length * 2; i < lookups.length; ++i) { - lookups[i] = "dne-" + ThreadLocalRandom.current().nextInt(); - } - - Collections.shuffle(Arrays.asList(lookups)); - return lookups; + return misses; }); - static int sharedLookupIndex = 0; + int hitIndex = 0; // per-thread (Scope.Thread) — no shared-counter contention under @Threads(8) + int missIndex = 0; - static String nextString() { - int localIndex = ++sharedLookupIndex; - if (localIndex >= LOOKUPS.length) { - sharedLookupIndex = localIndex = 0; + String nextHit() { + int i = hitIndex + 1; + if (i >= HITS.length) { + i = 0; } - return LOOKUPS[localIndex]; + hitIndex = i; + return HITS[i]; + } + + String nextMiss() { + int i = missIndex + 1; + if (i >= MISSES.length) { + i = 0; + } + missIndex = i; + return MISSES[i]; } static final String[] ARRAY = STRINGS; - @Benchmark - public boolean contains_array() { - String needle = nextString(); + static boolean arrayContains(String needle) { for (String str : ARRAY) { if (needle.equals(str)) return true; } return false; } + @Benchmark + public boolean array_hit() { + return arrayContains(nextHit()); + } + + @Benchmark + public boolean array_miss() { + return arrayContains(nextMiss()); + } + static final String[] SORTED_ARRAY = init( () -> { @@ -108,21 +141,70 @@ public boolean contains_array() { }); @Benchmark - public boolean contains_sortedArray() { - return (Arrays.binarySearch(SORTED_ARRAY, nextString()) != -1); + public boolean sortedArray_hit() { + return Arrays.binarySearch(SORTED_ARRAY, nextHit()) >= 0; + } + + @Benchmark + public boolean sortedArray_miss() { + return Arrays.binarySearch(SORTED_ARRAY, nextMiss()) >= 0; } static final HashSet HASH_SET = new HashSet<>(Arrays.asList(STRINGS)); @Benchmark - public boolean contains_hashSet() { - return HASH_SET.contains(nextString()); + public boolean hashSet_hit() { + return HASH_SET.contains(nextHit()); + } + + @Benchmark + public boolean hashSet_miss() { + return HASH_SET.contains(nextMiss()); } static final TreeSet TREE_SET = new TreeSet<>(Arrays.asList(STRINGS)); @Benchmark - public boolean contains_treeSet() { - return HASH_SET.contains(nextString()); + public boolean treeSet_hit() { + return TREE_SET.contains(nextHit()); + } + + @Benchmark + public boolean treeSet_miss() { + return TREE_SET.contains(nextMiss()); + } + + static final TagSet TAG_SET = TagSet.of(STRINGS); + + @Benchmark + public boolean tagSet_hit() { + return TAG_SET.contains(nextHit()); + } + + @Benchmark + public boolean tagSet_miss() { + return TAG_SET.contains(nextMiss()); + } + + // The static Support path: hashes/names built once into static-final arrays (refs fold to + // constants) and probed directly via Support.indexOf -- vs tagSet_* above, which loads them + // through a TagSet instance. Mirrors KeyOfBenchmark's tagSet (static) vs tagSet_throughClass. + static final int[] SUPPORT_HASHES; + static final String[] SUPPORT_NAMES; + + static { + TagSet.Data data = TagSet.Support.create(STRINGS); + SUPPORT_HASHES = data.hashes; + SUPPORT_NAMES = data.names; + } + + @Benchmark + public boolean tagSetSupport_hit() { + return TagSet.Support.indexOf(SUPPORT_HASHES, SUPPORT_NAMES, nextHit()) >= 0; + } + + @Benchmark + public boolean tagSetSupport_miss() { + return TagSet.Support.indexOf(SUPPORT_HASHES, SUPPORT_NAMES, nextMiss()) >= 0; } } diff --git a/internal-api/src/main/java/datadog/trace/api/KnownTagIds.java b/internal-api/src/main/java/datadog/trace/api/KnownTagIds.java index f7e420e06df..3802fd955af 100644 --- a/internal-api/src/main/java/datadog/trace/api/KnownTagIds.java +++ b/internal-api/src/main/java/datadog/trace/api/KnownTagIds.java @@ -1,6 +1,7 @@ package datadog.trace.api; import datadog.trace.bootstrap.instrumentation.api.Tags; +import datadog.trace.util.TagSet; /** * Hand-assigned tag-id constants for well-known tags, plus the {@link KnownTags.Resolver} that @@ -135,6 +136,84 @@ public final class KnownTagIds { public static final long DB_POOL_NAME = KnownTags.tagId(DB_POOL_NAME_SERIAL, 25, Tags.DB_POOL_NAME); + // Open-addressed name -> id table backing keyOf (data, not a switch): scales flat as the known + // set grows, where a generated switch eventually falls off the inline threshold. KEYOF_NAMES and + // KEYOF_VALUES are parallel; the table places names by hash and a parallel ids[] by slot. + private static final String[] KEYOF_NAMES = { + Tags.ERROR, + DDTags.PARENT_ID, + DDTags.BASE_SERVICE, + Tags.VERSION, + ENV, + DDTags.DJM_ENABLED, + DDTags.DSM_ENABLED, + DDTags.TRACER_HOST, + DDTags.DD_INTEGRATION, + DDTags.DD_SVC_SRC, + Tags.PEER_SERVICE, + DDTags.PEER_SERVICE_REMAPPED_FROM, + Tags.HTTP_METHOD, + Tags.HTTP_ROUTE, + Tags.HTTP_URL, + Tags.PEER_HOSTNAME, + Tags.PEER_HOST_IPV4, + Tags.PEER_HOST_IPV6, + Tags.PEER_PORT, + Tags.COMPONENT, + Tags.SPAN_KIND, + DDTags.LANGUAGE_TAG_KEY, + Tags.DB_TYPE, + Tags.DB_INSTANCE, + Tags.DB_USER, + Tags.DB_OPERATION, + Tags.DB_POOL_NAME, + }; + + private static final long[] KEYOF_VALUES = { + ERROR, + PARENT_ID, + BASE_SERVICE, + VERSION, + ENV_ID, + DJM_ENABLED, + DSM_ENABLED, + TRACER_HOST_ID, + INTEGRATION_ID, + SVC_SRC_ID, + PEER_SERVICE, + PEER_SERVICE_REMAPPED_FROM, + HTTP_METHOD, + HTTP_ROUTE, + HTTP_URL, + PEER_HOSTNAME, + PEER_HOST_IPV4, + PEER_HOST_IPV6, + PEER_PORT, + COMPONENT, + SPAN_KIND, + LANGUAGE, + DB_TYPE, + DB_INSTANCE, + DB_USER, + DB_OPERATION, + DB_POOL_NAME, + }; + + private static final int[] KEYOF_HASHES; + private static final String[] KEYOF_KEYS; + private static final long[] KEYOF_IDS; + + static { + TagSet.Data data = TagSet.Support.create(KEYOF_NAMES); + long[] ids = new long[data.names.length]; + for (int j = 0; j < KEYOF_NAMES.length; j++) { + ids[TagSet.Support.indexOf(data.hashes, data.names, KEYOF_NAMES[j])] = KEYOF_VALUES[j]; + } + KEYOF_HASHES = data.hashes; + KEYOF_KEYS = data.names; + KEYOF_IDS = ids; + } + static final KnownTags.Resolver RESOLVER = new KnownTags.Resolver() { @Override @@ -206,64 +285,8 @@ public int slotCount() { @Override public long keyOf(String name) { - switch (name) { - case Tags.ERROR: - return ERROR; - case DDTags.PARENT_ID: - return PARENT_ID; - case DDTags.BASE_SERVICE: - return BASE_SERVICE; - case Tags.VERSION: - return VERSION; - case ENV: - return ENV_ID; - case DDTags.DJM_ENABLED: - return DJM_ENABLED; - case DDTags.DSM_ENABLED: - return DSM_ENABLED; - case DDTags.TRACER_HOST: - return TRACER_HOST_ID; - case DDTags.DD_INTEGRATION: - return INTEGRATION_ID; - case DDTags.DD_SVC_SRC: - return SVC_SRC_ID; - case Tags.PEER_SERVICE: - return PEER_SERVICE; - case DDTags.PEER_SERVICE_REMAPPED_FROM: - return PEER_SERVICE_REMAPPED_FROM; - case Tags.HTTP_METHOD: - return HTTP_METHOD; - case Tags.HTTP_ROUTE: - return HTTP_ROUTE; - case Tags.HTTP_URL: - return HTTP_URL; - case Tags.PEER_HOSTNAME: - return PEER_HOSTNAME; - case Tags.PEER_HOST_IPV4: - return PEER_HOST_IPV4; - case Tags.PEER_HOST_IPV6: - return PEER_HOST_IPV6; - case Tags.PEER_PORT: - return PEER_PORT; - case Tags.COMPONENT: - return COMPONENT; - case Tags.SPAN_KIND: - return SPAN_KIND; - case DDTags.LANGUAGE_TAG_KEY: - return LANGUAGE; - case Tags.DB_TYPE: - return DB_TYPE; - case Tags.DB_INSTANCE: - return DB_INSTANCE; - case Tags.DB_USER: - return DB_USER; - case Tags.DB_OPERATION: - return DB_OPERATION; - case Tags.DB_POOL_NAME: - return DB_POOL_NAME; - default: - return 0L; - } + int slot = TagSet.Support.indexOf(KEYOF_HASHES, KEYOF_KEYS, name); + return slot < 0 ? 0L : KEYOF_IDS[slot]; } }; diff --git a/internal-api/src/main/java/datadog/trace/api/TagMap.java b/internal-api/src/main/java/datadog/trace/api/TagMap.java index 6e155181395..06742a10a0c 100644 --- a/internal-api/src/main/java/datadog/trace/api/TagMap.java +++ b/internal-api/src/main/java/datadog/trace/api/TagMap.java @@ -1417,18 +1417,21 @@ private static final class EmptyHolder { private int size; private boolean frozen; - // Dense store for known tags (any tag with a non-zero globalSerial), kept in insertion order. - // Lazily allocated on the first known-tag write. Parallel arrays: knownIds[i] is the tagId of the - // i-th present known tag and knownValues[i] its value (Object; primitives boxed). On set we - // linear-scan [0, knownCount) by globalSerial; a match overwrites, otherwise we append (growing - // the arrays). There are no positional collisions: every known tag simply gets a dense slot. - // Unknown tags (globalSerial == 0) still live in the hash buckets. + // Positional store for known tags, indexed directly by fieldPos (no linear scan). Lazily + // allocated + // on the first slotted write, sized to KnownTags.slotCount() and never grown. Parallel arrays: + // knownIds[p] is the tagId occupying slot p (0L = empty) and knownValues[p] its value (Object; + // primitives boxed; null = empty). knownCount is the number of OCCUPIED slots. A known tag claims + // its slot first-writer-wins by occupant: an empty slot is claimed, the same globalSerial + // overwrites in place, and a DIFFERENT globalSerial already holding the slot (a fieldPos + // conflict) + // routes to the hash buckets instead. Unslotted known tags (fieldPos >= slotCount(), e.g. + // NO_SLOT) + // and unknown tags (globalSerial == 0) also live in the hash buckets. private long[] knownIds; private Object[] knownValues; private int knownCount; - private static final int KNOWN_INITIAL_CAPACITY = 8; - public OptimizedTagMap() { // buckets stay null until the first unknown-tag insertion (see setInBuckets) this.buckets = null; @@ -1617,6 +1620,32 @@ public Entry getEntry(long tagId) { return name == null ? null : this.getEntryFromBuckets(name); } + // Mirrors knownRawValue(String) but skips keyOf - an id already carries its slot (fieldPos), so + // this is a direct positional dense read with no name resolution and no Entry. + private Object knownRawValue(long tagId) { + int i = this.knownIndexOf(tagId); + return i < 0 ? null : this.knownValues[i]; + } + + // Value read by id - dense fast path (no keyOf, no Entry), bucket fallback by resolved name. + public Object getObject(long tagId) { + Object known = this.knownRawValue(tagId); + if (known != null) return known; + String name = KnownTags.nameOf(tagId); + if (name == null) return null; + Entry entry = this.getEntryFromBuckets(name); + return entry == null ? null : entry.objectValue(); + } + + public String getString(long tagId) { + Object known = this.knownRawValue(tagId); + if (known != null) return TagValueConversions.toString(known); + String name = KnownTags.nameOf(tagId); + if (name == null) return null; + Entry entry = this.getEntryFromBuckets(name); + return entry == null ? null : entry.stringValue(); + } + private Entry getEntryFromBuckets(String tag) { Object[] thisBuckets = this.buckets; if (thisBuckets == null) return null; @@ -1657,13 +1686,19 @@ public void set(TagMap.EntryReader newEntryReader) { long tagId = entry.tagId; if (KnownTags.globalSerial(tagId) == 0 && KnownTags.isActive()) { long resolved = KnownTags.keyOf(entry.tag()); - if (resolved != 0L) tagId = resolved; + if (resolved != 0L) { + // Cache the resolved id back onto the entry so a SHARED cached entry (a decorator's + // SPAN_KIND_ENTRY / componentEntry, reused across every span) pays keyOf only on its first + // set, not on every span. Mirrors the write-back in getAndSet. Benign race on the shared + // entry: all writers store the same resolved id. + entry.tagId = resolved; + tagId = resolved; + } } - if (KnownTags.globalSerial(tagId) != 0) { - this.putKnownValue(tagId, entry.objectValue()); - } else { - this.setInBuckets(entry); + if (KnownTags.globalSerial(tagId) != 0 && this.trySetKnownSlot(tagId, entry.objectValue())) { + return; } + this.setInBuckets(entry); } // String-keyed setters. Resolve the tag identity once: a registered KnownTags resolver maps known @@ -1674,192 +1709,151 @@ public void set(TagMap.EntryReader newEntryReader) { public void set(String tag, Object value) { this.checkWriteAccess(); long id = KnownTags.keyOf(tag); - if (id != 0L) { - this.putKnownValue(id, value); - } else { - this.setInBuckets(Entry.newAnyEntry(tag, value)); - } + if (id != 0L && this.trySetKnownSlot(id, value)) return; + this.setInBuckets(Entry.newAnyEntry(tag, value)); } @Override public void set(String tag, CharSequence value) { this.checkWriteAccess(); long id = KnownTags.keyOf(tag); - if (id != 0L) { - this.putKnownValue(id, value); - } else { - this.setInBuckets(Entry.newObjectEntry(tag, value)); - } + if (id != 0L && this.trySetKnownSlot(id, value)) return; + this.setInBuckets(Entry.newObjectEntry(tag, value)); } @Override public void set(String tag, boolean value) { this.checkWriteAccess(); long id = KnownTags.keyOf(tag); - if (id != 0L) { - this.putKnownValue(id, Boolean.valueOf(value)); - } else { - this.setInBuckets(Entry.newBooleanEntry(tag, value)); - } + if (id != 0L && this.trySetKnownSlot(id, Boolean.valueOf(value))) return; + this.setInBuckets(Entry.newBooleanEntry(tag, value)); } @Override public void set(String tag, int value) { this.checkWriteAccess(); long id = KnownTags.keyOf(tag); - if (id != 0L) { - this.putKnownValue(id, Integer.valueOf(value)); - } else { - this.setInBuckets(Entry.newIntEntry(tag, value)); - } + if (id != 0L && this.trySetKnownSlot(id, Integer.valueOf(value))) return; + this.setInBuckets(Entry.newIntEntry(tag, value)); } @Override public void set(String tag, long value) { this.checkWriteAccess(); long id = KnownTags.keyOf(tag); - if (id != 0L) { - this.putKnownValue(id, Long.valueOf(value)); - } else { - this.setInBuckets(Entry.newLongEntry(tag, value)); - } + if (id != 0L && this.trySetKnownSlot(id, Long.valueOf(value))) return; + this.setInBuckets(Entry.newLongEntry(tag, value)); } @Override public void set(String tag, float value) { this.checkWriteAccess(); long id = KnownTags.keyOf(tag); - if (id != 0L) { - this.putKnownValue(id, Float.valueOf(value)); - } else { - this.setInBuckets(Entry.newFloatEntry(tag, value)); - } + if (id != 0L && this.trySetKnownSlot(id, Float.valueOf(value))) return; + this.setInBuckets(Entry.newFloatEntry(tag, value)); } @Override public void set(String tag, double value) { this.checkWriteAccess(); long id = KnownTags.keyOf(tag); - if (id != 0L) { - this.putKnownValue(id, Double.valueOf(value)); - } else { - this.setInBuckets(Entry.newDoubleEntry(tag, value)); - } + if (id != 0L && this.trySetKnownSlot(id, Double.valueOf(value))) return; + this.setInBuckets(Entry.newDoubleEntry(tag, value)); } // Tag-id keyed setters. The id already carries the globalSerial, so a known tag (non-zero - // globalSerial) goes straight into the dense store via putKnownValue with NO Entry allocation - // (strings/objects by reference; primitives boxed once). An id without a globalSerial is not a - // known tag — fall back to the bucket path, which builds an Entry that resolves its name lazily. + // globalSerial) goes straight into its positional slot via trySetKnownSlot with NO Entry + // allocation (strings/objects by reference; primitives boxed once). An id without a globalSerial, + // an unslotted id, or a fieldPos conflict falls back to the bucket path, which builds an Entry + // that resolves its name lazily. @Override public void set(long tagId, Object value) { this.checkWriteAccess(); - if (KnownTags.globalSerial(tagId) != 0) { - this.putKnownValue(tagId, value); - } else { - this.setInBuckets(Entry.newAnyEntry(tagId, value)); - } + if (KnownTags.globalSerial(tagId) != 0 && this.trySetKnownSlot(tagId, value)) return; + this.setInBuckets(Entry.newAnyEntry(tagId, value)); } @Override public void set(long tagId, CharSequence value) { this.checkWriteAccess(); - if (KnownTags.globalSerial(tagId) != 0) { - this.putKnownValue(tagId, value); - } else { - this.setInBuckets(Entry.newObjectEntry(tagId, value)); - } + if (KnownTags.globalSerial(tagId) != 0 && this.trySetKnownSlot(tagId, value)) return; + this.setInBuckets(Entry.newObjectEntry(tagId, value)); } @Override public void set(long tagId, boolean value) { this.checkWriteAccess(); - if (KnownTags.globalSerial(tagId) != 0) { - this.putKnownValue(tagId, Boolean.valueOf(value)); - } else { - this.setInBuckets(Entry.newBooleanEntry(tagId, value)); - } + if (KnownTags.globalSerial(tagId) != 0 && this.trySetKnownSlot(tagId, Boolean.valueOf(value))) + return; + this.setInBuckets(Entry.newBooleanEntry(tagId, value)); } @Override public void set(long tagId, int value) { this.checkWriteAccess(); - if (KnownTags.globalSerial(tagId) != 0) { - this.putKnownValue(tagId, Integer.valueOf(value)); - } else { - this.setInBuckets(Entry.newIntEntry(tagId, value)); - } + if (KnownTags.globalSerial(tagId) != 0 && this.trySetKnownSlot(tagId, Integer.valueOf(value))) + return; + this.setInBuckets(Entry.newIntEntry(tagId, value)); } @Override public void set(long tagId, long value) { this.checkWriteAccess(); - if (KnownTags.globalSerial(tagId) != 0) { - this.putKnownValue(tagId, Long.valueOf(value)); - } else { - this.setInBuckets(Entry.newLongEntry(tagId, value)); - } + if (KnownTags.globalSerial(tagId) != 0 && this.trySetKnownSlot(tagId, Long.valueOf(value))) + return; + this.setInBuckets(Entry.newLongEntry(tagId, value)); } @Override public void set(long tagId, float value) { this.checkWriteAccess(); - if (KnownTags.globalSerial(tagId) != 0) { - this.putKnownValue(tagId, Float.valueOf(value)); - } else { - this.setInBuckets(Entry.newFloatEntry(tagId, value)); - } + if (KnownTags.globalSerial(tagId) != 0 && this.trySetKnownSlot(tagId, Float.valueOf(value))) + return; + this.setInBuckets(Entry.newFloatEntry(tagId, value)); } @Override public void set(long tagId, double value) { this.checkWriteAccess(); - if (KnownTags.globalSerial(tagId) != 0) { - this.putKnownValue(tagId, Double.valueOf(value)); - } else { - this.setInBuckets(Entry.newDoubleEntry(tagId, value)); - } + if (KnownTags.globalSerial(tagId) != 0 && this.trySetKnownSlot(tagId, Double.valueOf(value))) + return; + this.setInBuckets(Entry.newDoubleEntry(tagId, value)); } - // Returns the dense index of the known tag matching tagId (by globalSerial), or -1 if absent. + // Returns the slot index (== fieldPos) holding the known tag matching tagId, or -1 if the slot is + // empty or occupied by a different globalSerial (a conflict, which lives in the buckets instead). private int knownIndexOf(long tagId) { - int globalSerial = KnownTags.globalSerial(tagId); - if (globalSerial == 0) return -1; - + int s = KnownTags.globalSerial(tagId); + if (s == 0) return -1; + int p = KnownTags.fieldPos(tagId); long[] ids = this.knownIds; - int count = this.knownCount; - for (int i = 0; i < count; ++i) { - if (KnownTags.globalSerial(ids[i]) == globalSerial) return i; - } - return -1; + if (ids == null || p < 0 || p >= ids.length) return -1; + return (KnownTags.globalSerial(ids[p]) == s) ? p : -1; } - // Materializes a real Entry for the dense entry at index i (carrying the stored tagId so it + // Materializes a real Entry for the dense entry at slot p (carrying the stored tagId so it // resolves its name and serializes correctly). - private Entry knownEntryAt(int i) { - return Entry.newAnyEntry(this.knownIds[i], this.knownValues[i]); + private Entry knownEntryAt(int p) { + return Entry.newAnyEntry(this.knownIds[p], this.knownValues[p]); } - // Returns a materialized entry for tagId if a known tag with that globalSerial is present, else - // null. (Explicit getEntry path - materializing here is fine, this is not iteration.) + // Returns a materialized entry for tagId if its slot holds that known tag, else null. (Explicit + // getEntry path - materializing here is fine, this is not iteration.) private Entry knownGet(long tagId) { - int i = this.knownIndexOf(tagId); - return i < 0 ? null : this.knownEntryAt(i); + int p = this.knownIndexOf(tagId); + return p < 0 ? null : this.knownEntryAt(p); } - // Removes and returns (materialized) the known tag matching tagId, else null. Compacts the dense - // store by swapping the last element into the removed slot (order need not be stable on remove). + // Removes and returns (materialized) the known tag from its slot, else null. Clears the slot in + // place; conflicting tags that live in the buckets are NOT promoted back into the freed slot. private Entry knownRemove(long tagId) { - int i = this.knownIndexOf(tagId); - if (i < 0) return null; - - Entry removed = this.knownEntryAt(i); - int last = this.knownCount - 1; - this.knownIds[i] = this.knownIds[last]; - this.knownValues[i] = this.knownValues[last]; - this.knownIds[last] = 0L; - this.knownValues[last] = null; - this.knownCount = last; + int p = this.knownIndexOf(tagId); + if (p < 0) return null; + + Entry removed = this.knownEntryAt(p); + this.knownIds[p] = 0L; + this.knownValues[p] = null; + this.knownCount -= 1; this.size -= 1; return removed; } @@ -1888,45 +1882,85 @@ public Entry getAndSet(Entry newEntry) { return this.setInBuckets(newEntry); } - // Dense-write core: store a known tag's value WITHOUT constructing an Entry. tagId must carry a - // non-zero globalSerial. Linear-scan [0, knownCount) by globalSerial; a match overwrites the - // stored id+value in place (no size change), otherwise we append (growing the parallel arrays). - // Primitives are expected pre-boxed by the caller; strings/objects are stored by reference. - private void putKnownValue(long tagId, Object value) { - int globalSerial = KnownTags.globalSerial(tagId); + // Positional dense-write core: store a known tag's value WITHOUT constructing an Entry, indexed + // directly by fieldPos. tagId must carry a non-zero globalSerial. Returns true if the value was + // stored in its slot (claimed or overwritten in place), or false if it could not be slotted — + // unslotted (fieldPos >= slotCount()) or a fieldPos CONFLICT with a different globalSerial + // already + // holding the slot — in which case the caller must route it to the buckets. Primitives are + // expected pre-boxed by the caller; strings/objects are stored by reference. + private boolean trySetKnownSlot(long tagId, Object value) { + int slotCount = KnownTags.slotCount(); + if (slotCount == 0) return false; + int p = KnownTags.fieldPos(tagId); + if (p < 0 || p >= slotCount) return false; // unslotted (e.g. NO_SLOT) -> buckets + long[] ids = this.knownIds; - int count = this.knownCount; - for (int i = 0; i < count; ++i) { - if (KnownTags.globalSerial(ids[i]) == globalSerial) { - // same tag - overwrite in place, no size change - this.knownIds[i] = tagId; - this.knownValues[i] = value; - return; + if (ids == null) { + ids = this.knownIds = new long[slotCount]; + this.knownValues = new Object[slotCount]; + } + + long occupant = ids[p]; + if (occupant == 0L) { + // empty slot - claim it. A tag that previously lost the slot race is parked in the buckets; + // if + // the slot has since freed up it may now claim the slot, so evict any stale bucketed copy to + // keep a single source of truth. buckets is null in the all-known case, so this is free + // there. + if (this.buckets != null) { + this.evictFromBuckets(tagId); } + ids[p] = tagId; + this.knownValues[p] = value; + this.knownCount += 1; + this.size += 1; + return true; + } + if (KnownTags.globalSerial(occupant) == KnownTags.globalSerial(tagId)) { + // same tag - overwrite in place, no size change (refresh the id to keep the name current) + ids[p] = tagId; + this.knownValues[p] = value; + return true; } + // a different globalSerial already holds this slot - conflict, route to buckets + return false; + } - // append - grow if necessary - if (ids == null) { - ids = this.knownIds = new long[KNOWN_INITIAL_CAPACITY]; - this.knownValues = new Object[KNOWN_INITIAL_CAPACITY]; - } else if (count == ids.length) { - int newCapacity = count << 1; - ids = this.knownIds = Arrays.copyOf(ids, newCapacity); - this.knownValues = Arrays.copyOf(this.knownValues, newCapacity); - } - ids[count] = tagId; - this.knownValues[count] = value; - this.knownCount = count + 1; - this.size += 1; - } - - // Stores a known tag in the dense store and returns the prior entry (materialized) or null. Used - // by the prior-returning getAndSet path; the void set(...) paths call putKnownValue directly to - // avoid even this Entry-materialization of the prior. + // Removes a stale bucketed copy of tagId (by resolved name), decrementing size if found. Used + // when + // a tag reclaims a freed slot to avoid a slot+bucket duplicate. Returns the removed entry or + // null. + private Entry evictFromBuckets(long tagId) { + String name = KnownTags.nameOf(tagId); + if (name == null) return null; + return this.removeFromBuckets(name, KnownTags.nameHash(tagId)); + } + + // Stores a known tag (slotting it if possible) and returns the prior entry (materialized) or + // null. + // Used by the prior-returning getAndSet path. On a fieldPos conflict or an unslotted tag, routes + // the value to the buckets and returns the prior bucketed entry (if any). private Entry setKnown(Entry newEntry, int globalSerial) { - int i = this.knownIndexOf(newEntry.tagId); - Entry prev = (i < 0) ? null : this.knownEntryAt(i); - this.putKnownValue(newEntry.tagId, newEntry.objectValue()); + long tagId = newEntry.tagId; + int p = this.knownIndexOf(tagId); + if (p >= 0) { + // slot already holds this tag - overwrite in place, returning the prior slot value + Entry prev = this.knownEntryAt(p); + this.knownIds[p] = tagId; + this.knownValues[p] = newEntry.objectValue(); + return prev; + } + // not in its slot. The only possible prior with this name is a bucketed copy (either a current + // conflict victim or one parked there before its slot freed up). Capture+remove it: it is both + // the prior to return AND, for the claim case, the duplicate that must be evicted. + Entry prev = (this.buckets != null) ? this.evictFromBuckets(tagId) : null; + if (this.trySetKnownSlot(tagId, newEntry.objectValue())) { + // claimed/overwrote a slot; evictFromBuckets already accounted for any removed bucketed copy + return prev; + } + // conflict / unslotted - the value goes (back) into the buckets, keyed by its resolved name + this.setInBuckets(newEntry); return prev; } @@ -2242,9 +2276,11 @@ private void putAllIntoEmptyMap(OptimizedTagMap that) { public void fillMap(Map map) { long[] ids = this.knownIds; Object[] values = this.knownValues; - int count = this.knownCount; - for (int i = 0; i < count; ++i) { - map.put(KnownTags.nameOf(ids[i]), values[i]); + if (ids != null) { + for (int i = 0; i < ids.length; ++i) { + if (ids[i] == 0L) continue; + map.put(KnownTags.nameOf(ids[i]), values[i]); + } } Object[] thisBuckets = this.buckets; @@ -2268,9 +2304,11 @@ public void fillMap(Map map) { public void fillStringMap(Map stringMap) { long[] ids = this.knownIds; Object[] values = this.knownValues; - int count = this.knownCount; - for (int i = 0; i < count; ++i) { - stringMap.put(KnownTags.nameOf(ids[i]), TagValueConversions.toString(values[i])); + if (ids != null) { + for (int i = 0; i < ids.length; ++i) { + if (ids[i] == 0L) continue; + stringMap.put(KnownTags.nameOf(ids[i]), TagValueConversions.toString(values[i])); + } } Object[] thisBuckets = this.buckets; @@ -2403,10 +2441,10 @@ public Stream stream() { public void forEach(Consumer consumer) { long[] ids = this.knownIds; Object[] values = this.knownValues; - int count = this.knownCount; - if (count != 0) { + if (ids != null && this.knownCount != 0) { EntryReadingHelper reader = new EntryReadingHelper(); - for (int i = 0; i < count; ++i) { + for (int i = 0; i < ids.length; ++i) { + if (ids[i] == 0L) continue; reader.set(KnownTags.nameOf(ids[i]), values[i]); consumer.accept(reader); } @@ -2434,10 +2472,10 @@ public void forEach(Consumer consumer) { public void forEach(T thisObj, BiConsumer consumer) { long[] ids = this.knownIds; Object[] values = this.knownValues; - int count = this.knownCount; - if (count != 0) { + if (ids != null && this.knownCount != 0) { EntryReadingHelper reader = new EntryReadingHelper(); - for (int i = 0; i < count; ++i) { + for (int i = 0; i < ids.length; ++i) { + if (ids[i] == 0L) continue; reader.set(KnownTags.nameOf(ids[i]), values[i]); consumer.accept(thisObj, reader); } @@ -2466,10 +2504,10 @@ public void forEach( T thisObj, U otherObj, TriConsumer consumer) { long[] ids = this.knownIds; Object[] values = this.knownValues; - int count = this.knownCount; - if (count != 0) { + if (ids != null && this.knownCount != 0) { EntryReadingHelper reader = new EntryReadingHelper(); - for (int i = 0; i < count; ++i) { + for (int i = 0; i < ids.length; ++i) { + if (ids[i] == 0L) continue; reader.set(KnownTags.nameOf(ids[i]), values[i]); consumer.accept(thisObj, otherObj, reader); } @@ -2498,9 +2536,9 @@ public void clear() { // drop the bucket array entirely - it will be lazily re-allocated on the next unknown-tag write this.buckets = null; - if (this.knownCount != 0) { - Arrays.fill(this.knownIds, 0, this.knownCount, 0L); - Arrays.fill(this.knownValues, 0, this.knownCount, null); + if (this.knownIds != null && this.knownCount != 0) { + Arrays.fill(this.knownIds, 0L); + Arrays.fill(this.knownValues, null); this.knownCount = 0; } this.size = 0; @@ -2526,19 +2564,22 @@ void checkIntegrity() { // While that's probably an unnecessary optimization, this method is only called in tests long[] ids = this.knownIds; - int knownCount = this.knownCount; - for (int i = 0; i < knownCount; ++i) { + int occupied = 0; + for (int i = 0; ids != null && i < ids.length; ++i) { long id = ids[i]; + if (id == 0L) continue; // empty slot + occupied += 1; if (KnownTags.globalSerial(id) == 0) { throw new IllegalStateException("known entry without globalSerial"); } - // no duplicate globalSerials in the dense store - for (int j = i + 1; j < knownCount; ++j) { - if (KnownTags.globalSerial(ids[j]) == KnownTags.globalSerial(id)) { - throw new IllegalStateException("duplicate known entry"); - } + // positional invariant: a tag occupies the slot equal to its fieldPos + if (KnownTags.fieldPos(id) != i) { + throw new IllegalStateException("known entry in wrong slot"); } } + if (occupied != this.knownCount) { + throw new IllegalStateException("incorrect knownCount"); + } Object[] thisBuckets = this.buckets; @@ -2699,7 +2740,6 @@ String toInternalString() { abstract static class IteratorBase { private final long[] knownIds; private final Object[] knownValues; - private final int knownCount; private final Object[] buckets; // Reused flyweight reader for dense (known) entries - no per-tag Entry allocation. Lazily @@ -2720,7 +2760,6 @@ abstract static class IteratorBase { IteratorBase(OptimizedTagMap map) { this.knownIds = map.knownIds; this.knownValues = map.knownValues; - this.knownCount = map.knownCount; this.buckets = map.buckets; } @@ -2763,16 +2802,18 @@ final EntryReader nextEntryOrNull() { } private final EntryReader advance() { - // drain the dense known entries first, via the reused flyweight reader - if (this.knownIndex + 1 < this.knownCount) { - ++this.knownIndex; - EntryReadingHelper reader = this.knownReader; - if (reader == null) { - reader = this.knownReader = new EntryReadingHelper(); + // drain the dense known entries first, via the reused flyweight reader (skip empty slots) + long[] ids = this.knownIds; + if (ids != null) { + while (++this.knownIndex < ids.length) { + if (ids[this.knownIndex] == 0L) continue; + EntryReadingHelper reader = this.knownReader; + if (reader == null) { + reader = this.knownReader = new EntryReadingHelper(); + } + reader.set(KnownTags.nameOf(ids[this.knownIndex]), this.knownValues[this.knownIndex]); + return reader; } - reader.set( - KnownTags.nameOf(this.knownIds[this.knownIndex]), this.knownValues[this.knownIndex]); - return reader; } if (this.buckets == null) return null; diff --git a/internal-api/src/main/java/datadog/trace/util/TagSet.java b/internal-api/src/main/java/datadog/trace/util/TagSet.java new file mode 100644 index 00000000000..570f3c7e003 --- /dev/null +++ b/internal-api/src/main/java/datadog/trace/util/TagSet.java @@ -0,0 +1,142 @@ +package datadog.trace.util; + +/** + * Flat open-addressed name set. Generic — it knows only names. + * + *

    Three ways to use it, trading convenience for indirection: + * + *

      + *
    • {@link Support} — static algorithm over raw arrays. Keep the arrays in your own + * (ideally {@code static final}) fields and the JIT folds the refs to constants. The fastest + * path; nothing to dereference. + *
    • {@link Data} — a build-time carrier for the placed {@code {hashes, names}} returned + * by {@link Support#create}. Pull its fields into your own and discard it. + *
    • The {@code TagSet} instance ({@link #of}) — a convenience wrapper holding the + * arrays; {@link #indexOf}/{@link #contains} delegate to {@link Support}. Costs an + * instance-field load per call (the indirection the static path removes) — fine off the hot + * path. + *
    + * + *

    Consumers attach their own parallel payload arrays (ids, values, ...) sized to {@link #slots} + * and indexed by the slot {@code indexOf} returns. + * + *

    Slot 0-value is the empty sentinel: {@link Support#hash} never returns 0, so {@code hashes[i] + * == 0} unambiguously means an empty slot. + */ +public final class TagSet { + private final int[] hashes; + private final String[] names; + public final int slots; // == hashes.length + + private TagSet(int[] hashes, String[] names) { + this.hashes = hashes; + this.names = names; + this.slots = hashes.length; + } + + /** + * Convenience instance — wraps the placed arrays. For the hot path prefer raw {@link Support}. + */ + public static TagSet of(String... names) { + Data data = Support.create(names); + return new TagSet(data.hashes, data.names); + } + + /** Slot of {@code name}, or -1. Delegates to {@link Support} on the instance's arrays. */ + public int indexOf(String name) { + return Support.indexOf(this.hashes, this.names, name); + } + + public boolean contains(String name) { + return indexOf(name) >= 0; + } + + /** Table size — allocate parallel payload arrays of this length. */ + public int slots() { + return this.slots; + } + + /** Build-time carrier. Pull the fields into your own (static final) fields; don't keep this. */ + public static final class Data { + public final int[] hashes; + public final String[] names; + + Data(int[] hashes, String[] names) { + this.hashes = hashes; + this.names = names; + } + } + + /** Static algorithm over raw arrays. Query helpers take raw arrays, never a Data or a TagSet. */ + public static final class Support { + private Support() {} + + /** Spread of String.hashCode; 0 reserved as the empty sentinel. */ + public static int hash(String name) { + int h = name.hashCode(); // cached on String -> field load + return h == 0 ? 0xDD06 : h ^ (h >>> 16); + } + + /** Power-of-two size, 2x-oversized so load factor stays <= 0.5. */ + public static int tableSizeFor(int n) { + int size = 1; + while (size <= n) { + size <<= 1; + } + return size << 1; + } + + /** Build the placed table. Returns a Data carrier; pull its arrays into your own fields. */ + public static Data create(String... names) { + int size = tableSizeFor(names.length); + int[] hashes = new int[size]; + String[] placed = new String[size]; + for (String name : names) { + put(hashes, placed, name, hash(name)); + } + return new Data(hashes, placed); + } + + /** Build-time placement. Returns the slot. */ + public static int put(int[] hashes, String[] names, String name, int h) { + final int mask = hashes.length - 1; + int i = h & mask; + for (int probes = 0; probes <= mask; probes++, i = (i + 1) & mask) { + if (hashes[i] == 0) { + hashes[i] = h; + names[i] = name; + return i; + } + if (hashes[i] == h && eq(names[i], name)) { + return i; // already present + } + } + throw new IllegalStateException("table full"); // impossible at LF <= 0.5 + } + + /** Probe; returns the slot or -1. Raw arrays — no Data, no instance. */ + public static int indexOf(int[] hashes, String[] names, String name, int h) { + final int mask = hashes.length - 1; + int i = h & mask; + for (int probes = 0; probes <= mask; probes++, i = (i + 1) & mask) { + int sh = hashes[i]; + if (sh == 0) { + return -1; + } + if (sh == h && eq(names[i], name)) { + return i; + } + } + return -1; + } + + public static int indexOf(int[] hashes, String[] names, String name) { + return indexOf(hashes, names, name, hash(name)); + } + + // `a` is a stored name on an occupied slot (never null); `b` is a non-null query. + private static boolean eq(String a, String b) { + return a == b || a.equals(b); // interned literals hit the == fast path + } + } +} diff --git a/internal-api/src/test/java/datadog/trace/util/TagSetTest.java b/internal-api/src/test/java/datadog/trace/util/TagSetTest.java new file mode 100644 index 00000000000..e5ca5ff41e5 --- /dev/null +++ b/internal-api/src/test/java/datadog/trace/util/TagSetTest.java @@ -0,0 +1,102 @@ +package datadog.trace.util; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import datadog.trace.util.TagSet.Data; +import datadog.trace.util.TagSet.Support; +import org.junit.jupiter.api.Test; + +class TagSetTest { + + @Test + void hash_spread_and_zeroSentinel() { + // "".hashCode() == 0 -> remapped to the non-zero sentinel so 0 can mean "empty slot" + assertEquals(0xDD06, Support.hash("")); + + int raw = "foo".hashCode(); + assertEquals(raw ^ (raw >>> 16), Support.hash("foo")); + assertNotEquals(0, Support.hash("foo")); + } + + @Test + void tableSizeFor_isPow2_andOversized() { + assertEquals(2, Support.tableSizeFor(0)); + assertEquals(4, Support.tableSizeFor(1)); + assertEquals(8, Support.tableSizeFor(3)); + assertEquals(16, Support.tableSizeFor(4)); + } + + @Test + void instance_contains_internedAndCopy_andMiss() { + TagSet set = TagSet.of("foo", "bar", "baz"); + + assertEquals(8, set.slots()); // 3 names -> tableSizeFor(3) == 8 + + assertTrue(set.contains("foo")); // interned literal -> == fast path in eq + assertTrue(set.contains(new String("bar"))); // non-interned -> .equals path + assertFalse(set.contains("nope")); + + assertTrue(set.indexOf("baz") >= 0); + assertEquals(-1, set.indexOf("nope")); + } + + @Test + void support_create_then_indexOf() { + Data d = Support.create("x", "y"); + + int slot = Support.indexOf(d.hashes, d.names, "x"); // 3-arg overload computes the hash + assertTrue(slot >= 0); + assertEquals("x", d.names[slot]); + + assertEquals(-1, Support.indexOf(d.hashes, d.names, "q")); + } + + /** Controlled hashes force collision, linear-probe wraparound, and the already-present path. */ + @Test + void put_and_indexOf_collisionAndWraparound() { + int[] hashes = new int[4]; // mask = 3 + String[] names = new String[4]; + + assertEquals(3, Support.put(hashes, names, "a", 7)); // 7 & 3 == 3 + assertEquals(0, Support.put(hashes, names, "b", 7)); // collides at 3, probes (3+1)&3 == 0 + assertEquals(3, Support.put(hashes, names, "a", 7)); // already present -> existing slot + + assertEquals(3, Support.indexOf(hashes, names, "a", 7)); // direct hit + assertEquals(0, Support.indexOf(hashes, names, "b", 7)); // hit after collision + wraparound + assertEquals( + -1, Support.indexOf(hashes, names, "c", 7)); // miss after probing 3 -> 0 -> 1(empty) + assertEquals(-1, Support.indexOf(hashes, names, "z", 6)); // 6 & 3 == 2, empty -> immediate miss + } + + @Test + void put_throwsWhenFull() { + int[] hashes = new int[2]; // mask = 1 + String[] names = new String[2]; + + Support.put(hashes, names, "a", 4); // 4 & 1 == 0 + Support.put(hashes, names, "b", 5); // 5 & 1 == 1 + + // both slots occupied, no match -> probe exhausts -> throw + assertThrows(IllegalStateException.class, () -> Support.put(hashes, names, "c", 6)); + } + + /** The documented usage: build a TagSet, attach a parallel payload indexed by slot. */ + @Test + void parallelPayloadBySlot() { + String[] names = {"a", "b", "c"}; + Data d = Support.create(names); + + long[] ids = new long[d.names.length]; + for (int j = 0; j < names.length; j++) { + ids[Support.indexOf(d.hashes, d.names, names[j])] = j + 1L; + } + + assertEquals(1L, ids[Support.indexOf(d.hashes, d.names, "a")]); + assertEquals(2L, ids[Support.indexOf(d.hashes, d.names, "b")]); + assertEquals(3L, ids[Support.indexOf(d.hashes, d.names, "c")]); + } +} diff --git a/tag-conventions.yaml b/tag-conventions.yaml index 84b0057fbb4..1ee73279326 100644 --- a/tag-conventions.yaml +++ b/tag-conventions.yaml @@ -1,43 +1,41 @@ -# Tag conventions — span-type inheritance + product mixins (draft) +# Tag conventions — structural inheritance + mixins (pull via include / push via applies) # --------------------------------------------------------------------------- -# Source: the OTel-convention prototype (DatadogBase / Http / Client / DataSource -# hierarchy in TracerProto) reconciled with the tags Spring PetClinic emits. # Language-agnostic spec the code generator consumes to emit per-language tag-id -# constants, the id<->name resolver, and the slot layout. +# constants, the id<->name resolver, and the slot layout. Reconciled from the +# OTel-convention prototype + the tags Spring PetClinic emits. # -# TWO COMPOSITION MECHANISMS -# inheritance (`extends`) — the structural "is-a" chain of span types that -# carries COMMON tags down to specific types. The root (`base`) is implicitly -# in every span. `extends` may list MULTIPLE parents (e.g. an http client span -# is both `http` and `client`). This is core tracing structure. +# THREE COMPOSITION MECHANISMS (we use all three; each models a different relationship) +# extends — STRUCTURAL "is-a" inheritance between span types (single concept, may be +# multiple). `http.server` is-a `http` is-a `base`. The root `base` is +# implicitly in every span. Abstract layers (`abstract: true`) exist only to +# be extended. Carries the common tags down the chain. # -# mixins (`mixins: [..]`) — PRODUCT / enrichment overlays composed ON THE SIDE, -# orthogonal to the span-type chain: CI Visibility, AppSec (ASM), Data Streams -# (DSM), profiling, etc. Each is gated by its product being enabled and may -# apply to all spans (implicitly) or to specific span types. +# mixins are reusable tag bundles attached TWO ways — BOTH are needed: +# include: a span type PULLS a mixin in (`include: [peer]`). Use for a bundle that is +# intrinsic to that type and declared locally on it; core-owned. (has-a) +# applies: a mixin PUSHES itself onto span types (`applies: all | [types]`). Use for +# cross-cutting / product bundles (profiling, appsec, dsm, ci) that attach +# without the span type knowing — so a product team owns its own attachment +# and never edits the core span_types. Gated by `enabled_by`. # # resolved tags(span_type) = own tags -# + tags inherited up every `extends` parent (incl. base) -# + tags from product mixins whose `applies` matches +# + tags up the `extends` chain (incl. base) +# + tags of every mixin the type `include`s +# + tags of every mixin whose `applies` matches the type # -# `abstract: true` marks a structural layer that is never a span on its own -# (base, client, http) — it exists only to be extended. +# When to choose: extends = identity; include = a capability the type intrinsically has +# (pull, core-owned, local to the type); applies = orthogonal/optional enrichment a +# bundle projects onto spans (push, product-owned, config-gated). # -# tag fields: -# tag canonical tag name written to the span -# type string | int | long | boolean | double (language-agnostic) -# required required | conditional | recommended | optional | opt_in -# aliases other names resolving to the same tag (e.g. OTel semconv names) -# -- tracer-implementation hints (layered out of the shared spec cross-language) -- -# slot true (default) = fast positional slot; false = id-only, lives in buckets -# intercepted true = setting it triggers tracer side-effects (must run the interceptor) -# source core (tracer sets once per span) | inst (instrumentation) [default inst] +# tag fields: tag | type (string|int|long|boolean|double) | required (req|conditional| +# recommended|optional|opt_in) | aliases. Impl hints (split out cross-language): +# slot (true default; false = id-only, lives in buckets) | intercepted | source (core|inst). # --------------------------------------------------------------------------- -# ===== Structural span types (inheritance) ================================== +# ===== Structural span types (extends) ===================================== span_types: - # ---- root: common tags, implicitly in EVERY span ---- + # root: common tags, implicitly in EVERY span base: abstract: true tags: @@ -56,22 +54,6 @@ span_types: - { tag: error.message, type: string, required: recommended, slot: false } - { tag: error.stack, type: string, required: recommended, slot: false } - # ---- client/outbound: anything that calls a remote peer (proto: Client) ---- - # OPEN QUESTION: is "peer" best as this structural abstract type (an outbound span - # is-a client), or folded directly into db.client/http.client, or something else? - # Kept as a structural layer for now; revisit. - client: - abstract: true - extends: base - tags: - - { tag: peer.service, type: string, required: recommended, intercepted: true } - - { tag: _dd.peer.service.source, type: string, required: recommended } - - { tag: peer.hostname, type: string, required: recommended } - - { tag: peer.ipv4, type: string } - - { tag: peer.ipv6, type: string } - - { tag: peer.port, type: int } - - # ---- HTTP family ---- http: abstract: true extends: base @@ -92,14 +74,15 @@ span_types: - { tag: servlet.context, type: string, required: optional, slot: false, intercepted: true } http.client: - extends: [ http, client ] # multiple inheritance: an http span AND a peer client + extends: http + include: [ peer ] # PULL: an http client intrinsically has a remote peer tags: - { tag: http.url, type: string, required: required, intercepted: true, aliases: [url.full] } - { tag: http.resend_count, type: int, required: recommended, slot: false } - # ---- Database client (h2.query, jdbc) ---- - db.client: - extends: client + db.client: # h2.query / jdbc + extends: base + include: [ peer ] # PULL tags: - { tag: db.type, type: string, required: required, aliases: [db.system] } - { tag: db.instance, type: string, required: recommended } @@ -108,18 +91,27 @@ span_types: - { tag: db.pool.name, type: string, required: optional } - { tag: db.statement, type: string, required: recommended, slot: false, intercepted: true, aliases: [db.query.text] } - # ---- View render (response.render) ---- - view.render: + view.render: # response.render extends: base tags: - { tag: view.name, type: string, required: recommended, slot: false } -# ===== Product / enrichment mixins (on the side) ============================ -# Composed orthogonally when the product is enabled. `applies: all` => overlaid on -# every span (implicitly); `applies: [types]` => only those span types. -# Tag lists below are ILLUSTRATIVE — each product team owns the authoritative set. +# ===== Mixins (reusable bundles) =========================================== +# Attached by include (pull, on a span type above) and/or by applies (push, here). mixins: + # peer — outbound/remote-peer capability. PULLED via `include` by client span types + # (it's intrinsic to them, core-owned), so no `applies`. + peer: + tags: + - { tag: peer.service, type: string, required: recommended, intercepted: true } + - { tag: _dd.peer.service.source, type: string, required: recommended } + - { tag: peer.hostname, type: string, required: recommended } + - { tag: peer.ipv4, type: string } + - { tag: peer.ipv6, type: string } + - { tag: peer.port, type: int } + + # products — PUSH via applies, gated by enabled_by; owned by the product teams. profiling: enabled_by: dd.profiling.enabled applies: all @@ -131,18 +123,16 @@ mixins: applies: all tags: - { tag: _dd.dsm.enabled, type: boolean, source: core } - # + pathway / checkpoint tags on messaging spans (owned by DSM) - appsec: # ASM + appsec: # ASM — entry/web spans enabled_by: dd.appsec.enabled applies: [ http.server ] tags: - { tag: _dd.appsec.enabled, type: boolean, source: core, slot: false } - # + appsec event / threat tags (owned by AppSec) - ci_visibility: # CI Visibility (test spans) + ci_visibility: # test spans (a `test` span type, not modeled here yet) enabled_by: dd.civisibility.enabled - applies: [ test ] # a `test` span type (not modeled here yet) + applies: [ test ] tags: - { tag: test.name, type: string, slot: false } - { tag: test.suite, type: string, slot: false } @@ -151,9 +141,10 @@ mixins: # --------------------------------------------------------------------------- # Notes +# - A mixin may be both included AND applied (different span types reach it different ways); +# the resolver de-dups so a tag pulled in twice is one slot. # - `span.kind` enumerates: server | client | producer | consumer | internal | broker. -# - "virtual" tags (sampling.priority, resource.name, service, manual.keep/drop, -# span.type, measured, origin, analytics.sample_rate, error flag) are handled -# entirely by the interceptor / span fields and are NOT stored; they'd be declared -# with `virtual: true` (reserved-tier id, no slot). Omitted here for now. +# - "virtual" tags (sampling.priority, resource.name, service, manual.keep/drop, span.type, +# measured, origin, analytics.sample_rate) are interceptor/span-field handled, NOT stored; +# they'd carry `virtual: true` (reserved-tier id, no slot). Omitted here. # --------------------------------------------------------------------------- From bd27072cc34ab2d139ce763dab1e511ab83d75d4 Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Wed, 17 Jun 2026 06:43:47 -0400 Subject: [PATCH 34/35] Rename TagMapInsertionBenchmark->TagMapAccessBenchmark; fuzz source-safety on merge - TagMapInsertion{,Baseline}Benchmark -> TagMapAccess{,Baseline}Benchmark (it covers reads now, not just insertion). - TagMapFuzzTest.testMerge: after putAll, assert the SOURCE is unchanged, and stays unchanged after the dest is independently mutated (guards against the dest sharing a mutable BucketGroup chain with the source). Co-Authored-By: Claude Opus 4.8 --- ...ava => TagMapAccessBaselineBenchmark.java} | 10 +++---- ...chmark.java => TagMapAccessBenchmark.java} | 27 +++++++++---------- .../datadog/trace/api/TagMapFuzzTest.java | 13 +++++++++ 3 files changed, 31 insertions(+), 19 deletions(-) rename internal-api/src/jmh/java/datadog/trace/api/{TagMapInsertionBaselineBenchmark.java => TagMapAccessBaselineBenchmark.java} (83%) rename internal-api/src/jmh/java/datadog/trace/api/{TagMapInsertionBenchmark.java => TagMapAccessBenchmark.java} (88%) diff --git a/internal-api/src/jmh/java/datadog/trace/api/TagMapInsertionBaselineBenchmark.java b/internal-api/src/jmh/java/datadog/trace/api/TagMapAccessBaselineBenchmark.java similarity index 83% rename from internal-api/src/jmh/java/datadog/trace/api/TagMapInsertionBaselineBenchmark.java rename to internal-api/src/jmh/java/datadog/trace/api/TagMapAccessBaselineBenchmark.java index 9ddbe041480..72eeccb1fda 100644 --- a/internal-api/src/jmh/java/datadog/trace/api/TagMapInsertionBaselineBenchmark.java +++ b/internal-api/src/jmh/java/datadog/trace/api/TagMapAccessBaselineBenchmark.java @@ -16,13 +16,13 @@ import org.openjdk.jmh.infra.Blackhole; /** - * Master-equivalent control for {@link TagMapInsertionBenchmark}: string insertion / lookup with NO + * Master-equivalent control for {@link TagMapAccessBenchmark}: string insertion / lookup with NO * {@link KnownTags.Resolver} registered, so every tag uses the hash buckets (no slot routing, no * keyOf). This mirrors how master behaves and isolates the comparison "automatic insertion by id * (this branch) vs the pre-feature string baseline". * *

    Runs in its own benchmark class so each method's fork has no resolver registered (the resolver - * is global static; {@code TagMapInsertionBenchmark} registers one in its own forks). + * is global static; {@code TagMapAccessBenchmark} registers one in its own forks). */ @BenchmarkMode(Mode.Throughput) @OutputTimeUnit(TimeUnit.SECONDS) @@ -31,9 +31,9 @@ @Measurement(iterations = 5) @Threads(8) @State(Scope.Benchmark) -public class TagMapInsertionBaselineBenchmark { - // same tag set as TagMapInsertionBenchmark for an apples-to-apples comparison - static final String[] NAMES = TagMapInsertionBenchmark.NAMES; +public class TagMapAccessBaselineBenchmark { + // same tag set as TagMapAccessBenchmark for an apples-to-apples comparison + static final String[] NAMES = TagMapAccessBenchmark.NAMES; static final Object[] VALUES = new Object[NAMES.length]; diff --git a/internal-api/src/jmh/java/datadog/trace/api/TagMapInsertionBenchmark.java b/internal-api/src/jmh/java/datadog/trace/api/TagMapAccessBenchmark.java similarity index 88% rename from internal-api/src/jmh/java/datadog/trace/api/TagMapInsertionBenchmark.java rename to internal-api/src/jmh/java/datadog/trace/api/TagMapAccessBenchmark.java index f3a324bb550..ecd1a1244b4 100644 --- a/internal-api/src/jmh/java/datadog/trace/api/TagMapInsertionBenchmark.java +++ b/internal-api/src/jmh/java/datadog/trace/api/TagMapAccessBenchmark.java @@ -26,9 +26,7 @@ *

    byId stores straight into the dense known-tag store at its positional slot ({@code * knownValues[fieldPos]}, O(1), no scan); byString pays {@code keyOf(name)} to resolve the id first * (via the real {@link datadog.trace.util.TagSet} table) and then slots it the same way. The bucket - * baseline (no resolver, master-equivalent) is {@link TagMapInsertionBaselineBenchmark}. - * - * + * baseline (no resolver, master-equivalent) is {@link TagMapAccessBaselineBenchmark}. * Apple M1 Max (10 core) - 8 threads - 1 fork - Java 8 (Zulu 8.0.382) - positional dense store * * Benchmark Mode Cnt Score Error Units @@ -43,16 +41,17 @@ * * *

      - *
    • getObject by id vs by name: 129.7M vs 73.5M (~1.77x) — the common read. The whole gap - * is {@code keyOf}; both hit the slot and return the raw value with no Entry. Id-keyed value - * reads win. - *
    • getObject ~= getEntry (130M ~= 129M): the Entry "materialization penalty" vanishes for - * value use — escape analysis scalar-replaces the transient Entry when the caller consumes its - * value rather than retaining it, so {@code getEntry} needs no replacement here. (getEntryReader - * was measured and dropped: its eager name resolution made it the slowest read.) - *
    • insertById ~3x the bucket baseline (126M vs 43M) — O(1) positional claim + no per-tag - * Entry; insertByString +32% (57M vs 43M) even paying {@code keyOf}, so the former - * name-insert regression is gone. + *
    • getObject by id vs by name: 129.7M vs 73.5M (~1.77x) — the common read. The whole + * gap is {@code keyOf}; both hit the slot and return the raw value with no Entry. Id-keyed + * value reads win. + *
    • getObject ~= getEntry (130M ~= 129M): the Entry "materialization penalty" vanishes + * for value use — escape analysis scalar-replaces the transient Entry when the caller + * consumes its value rather than retaining it, so {@code getEntry} needs no replacement here. + * (getEntryReader was measured and dropped: its eager name resolution made it the slowest + * read.) + *
    • insertById ~3x the bucket baseline (126M vs 43M) — O(1) positional claim + no + * per-tag Entry; insertByString +32% (57M vs 43M) even paying {@code keyOf}, so the + * former name-insert regression is gone. *
    */ @BenchmarkMode(Mode.Throughput) @@ -62,7 +61,7 @@ @Measurement(iterations = 5) @Threads(8) @State(Scope.Benchmark) -public class TagMapInsertionBenchmark { +public class TagMapAccessBenchmark { // a representative HTTP-server-ish tag set static final String[] NAMES = { "http.request.method", diff --git a/internal-api/src/test/java/datadog/trace/api/TagMapFuzzTest.java b/internal-api/src/test/java/datadog/trace/api/TagMapFuzzTest.java index 4ffeea90c7d..d6c2c52637e 100644 --- a/internal-api/src/test/java/datadog/trace/api/TagMapFuzzTest.java +++ b/internal-api/src/test/java/datadog/trace/api/TagMapFuzzTest.java @@ -144,6 +144,19 @@ void testMerge() { hashMapA.putAll(hashMapB); assertMapEquals(hashMapA, tagMapA); + + // The merge must not mutate the source, AND must not leave the dest sharing a mutable + // BucketGroup chain with it (cloneChain must deep-copy). So the source must stay intact both + // right after the merge and after the dest is independently mutated - mutating a shared chain + // would corrupt the source. + assertMapEquals(hashMapB, tagMapB); + for (int j = 0; j < 16; ++j) { + tagMapA.set("merge-probe-" + j, "probe-" + j); + } + for (String key : hashMapB.keySet()) { + tagMapA.remove(key); + } + assertMapEquals(hashMapB, tagMapB); } } From edbaf77c4cd5c91ff1da7b4a952f2756139a53bc Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Wed, 17 Jun 2026 06:44:51 -0400 Subject: [PATCH 35/35] Spotless formatting --- .../src/jmh/java/datadog/trace/util/KeyOfBenchmark.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/internal-api/src/jmh/java/datadog/trace/util/KeyOfBenchmark.java b/internal-api/src/jmh/java/datadog/trace/util/KeyOfBenchmark.java index 48021829c7d..aee20e94755 100644 --- a/internal-api/src/jmh/java/datadog/trace/util/KeyOfBenchmark.java +++ b/internal-api/src/jmh/java/datadog/trace/util/KeyOfBenchmark.java @@ -27,9 +27,7 @@ * *

    Two term flavors: interned (realistic — instrumentation passes string literals → the * {@code ==} fast path in eq) and copies (non-interned → forces {@code String.equals}). - * Terms are hit-dominated. - * - * + * Terms are hit-dominated. * Apple M1 Max (10 core) - 8 threads (per-thread state) - 2 forks - Java 8 (Zulu 8.0.382) * * Benchmark Mode Cnt Score Error Units