chore(storage): support DISABLE_BUCKET_MD_IN_OTEL environment variable flag#17248
chore(storage): support DISABLE_BUCKET_MD_IN_OTEL environment variable flag#17248chandra-siri wants to merge 1 commit into
Conversation
There was a problem hiding this comment.
Code Review
This pull request introduces the DISABLE_BUCKET_MD_IN_OTEL environment variable to allow users to explicitly disable GCS destination annotations in OpenTelemetry traces. While the implementation successfully achieves this, the current approach evaluates the environment variable on every single API request and trace span helper call, which introduces unnecessary overhead. To optimize performance, these checks should be inlined into the conditional blocks to leverage Python's short-circuit evaluation. Additionally, the system test should be refactored to use pytest's monkeypatch fixture instead of manual environment variable manipulation to ensure robust cleanup.
| # Check if GCS destination annotations are explicitly disabled via environment | ||
| disable_bucket_md = os.environ.get("DISABLE_BUCKET_MD_IN_OTEL", "").lower() in ( | ||
| "1", | ||
| "true", | ||
| "yes", | ||
| "on", | ||
| ) | ||
| client = self._client | ||
|
|
||
| if ( | ||
| HAS_OPENTELEMETRY | ||
| not disable_bucket_md | ||
| and HAS_OPENTELEMETRY | ||
| and enable_otel_traces | ||
| and hasattr(client, "_bucket_metadata_cache") | ||
| and client._bucket_metadata_cache |
There was a problem hiding this comment.
Evaluating the environment variable DISABLE_BUCKET_MD_IN_OTEL on every single api_request call introduces unnecessary overhead for all users, even when OpenTelemetry is not installed or enabled. Moving this check inside the if condition allows Python to short-circuit and avoid the os.environ lookup entirely when HAS_OPENTELEMETRY or enable_otel_traces is False.
client = self._client
if (
HAS_OPENTELEMETRY
and enable_otel_traces
and os.environ.get("DISABLE_BUCKET_MD_IN_OTEL", "").lower() not in ("1", "true", "yes", "on")
and hasattr(client, "_bucket_metadata_cache")
and client._bucket_metadata_cache| # Check if GCS destination annotations are explicitly disabled via environment | ||
| disable_bucket_md = os.environ.get("DISABLE_BUCKET_MD_IN_OTEL", "").lower() in ( | ||
| "1", | ||
| "true", | ||
| "yes", | ||
| "on", | ||
| ) | ||
|
|
||
| if ( | ||
| bucket_name | ||
| not disable_bucket_md | ||
| and bucket_name | ||
| and isinstance(bucket_name, str) | ||
| and client | ||
| and hasattr(client, "_bucket_metadata_cache") |
There was a problem hiding this comment.
Evaluating the environment variable on every call to create_trace_span_helper can be avoided if bucket_name is not valid or not a string. Inlining the check allows Python to short-circuit and avoid the os.environ lookup when the span creation is skipped.
if (
bucket_name
and isinstance(bucket_name, str)
and os.environ.get("DISABLE_BUCKET_MD_IN_OTEL", "").lower() not in ("1", "true", "yes", "on")
and client
and hasattr(client, "_bucket_metadata_cache")| def test_disable_bucket_md_env_flag(storage_client, exporter, buckets_to_delete): | ||
| """Verifies that setting DISABLE_BUCKET_MD_IN_OTEL=true disables GCS | ||
| destination annotations, even on cache hits.""" | ||
| # Clear cache and OTel exporter logs | ||
| storage_client._bucket_metadata_cache.clear() | ||
| exporter.clear() | ||
|
|
||
| bucket_name = _helpers.unique_name("aco-disable") | ||
| bucket = storage_client.bucket(bucket_name) | ||
| storage_client.create_bucket(bucket) | ||
| buckets_to_delete.append(bucket) | ||
|
|
||
| blob_name = "test_blob.txt" | ||
| blob = bucket.blob(blob_name) | ||
| blob.upload_from_string("hello") | ||
|
|
||
| # Warm cache directly via GCS creation warming (client.create_bucket already primes the cache) | ||
| assert storage_client._bucket_metadata_cache.get(bucket_name) is not None | ||
|
|
||
| # Enable the DISABLE_BUCKET_MD_IN_OTEL environment variable | ||
| os.environ["DISABLE_BUCKET_MD_IN_OTEL"] = "true" | ||
|
|
||
| try: | ||
| # Download (normally would be a cache hit with GCS annotations) | ||
| blob.download_as_bytes() | ||
|
|
||
| # Verify that ACO attributes are NOT present in the OTel span! | ||
| spans = exporter.get_finished_spans() | ||
| dl_spans = [s for s in spans if s.name == "Storage.Blob.downloadAsBytes"] | ||
| assert len(dl_spans) == 1 | ||
| attrs = dl_spans[0].attributes | ||
| assert "gcp.resource.destination.id" not in attrs | ||
| assert "gcp.resource.destination.location" not in attrs | ||
| finally: | ||
| # Restore env var | ||
| os.environ.pop("DISABLE_BUCKET_MD_IN_OTEL", None) |
There was a problem hiding this comment.
Using manual try...finally blocks to set and restore environment variables in tests is error-prone and can pollute the environment of other tests if not handled perfectly. It is more idiomatic and robust to use pytest's built-in monkeypatch fixture, which automatically handles setup and teardown of environment variables.
def test_disable_bucket_md_env_flag(storage_client, exporter, buckets_to_delete, monkeypatch):
"""Verifies that setting DISABLE_BUCKET_MD_IN_OTEL=true disables GCS
destination annotations, even on cache hits."""
# Clear cache and OTel exporter logs
storage_client._bucket_metadata_cache.clear()
exporter.clear()
bucket_name = _helpers.unique_name("aco-disable")
bucket = storage_client.bucket(bucket_name)
storage_client.create_bucket(bucket)
buckets_to_delete.append(bucket)
blob_name = "test_blob.txt"
blob = bucket.blob(blob_name)
blob.upload_from_string("hello")
# Warm cache directly via GCS creation warming (client.create_bucket already primes the cache)
assert storage_client._bucket_metadata_cache.get(bucket_name) is not None
# Enable the DISABLE_BUCKET_MD_IN_OTEL environment variable
monkeypatch.setenv("DISABLE_BUCKET_MD_IN_OTEL", "true")
# Download (normally would be a cache hit with GCS annotations)
blob.download_as_bytes()
# Verify that ACO attributes are NOT present in the OTel span!
spans = exporter.get_finished_spans()
dl_spans = [s for s in spans if s.name == "Storage.Blob.downloadAsBytes"]
assert len(dl_spans) == 1
attrs = dl_spans[0].attributes
assert "gcp.resource.destination.id" not in attrs
assert "gcp.resource.destination.location" not in attrs
This PR introduces the
DISABLE_BUCKET_MD_IN_OTELenvironment flag to dynamically disable injecting Cloud Storage bucket metadata destination attributes (gcp.resource.destination.idandgcp.resource.destination.location) inside OTel spans.