Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 0 additions & 31 deletions .github/workflows/push.yml
Original file line number Diff line number Diff line change
Expand Up @@ -306,37 +306,6 @@ jobs:
exit 1
fi

# Github repo: https://github.com/ajv-validator/ajv-cli
- name: Install ajv-cli
run: npm install -g ajv-cli@5.0.0

# Assert that the generated bundle schema is a valid JSON schema by using
# ajv-cli to validate it against bundle configuration files.
# By default the ajv-cli runs in strict mode which will fail if the schema
# itself is not valid. Strict mode is more strict than the JSON schema
# specification. See for details: https://ajv.js.org/options.html#strict-mode-options
# The ajv-cli is configured to use the markdownDescription keyword which is not part of the JSON schema specification,
# but is used in editors like VSCode to render markdown in the description field
- name: Validate bundle schema
run: |
go run main.go bundle schema > schema.json

# Add markdownDescription keyword to ajv
echo "module.exports = function(a) {
a.addKeyword('deprecationMessage');
a.addKeyword('doNotSuggest');
a.addKeyword('markdownDescription');
a.addKeyword('x-databricks-preview');
}" >> keywords.js

for file in ./bundle/internal/schema/testdata/pass/*.yml; do
ajv test -s schema.json -d $file --valid -c=./keywords.js
done

for file in ./bundle/internal/schema/testdata/fail/*.yml; do
ajv test -s schema.json -d $file --invalid -c=./keywords.js
done

validate-python-codegen:
needs: cleanups
runs-on: ubuntu-latest
Expand Down
4 changes: 4 additions & 0 deletions NOTICE
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,10 @@ License - https://github.com/manifoldco/promptui/blob/master/LICENSE.md

This Software contains code from the following open source projects, licensed under the MIT license:

google/jsonschema-go - https://github.com/google/jsonschema-go
Copyright 2025 Google LLC
License - https://github.com/google/jsonschema-go/blob/main/LICENSE

charmbracelet/bubbles - https://github.com/charmbracelet/bubbles
Copyright (c) 2020-2025 Charmbracelet, Inc
License - https://github.com/charmbracelet/bubbles/blob/master/LICENSE
Expand Down
212 changes: 212 additions & 0 deletions bundle/schema/validate_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,212 @@
package schema_test

import (
"encoding/json"
"os"
"path/filepath"
"strings"
"testing"

"github.com/databricks/cli/bundle/schema"
googleschema "github.com/google/jsonschema-go/jsonschema"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"go.yaml.in/yaml/v3"
)

// isSchemaNode returns true if the object is a JSON Schema definition rather
// than an intermediate nesting node in the $defs tree. Intermediate nodes only
// have map[string]any values (more nesting), while schema definitions always
// have at least one non-object value ("type" is a string, "oneOf" is an array,
// etc.). An empty object {} is also a valid schema (it accepts any value).
func isSchemaNode(obj map[string]any) bool {
if len(obj) == 0 {
return true
}
for _, v := range obj {
if _, isObj := v.(map[string]any); !isObj {
return true
}
}
return false
}

// flattenDefs flattens the nested $defs object tree into a single-level map.
// Nested path segments are joined with "/" to form flat keys.
// e.g., $defs["github.com"]["databricks"]["resources.Job"] becomes
// $defs["github.com/databricks/resources.Job"].
func flattenDefs(defs map[string]any) map[string]any {
result := map[string]any{}
flattenDefsHelper("", defs, result)
return result
}

func flattenDefsHelper(prefix string, node, result map[string]any) {
for key, value := range node {
fullKey := prefix + "/" + key
if prefix == "" {
fullKey = key
}

obj, isObj := value.(map[string]any)
if !isObj || isSchemaNode(obj) {
result[fullKey] = value
} else {
flattenDefsHelper(fullKey, obj, result)
}
}
}

// rewriteRefs recursively walks a JSON value and rewrites all $ref strings.
// After flattening, $defs keys contain literal "/" characters. In JSON Pointer
// (RFC 6901) "/" is the path separator, so these must be escaped as "~1" in
// $ref values to be treated as a single key lookup.
func rewriteRefs(v any) any {
switch val := v.(type) {
case map[string]any:
result := make(map[string]any, len(val))
for k, child := range val {
if k == "$ref" {
if s, ok := child.(string); ok {
result[k] = rewriteRef(s)
} else {
result[k] = child
}
} else {
result[k] = rewriteRefs(child)
}
}
return result
case []any:
result := make([]any, len(val))
for i, item := range val {
result[i] = rewriteRefs(item)
}
return result
default:
return v
}
}

// rewriteRef transforms a $ref from nested JSON Pointer format to flat key format.
// e.g., "#/$defs/github.com/databricks/resources.Job"
// becomes "#/$defs/github.com~1databricks~1resources.Job"
func rewriteRef(ref string) string {
const prefix = "#/$defs/"
if !strings.HasPrefix(ref, prefix) {
return ref
}
path := ref[len(prefix):]
return prefix + strings.ReplaceAll(path, "/", "~1")
}

// transformSchema flattens nested $defs and rewrites $ref values for compatibility
// with the Google jsonschema-go library which expects flat $defs.
func transformSchema(raw map[string]any) map[string]any {
if defs, ok := raw["$defs"].(map[string]any); ok {
raw["$defs"] = flattenDefs(defs)
}
return rewriteRefs(raw).(map[string]any)
}

func compileSchema(t *testing.T) *googleschema.Resolved {
t.Helper()

var raw map[string]any
err := json.Unmarshal(schema.Bytes, &raw)
require.NoError(t, err)

transformed := transformSchema(raw)

b, err := json.Marshal(transformed)
require.NoError(t, err)

var s googleschema.Schema
err = json.Unmarshal(b, &s)
require.NoError(t, err)

resolved, err := s.Resolve(nil)
require.NoError(t, err)

return resolved
}

// loadYAMLAsJSON reads a YAML file and returns it as a JSON-compatible any value.
// The YAML -> JSON roundtrip ensures canonical JSON types (float64, string, bool, nil,
// map[string]any, []any) that the JSON schema validator expects.
func loadYAMLAsJSON(t *testing.T, path string) any {
t.Helper()

data, err := os.ReadFile(path)
require.NoError(t, err)

var yamlVal any
err = yaml.Unmarshal(data, &yamlVal)
require.NoError(t, err)

jsonBytes, err := json.Marshal(yamlVal)
require.NoError(t, err)

var instance any
err = json.Unmarshal(jsonBytes, &instance)
require.NoError(t, err)

return instance
}

func TestSchemaValidatePassCases(t *testing.T) {
sch := compileSchema(t)

files, err := filepath.Glob("../internal/schema/testdata/pass/*.yml")
require.NoError(t, err)
require.NotEmpty(t, files)

for _, file := range files {
t.Run(filepath.Base(file), func(t *testing.T) {
instance := loadYAMLAsJSON(t, file)
err := sch.Validate(instance)
assert.NoError(t, err)
})
}
}

func TestSchemaValidateFailCases(t *testing.T) {
sch := compileSchema(t)

// Each entry maps a test file to the expected schema path in the error.
// The bundle schema wraps every type in oneOf for interpolation patterns,
// and the Google library discards per-branch errors on oneOf failure, so
// we can only assert on the schema path, not the specific failure reason.
tests := map[string]string{
"basic.yml": "config.Bundle",
"deprecated_job_field_format.yml": "config.Resources",
"hidden_job_field_deployment.yml": "config.Resources",
"hidden_job_field_edit_mode.yml": "config.Target",
"incorrect_volume_type.yml": "config.Resources",
"invalid_enum_value_in_job.yml": "config.Resources",
"invalid_enum_value_in_model.yml": "config.Resources",
"invalid_reference_in_job.yml": "config.Resources",
"invalid_reference_in_model.yml": "config.Resources",
"readonly_job_field_git_snapshot.yml": "config.Resources",
"readonly_job_field_job_source.yml": "config.Resources",
"required_field_missing_in_job.yml": "config.Resources",
"unknown_field_in_job.yml": "config.Resources",
"unknown_field_in_model.yml": "config.Resources",
}

files, err := filepath.Glob("../internal/schema/testdata/fail/*.yml")
require.NoError(t, err)
require.NotEmpty(t, files)

for _, file := range files {
name := filepath.Base(file)
expectedErr, ok := tests[name]
require.True(t, ok, "no expected error for %s, please add an entry to the test table", name)

t.Run(name, func(t *testing.T) {
instance := loadYAMLAsJSON(t, file)
err := sch.Validate(instance)
assert.ErrorContains(t, err, expectedErr)
})
}
}
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ require (
github.com/charmbracelet/lipgloss v1.1.0 // MIT
github.com/databricks/databricks-sdk-go v0.126.0 // Apache 2.0
github.com/fatih/color v1.19.0 // MIT
github.com/google/jsonschema-go v0.4.2 // MIT
github.com/google/uuid v1.6.0 // BSD-3-Clause
github.com/gorilla/mux v1.8.1 // BSD 3-Clause
github.com/gorilla/websocket v1.5.3 // BSD 2-Clause
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,8 @@ github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
github.com/google/go-querystring v1.2.0 h1:yhqkPbu2/OH+V9BfpCVPZkNmUXhb2gBxJArfhIxNtP0=
github.com/google/go-querystring v1.2.0/go.mod h1:8IFJqpSRITyJ8QhQ13bmbeMBDfmeEJZD5A0egEOmkqU=
github.com/google/jsonschema-go v0.4.2 h1:tmrUohrwoLZZS/P3x7ex0WAVknEkBZM46iALbcqoRA8=
github.com/google/jsonschema-go v0.4.2/go.mod h1:r5quNTdLOYEz95Ru18zA0ydNbBuYoo9tgaYcxEYhJVE=
github.com/google/s2a-go v0.1.9 h1:LGD7gtMgezd8a/Xak7mEWL0PjoTQFvpRudN895yqKW0=
github.com/google/s2a-go v0.1.9/go.mod h1:YA0Ei2ZQL3acow2O62kdp9UlnvMmU7kA6Eutn0dXayM=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
Expand Down
Loading