diff --git a/Cargo.lock b/Cargo.lock index 8d660a38..8342e338 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4706,6 +4706,7 @@ dependencies = [ "weaver_common", "weaver_diff", "weaver_forge", + "weaver_otel_schema", "weaver_resolved_schema", "weaver_resolver", "weaver_semconv", @@ -4821,6 +4822,18 @@ dependencies = [ "weaver_semconv", ] +[[package]] +name = "weaver_otel_schema" +version = "0.10.0" +dependencies = [ + "miette", + "serde", + "serde_yaml", + "thiserror 2.0.11", + "weaver_common", + "weaver_version", +] + [[package]] name = "weaver_resolved_schema" version = "0.12.0" @@ -4860,12 +4873,14 @@ dependencies = [ "glob", "miette", "ordered-float", + "regex", "schemars", "serde", "serde_json", "serde_yaml", "thiserror 2.0.11", "ureq", + "weaver_cache", "weaver_common", ] diff --git a/Cargo.toml b/Cargo.toml index e7834d5e..5ff9ea83 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -79,6 +79,7 @@ weaver_semconv_gen = { path = "crates/weaver_semconv_gen" } weaver_cache = { path = "crates/weaver_cache" } weaver_forge = { path = "crates/weaver_forge" } weaver_checker = { path = "crates/weaver_checker" } +weaver_otel_schema = { path = "crates/weaver_otel_schema" } clap = { version = "4.5.24", features = ["derive"] } rayon = "1.10.0" diff --git a/README.md b/README.md index 42d8a938..112d5849 100644 --- a/README.md +++ b/README.md @@ -147,6 +147,7 @@ Semantic Convention Registries. The following commands are available: |---------------------------------------------------------------------------|---------------------------------------------| | [weaver registry check](docs/usage.md#registry-check) | Check the validity of a semconv registry | | [weaver registry resolve](docs/usage.md#registry-resolve) | Resolve a semconv registry | +| [weaver registry diff](docs/usage.md#registry-diff) | Generate a diff report between two versions | | [weaver registry generate](docs/usage.md#registry-generate) | Generate artifacts from a semconv registry | | [weaver registry update-markdown](docs/usage.md#registry-update-markdown) | Update semconv snippet-based markdown files | | [weaver registry stats](docs/usage.md#registry-stats) | Generate statistics on a semconv registry | @@ -162,6 +163,8 @@ Telemetry Schemas. documentation and code based on semantic conventions. - [Weaver Checker](crates/weaver_checker/README.md): An integrated policy engine for enforcing policies on semantic conventions. +- [Schema Changes](docs/schema-changes.md): A document describing the data model + used to represent the differences between two versions of a semantic convention registry. - [Application Telemetry Schema OTEP](https://github.com/open-telemetry/oteps/blob/main/text/0243-app-telemetry-schema-vision-roadmap.md): A vision and roadmap for the concept of Application Telemetry Schema. - Presentation slides from the Semantic Convention SIG meeting on October 23, diff --git a/crates/weaver_cache/src/lib.rs b/crates/weaver_cache/src/lib.rs index 56752e56..add17781 100644 --- a/crates/weaver_cache/src/lib.rs +++ b/crates/weaver_cache/src/lib.rs @@ -29,6 +29,8 @@ pub mod registry_path; const TAR_GZ_EXT: &str = ".tar.gz"; /// The extension for a zip archive. const ZIP_EXT: &str = ".zip"; +/// The name of the registry manifest file. +const REGISTRY_MANIFEST: &str = "registry_manifest.yaml"; /// An error that can occur while creating or using a cache. #[derive(thiserror::Error, Debug, Clone, Serialize, Diagnostic)] @@ -99,7 +101,7 @@ impl From for DiagnosticMessages { /// - A simple wrapper around a local directory /// - Initialized from a Git repository /// - Initialized from a Git archive -#[derive(Default)] +#[derive(Default, Debug)] pub struct RegistryRepo { // A unique identifier for the registry (e.g. main, baseline, etc.) id: String, @@ -509,6 +511,17 @@ impl RegistryRepo { &self.registry_path } + /// Returns the path to the `registry_manifest.yaml` file (if any). + #[must_use] + pub fn manifest_path(&self) -> Option { + let manifest_path = self.path.join(REGISTRY_MANIFEST); + if manifest_path.exists() { + Some(manifest_path) + } else { + None + } + } + /// Creates a temporary directory for the registry repository and returns the path. /// The temporary directory is created in the `.weaver/semconv_registry_cache`. fn create_tmp_repo() -> Result { diff --git a/crates/weaver_codegen_test/build.rs b/crates/weaver_codegen_test/build.rs index 95c79a9c..c6b84e92 100644 --- a/crates/weaver_codegen_test/build.rs +++ b/crates/weaver_codegen_test/build.rs @@ -24,7 +24,6 @@ use weaver_semconv::registry::SemConvRegistry; const SEMCONV_REGISTRY_PATH: &str = "./semconv_registry/"; const TEMPLATES_PATH: &str = "./templates/registry/"; -const REGISTRY_ID: &str = "test"; const TARGET: &str = "rust"; const FOLLOW_SYMLINKS: bool = false; @@ -50,8 +49,10 @@ fn main() { .ignore(|e| matches!(e.severity(), Some(miette::Severity::Warning))) .into_result_failing_non_fatal() .unwrap_or_else(|e| process_error(&logger, e)); - let mut registry = SemConvRegistry::from_semconv_specs(REGISTRY_ID, semconv_specs); + let mut registry = SemConvRegistry::from_semconv_specs(®istry_repo, semconv_specs) + .unwrap_or_else(|e| process_error(&logger, e)); let schema = SchemaResolver::resolve_semantic_convention_registry(&mut registry) + .into_result_failing_non_fatal() .unwrap_or_else(|e| process_error(&logger, e)); let loader = FileSystemFileLoader::try_new(TEMPLATES_PATH.into(), TARGET) @@ -59,13 +60,9 @@ fn main() { let config = WeaverConfig::try_from_path("./templates/registry/rust") .unwrap_or_else(|e| process_error(&logger, e)); let engine = TemplateEngine::new(config, loader, Params::default()); - let template_registry = ResolvedRegistry::try_from_resolved_registry( - schema - .registry(REGISTRY_ID) - .expect("Failed to get the registry from the resolved schema"), - schema.catalog(), - ) - .unwrap_or_else(|e| process_error(&logger, e)); + let template_registry = + ResolvedRegistry::try_from_resolved_registry(&schema.registry, schema.catalog()) + .unwrap_or_else(|e| process_error(&logger, e)); let target_dir: PathBuf = target_dir.into(); engine .generate( diff --git a/crates/weaver_common/src/diagnostic.rs b/crates/weaver_common/src/diagnostic.rs index 9f0fcab7..826db5eb 100644 --- a/crates/weaver_common/src/diagnostic.rs +++ b/crates/weaver_common/src/diagnostic.rs @@ -75,6 +75,12 @@ pub struct DiagnosticMessage { #[serde(transparent)] pub struct DiagnosticMessages(Vec); +impl From for DiagnosticMessages { + fn from(value: DiagnosticMessage) -> Self { + Self(vec![value]) + } +} + impl DiagnosticMessage { /// Creates a new diagnostic message from an error pub fn new(error: M) -> Self { @@ -180,8 +186,7 @@ impl DiagnosticMessages { Self(vec![DiagnosticMessage::new(error)]) } - /// Returns true if all the diagnostic messages are explicitly marked as - /// warnings or advices. + /// Returns true if at least one diagnostic message has an error severity. #[must_use] pub fn has_error(&self) -> bool { let non_error_count = self @@ -242,7 +247,7 @@ where #[cfg(test)] mod tests { use super::*; - use miette::{diagnostic, Diagnostic}; + use miette::Diagnostic; #[derive(thiserror::Error, Debug, Clone, Diagnostic, Serialize)] #[error("This is a test error")] diff --git a/crates/weaver_common/src/result.rs b/crates/weaver_common/src/result.rs index 9dde0ba2..a6ef2018 100644 --- a/crates/weaver_common/src/result.rs +++ b/crates/weaver_common/src/result.rs @@ -27,10 +27,50 @@ pub enum WResult { FatalErr(E), } +impl WResult { + /// Converts a [`WResult`] into a standard [`Result`], optionally capturing non-fatal errors. + pub fn capture_non_fatal_errors( + self, + diag_msgs: &mut DiagnosticMessages, + ) -> Result { + match self { + WResult::Ok(result) => Ok(result), + WResult::OkWithNFEs(result, nfes) => { + diag_msgs.extend_from_vec(nfes); + Ok(result) + } + WResult::FatalErr(fatal_err) => Err(fatal_err), + } + } +} + impl WResult where E: WeaverError + Error + Diagnostic + Serialize + Send + Sync + 'static, { + /// Returns `true` if the result is a fatal error. + pub fn is_fatal(&self) -> bool { + matches!(self, WResult::FatalErr(_)) + } + + /// Returns `true` if the result is not Ok. + pub fn has_errors(&self) -> bool { + match self { + WResult::Ok(_) => false, + WResult::OkWithNFEs(_, errors) => !errors.is_empty(), + WResult::FatalErr(_) => true, + } + } + + /// Returns the number of non-fatal errors, or 1 if the result is a fatal error, 0 otherwise. + pub fn error_count(&self) -> usize { + match self { + WResult::Ok(_) => 0, + WResult::OkWithNFEs(_, errors) => errors.len(), + WResult::FatalErr(_) => 1, + } + } + /// Creates a new [`WResult`] with a successful result. pub fn with_non_fatal_errors(result: T, non_fatal_errors: Vec) -> Self { if non_fatal_errors.is_empty() { diff --git a/crates/weaver_forge/expected_output/test/resource/library.md b/crates/weaver_forge/expected_output/test/resource/library.md index 4d7dbc05..3f7e85c1 100644 --- a/crates/weaver_forge/expected_output/test/resource/library.md +++ b/crates/weaver_forge/expected_output/test/resource/library.md @@ -22,7 +22,7 @@ Brief: Span attributes used by non-OTLP exporters to represent OpenTelemetry Sco - Examples: [ "io.opentelemetry.contrib.mongodb", ] -- Deprecated: use the `otel.scope.name` attribute. +- Deprecated: #### Attribute `otel.library.version` @@ -36,7 +36,7 @@ Brief: Span attributes used by non-OTLP exporters to represent OpenTelemetry Sco - Examples: [ "1.0.0", ] -- Deprecated: use the `otel.scope.version` attribute. +- Deprecated: \ No newline at end of file diff --git a/crates/weaver_forge/expected_output/test/resources.md b/crates/weaver_forge/expected_output/test/resources.md index bb21a7f9..dfdb763e 100644 --- a/crates/weaver_forge/expected_output/test/resources.md +++ b/crates/weaver_forge/expected_output/test/resources.md @@ -25,7 +25,7 @@ Brief: Span attributes used by non-OTLP exporters to represent OpenTelemetry Sco - Examples: [ "io.opentelemetry.contrib.mongodb", ] -- Deprecated: use the `otel.scope.name` attribute. +- Deprecated: #### Attribute `otel.library.version` @@ -39,7 +39,7 @@ Brief: Span attributes used by non-OTLP exporters to represent OpenTelemetry Sco - Examples: [ "1.0.0", ] -- Deprecated: use the `otel.scope.version` attribute. +- Deprecated: diff --git a/crates/weaver_forge/src/lib.rs b/crates/weaver_forge/src/lib.rs index 919432a5..7675c408 100644 --- a/crates/weaver_forge/src/lib.rs +++ b/crates/weaver_forge/src/lib.rs @@ -721,13 +721,12 @@ mod tests { let registry = SemConvRegistry::try_from_path_pattern(registry_id, "data/*.yaml") .into_result_failing_non_fatal() .expect("Failed to load registry"); - prepare_test_with_registry(target, cli_params, registry_id, registry) + prepare_test_with_registry(target, cli_params, registry) } fn prepare_test_with_registry( target: &str, cli_params: Params, - registry_id: &str, mut registry: SemConvRegistry, ) -> ( TestLogger, @@ -741,18 +740,17 @@ mod tests { let config = WeaverConfig::try_from_path(format!("templates/{}", target)).unwrap(); let engine = TemplateEngine::new(config, loader, cli_params); let schema = SchemaResolver::resolve_semantic_convention_registry(&mut registry) + .into_result_failing_non_fatal() .expect("Failed to resolve registry"); - let template_registry = ResolvedRegistry::try_from_resolved_registry( - schema.registry(registry_id).expect("registry not found"), - schema.catalog(), - ) - .unwrap_or_else(|e| { - panic!( - "Failed to create the context for the template evaluation: {:?}", - e - ) - }); + let template_registry = + ResolvedRegistry::try_from_resolved_registry(&schema.registry, schema.catalog()) + .unwrap_or_else(|e| { + panic!( + "Failed to create the context for the template evaluation: {:?}", + e + ) + }); // Delete all the files in the observed_output/target directory // before generating the new files. @@ -920,18 +918,17 @@ mod tests { .into_result_failing_non_fatal() .expect("Failed to load registry"); let schema = SchemaResolver::resolve_semantic_convention_registry(&mut registry) + .into_result_failing_non_fatal() .expect("Failed to resolve registry"); - let template_registry = ResolvedRegistry::try_from_resolved_registry( - schema.registry(registry_id).expect("registry not found"), - schema.catalog(), - ) - .unwrap_or_else(|e| { - panic!( - "Failed to create the context for the template evaluation: {:?}", - e - ) - }); + let template_registry = + ResolvedRegistry::try_from_resolved_registry(&schema.registry, schema.catalog()) + .unwrap_or_else(|e| { + panic!( + "Failed to create the context for the template evaluation: {:?}", + e + ) + }); engine .generate( @@ -1056,7 +1053,7 @@ mod tests { .into_result_failing_non_fatal() .expect("Failed to load registry"); let (logger, engine, template_registry, observed_output, expected_output) = - prepare_test_with_registry("comment_format", Params::default(), registry_id, registry); + prepare_test_with_registry("comment_format", Params::default(), registry); engine .generate( diff --git a/crates/weaver_forge/src/registry.rs b/crates/weaver_forge/src/registry.rs index 5d46ecd7..1a99af45 100644 --- a/crates/weaver_forge/src/registry.rs +++ b/crates/weaver_forge/src/registry.rs @@ -12,6 +12,7 @@ use weaver_resolved_schema::catalog::Catalog; use weaver_resolved_schema::lineage::GroupLineage; use weaver_resolved_schema::registry::{Constraint, Group, Registry}; use weaver_semconv::any_value::AnyValueSpec; +use weaver_semconv::deprecated::Deprecated; use weaver_semconv::group::{GroupType, InstrumentSpec, SpanKindSpec}; use weaver_semconv::stability::Stability; @@ -63,7 +64,7 @@ pub struct ResolvedGroup { /// provided as `description` MUST specify why it's deprecated and/or what /// to use instead. See also stability. #[serde(skip_serializing_if = "Option::is_none")] - pub deprecated: Option, + pub deprecated: Option, /// Additional constraints. /// Allow to define additional requirements on the semantic convention. /// It defaults to an empty list. diff --git a/crates/weaver_forge/templates/test/resource.md b/crates/weaver_forge/templates/test/resource.md index de165458..965d7392 100644 --- a/crates/weaver_forge/templates/test/resource.md +++ b/crates/weaver_forge/templates/test/resource.md @@ -39,7 +39,7 @@ Brief: {{ resource.brief }} - Sampling relevant: {{ attribute.sampling_relevant }} {%- endif %} {%- if attribute.deprecated %} -- Deprecated: {{ attribute.deprecated }} +- Deprecated: {{ attribute.deprecated.note }} {%- endif %} {% if attribute.stability %} - Stability: {{ attribute.stability | capitalize }} diff --git a/crates/weaver_forge/templates/test/resources.md b/crates/weaver_forge/templates/test/resources.md index 483f5b4f..c3cbff94 100644 --- a/crates/weaver_forge/templates/test/resources.md +++ b/crates/weaver_forge/templates/test/resources.md @@ -39,7 +39,7 @@ Brief: {{ resource.brief }} - Sampling relevant: {{ attribute.sampling_relevant }} {%- endif %} {%- if attribute.deprecated %} -- Deprecated: {{ attribute.deprecated }} +- Deprecated: {{ attribute.note }} {%- endif %} {% if attribute.stability %} - Stability: {{ attribute.stability | capitalize }} diff --git a/crates/weaver_otel_schema/Cargo.toml b/crates/weaver_otel_schema/Cargo.toml new file mode 100644 index 00000000..b420ec93 --- /dev/null +++ b/crates/weaver_otel_schema/Cargo.toml @@ -0,0 +1,21 @@ +[package] +name = "weaver_otel_schema" +version = "0.10.0" +authors.workspace = true +repository.workspace = true +license.workspace = true +publish.workspace = true +edition.workspace = true +rust-version.workspace = true + +[lints] +workspace = true + +[dependencies] +weaver_version = { path = "../weaver_version" } +weaver_common = { path = "../weaver_common" } + +thiserror.workspace = true +serde.workspace = true +serde_yaml.workspace = true +miette.workspace = true diff --git a/crates/weaver_otel_schema/README.md b/crates/weaver_otel_schema/README.md new file mode 100644 index 00000000..382352ba --- /dev/null +++ b/crates/weaver_otel_schema/README.md @@ -0,0 +1,3 @@ +# OpenTelemetry Schema Data Model + +This crate describes the data model for the OpenTelemetry telemetry schema. diff --git a/crates/weaver_otel_schema/allowed-external-types.toml b/crates/weaver_otel_schema/allowed-external-types.toml new file mode 100644 index 00000000..1f22cce0 --- /dev/null +++ b/crates/weaver_otel_schema/allowed-external-types.toml @@ -0,0 +1,11 @@ +# Copyright The OpenTelemetry Authors +# SPDX-License-Identifier: Apache-2.0 +# This is used with cargo-check-external-types to reduce the surface area of downstream crates from +# the public API. Ideally this can have a few exceptions as possible. +allowed_external_types = [ + "serde::ser::Serialize", + "serde::de::Deserialize", + "miette::protocol::Diagnostic", + + "weaver_version::*", +] \ No newline at end of file diff --git a/crates/weaver_otel_schema/src/lib.rs b/crates/weaver_otel_schema/src/lib.rs new file mode 100644 index 00000000..e467036b --- /dev/null +++ b/crates/weaver_otel_schema/src/lib.rs @@ -0,0 +1,93 @@ +// SPDX-License-Identifier: Apache-2.0 + +//! OpenTelemetry Schema Definitions +//! Please refer to the [OpenTelemetry Schema OTEP](https://github.com/open-telemetry/oteps/blob/main/text/0152-telemetry-schemas.md) +//! for more information. + +use crate::Error::{InvalidTelemetrySchema, TelemetrySchemaNotFound}; +use miette::Diagnostic; +use serde::{Deserialize, Serialize}; +use weaver_common::diagnostic::{DiagnosticMessage, DiagnosticMessages}; +use weaver_version::Versions; + +/// Errors emitted by this crate. +#[derive(thiserror::Error, Debug, Clone, Deserialize, Serialize, Diagnostic)] +pub enum Error { + /// OTel Telemetry schema not found. + #[error("OTel telemetry schema not found (path_or_url: {path_or_url:?}).")] + TelemetrySchemaNotFound { + /// The path or the url to the telemetry schema file. + path_or_url: String, + }, + + /// Invalid OTel Telemetry schema. + #[error("Invalid OTel telemetry schema (path_or_url: {path_or_url:?}). {error}")] + InvalidTelemetrySchema { + /// The path or the url to the telemetry schema file. + path_or_url: String, + /// The error that occurred. + error: String, + }, +} + +impl From for DiagnosticMessages { + fn from(error: Error) -> Self { + DiagnosticMessages::new(vec![DiagnosticMessage::new(error)]) + } +} + +/// An OpenTelemetry Telemetry Schema. +#[derive(Serialize, Deserialize, Debug)] +#[serde(deny_unknown_fields)] +pub struct TelemetrySchema { + /// Version of the file structure. + pub file_format: String, + /// Schema URL that this file is published at. + pub schema_url: String, + /// Definitions for each schema version in this family. + /// Note: the ordering of versions is defined according to semver + /// version number ordering rules. + /// This section is described in more details in the OTEP 0152 and in a dedicated + /// section below. + /// + #[serde(skip_serializing_if = "Option::is_none")] + pub versions: Option, +} + +impl TelemetrySchema { + /// Attempts to load a telemetry schema from a file. + pub fn try_from_file>(path: P) -> Result { + let schema_path_buf = path.as_ref().to_path_buf(); + + if !schema_path_buf.exists() { + return Err(TelemetrySchemaNotFound { + path_or_url: schema_path_buf.as_path().to_string_lossy().to_string(), + }); + } + + let file = std::fs::File::open(path).map_err(|e| InvalidTelemetrySchema { + path_or_url: schema_path_buf.as_path().to_string_lossy().to_string(), + error: e.to_string(), + })?; + let reader = std::io::BufReader::new(file); + let schema: TelemetrySchema = + serde_yaml::from_reader(reader).map_err(|e| InvalidTelemetrySchema { + path_or_url: schema_path_buf.as_path().to_string_lossy().to_string(), + error: e.to_string(), + })?; + + Ok(schema) + } +} + +#[cfg(test)] +mod tests { + use crate::TelemetrySchema; + + #[test] + fn test_try_from_file() { + let schema = TelemetrySchema::try_from_file("tests/test_data/1.27.0.yaml").unwrap(); + assert_eq!(schema.file_format, "1.1.0"); + assert_eq!(schema.schema_url, "https://opentelemetry.io/schemas/1.27.0"); + } +} diff --git a/crates/weaver_otel_schema/tests/test_data/1.27.0.yaml b/crates/weaver_otel_schema/tests/test_data/1.27.0.yaml new file mode 100644 index 00000000..0c12466f --- /dev/null +++ b/crates/weaver_otel_schema/tests/test_data/1.27.0.yaml @@ -0,0 +1,505 @@ +file_format: 1.1.0 +schema_url: https://opentelemetry.io/schemas/1.27.0 +versions: + 1.27.0: + all: + changes: + # https://github.com/open-telemetry/semantic-conventions/pull/1216 + - rename_attributes: + attribute_map: + tls.client.server_name: server.address + # https://github.com/open-telemetry/semantic-conventions/pull/1075 + - rename_attributes: + attribute_map: + deployment.environment: deployment.environment.name + # https://github.com/open-telemetry/semantic-conventions/pull/1245 + - rename_attributes: + attribute_map: + messaging.kafka.message.offset: messaging.kafka.offset + # https://github.com/open-telemetry/semantic-conventions/pull/815 + - rename_attributes: + attribute_map: + messaging.kafka.consumer.group: messaging.consumer.group.name + messaging.rocketmq.client_group: messaging.consumer.group.name + messaging.evenhubs.consumer.group: messaging.consumer.group.name + message.servicebus.destination.subscription_name: messaging.destination.subscription.name + # https://github.com/open-telemetry/semantic-conventions/pull/1200 + - rename_attributes: + attribute_map: + gen_ai.usage.completion_tokens: gen_ai.usage.output_tokens + gen_ai.usage.prompt_tokens: gen_ai.usage.input_tokens + spans: + changes: + # https://github.com/open-telemetry/semantic-conventions/pull/1002 + - rename_attributes: + attribute_map: + db.elasticsearch.cluster.name: db.namespace + metrics: + changes: + # https://github.com/open-telemetry/semantic-conventions/pull/1125 + - rename_attributes: + attribute_map: + db.client.connections.state: db.client.connection.state + apply_to_metrics: + - db.client.connection.count + - rename_attributes: + attribute_map: + db.client.connections.pool.name: db.client.connection.pool.name + apply_to_metrics: + - db.client.connection.count + - db.client.connection.idle.max + - db.client.connection.idle.min + - db.client.connection.max + - db.client.connection.pending_requests + - db.client.connection.timeouts + - db.client.connection.create_time + - db.client.connection.wait_time + - db.client.connection.use_time + # https://github.com/open-telemetry/semantic-conventions/pull/1006 + - rename_metrics: + messaging.publish.messages: messaging.client.published.messages + # https://github.com/open-telemetry/semantic-conventions/pull/1026 + - rename_attributes: + attribute_map: + system.cpu.state: cpu.mode + process.cpu.state: cpu.mode + container.cpu.state: cpu.mode + apply_to_metrics: + - system.cpu.time + - system.cpu.utilization + - process.cpu.time + - process.cpu.utilization + - container.cpu.time + # https://github.com/open-telemetry/semantic-conventions/pull/1265 + - rename_metrics: + jvm.buffer.memory.usage: jvm.buffer.memory.used + 1.26.0: + metrics: + changes: + # https://github.com/open-telemetry/semantic-conventions/pull/966 + - rename_metrics: + db.client.connections.usage: db.client.connection.count + db.client.connections.idle.max: db.client.connection.idle.max + db.client.connections.idle.min: db.client.connection.idle.min + db.client.connections.max: db.client.connection.max + db.client.connections.pending_requests: db.client.connection.pending_requests + db.client.connections.timeouts: db.client.connection.timeouts + # https://github.com/open-telemetry/semantic-conventions/pull/948 + - rename_attributes: + attribute_map: + messaging.client_id: messaging.client.id + # https://github.com/open-telemetry/semantic-conventions/pull/909 + - rename_attributes: + attribute_map: + state: db.client.connections.state + apply_to_metrics: + - db.client.connections.usage + - rename_attributes: + attribute_map: + pool.name: db.client.connections.pool.name + apply_to_metrics: + - db.client.connections.usage + - db.client.connections.idle.max + - db.client.connections.idle.min + - db.client.connections.max + - db.client.connections.pending_requests + - db.client.connections.timeouts + - db.client.connections.create_time + - db.client.connections.wait_time + - db.client.connections.use_time + all: + changes: + # https://github:com/open-telemetry/semantic-conventions/pull/731/ + - rename_attributes: + attribute_map: + enduser.id: user.id + + 1.25.0: + spans: + changes: + # https://github.com/open-telemetry/semantic-conventions/pull/911 + - rename_attributes: + attribute_map: + db.name: db.namespace + # https://github.com/open-telemetry/semantic-conventions/pull/870 + - rename_attributes: + attribute_map: + db.sql.table: db.collection.name + db.mongodb.collection: db.collection.name + db.cosmosdb.container: db.collection.name + db.cassandra.table: db.collection.name + # https://github.com/open-telemetry/semantic-conventions/pull/798 + - rename_attributes: + attribute_map: + messaging.kafka.destination.partition: messaging.destination.partition.id + # https://github.com/open-telemetry/semantic-conventions/pull/875 + - rename_attributes: + attribute_map: + db.operation: db.operation.name + # https://github.com/open-telemetry/semantic-conventions/pull/913 + - rename_attributes: + attribute_map: + messaging.operation: messaging.operation.type + # https://github.com/open-telemetry/semantic-conventions/pull/866 + - rename_attributes: + attribute_map: + db.statement: db.query.text + metrics: + changes: + # https://github.com/open-telemetry/semantic-conventions/pull/484 + - rename_attributes: + attribute_map: + system.processes.status: system.process.status + apply_to_metrics: + - system.processes.count + - rename_metrics: + system.processes.count: system.process.count + system.processes.created: system.process.created + # https://github.com/open-telemetry/semantic-conventions/pull/625 + - rename_attributes: + attribute_map: + container.labels: container.label + k8s.pod.labels: k8s.pod.label + # https://github.com/open-telemetry/semantic-conventions/pull/330 + - rename_metrics: + process.threads: process.thread.count + process.open_file_descriptors: process.open_file_descriptor.count + - rename_attributes: + attribute_map: + state: process.cpu.state + apply_to_metrics: + - process.cpu.time + - process.cpu.utilization + - rename_attributes: + attribute_map: + direction: disk.io.direction + apply_to_metrics: + - process.disk.io + - rename_attributes: + attribute_map: + type: process.context_switch_type + apply_to_metrics: + - process.context_switches + - rename_attributes: + attribute_map: + direction: network.io.direction + apply_to_metrics: + - process.network.io + - rename_attributes: + attribute_map: + type: process.paging.fault_type + apply_to_metrics: + - process.paging.faults + all: + changes: + # https://github.com/open-telemetry/semantic-conventions/pull/854 + - rename_attributes: + attribute_map: + message.type: rpc.message.type + message.id: rpc.message.id + message.compressed_size: rpc.message.compressed_size + message.uncompressed_size: rpc.message.uncompressed_size + + 1.24.0: + metrics: + changes: + # https://github.com/open-telemetry/semantic-conventions/pull/536 + - rename_metrics: + jvm.memory.usage: jvm.memory.used + jvm.memory.usage_after_last_gc: jvm.memory.used_after_last_gc + # https://github.com/open-telemetry/semantic-conventions/pull/530 + - rename_attributes: + attribute_map: + system.network.io.direction: network.io.direction + system.disk.io.direction: disk.io.direction + 1.23.1: + 1.23.0: + metrics: + changes: + # https://github.com/open-telemetry/semantic-conventions/pull/20 + - rename_attributes: + attribute_map: + thread.daemon: jvm.thread.daemon + apply_to_metrics: + - jvm.thread.count + 1.22.0: + spans: + changes: + # https://github.com/open-telemetry/semantic-conventions/pull/229 + - rename_attributes: + attribute_map: + messaging.message.payload_size_bytes: messaging.message.body.size + # https://github.com/open-telemetry/opentelemetry-specification/pull/374 + - rename_attributes: + attribute_map: + http.resend_count: http.request.resend_count + metrics: + changes: + # https://github.com/open-telemetry/semantic-conventions/pull/224 + - rename_metrics: + http.client.duration: http.client.request.duration + http.server.duration: http.server.request.duration + # https://github.com/open-telemetry/semantic-conventions/pull/241 + - rename_metrics: + process.runtime.jvm.memory.usage: jvm.memory.usage + process.runtime.jvm.memory.committed: jvm.memory.committed + process.runtime.jvm.memory.limit: jvm.memory.limit + process.runtime.jvm.memory.usage_after_last_gc: jvm.memory.usage_after_last_gc + process.runtime.jvm.gc.duration: jvm.gc.duration + # also https://github.com/open-telemetry/semantic-conventions/pull/252 + process.runtime.jvm.threads.count: jvm.thread.count + # also https://github.com/open-telemetry/semantic-conventions/pull/252 + process.runtime.jvm.classes.loaded: jvm.class.loaded + # also https://github.com/open-telemetry/semantic-conventions/pull/252 + process.runtime.jvm.classes.unloaded: jvm.class.unloaded + # also https://github.com/open-telemetry/semantic-conventions/pull/252 + # and https://github.com/open-telemetry/semantic-conventions/pull/60 + process.runtime.jvm.classes.current_loaded: jvm.class.count + process.runtime.jvm.cpu.time: jvm.cpu.time + process.runtime.jvm.cpu.recent_utilization: jvm.cpu.recent_utilization + process.runtime.jvm.memory.init: jvm.memory.init + process.runtime.jvm.system.cpu.utilization: jvm.system.cpu.utilization + process.runtime.jvm.system.cpu.load_1m: jvm.system.cpu.load_1m + # https://github.com/open-telemetry/semantic-conventions/pull/253 + process.runtime.jvm.buffer.usage: jvm.buffer.memory.usage + # https://github.com/open-telemetry/semantic-conventions/pull/253 + process.runtime.jvm.buffer.limit: jvm.buffer.memory.limit + process.runtime.jvm.buffer.count: jvm.buffer.count + # https://github.com/open-telemetry/semantic-conventions/pull/20 + - rename_attributes: + attribute_map: + type: jvm.memory.type + pool: jvm.memory.pool.name + apply_to_metrics: + - jvm.memory.usage + - jvm.memory.committed + - jvm.memory.limit + - jvm.memory.usage_after_last_gc + - jvm.memory.init + - rename_attributes: + attribute_map: + name: jvm.gc.name + action: jvm.gc.action + apply_to_metrics: + - jvm.gc.duration + - rename_attributes: + attribute_map: + daemon: thread.daemon + apply_to_metrics: + - jvm.threads.count + - rename_attributes: + attribute_map: + pool: jvm.buffer.pool.name + apply_to_metrics: + - jvm.buffer.memory.usage + - jvm.buffer.memory.limit + - jvm.buffer.count + # https://github.com/open-telemetry/semantic-conventions/pull/89 + - rename_attributes: + attribute_map: + state: system.cpu.state + cpu: system.cpu.logical_number + apply_to_metrics: + - system.cpu.time + - system.cpu.utilization + - rename_attributes: + attribute_map: + state: system.memory.state + apply_to_metrics: + - system.memory.usage + - system.memory.utilization + - rename_attributes: + attribute_map: + state: system.paging.state + apply_to_metrics: + - system.paging.usage + - system.paging.utilization + - rename_attributes: + attribute_map: + type: system.paging.type + direction: system.paging.direction + apply_to_metrics: + - system.paging.faults + - system.paging.operations + - rename_attributes: + attribute_map: + device: system.device + direction: system.disk.direction + apply_to_metrics: + - system.disk.io + - system.disk.operations + - system.disk.io_time + - system.disk.operation_time + - system.disk.merged + - rename_attributes: + attribute_map: + device: system.device + state: system.filesystem.state + type: system.filesystem.type + mode: system.filesystem.mode + mountpoint: system.filesystem.mountpoint + apply_to_metrics: + - system.filesystem.usage + - system.filesystem.utilization + - rename_attributes: + attribute_map: + device: system.device + direction: system.network.direction + protocol: network.protocol + state: system.network.state + apply_to_metrics: + - system.network.dropped + - system.network.packets + - system.network.errors + - system.network.io + - system.network.connections + - rename_attributes: + attribute_map: + status: system.processes.status + apply_to_metrics: + - system.processes.count + # https://github.com/open-telemetry/semantic-conventions/pull/247 + - rename_metrics: + http.server.request.size: http.server.request.body.size + http.server.response.size: http.server.response.body.size + resources: + changes: + # https://github.com/open-telemetry/semantic-conventions/pull/178 + - rename_attributes: + attribute_map: + telemetry.auto.version: telemetry.distro.version + 1.21.0: + spans: + changes: + # https://github.com/open-telemetry/opentelemetry-specification/pull/3336 + - rename_attributes: + attribute_map: + messaging.kafka.client_id: messaging.client_id + messaging.rocketmq.client_id: messaging.client_id + # https://github.com/open-telemetry/opentelemetry-specification/pull/3402 + - rename_attributes: + attribute_map: + # net.peer.(name|port) attributes were usually populated on client side + # so they should be usually translated to server.(address|port) + # net.host.* attributes were only populated on server side + net.host.name: server.address + net.host.port: server.port + # was only populated on client side + net.sock.peer.name: server.socket.domain + # net.sock.peer.(addr|port) mapping is not possible + # since they applied to both client and server side + # were only populated on server side + net.sock.host.addr: server.socket.address + net.sock.host.port: server.socket.port + http.client_ip: client.address + # https://github.com/open-telemetry/opentelemetry-specification/pull/3426 + - rename_attributes: + attribute_map: + net.protocol.name: network.protocol.name + net.protocol.version: network.protocol.version + net.host.connection.type: network.connection.type + net.host.connection.subtype: network.connection.subtype + net.host.carrier.name: network.carrier.name + net.host.carrier.mcc: network.carrier.mcc + net.host.carrier.mnc: network.carrier.mnc + net.host.carrier.icc: network.carrier.icc + # https://github.com/open-telemetry/opentelemetry-specification/pull/3355 + - rename_attributes: + attribute_map: + http.method: http.request.method + http.status_code: http.response.status_code + http.scheme: url.scheme + http.url: url.full + http.request_content_length: http.request.body.size + http.response_content_length: http.response.body.size + metrics: + changes: + # https://github.com/open-telemetry/semantic-conventions/pull/53 + - rename_metrics: + process.runtime.jvm.cpu.utilization: process.runtime.jvm.cpu.recent_utilization + 1.20.0: + spans: + changes: + # https://github.com/open-telemetry/opentelemetry-specification/pull/3272 + - rename_attributes: + attribute_map: + net.app.protocol.name: net.protocol.name + net.app.protocol.version: net.protocol.version + 1.19.0: + spans: + changes: + # https://github.com/open-telemetry/opentelemetry-specification/pull/3209 + - rename_attributes: + attribute_map: + faas.execution: faas.invocation_id + # https://github.com/open-telemetry/opentelemetry-specification/pull/3188 + - rename_attributes: + attribute_map: + faas.id: cloud.resource_id + # https://github.com/open-telemetry/opentelemetry-specification/pull/3190 + - rename_attributes: + attribute_map: + http.user_agent: user_agent.original + resources: + changes: + # https://github.com/open-telemetry/opentelemetry-specification/pull/3190 + - rename_attributes: + attribute_map: + browser.user_agent: user_agent.original + 1.18.0: + 1.17.0: + spans: + changes: + # https://github.com/open-telemetry/opentelemetry-specification/pull/2957 + - rename_attributes: + attribute_map: + messaging.consumer_id: messaging.consumer.id + messaging.protocol: net.app.protocol.name + messaging.protocol_version: net.app.protocol.version + messaging.destination: messaging.destination.name + messaging.temp_destination: messaging.destination.temporary + messaging.destination_kind: messaging.destination.kind + messaging.message_id: messaging.message.id + messaging.conversation_id: messaging.message.conversation_id + messaging.message_payload_size_bytes: messaging.message.payload_size_bytes + messaging.message_payload_compressed_size_bytes: messaging.message.payload_compressed_size_bytes + messaging.rabbitmq.routing_key: messaging.rabbitmq.destination.routing_key + messaging.kafka.message_key: messaging.kafka.message.key + messaging.kafka.partition: messaging.kafka.destination.partition + messaging.kafka.tombstone: messaging.kafka.message.tombstone + messaging.rocketmq.message_type: messaging.rocketmq.message.type + messaging.rocketmq.message_tag: messaging.rocketmq.message.tag + messaging.rocketmq.message_keys: messaging.rocketmq.message.keys + messaging.kafka.consumer_group: messaging.kafka.consumer.group + 1.16.0: + 1.15.0: + spans: + changes: + # https://github.com/open-telemetry/opentelemetry-specification/pull/2743 + - rename_attributes: + attribute_map: + http.retry_count: http.resend_count + 1.14.0: + 1.13.0: + spans: + changes: + # https://github.com/open-telemetry/opentelemetry-specification/pull/2614 + - rename_attributes: + attribute_map: + net.peer.ip: net.sock.peer.addr + net.host.ip: net.sock.host.addr + 1.12.0: + 1.11.0: + 1.10.0: + 1.9.0: + 1.8.0: + spans: + changes: + - rename_attributes: + attribute_map: + db.cassandra.keyspace: db.name + db.hbase.namespace: db.name + 1.7.0: + 1.6.1: + 1.5.0: + 1.4.0: \ No newline at end of file diff --git a/crates/weaver_resolved_schema/src/attribute.rs b/crates/weaver_resolved_schema/src/attribute.rs index 8326a9c1..3c8aec83 100644 --- a/crates/weaver_resolved_schema/src/attribute.rs +++ b/crates/weaver_resolved_schema/src/attribute.rs @@ -10,7 +10,10 @@ use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use std::fmt::Display; use std::ops::Not; +#[cfg(test)] +use weaver_semconv::attribute::PrimitiveOrArrayTypeSpec; use weaver_semconv::attribute::{AttributeSpec, AttributeType, Examples, RequirementLevel}; +use weaver_semconv::deprecated::Deprecated; use weaver_semconv::stability::Stability; /// An attribute definition. @@ -60,11 +63,9 @@ pub struct Attribute { /// error. #[serde(skip_serializing_if = "Option::is_none")] pub stability: Option, - /// Specifies if the attribute is deprecated. The string - /// provided as MUST specify why it's deprecated and/or what - /// to use instead. See also stability. + /// Specifies if the attribute is deprecated. #[serde(skip_serializing_if = "Option::is_none")] - pub deprecated: Option, + pub deprecated: Option, /// Specifies the prefix of the attribute. /// If this parameter is set, the resolved id of the referenced attribute will /// have group prefix added to it. @@ -100,3 +101,105 @@ impl Display for AttributeRef { write!(f, "AttributeRef({})", self.0) } } + +impl Attribute { + /// Creates a new string attribute. + /// Note: This constructor is used for testing purposes. + #[cfg(test)] + pub(crate) fn string>(name: S, brief: S, note: S) -> Self { + Self { + name: name.as_ref().to_owned(), + r#type: AttributeType::PrimitiveOrArray(PrimitiveOrArrayTypeSpec::String), + brief: brief.as_ref().to_owned(), + examples: None, + tag: None, + requirement_level: Default::default(), + sampling_relevant: None, + note: note.as_ref().to_owned(), + stability: None, + deprecated: None, + prefix: false, + tags: None, + value: None, + } + } + + /// Creates a new integer attribute. + /// Note: This constructor is used for testing purposes. + #[cfg(test)] + pub(crate) fn int>(name: S, brief: S, note: S) -> Self { + Self { + name: name.as_ref().to_owned(), + r#type: AttributeType::PrimitiveOrArray(PrimitiveOrArrayTypeSpec::Int), + brief: brief.as_ref().to_owned(), + examples: None, + tag: None, + requirement_level: Default::default(), + sampling_relevant: None, + note: note.as_ref().to_owned(), + stability: None, + deprecated: None, + prefix: false, + tags: None, + value: None, + } + } + + /// Creates a new double attribute. + /// Note: This constructor is used for testing purposes. + #[cfg(test)] + pub(crate) fn double>(name: S, brief: S, note: S) -> Self { + Self { + name: name.as_ref().to_owned(), + r#type: AttributeType::PrimitiveOrArray(PrimitiveOrArrayTypeSpec::Double), + brief: brief.as_ref().to_owned(), + examples: None, + tag: None, + requirement_level: Default::default(), + sampling_relevant: None, + note: note.as_ref().to_owned(), + stability: None, + deprecated: None, + prefix: false, + tags: None, + value: None, + } + } + + /// Creates a new boolean attribute. + /// Note: This constructor is used for testing purposes. + #[cfg(test)] + pub(crate) fn boolean>(name: S, brief: S, note: S) -> Self { + Self { + name: name.as_ref().to_owned(), + r#type: AttributeType::PrimitiveOrArray(PrimitiveOrArrayTypeSpec::Boolean), + brief: brief.as_ref().to_owned(), + examples: None, + tag: None, + requirement_level: Default::default(), + sampling_relevant: None, + note: note.as_ref().to_owned(), + stability: None, + deprecated: None, + prefix: false, + tags: None, + value: None, + } + } + + /// Sets the deprecated field of the attribute. + /// Note: This method is used for testing purposes. + #[cfg(test)] + pub(crate) fn deprecated(mut self, deprecated: Deprecated) -> Self { + self.deprecated = Some(deprecated); + self + } + + /// Sets the note field of the attribute. + /// Note: This method is used for testing purposes. + #[cfg(test)] + pub(crate) fn note>(mut self, note: S) -> Self { + self.note = note.as_ref().to_owned(); + self + } +} diff --git a/crates/weaver_resolved_schema/src/catalog.rs b/crates/weaver_resolved_schema/src/catalog.rs index 191d03fd..09777534 100644 --- a/crates/weaver_resolved_schema/src/catalog.rs +++ b/crates/weaver_resolved_schema/src/catalog.rs @@ -15,13 +15,13 @@ use weaver_semconv::stability::Stability; /// Attribute references are used to refer to attributes in the catalog. /// /// Note : In the future, this catalog could be extended with other entities. -#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)] +#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema, Default)] #[serde(deny_unknown_fields)] #[must_use] pub struct Catalog { /// Catalog of attributes used in the schema. #[serde(skip_serializing_if = "Vec::is_empty")] - pub attributes: Vec, + attributes: Vec, } /// Statistics on a catalog. @@ -41,6 +41,24 @@ pub struct Stats { } impl Catalog { + /// Creates a catalog from a list of attributes. + pub fn from_attributes(attributes: Vec) -> Self { + Self { attributes } + } + + /// Adds attributes to the catalog and returns a list of attribute references. + #[must_use] + pub fn add_attributes( + &mut self, + attributes: [Attribute; N], + ) -> Vec { + let start_index = self.attributes.len(); + self.attributes.extend(attributes.iter().cloned()); + (start_index..self.attributes.len()) + .map(|i| AttributeRef(i as u32)) + .collect::>() + } + /// Returns the attribute name from an attribute ref if it exists /// in the catalog or None if it does not exist. #[must_use] @@ -50,6 +68,17 @@ impl Catalog { .map(|attr| attr.name.as_ref()) } + /// Counts the number of attributes in the catalog. + #[must_use] + pub fn count_attributes(&self) -> usize { + self.attributes.len() + } + + /// Return an iterator over the attributes in the catalog. + pub fn iter(&self) -> impl Iterator { + self.attributes.iter() + } + /// Returns the attribute from an attribute ref if it exists. #[must_use] pub fn attribute(&self, attribute_ref: &AttributeRef) -> Option<&Attribute> { diff --git a/crates/weaver_resolved_schema/src/lib.rs b/crates/weaver_resolved_schema/src/lib.rs index f7e1793c..f41022e9 100644 --- a/crates/weaver_resolved_schema/src/lib.rs +++ b/crates/weaver_resolved_schema/src/lib.rs @@ -1,16 +1,22 @@ // SPDX-License-Identifier: Apache-2.0 //! Define the concept of Resolved Telemetry Schema. +//! //! A Resolved Telemetry Schema is self-contained and doesn't contain any //! external references to other schemas or semantic conventions. +use crate::attribute::Attribute; use crate::catalog::Catalog; use crate::instrumentation_library::InstrumentationLibrary; -use crate::registry::Registry; +use crate::registry::{Group, Registry}; use crate::resource::Resource; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; +use weaver_semconv::deprecated::Deprecated; +use weaver_semconv::group::GroupType; +use weaver_semconv::manifest::RegistryManifest; +use weaver_version::schema_changes::{SchemaChanges, SchemaItemChange, SchemaItemType}; use weaver_version::Versions; pub mod attribute; @@ -39,10 +45,10 @@ pub struct ResolvedTelemetrySchema { pub file_format: String, /// Schema URL that this file is published at. pub schema_url: String, - /// A map of named semantic convention registries that can be used in this schema - /// and its descendants. - #[serde(skip_serializing_if = "HashMap::is_empty")] - pub registries: HashMap, + /// The ID of the registry that this schema belongs to. + pub registry_id: String, + /// The registry that this schema belongs to. + pub registry: Registry, /// Catalog of unique items that are shared across multiple registries /// and signals. pub catalog: Catalog, @@ -64,14 +70,14 @@ pub struct ResolvedTelemetrySchema { /// #[serde(skip_serializing_if = "Option::is_none")] pub versions: Option, + /// The manifest of the registry. + pub registry_manifest: Option, } /// Statistics on a resolved telemetry schema. #[derive(Debug, Serialize)] #[must_use] pub struct Stats { - /// Total number of registries. - pub registry_count: usize, /// Statistics on each registry. pub registry_stats: Vec, /// Statistics on the catalog. @@ -79,10 +85,58 @@ pub struct Stats { } impl ResolvedTelemetrySchema { - /// Get a registry by its ID. - #[must_use] - pub fn registry(&self, registry_id: &str) -> Option<&Registry> { - self.registries.get(registry_id) + /// Create a new resolved telemetry schema. + pub fn new>( + file_format: S, + schema_url: S, + registry_id: S, + registry_url: S, + ) -> Self { + Self { + file_format: file_format.as_ref().to_owned(), + schema_url: schema_url.as_ref().to_owned(), + registry_id: registry_id.as_ref().to_owned(), + registry: Registry::new(registry_url), + catalog: Catalog::default(), + resource: None, + instrumentation_library: None, + dependencies: vec![], + versions: None, + registry_manifest: None, + } + } + + /// Adds a new attribute group to the schema. + /// + /// Note: This method is intended to be used for testing purposes only. + #[cfg(test)] + pub(crate) fn add_attribute_group( + &mut self, + group_id: &str, + attrs: [Attribute; N], + ) { + let attr_refs = self.catalog.add_attributes(attrs); + self.registry.groups.push(Group { + id: group_id.to_owned(), + r#type: GroupType::AttributeGroup, + brief: "".to_owned(), + note: "".to_owned(), + prefix: "".to_owned(), + extends: None, + stability: None, + deprecated: None, + name: Some(group_id.to_owned()), + lineage: None, + display_name: None, + attributes: attr_refs, + span_kind: None, + events: vec![], + metric_name: None, + instrument: None, + constraints: vec![], + unit: None, + body: None, + }); } /// Get the catalog of the resolved telemetry schema. @@ -92,23 +146,391 @@ impl ResolvedTelemetrySchema { /// Compute statistics on the resolved telemetry schema. pub fn stats(&self) -> Stats { - let mut registry_stats = Vec::new(); - for registry in self.registries.values() { - registry_stats.push(registry.stats()); - } + let registry_stats = vec![self.registry.stats()]; Stats { - registry_count: self.registries.len(), registry_stats, catalog_stats: self.catalog.stats(), } } + + /// Get the attributes of the resolved telemetry schema. + #[must_use] + pub fn attribute_map(&self) -> HashMap<&str, &Attribute> { + self.registry + .groups + .iter() + .filter(|group| group.r#type == GroupType::AttributeGroup) + .flat_map(|group| { + group.attributes.iter().map(|attr_ref| { + // An attribute ref is a reference to an attribute in the catalog. + // Not finding the attribute in the catalog is a bug somewhere in + // the resolution process. So it's fine to panic here. + let attr = self + .catalog + .attribute(attr_ref) + .expect("Attribute ref not found in catalog. This is a bug."); + (attr.name.as_str(), attr) + }) + }) + .collect() + } + + /// Get the "registry" attributes of the resolved telemetry schema. + /// + /// Note: At the moment (2024-12-30), I don't know a better way to identify + /// the "registry" attributes other than by checking if the group ID starts + /// with "registry.". + #[must_use] + pub fn registry_attribute_map(&self) -> HashMap<&str, &Attribute> { + self.registry + .groups + .iter() + .filter(|group| group.r#type == GroupType::AttributeGroup) + .filter(|group| group.id.starts_with("registry.")) + .flat_map(|group| { + group.attributes.iter().map(|attr_ref| { + // An attribute ref is a reference to an attribute in the catalog. + // Not finding the attribute in the catalog is a bug somewhere in + // the resolution process. So it's fine to panic here. + let attr = self + .catalog + .attribute(attr_ref) + .expect("Attribute ref not found in catalog. This is a bug."); + (attr.name.as_str(), attr) + }) + }) + .collect() + } + + /// Get the groups of a specific type from the resolved telemetry schema. + #[must_use] + pub fn groups(&self, group_type: GroupType) -> HashMap { + self.registry + .groups + .iter() + .filter(|group| group.r#type == group_type) + .map(|group| (group.id.clone(), group)) + .collect() + } + + /// Generate a diff between the current schema (must be the most recent one) + /// and a baseline schema. + #[must_use] + pub fn diff(&self, baseline_schema: &ResolvedTelemetrySchema) -> SchemaChanges { + let mut changes = SchemaChanges::new(); + + if let Some(ref manifest) = self.registry_manifest { + changes.set_head_manifest(weaver_version::schema_changes::RegistryManifest { + semconv_version: manifest.semconv_version.clone(), + }); + } + + if let Some(ref manifest) = baseline_schema.registry_manifest { + changes.set_baseline_manifest(weaver_version::schema_changes::RegistryManifest { + semconv_version: manifest.semconv_version.clone(), + }); + } + + // Attributes in the registry + self.diff_attributes(baseline_schema, &mut changes); + + // Signals + let latest_signals = self.groups(GroupType::Metric); + let baseline_signals = baseline_schema.groups(GroupType::Metric); + self.diff_signals( + SchemaItemType::Metrics, + &latest_signals, + &baseline_signals, + &mut changes, + ); + let latest_signals = self.groups(GroupType::Event); + let baseline_signals = baseline_schema.groups(GroupType::Event); + self.diff_signals( + SchemaItemType::Events, + &latest_signals, + &baseline_signals, + &mut changes, + ); + let latest_signals = self.groups(GroupType::Span); + let baseline_signals = baseline_schema.groups(GroupType::Span); + self.diff_signals( + SchemaItemType::Spans, + &latest_signals, + &baseline_signals, + &mut changes, + ); + let latest_signals = self.groups(GroupType::Resource); + let baseline_signals = baseline_schema.groups(GroupType::Resource); + self.diff_signals( + SchemaItemType::Resources, + &latest_signals, + &baseline_signals, + &mut changes, + ); + + changes + } + + fn diff_attributes( + &self, + baseline_schema: &ResolvedTelemetrySchema, + changes: &mut SchemaChanges, + ) { + let latest_attributes = self.registry_attribute_map(); + let baseline_attributes = baseline_schema.registry_attribute_map(); + + // A map of attributes that have been renamed to a new attribute. + // The key is the new name of the attribute, and the value is a set of old names. + // The key may refer to an existing attribute in the baseline schema or + // a new attribute in the latest schema. + let mut renamed_attributes = HashMap::new(); + + // ToDo for future PR, process differences at the field level (not required for the schema update) + + // Collect all the information related to the attributes that have been + // deprecated in the latest schema. + for (attr_name, attr) in latest_attributes.iter() { + if let Some(deprecated) = attr.deprecated.as_ref() { + // is this a change from the baseline? + if let Some(baseline_attr) = baseline_attributes.get(attr_name) { + if let Some(baseline_deprecated) = baseline_attr.deprecated.as_ref() { + if deprecated == baseline_deprecated { + continue; + } + } + } + match deprecated { + Deprecated::Renamed { + renamed_to: rename_to, + .. + } => { + // Insert the old name into the set of old names + // for the new name (rename_to). + _ = renamed_attributes + .entry(rename_to.as_str()) + .or_insert_with(HashSet::new) + .insert(*attr_name); + } + Deprecated::Deprecated { .. } => { + changes.add_change( + SchemaItemType::Attributes, + SchemaItemChange::Deprecated { + name: attr.name.clone(), + note: deprecated.to_string(), + }, + ); + } + } + } + } + + // Based on the analysis of deprecated fields conducted earlier, we can + // now distinguish between: + // - an attribute created to give a new name to an existing attribute or + // to unify several attributes into a single one, + // - an attribute created to represent something new. + for (attr_name, attr) in latest_attributes.iter() { + if !baseline_attributes.contains_key(attr_name) { + // The attribute in the latest schema does not exist in the baseline schema. + // This attribute may be referenced in the deprecated field of another + // attribute, indicating that it is a replacement attribute intended to rename + // one or more existing attributes. + // If it is not referenced in the deprecated field of another attribute, then + // it is an entirely new attribute that did not previously exist. + if let Some(old_names) = renamed_attributes.remove(attr_name) { + // The new attribute is identified as a replacement attribute based + // on the deprecated metadata. + changes.add_change( + SchemaItemType::Attributes, + SchemaItemChange::RenamedToNew { + old_names: old_names.iter().map(|n| (*n).to_owned()).collect(), + new_name: attr.name.clone(), + }, + ); + } else { + // The new attribute is identified as a new attribute not related to + // any previous attributes in the baseline schema. + changes.add_change( + SchemaItemType::Attributes, + SchemaItemChange::Added { + name: attr.name.clone(), + }, + ); + } + } + } + + // Any attribute in the baseline schema that is not present in the latest schema + // is considered removed. + // Note: This should never occur if the registry evolution process is followed. + // However, detecting this case is useful for identifying a violation of the process. + for (attr_name, attr) in baseline_attributes.iter() { + if !latest_attributes.contains_key(attr_name) { + changes.add_change( + SchemaItemType::Attributes, + SchemaItemChange::Removed { + name: attr.name.clone(), + }, + ); + } + } + + // The attribute names that remain in the list `renamed_attributes` are those + // present in both versions of the same schema. They represent cases where + // attributes have been renamed to an already existing attribute. + for (new_name, old_names) in renamed_attributes.iter() { + changes.add_change( + SchemaItemType::Attributes, + SchemaItemChange::RenamedToExisting { + old_names: old_names.iter().map(|n| (*n).to_owned()).collect(), + current_name: (*new_name).to_owned(), + }, + ); + } + } + + fn diff_signals( + &self, + schema_item_type: SchemaItemType, + latest_signals: &HashMap, + baseline_signals: &HashMap, + changes: &mut SchemaChanges, + ) { + /// Get the name of the provided group based on the given schema item type. + fn group_name(schema_item_type: SchemaItemType, group: &Group) -> String { + match schema_item_type { + SchemaItemType::Attributes + | SchemaItemType::Events + | SchemaItemType::Spans + | SchemaItemType::Resources => group.name.clone().unwrap_or_default(), + SchemaItemType::Metrics => group.metric_name.clone().unwrap_or_default(), + } + } + + // A map of signal groups that have been renamed to a new signal of same type. + // The key is the new name of the signal, and the value is a set of old names. + // The key may refer to an existing signal in the baseline schema or + // a new signal in the latest schema. + let mut renamed_signals: HashMap> = HashMap::new(); + + // Collect all the information related to the signals that have been + // deprecated in the latest schema. + for (signal_name, group) in latest_signals.iter() { + if let Some(deprecated) = group.deprecated.as_ref() { + // is this a change from the baseline? + if let Some(baseline_group) = baseline_signals.get(signal_name) { + if let Some(baseline_deprecated) = baseline_group.deprecated.as_ref() { + if deprecated == baseline_deprecated { + continue; + } + } + } + match deprecated { + Deprecated::Renamed { + renamed_to: rename_to, + .. + } => { + // Insert the deprecated signal into the set of renamed/deprecated signals + // for the new name (rename_to). + renamed_signals + .entry(rename_to.clone()) + .or_default() + .push(group); + } + Deprecated::Deprecated { .. } => { + changes.add_change( + schema_item_type, + SchemaItemChange::Deprecated { + name: signal_name.clone(), + note: deprecated.to_string(), + }, + ); + } + } + } + } + + // Based on the analysis of deprecated fields conducted earlier, we can + // now distinguish between: + // - a signal created to give a new name to an existing signal or + // to unify several signals into a single one, + // - a signal created to represent something new. + for (signal_name, _) in latest_signals.iter() { + if !baseline_signals.contains_key(signal_name) { + // The signal in the latest schema does not exist in the baseline schema. + // This signal may be referenced in the deprecated field of another + // signal, indicating that it is a replacement signal intended to rename + // one or more existing signals. + // If it is not referenced in the deprecated field of another signal, then + // it is an entirely new signal that did not previously exist. + if let Some(old_groups) = renamed_signals.remove(signal_name) { + // The new signal is identified as a replacement signal based + // on the deprecated metadata. + changes.add_change( + schema_item_type, + SchemaItemChange::RenamedToNew { + old_names: old_groups + .iter() + .map(|n| group_name(schema_item_type, n)) + .collect(), + new_name: signal_name.clone(), + }, + ); + } else { + // The new signal is identified as a new signal not related to + // any previous signals in the baseline schema. + changes.add_change( + schema_item_type, + SchemaItemChange::Added { + name: signal_name.clone(), + }, + ); + } + } + } + + // Any signal in the baseline schema that is not present in the latest schema + // is considered removed. + // Note: This should never occur if the registry evolution process is followed. + // However, detecting this case is useful for identifying a violation of the process. + for (signal_name, _) in baseline_signals.iter() { + if !latest_signals.contains_key(signal_name) { + changes.add_change( + schema_item_type, + SchemaItemChange::Removed { + name: signal_name.clone(), + }, + ); + } + } + + // The signal names that remain in the list `renamed_signals` are those + // present in both versions of the same schema. They represent cases where + // signals have been renamed to an already existing signal. + for (new_name, old_groups) in renamed_signals.iter() { + changes.add_change( + schema_item_type, + SchemaItemChange::RenamedToExisting { + old_names: old_groups + .iter() + .map(|n| group_name(schema_item_type, n)) + .collect(), + current_name: (*new_name).to_owned(), + }, + ); + } + } } #[cfg(test)] mod tests { + use crate::attribute::Attribute; use crate::ResolvedTelemetrySchema; use schemars::schema_for; use serde_json::to_string_pretty; + use std::collections::HashSet; + use weaver_semconv::deprecated::Deprecated; + use weaver_version::schema_changes::SchemaItemChange; #[test] fn test_json_schema_gen() { @@ -118,4 +540,265 @@ mod tests { // Ensure the schema can be serialized to a string assert!(to_string_pretty(&schema).is_ok()); } + + #[test] + fn no_diff() { + let mut prior_schema = ResolvedTelemetrySchema::new("1.0", "", "", ""); + prior_schema.add_attribute_group( + "group1", + [ + Attribute::boolean("attr1", "brief1", "note1"), + Attribute::string("attr2", "brief2", "note2"), + Attribute::int("attr3", "brief3", "note3"), + Attribute::double("attr4", "brief4", "note4"), + ], + ); + + let changes = prior_schema.diff(&prior_schema); + assert!(changes.is_empty()); + } + + #[test] + fn detect_2_added_attributes() { + let mut prior_schema = ResolvedTelemetrySchema::new("1.0", "", "", ""); + prior_schema.add_attribute_group( + "registry.group1", + [ + Attribute::boolean("attr1", "brief1", "note1"), + Attribute::string("attr2", "brief2", "note2"), + ], + ); + + let mut latest_schema = ResolvedTelemetrySchema::new("1.0", "", "", ""); + latest_schema.add_attribute_group( + "registry.group1", + [ + Attribute::boolean("attr1", "brief1", "note1"), + Attribute::string("attr2", "brief2", "note2"), + Attribute::int("attr3", "brief3", "note3"), + Attribute::double("attr4", "brief4", "note4"), + ], + ); + + let changes = latest_schema.diff(&prior_schema); + assert_eq!(changes.count_changes(), 2); + assert_eq!(changes.count_attribute_changes(), 2); + assert_eq!(changes.count_added_attributes(), 2); + } + + #[test] + fn detect_2_deprecated_attributes() { + let mut prior_schema = ResolvedTelemetrySchema::new("1.0", "", "", ""); + prior_schema.add_attribute_group( + "registry.group1", + [ + Attribute::boolean("attr1", "brief1", "note1"), + Attribute::string("attr2", "brief2", "note2"), + Attribute::int("attr3", "brief3", "note3"), + Attribute::double("attr4", "brief4", "note4"), + Attribute::double("attr5", "brief5", "note5").deprecated(Deprecated::Deprecated), + ], + ); + + let mut latest_schema = ResolvedTelemetrySchema::new("1.0", "", "", ""); + latest_schema.add_attribute_group( + "registry.group1", + [ + Attribute::boolean("attr1", "brief1", "note1"), + Attribute::string("attr2", "brief2", "note2") + .deprecated(Deprecated::Deprecated) + .note("This attribute is deprecated."), + Attribute::int("attr3", "brief3", "note3") + .deprecated(Deprecated::Deprecated) + .note("This attribute is deprecated."), + Attribute::double("attr4", "brief4", "note4"), + Attribute::double("attr5", "brief5", "note5").deprecated(Deprecated::Deprecated), + ], + ); + + let changes = latest_schema.diff(&prior_schema); + assert_eq!(changes.count_changes(), 2); + assert_eq!(changes.count_attribute_changes(), 2); + assert_eq!(changes.count_deprecated_attributes(), 2); + } + + #[test] + fn detect_2_renamed_to_new_attributes() { + let mut prior_schema = ResolvedTelemetrySchema::new("1.0", "", "", ""); + prior_schema.add_attribute_group( + "registry.group1", + [ + Attribute::boolean("attr1", "brief1", "note1"), + Attribute::string("attr2", "brief2", "note2"), + Attribute::int("attr3", "brief3", "note3"), + Attribute::double("attr4", "brief4", "note4"), + ], + ); + + // 2 new attributes are added: attr2_bis and attr3_bis + // attr2 is renamed attr2_bis + // attr3 is renamed attr3_bis + let mut latest_schema = ResolvedTelemetrySchema::new("1.0", "", "", ""); + latest_schema.add_attribute_group( + "registry.group1", + [ + Attribute::boolean("attr1", "brief1", "note1"), + Attribute::string("attr2", "brief2", "note2").deprecated(Deprecated::Renamed { + renamed_to: "attr2_bis".to_owned(), + }), + Attribute::int("attr3", "brief3", "note3").deprecated(Deprecated::Renamed { + renamed_to: "attr3_bis".to_owned(), + }), + Attribute::double("attr4", "brief4", "note4"), + ], + ); + latest_schema.add_attribute_group( + "registry.group2", + [ + Attribute::boolean("attr2_bis", "brief1", "note1"), + Attribute::boolean("attr3_bis", "brief1", "note1"), + ], + ); + + let changes = latest_schema.diff(&prior_schema); + assert_eq!(changes.count_changes(), 2); + assert_eq!(changes.count_attribute_changes(), 2); + assert_eq!(changes.count_renamed_to_new_attributes(), 2); + } + + #[test] + fn detect_merge_of_2_attributes_renamed_to_the_same_existing_attribute() { + let mut prior_schema = ResolvedTelemetrySchema::new("1.0", "", "", ""); + prior_schema.add_attribute_group( + "registry.group1", + [ + Attribute::boolean("attr1", "brief1", "note1"), + Attribute::string("attr2", "brief2", "note2"), + Attribute::string("attr3", "brief3", "note3"), + Attribute::double("attr4", "brief4", "note4"), + ], + ); + prior_schema.add_attribute_group("group2", [Attribute::string("attr5", "brief", "note")]); + + // 2 new attributes are added: attr2_bis and attr3_bis + // attr2 is renamed attr2_bis + // attr3 is renamed attr3_bis + let mut latest_schema = ResolvedTelemetrySchema::new("1.0", "", "", ""); + latest_schema.add_attribute_group( + "registry.group1", + [ + Attribute::boolean("attr1", "brief1", "note1"), + Attribute::string("attr2", "brief2", "note2").deprecated(Deprecated::Renamed { + renamed_to: "attr5".to_owned(), + }), + Attribute::int("attr3", "brief3", "note3").deprecated(Deprecated::Renamed { + renamed_to: "attr5".to_owned(), + }), + Attribute::double("attr4", "brief4", "note4"), + ], + ); + latest_schema.add_attribute_group("group2", [Attribute::string("attr5", "brief", "note")]); + + let changes = latest_schema.diff(&prior_schema); + assert_eq!(changes.count_changes(), 1); + assert_eq!(changes.count_attribute_changes(), 1); + // 2 attributes are renamed to the same existing attribute + assert_eq!(changes.count_renamed_to_existing_attributes(), 1); + let changes = changes.renamed_to_existing_attributes(); + if let SchemaItemChange::RenamedToExisting { + old_names, + current_name, + } = &changes[0] + { + let expected_old_names: HashSet<_> = ["attr2".to_owned(), "attr3".to_owned()] + .into_iter() + .collect(); + assert_eq!(old_names, &expected_old_names); + assert_eq!(current_name, "attr5"); + } + } + + #[test] + fn detect_merge_of_2_attributes_renamed_to_the_same_new_attribute() { + let mut prior_schema = ResolvedTelemetrySchema::new("1.0", "", "", ""); + prior_schema.add_attribute_group( + "registry.group1", + [ + Attribute::boolean("attr1", "brief1", "note1"), + Attribute::string("attr2", "brief2", "note2"), + Attribute::string("attr3", "brief3", "note3"), + Attribute::double("attr4", "brief4", "note4"), + ], + ); + + // 2 new attributes are added: attr2_bis and attr3_bis + // attr2 is renamed attr2_bis + // attr3 is renamed attr3_bis + let mut latest_schema = ResolvedTelemetrySchema::new("1.0", "", "", ""); + latest_schema.add_attribute_group( + "registry.group1", + [ + Attribute::boolean("attr1", "brief1", "note1"), + Attribute::string("attr2", "brief2", "note2").deprecated(Deprecated::Renamed { + renamed_to: "attr5".to_owned(), + }), + Attribute::int("attr3", "brief3", "note3").deprecated(Deprecated::Renamed { + renamed_to: "attr5".to_owned(), + }), + Attribute::double("attr4", "brief4", "note4"), + ], + ); + latest_schema.add_attribute_group( + "registry.group2", + [Attribute::string("attr5", "brief", "note")], + ); + + let changes = latest_schema.diff(&prior_schema); + assert_eq!(changes.count_changes(), 1); + assert_eq!(changes.count_attribute_changes(), 1); + // 2 attributes are renamed to the same existing attribute + assert_eq!(changes.count_renamed_to_new_attributes(), 1); + let changes = changes.renamed_to_new_attributes(); + if let SchemaItemChange::RenamedToNew { + old_names, + new_name, + } = &changes[0] + { + let expected_old_names: HashSet<_> = ["attr2".to_owned(), "attr3".to_owned()] + .into_iter() + .collect(); + assert_eq!(old_names, &expected_old_names); + assert_eq!(new_name, "attr5"); + } + } + + /// In normal situation this should never happen based on the registry evolution process. + /// However, detecting this case is useful for identifying a violation of the process. + #[test] + fn detect_2_removed_attributes() { + let mut prior_schema = ResolvedTelemetrySchema::new("1.0", "", "", ""); + prior_schema.add_attribute_group( + "registry.group1", + [ + Attribute::boolean("attr1", "brief1", "note1"), + Attribute::string("attr2", "brief2", "note2"), + Attribute::int("attr3", "brief3", "note3"), + Attribute::double("attr4", "brief4", "note4"), + ], + ); + + let mut latest_schema = ResolvedTelemetrySchema::new("1.0", "", "", ""); + latest_schema.add_attribute_group( + "registry.group1", + [ + Attribute::boolean("attr1", "brief1", "note1"), + Attribute::string("attr2", "brief2", "note2"), + ], + ); + + let changes = latest_schema.diff(&prior_schema); + assert_eq!(changes.count_changes(), 2); + assert_eq!(changes.count_attribute_changes(), 2); + assert_eq!(changes.count_removed_attributes(), 2); + } } diff --git a/crates/weaver_resolved_schema/src/lineage.rs b/crates/weaver_resolved_schema/src/lineage.rs index ac194984..36a806bb 100644 --- a/crates/weaver_resolved_schema/src/lineage.rs +++ b/crates/weaver_resolved_schema/src/lineage.rs @@ -8,6 +8,7 @@ use std::collections::{BTreeMap, BTreeSet}; use serde::{Deserialize, Serialize}; use weaver_semconv::attribute::{AttributeSpec, Examples, RequirementLevel}; +use weaver_semconv::deprecated::Deprecated; use weaver_semconv::stability::Stability; /// Attribute lineage (at the field level). @@ -332,9 +333,9 @@ impl AttributeLineage { /// from the parent. pub fn deprecated( &mut self, - local_value: &Option, - parent_value: &Option, - ) -> Option { + local_value: &Option, + parent_value: &Option, + ) -> Option { if local_value.is_some() { _ = self .locally_overridden_fields diff --git a/crates/weaver_resolved_schema/src/registry.rs b/crates/weaver_resolved_schema/src/registry.rs index e0d07ef3..83dffaf9 100644 --- a/crates/weaver_resolved_schema/src/registry.rs +++ b/crates/weaver_resolved_schema/src/registry.rs @@ -9,7 +9,7 @@ use std::collections::{BTreeMap, HashMap, HashSet}; use weaver_semconv::any_value::AnyValueSpec; use serde::{Deserialize, Serialize}; - +use weaver_semconv::deprecated::Deprecated; use weaver_semconv::group::{GroupType, InstrumentSpec, SpanKindSpec}; use weaver_semconv::stability::Stability; @@ -80,7 +80,7 @@ pub struct Group { /// provided as MUST specify why it's deprecated and/or what /// to use instead. See also stability. #[serde(skip_serializing_if = "Option::is_none")] - pub deprecated: Option, + pub deprecated: Option, /// Additional constraints. /// Allow to define additional requirements on the semantic convention. /// It defaults to an empty list. @@ -237,6 +237,15 @@ impl CommonGroupStats { } impl Registry { + /// Creates a new registry. + #[must_use] + pub fn new>(registry_url: S) -> Self { + Self { + registry_url: registry_url.as_ref().to_owned(), + groups: Vec::new(), + } + } + /// Returns the groups of the specified type. /// /// # Arguments diff --git a/crates/weaver_resolver/src/lib.rs b/crates/weaver_resolver/src/lib.rs index c761104f..d6c979ca 100644 --- a/crates/weaver_resolver/src/lib.rs +++ b/crates/weaver_resolver/src/lib.rs @@ -3,7 +3,6 @@ #![doc = include_str!("../README.md")] use miette::Diagnostic; -use std::collections::HashMap; use std::path::{PathBuf, MAIN_SEPARATOR}; use rayon::iter::ParallelIterator; @@ -235,29 +234,26 @@ impl SchemaResolver { /// corresponding resolved telemetry schema. pub fn resolve_semantic_convention_registry( registry: &mut SemConvRegistry, - ) -> Result { + ) -> WResult { let mut attr_catalog = AttributeCatalog::default(); - let resolved_registry = resolve_semconv_registry(&mut attr_catalog, "", registry)?; - - let catalog = Catalog { - attributes: attr_catalog.drain_attributes(), - }; - - let mut registries = HashMap::new(); - _ = registries.insert(registry.id().into(), resolved_registry); - - let resolved_schema = ResolvedTelemetrySchema { - file_format: "1.0.0".to_owned(), - schema_url: "".to_owned(), - registries, - catalog, - resource: None, - instrumentation_library: None, - dependencies: vec![], - versions: None, // ToDo LQ: Implement this! - }; - - Ok(resolved_schema) + resolve_semconv_registry(&mut attr_catalog, "", registry).map(move |resolved_registry| { + let catalog = Catalog::from_attributes(attr_catalog.drain_attributes()); + + let resolved_schema = ResolvedTelemetrySchema { + file_format: "1.0.0".to_owned(), + schema_url: "".to_owned(), + registry_id: registry.id().into(), + registry: resolved_registry, + catalog, + resource: None, + instrumentation_library: None, + dependencies: vec![], + versions: None, // ToDo LQ: Implement this! + registry_manifest: registry.manifest().cloned(), + }; + + resolved_schema + }) } /// Loads the semantic convention specifications from the given registry path. diff --git a/crates/weaver_resolver/src/registry.rs b/crates/weaver_resolver/src/registry.rs index b9098d5a..b9e71baa 100644 --- a/crates/weaver_resolver/src/registry.rs +++ b/crates/weaver_resolver/src/registry.rs @@ -8,6 +8,7 @@ use std::collections::{BTreeMap, HashMap, HashSet}; use std::fmt::Display; use std::hash::Hash; use weaver_common::error::handle_errors; +use weaver_common::result::WResult; use weaver_resolved_schema::attribute::UnresolvedAttribute; use weaver_resolved_schema::lineage::{AttributeLineage, GroupLineage}; use weaver_resolved_schema::registry::{Constraint, Group, Registry}; @@ -73,16 +74,24 @@ pub fn resolve_semconv_registry( attr_catalog: &mut AttributeCatalog, registry_url: &str, registry: &SemConvRegistry, -) -> Result { +) -> WResult { let mut ureg = unresolved_registry_from_specs(registry_url, registry); - resolve_prefix_on_attributes(&mut ureg)?; + if let Err(e) = resolve_prefix_on_attributes(&mut ureg) { + return WResult::FatalErr(e); + } - resolve_extends_references(&mut ureg)?; + if let Err(e) = resolve_extends_references(&mut ureg) { + return WResult::FatalErr(e); + } - resolve_attribute_references(&mut ureg, attr_catalog)?; + if let Err(e) = resolve_attribute_references(&mut ureg, attr_catalog) { + return WResult::FatalErr(e); + } - resolve_include_constraints(&mut ureg)?; + if let Err(e) = resolve_include_constraints(&mut ureg) { + return WResult::FatalErr(e); + } // Sort the attribute internal references in each group. // This is needed to ensure that the resolved registry is easy to compare @@ -96,9 +105,13 @@ pub fn resolve_semconv_registry( }) .collect(); + let mut errors = vec![]; + // Check the `any_of` constraints. let attr_name_index = attr_catalog.attribute_name_index(); - check_any_of_constraints(&ureg.registry, &attr_name_index)?; + if let Err(e) = check_any_of_constraints(&ureg.registry, &attr_name_index) { + errors.push(e); + } // All constraints are satisfied. // Remove the constraints from the resolved registry. @@ -107,7 +120,6 @@ pub fn resolve_semconv_registry( } // Other complementary checks. - let mut errors = vec![]; // Check for duplicate group IDs. check_uniqueness( &ureg.registry, @@ -140,9 +152,7 @@ pub fn resolve_semconv_registry( ); check_root_attribute_id_duplicates(&ureg.registry, &attr_name_index, &mut errors); - handle_errors(errors)?; - - Ok(ureg.registry) + WResult::OkWithNFEs(ureg.registry, errors) } /// Checks the `any_of` constraints in the given registry. @@ -360,10 +370,7 @@ fn unresolved_registry_from_specs( .collect(); UnresolvedRegistry { - registry: Registry { - registry_url: registry_url.to_owned(), - groups: vec![], - }, + registry: Registry::new(registry_url), groups, } } @@ -834,6 +841,7 @@ mod tests { use glob::glob; use serde::Serialize; + use weaver_common::result::WResult; use weaver_diff::canonicalize_json_string; use weaver_resolved_schema::attribute; use weaver_resolved_schema::registry::{Constraint, Registry}; @@ -900,7 +908,8 @@ mod tests { let mut attr_catalog = AttributeCatalog::default(); let observed_registry = - resolve_semconv_registry(&mut attr_catalog, "https://127.0.0.1", &sc_specs); + resolve_semconv_registry(&mut attr_catalog, "https://127.0.0.1", &sc_specs) + .into_result_failing_non_fatal(); // Check that the resolved attribute catalog matches the expected attribute catalog. let observed_attr_catalog = attr_catalog.drain_attributes(); @@ -968,7 +977,7 @@ mod tests { } } - fn create_registry_from_string(registry_spec: &str) -> Result { + fn create_registry_from_string(registry_spec: &str) -> WResult { let mut sc_specs = SemConvRegistry::new("default"); sc_specs .add_semconv_spec_from_string("", registry_spec) @@ -993,7 +1002,8 @@ groups: type: attribute_group brief: \"Group two\" extends: group.non.existent.two", - ); + ) + .into_result_failing_non_fatal(); assert!(result.is_err()); @@ -1019,7 +1029,8 @@ groups: requirement_level: opt_in - ref: non.existent.two requirement_level: opt_in", - ); + ) + .into_result_failing_non_fatal(); assert!(result.is_err()); @@ -1044,7 +1055,8 @@ groups: - include: 'non.existent.one' - include: 'non.existent.two' - include: 'non.existent.three'", - ); + ) + .into_result_failing_non_fatal(); assert!(result.is_err()); @@ -1125,10 +1137,11 @@ groups: // Resolve the semantic convention registry. let resolved_schema = - SchemaResolver::resolve_semantic_convention_registry(&mut semconv_registry)?; + SchemaResolver::resolve_semantic_convention_registry(&mut semconv_registry) + .into_result_failing_non_fatal()?; // Get the resolved registry by its ID. - let resolved_registry = resolved_schema.registry(registry_id).unwrap(); + let resolved_registry = &resolved_schema.registry; // Get the catalog of the resolved telemetry schema. let catalog = resolved_schema.catalog(); diff --git a/crates/weaver_semconv/Cargo.toml b/crates/weaver_semconv/Cargo.toml index 14c2e4b2..84b71e59 100644 --- a/crates/weaver_semconv/Cargo.toml +++ b/crates/weaver_semconv/Cargo.toml @@ -16,6 +16,7 @@ serde_json.workspace = true [dependencies] weaver_common = { path = "../weaver_common" } +weaver_cache = { path = "../weaver_cache" } serde.workspace = true serde_yaml.workspace = true @@ -24,5 +25,6 @@ ureq.workspace = true ordered-float.workspace = true miette.workspace = true schemars.workspace = true +regex.workspace = true glob = "0.3.2" \ No newline at end of file diff --git a/crates/weaver_semconv/allowed-external-types.toml b/crates/weaver_semconv/allowed-external-types.toml index a87cab8c..543eb188 100644 --- a/crates/weaver_semconv/allowed-external-types.toml +++ b/crates/weaver_semconv/allowed-external-types.toml @@ -5,7 +5,9 @@ allowed_external_types = [ "serde::ser::Serialize", "serde::de::Deserialize", + "serde::de::Deserializer", "weaver_common::*", + "weaver_cache::RegistryRepo", "ordered_float::OrderedFloat", # ToDo: Remove this dependency before version 1.0 "miette::protocol::Diagnostic", "schemars::JsonSchema", diff --git a/crates/weaver_semconv/src/attribute.rs b/crates/weaver_semconv/src/attribute.rs index 9d511cf7..a8d46cb2 100644 --- a/crates/weaver_semconv/src/attribute.rs +++ b/crates/weaver_semconv/src/attribute.rs @@ -5,6 +5,7 @@ //! Attribute specification. use crate::any_value::AnyValueSpec; +use crate::deprecated::Deprecated; use crate::stability::Stability; use crate::Error; use ordered_float::OrderedFloat; @@ -66,11 +67,13 @@ pub enum AttributeSpec { /// error. #[serde(skip_serializing_if = "Option::is_none")] stability: Option, - /// Specifies if the attribute is deprecated. The string - /// provided as MUST specify why it's deprecated and/or what - /// to use instead. See also stability. + /// Specifies if the attribute is deprecated. #[serde(skip_serializing_if = "Option::is_none")] - deprecated: Option, + #[serde( + deserialize_with = "crate::deprecated::deserialize_option_deprecated", + default + )] + deprecated: Option, /// Specifies the prefix of the attribute. /// If this parameter is set, the resolved id of the referenced attribute will /// have group prefix added to it. @@ -123,11 +126,13 @@ pub enum AttributeSpec { /// error. #[serde(skip_serializing_if = "Option::is_none")] stability: Option, - /// Specifies if the attribute is deprecated. The string - /// provided as MUST specify why it's deprecated and/or what - /// to use instead. See also stability. + /// Specifies if the attribute is deprecated. #[serde(skip_serializing_if = "Option::is_none")] - deprecated: Option, + #[serde( + deserialize_with = "crate::deprecated::deserialize_option_deprecated", + default + )] + deprecated: Option, }, } @@ -887,7 +892,7 @@ mod tests { sampling_relevant: Some(true), note: "note".to_owned(), stability: Some(Stability::Stable), - deprecated: Some("deprecated".to_owned()), + deprecated: Some(Deprecated::Deprecated), }; assert_eq!(attr.id(), "id"); assert_eq!(attr.brief(), "brief"); @@ -904,7 +909,7 @@ mod tests { sampling_relevant: Some(true), note: Some("note".to_owned()), stability: Some(Stability::Stable), - deprecated: Some("deprecated".to_owned()), + deprecated: Some(Deprecated::Deprecated), prefix: false, }; assert_eq!(attr.id(), "ref"); diff --git a/crates/weaver_semconv/src/deprecated.rs b/crates/weaver_semconv/src/deprecated.rs new file mode 100644 index 00000000..7257371a --- /dev/null +++ b/crates/weaver_semconv/src/deprecated.rs @@ -0,0 +1,196 @@ +// SPDX-License-Identifier: Apache-2.0 + +//! An enum to represent the different ways to deprecate an attribute, a metric, ... +//! +//! Two formats are supported: +//! - A string with the deprecation message (old format) +//! - A map with the action (renamed or removed) and optionally a note. When the +//! action is renamed, the map must also contain the field renamed_to. + +use schemars::JsonSchema; +use serde::de::{MapAccess, Visitor}; +use serde::{de, Deserialize, Deserializer, Serialize}; +use std::fmt; +use std::fmt::{Display, Formatter}; + +/// The different ways to deprecate an attribute, a metric, ... +#[derive(Serialize, Deserialize, Clone, Debug, Eq, PartialEq, Hash, JsonSchema)] +#[serde(rename_all = "snake_case")] +#[serde(tag = "action")] +pub enum Deprecated { + /// The object containing the deprecated field has been renamed to an + /// existing object or to a new object. + Renamed { + /// The new name of the field. + renamed_to: String, + }, + /// The object containing the deprecated field has been deprecated + /// either because it no longer exists, has been split into multiple fields, + /// has been renamed in various ways across different contexts, or for any other reason. + /// + /// The `note` field should contain the reason why the field has been deprecated. + Deprecated, +} + +/// Custom deserialization function to handle both old and new formats. +/// The old format is a string with the deprecation message. +/// The new format is a map with the action (renamed or removed) and optionally a note. When the +/// action is renamed, the map must also contain the field `rename_to`. +pub fn deserialize_deprecated<'de, D>(deserializer: D) -> Result +where + D: Deserializer<'de>, +{ + // Define the visitor to handle both the old and new formats + struct DeprecatedVisitor; + + impl<'de> Visitor<'de> for DeprecatedVisitor { + type Value = Deprecated; + + fn expecting(&self, formatter: &mut Formatter<'_>) -> fmt::Result { + formatter.write_str("a string or a map for deprecated field") + } + + /// Handle the old format (just a string) + /// + /// Note: The old format of the deprecated field is a string with the deprecation message. + /// The new format is a map with at least the `action` field and the deprecation message is + /// expected to be in the standard `note` field. + fn visit_str(self, _value: &str) -> Result + where + E: de::Error, + { + Ok(Deprecated::Deprecated) + } + + // Handle the new format (a map with action and optionally `rename_to` or `note`) + fn visit_map(self, mut map: V) -> Result + where + V: MapAccess<'de>, + { + let mut action = None; + let mut new_name = None; + + while let Some(key) = map.next_key::()? { + match key.as_str() { + "action" => action = Some(map.next_value::()?), + "new_name" => new_name = Some(map.next_value()?), + _ => { + return Err(de::Error::unknown_field( + &key, + &["action", "new_name", "note"], + )) + } + } + } + + match action.as_deref() { + Some("renamed") => { + let rename_to = + new_name.ok_or_else(|| de::Error::missing_field("rename_to"))?; + Ok(Deprecated::Renamed { + renamed_to: rename_to, + }) + } + Some("deprecated") => Ok(Deprecated::Deprecated), + _ => Err(de::Error::missing_field("action")), + } + } + } + + deserializer.deserialize_any(DeprecatedVisitor) +} + +/// Custom deserialization function to handle both old and new formats for an optional field. +pub fn deserialize_option_deprecated<'de, D>( + deserializer: D, +) -> Result, D::Error> +where + D: Deserializer<'de>, +{ + struct OptionDeprecatedVisitor; + + impl<'de> Visitor<'de> for OptionDeprecatedVisitor { + type Value = Option; + + fn expecting(&self, formatter: &mut Formatter<'_>) -> fmt::Result { + formatter.write_str("a string, a map, or nothing for a deprecated field") + } + + fn visit_unit(self) -> Result + where + E: de::Error, + { + // If we encounter an empty value (unit), we return None + Ok(None) + } + + fn visit_none(self) -> Result + where + E: de::Error, + { + // Explicitly handle the None case (e.g., empty field) + Ok(None) + } + + fn visit_some(self, deserializer: D) -> Result + where + D: Deserializer<'de>, + { + // Use the existing deserializer for Deprecated values and wrap the result in Some + let deprecated = deserialize_deprecated(deserializer)?; + Ok(Some(deprecated)) + } + } + + deserializer.deserialize_option(OptionDeprecatedVisitor) +} + +/// Implements a human-readable display for the `Deprecated` enum. +impl Display for Deprecated { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + match self { + Deprecated::Renamed { + renamed_to: rename_to, + } => { + write!(f, "Replaced by `{}`.", rename_to) + } + Deprecated::Deprecated => { + write!(f, "Deprecated") + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[derive(Debug, Deserialize)] + struct Item { + #[serde(deserialize_with = "deserialize_option_deprecated", default)] + deprecated: Option, + } + + #[test] + fn test_deser_and_to_string() { + let yaml_data = r#" +- deprecated: 'Replaced by `jvm.buffer.memory.used`.' +- deprecated: + action: deprecated +- deprecated: + action: renamed + new_name: foo.unique_id +"#; + + let items: Vec = serde_yaml::from_str(yaml_data).unwrap(); + assert_eq!(items.len(), 3); + assert_eq!(items[0].deprecated, Some(Deprecated::Deprecated)); + assert_eq!(items[1].deprecated, Some(Deprecated::Deprecated {})); + assert_eq!( + items[2].deprecated, + Some(Deprecated::Renamed { + renamed_to: "foo.unique_id".to_owned(), + }) + ); + } +} diff --git a/crates/weaver_semconv/src/group.rs b/crates/weaver_semconv/src/group.rs index f9fc630a..d64717d3 100644 --- a/crates/weaver_semconv/src/group.rs +++ b/crates/weaver_semconv/src/group.rs @@ -11,6 +11,7 @@ use serde::{Deserialize, Serialize}; use crate::any_value::AnyValueSpec; use crate::attribute::{AttributeSpec, AttributeType, PrimitiveOrArrayTypeSpec}; +use crate::deprecated::Deprecated; use crate::group::InstrumentSpec::{Counter, Gauge, Histogram, UpDownCounter}; use crate::stability::Stability; use crate::Error; @@ -51,7 +52,11 @@ pub struct GroupSpec { /// provided as MUST specify why it's deprecated and/or what /// to use instead. See also stability. #[serde(skip_serializing_if = "Option::is_none")] - pub deprecated: Option, + #[serde( + deserialize_with = "crate::deprecated::deserialize_option_deprecated", + default + )] + pub deprecated: Option, /// List of attributes that belong to the semantic convention. #[serde(default)] pub attributes: Vec, @@ -456,6 +461,7 @@ impl Display for InstrumentSpec { mod tests { use crate::any_value::AnyValueCommonSpec; use crate::attribute::{BasicRequirementLevelSpec, Examples, RequirementLevel}; + use crate::deprecated::Deprecated; use crate::Error::{ CompoundError, InvalidAttributeAllowCustomValues, InvalidExampleWarning, InvalidGroup, InvalidGroupMissingExtendsOrAttributes, InvalidGroupStability, InvalidGroupUsesPrefix, @@ -474,13 +480,13 @@ mod tests { prefix: "".to_owned(), extends: None, stability: Some(Stability::Deprecated), - deprecated: Some("true".to_owned()), + deprecated: Some(Deprecated::Deprecated), attributes: vec![AttributeSpec::Id { id: "test".to_owned(), r#type: AttributeType::PrimitiveOrArray(PrimitiveOrArrayTypeSpec::String), brief: None, stability: Some(Stability::Deprecated), - deprecated: Some("true".to_owned()), + deprecated: Some(Deprecated::Deprecated), examples: Some(Examples::String("test".to_owned())), tag: None, requirement_level: Default::default(), @@ -603,13 +609,13 @@ mod tests { prefix: "".to_owned(), extends: None, stability: Some(Stability::Deprecated), - deprecated: Some("true".to_owned()), + deprecated: Some(Deprecated::Deprecated), attributes: vec![AttributeSpec::Id { id: "test".to_owned(), r#type: AttributeType::PrimitiveOrArray(PrimitiveOrArrayTypeSpec::String), brief: None, stability: Some(Stability::Deprecated), - deprecated: Some("true".to_owned()), + deprecated: Some(Deprecated::Deprecated), examples: Some(Examples::String("test".to_owned())), tag: None, requirement_level: Default::default(), @@ -637,7 +643,7 @@ mod tests { r#type: AttributeType::PrimitiveOrArray(PrimitiveOrArrayTypeSpec::String), brief: None, stability: Some(Stability::Deprecated), - deprecated: Some("true".to_owned()), + deprecated: Some(Deprecated::Deprecated), examples: None, tag: None, requirement_level: Default::default(), @@ -662,7 +668,7 @@ mod tests { r#type: AttributeType::PrimitiveOrArray(PrimitiveOrArrayTypeSpec::Strings), brief: None, stability: Some(Stability::Deprecated), - deprecated: Some("true".to_owned()), + deprecated: Some(Deprecated::Deprecated), examples: None, tag: None, requirement_level: Default::default(), @@ -692,7 +698,7 @@ mod tests { prefix: "".to_owned(), extends: None, stability: Some(Stability::Deprecated), - deprecated: Some("true".to_owned()), + deprecated: Some(Deprecated::Deprecated), attributes: vec![AttributeSpec::Id { id: "test".to_owned(), r#type: AttributeType::Enum { @@ -701,7 +707,7 @@ mod tests { }, brief: None, stability: Some(Stability::Deprecated), - deprecated: Some("true".to_owned()), + deprecated: Some(Deprecated::Deprecated), examples: Some(Examples::String("test".to_owned())), tag: None, requirement_level: Default::default(), @@ -739,7 +745,7 @@ mod tests { }, brief: None, stability: Some(Stability::Deprecated), - deprecated: Some("true".to_owned()), + deprecated: Some(Deprecated::Deprecated), examples: Some(Examples::String("test".to_owned())), tag: None, requirement_level: Default::default(), @@ -761,7 +767,7 @@ mod tests { prefix: "".to_owned(), extends: None, stability: Some(Stability::Deprecated), - deprecated: Some("true".to_owned()), + deprecated: Some(Deprecated::Deprecated), constraints: vec![], span_kind: None, events: vec![], @@ -970,7 +976,7 @@ mod tests { r#type: AttributeType::PrimitiveOrArray(PrimitiveOrArrayTypeSpec::String), brief: None, stability: Some(Stability::Deprecated), - deprecated: Some("true".to_owned()), + deprecated: Some(Deprecated::Deprecated), examples: Some(Examples::String("test".to_owned())), tag: None, requirement_level: Default::default(), @@ -1112,7 +1118,7 @@ mod tests { r#type: AttributeType::PrimitiveOrArray(PrimitiveOrArrayTypeSpec::String), brief: None, stability: Some(Stability::Deprecated), - deprecated: Some("true".to_owned()), + deprecated: Some(Deprecated::Deprecated), examples: Some(Examples::String("test".to_owned())), tag: None, requirement_level: Default::default(), diff --git a/crates/weaver_semconv/src/lib.rs b/crates/weaver_semconv/src/lib.rs index b4be2c35..da141e36 100644 --- a/crates/weaver_semconv/src/lib.rs +++ b/crates/weaver_semconv/src/lib.rs @@ -5,12 +5,15 @@ use crate::Error::CompoundError; use miette::Diagnostic; use serde::Serialize; +use std::path::PathBuf; use weaver_common::diagnostic::{DiagnosticMessage, DiagnosticMessages}; use weaver_common::error::{format_errors, WeaverError}; pub mod any_value; pub mod attribute; +pub mod deprecated; pub mod group; +pub mod manifest; pub mod metric; pub mod registry; pub mod semconv; @@ -204,6 +207,24 @@ pub enum Error { error: String, }, + /// This error is raised when a registry manifest is not found. + #[error("The registry manifest at {path:?} is not found.")] + #[diagnostic(severity(Error))] + RegistryManifestNotFound { + /// The path to the registry manifest file. + path: PathBuf, + }, + + /// This error is raised when a registry manifest is invalid. + #[error("The registry manifest at {path:?} is invalid. {error}")] + #[diagnostic(severity(Error))] + InvalidRegistryManifest { + /// The path to the registry manifest file. + path: PathBuf, + /// The error that occurred. + error: String, + }, + /// A container for multiple errors. #[error("{:?}", format_errors(.0))] CompoundError(#[related] Vec), diff --git a/crates/weaver_semconv/src/manifest.rs b/crates/weaver_semconv/src/manifest.rs new file mode 100644 index 00000000..7f96d16f --- /dev/null +++ b/crates/weaver_semconv/src/manifest.rs @@ -0,0 +1,161 @@ +// SPDX-License-Identifier: Apache-2.0 + +//! Contains the definitions for the semantic conventions registry manifest. +//! +//! This struct is used to specify the registry, including its name, version, +//! description, and few other details. +//! +//! In the future, this struct may be extended to include additional information +//! such as the registry's owner, maintainers, and dependencies. + +use crate::Error; +use crate::Error::{InvalidRegistryManifest, RegistryManifestNotFound}; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use std::path::PathBuf; +use weaver_common::error::handle_errors; + +/// Represents the information of a semantic convention registry manifest. +/// +/// This information defines the registry's name, version, description, and schema +/// base url. +#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)] +pub struct RegistryManifest { + /// The name of the registry. This name is used to define the package name. + pub name: String, + + /// An optional description of the registry. + /// + /// This field can be used to provide additional context or information about the registry's + /// purpose and contents. + /// The format of the description is markdown. + pub description: Option, + + /// The version of the registry which will be used to define the semconv package version. + pub semconv_version: String, + + /// The base URL where the registry's schema files are hosted. + pub schema_base_url: String, +} + +impl RegistryManifest { + /// Attempts to load a registry manifest from a file. + /// + /// The expected file format is YAML. + pub fn try_from_file>(path: P) -> Result { + let manifest_path_buf = path.as_ref().to_path_buf(); + + if !manifest_path_buf.exists() { + return Err(RegistryManifestNotFound { + path: manifest_path_buf.clone(), + }); + } + + let file = std::fs::File::open(path).map_err(|e| InvalidRegistryManifest { + path: manifest_path_buf.clone(), + error: e.to_string(), + })?; + let reader = std::io::BufReader::new(file); + let manifest: RegistryManifest = + serde_yaml::from_reader(reader).map_err(|e| InvalidRegistryManifest { + path: manifest_path_buf.clone(), + error: e.to_string(), + })?; + + manifest.validate(manifest_path_buf.clone())?; + + Ok(manifest) + } + + fn validate(&self, path: PathBuf) -> Result<(), Error> { + let mut errors = vec![]; + + if self.name.is_empty() { + errors.push(InvalidRegistryManifest { + path: path.clone(), + error: "The registry name is required.".to_owned(), + }); + } + + if self.semconv_version.is_empty() { + errors.push(InvalidRegistryManifest { + path: path.clone(), + error: "The registry version is required.".to_owned(), + }); + } + + if self.schema_base_url.is_empty() { + errors.push(InvalidRegistryManifest { + path: path.clone(), + error: "The registry schema base URL is required.".to_owned(), + }); + } + + handle_errors(errors)?; + + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::Error::CompoundError; + + #[test] + fn test_not_found_registry_info() { + let result = RegistryManifest::try_from_file("tests/test_data/missing_registry.yaml"); + assert!( + matches!(result, Err(RegistryManifestNotFound { path, .. }) if path.ends_with("missing_registry.yaml")) + ); + } + + #[test] + fn test_incomplete_registry_info() { + let result = RegistryManifest::try_from_file( + "tests/test_data/incomplete_semconv_registry_manifest.yaml", + ); + assert!( + matches!(result, Err(InvalidRegistryManifest { path, .. }) if path.ends_with("incomplete_semconv_registry_manifest.yaml")) + ); + } + + #[test] + fn test_valid_registry_info() { + let config = + RegistryManifest::try_from_file("tests/test_data/valid_semconv_registry_manifest.yaml") + .expect("Failed to load the registry configuration file."); + assert_eq!(config.name, "vendor_acme"); + assert_eq!(config.semconv_version, "0.1.0"); + assert_eq!(config.schema_base_url, "https://acme.com/schemas/"); + } + + #[test] + fn test_invalid_registry_info() { + let result = RegistryManifest::try_from_file( + "tests/test_data/invalid_semconv_registry_manifest.yaml", + ); + let path = PathBuf::from("tests/test_data/invalid_semconv_registry_manifest.yaml"); + + let expected_errs = CompoundError(vec![ + InvalidRegistryManifest { + path: path.clone(), + error: "The registry name is required.".to_owned(), + }, + InvalidRegistryManifest { + path: path.clone(), + error: "The registry version is required.".to_owned(), + }, + InvalidRegistryManifest { + path: path.clone(), + error: "The registry schema base URL is required.".to_owned(), + }, + ]); + + if let Err(observed_errs) = result { + assert_eq!(observed_errs, expected_errs); + } else { + panic!("Expected an error, but got a result."); + } + } +} diff --git a/crates/weaver_semconv/src/registry.rs b/crates/weaver_semconv/src/registry.rs index cc97654e..fbb65d0d 100644 --- a/crates/weaver_semconv/src/registry.rs +++ b/crates/weaver_semconv/src/registry.rs @@ -4,12 +4,16 @@ use crate::attribute::AttributeSpecWithProvenance; use crate::group::GroupSpecWithProvenance; +use crate::manifest::RegistryManifest; use crate::metric::MetricSpecWithProvenance; use crate::semconv::{SemConvSpec, SemConvSpecWithProvenance}; use crate::stats::Stats; use crate::Error; +use regex::Regex; use std::collections::HashMap; use std::path::Path; +use std::sync::LazyLock; +use weaver_cache::RegistryRepo; use weaver_common::result::WResult; /// A semantic convention registry is a collection of semantic convention @@ -36,6 +40,9 @@ pub struct SemConvRegistry { /// /// This collection contains all the metrics defined in the semantic convention registry. metrics: HashMap, + + /// The manifest of the semantic convention registry. + manifest: Option, } impl SemConvRegistry { @@ -103,20 +110,45 @@ impl SemConvRegistry { /// /// # Arguments /// - /// * `registry_id` - The id of the semantic convention registry. + /// * `registry_repo` - The semantic convention registry. /// * `semconv_specs` - The list of semantic convention specs to load. pub fn from_semconv_specs( - registry_id: &str, + registry_repo: &RegistryRepo, semconv_specs: Vec<(String, SemConvSpec)>, - ) -> SemConvRegistry { + ) -> Result { + static VERSION_REGEX: LazyLock = + LazyLock::new(|| Regex::new(r".*(v\d+\.\d+\.\d+).*").expect("Invalid regex")); + // Load all the semantic convention registry. - let mut registry = SemConvRegistry::new(registry_id); + let mut registry = SemConvRegistry::new(registry_repo.id()); for (provenance, spec) in semconv_specs { registry.add_semconv_spec(SemConvSpecWithProvenance { spec, provenance }); } - registry + if let Some(manifest_path) = registry_repo.manifest_path() { + registry.set_manifest(RegistryManifest::try_from_file(manifest_path)?); + } else { + let mut semconv_version = "unversioned".to_owned(); + + // No registry manifest found. + // Try to infer the manifest from the registry path by detecting the + // presence of the following pattern in the registry path: v\d+\.\d+\.\d+. + if let Some(captures) = VERSION_REGEX.captures(registry_repo.registry_path_repr()) { + if let Some(captured_text) = captures.get(1) { + semconv_version = captured_text.as_str().to_owned(); + } + } + + registry.set_manifest(RegistryManifest { + name: registry_repo.id().to_owned(), + description: None, + semconv_version, + schema_base_url: "".to_owned(), + }); + } + + Ok(registry) } /// Returns the id of the semantic convention registry. @@ -125,6 +157,17 @@ impl SemConvRegistry { &self.id } + /// Set the manifest of the semantic convention registry. + fn set_manifest(&mut self, manifest: RegistryManifest) { + self.manifest = Some(manifest); + } + + /// Returns the manifest of the semantic convention registry. + #[must_use] + pub fn manifest(&self) -> Option<&RegistryManifest> { + self.manifest.as_ref() + } + /// Add a semantic convention spec to the semantic convention registry. /// /// # Arguments @@ -161,7 +204,7 @@ impl SemConvRegistry { SemConvSpec::from_file(semconv_path).map(|spec| (provenance, spec)) } - /// Downloads and returns the semantic convention spec from an URL. + /// Downloads and returns the semantic convention spec from a URL. pub fn semconv_spec_from_url(sem_conv_url: &str) -> WResult<(String, SemConvSpec), Error> { SemConvSpec::from_url(sem_conv_url).map(|spec| (sem_conv_url.to_owned(), spec)) } @@ -215,6 +258,8 @@ mod tests { use crate::group::{GroupSpec, GroupType}; use crate::registry::SemConvRegistry; use crate::Error; + use weaver_cache::registry_path::RegistryPath; + use weaver_cache::RegistryRepo; use weaver_common::test::ServeStaticFiles; #[test] @@ -312,7 +357,11 @@ mod tests { }, ), ]; - let registry = SemConvRegistry::from_semconv_specs("test", semconv_specs); + let registry_path = RegistryPath::LocalFolder { + path: "data".to_owned(), + }; + let registry_repo = RegistryRepo::try_new("test", ®istry_path).unwrap(); + let registry = SemConvRegistry::from_semconv_specs(®istry_repo, semconv_specs).unwrap(); assert_eq!(registry.id(), "test"); assert_eq!(registry.semconv_spec_count(), 2); } diff --git a/crates/weaver_semconv/tests/test_data/incomplete_semconv_registry_manifest.yaml b/crates/weaver_semconv/tests/test_data/incomplete_semconv_registry_manifest.yaml new file mode 100644 index 00000000..07a968d8 --- /dev/null +++ b/crates/weaver_semconv/tests/test_data/incomplete_semconv_registry_manifest.yaml @@ -0,0 +1,2 @@ +# This an invalid semconv registry manifest file because the version and schema base url fields are missing. +name: vendor_acme \ No newline at end of file diff --git a/crates/weaver_semconv/tests/test_data/invalid_semconv_registry_manifest.yaml b/crates/weaver_semconv/tests/test_data/invalid_semconv_registry_manifest.yaml new file mode 100644 index 00000000..06233de2 --- /dev/null +++ b/crates/weaver_semconv/tests/test_data/invalid_semconv_registry_manifest.yaml @@ -0,0 +1,4 @@ +name: "" +description: This registry contains the semantic conventions for the Acme vendor. +semconv_version: "" +schema_base_url: "" \ No newline at end of file diff --git a/crates/weaver_semconv/tests/test_data/valid_semconv_registry_manifest.yaml b/crates/weaver_semconv/tests/test_data/valid_semconv_registry_manifest.yaml new file mode 100644 index 00000000..26324916 --- /dev/null +++ b/crates/weaver_semconv/tests/test_data/valid_semconv_registry_manifest.yaml @@ -0,0 +1,4 @@ +name: vendor_acme +description: This registry contains the semantic conventions for the Acme vendor. +semconv_version: 0.1.0 +schema_base_url: https://acme.com/schemas/ \ No newline at end of file diff --git a/crates/weaver_semconv_gen/src/lib.rs b/crates/weaver_semconv_gen/src/lib.rs index 26a7b815..0302faa0 100644 --- a/crates/weaver_semconv_gen/src/lib.rs +++ b/crates/weaver_semconv_gen/src/lib.rs @@ -5,17 +5,17 @@ //! poorly porting the code into RUST. We expect to optimise and improve things over time. use miette::Diagnostic; -use std::{fmt, fs}; - use serde::Serialize; +use std::{fmt, fs}; use weaver_cache::RegistryRepo; use weaver_common::diagnostic::{DiagnosticMessage, DiagnosticMessages}; use weaver_common::error::{format_errors, WeaverError}; +use weaver_common::result::WResult; use weaver_diff::diff_output; use weaver_forge::registry::ResolvedGroup; use weaver_forge::TemplateEngine; use weaver_resolved_schema::catalog::Catalog; -use weaver_resolved_schema::registry::{Group, Registry}; +use weaver_resolved_schema::registry::Group; use weaver_resolved_schema::ResolvedTelemetrySchema; use weaver_resolver::SchemaResolver; use weaver_semconv::registry::SemConvRegistry; @@ -324,7 +324,6 @@ impl SnippetGenerator { /// The resolved Semantic Convention repository that is used to drive snipper generation. struct ResolvedSemconvRegistry { schema: ResolvedTelemetrySchema, - registry_id: String, } impl ResolvedSemconvRegistry { @@ -334,20 +333,29 @@ impl ResolvedSemconvRegistry { diag_msgs: &mut DiagnosticMessages, follow_symlinks: bool, ) -> Result { - let registry_id = "semantic_conventions"; - let semconv_specs = SchemaResolver::load_semconv_specs(registry_repo, follow_symlinks) - .capture_non_fatal_errors(diag_msgs)?; - let mut registry = SemConvRegistry::from_semconv_specs(registry_id, semconv_specs); - let schema = SchemaResolver::resolve_semantic_convention_registry(&mut registry)?; - let lookup = ResolvedSemconvRegistry { - schema, - registry_id: registry_id.into(), + let semconv_specs = match SchemaResolver::load_semconv_specs(registry_repo, follow_symlinks) + { + WResult::Ok(semconv_specs) => semconv_specs, + WResult::OkWithNFEs(semconv_specs, errs) => { + diag_msgs.extend_from_vec(errs.into_iter().map(DiagnosticMessage::new).collect()); + semconv_specs + } + WResult::FatalErr(err) => return Err(err.into()), }; - Ok(lookup) - } - fn my_registry(&self) -> Option<&Registry> { - self.schema.registry(self.registry_id.as_str()) + let mut registry = match SemConvRegistry::from_semconv_specs(registry_repo, semconv_specs) { + Ok(registry) => registry, + Err(e) => return Err(e.into()), + }; + let schema = match SchemaResolver::resolve_semantic_convention_registry(&mut registry) { + WResult::Ok(schema) => schema, + WResult::OkWithNFEs(schema, errs) => { + diag_msgs.extend_from_vec(errs.into_iter().map(DiagnosticMessage::new).collect()); + schema + } + WResult::FatalErr(err) => return Err(err.into()), + }; + Ok(ResolvedSemconvRegistry { schema }) } fn catalog(&self) -> &Catalog { @@ -355,8 +363,7 @@ impl ResolvedSemconvRegistry { } fn find_group(&self, id: &str) -> Option<&Group> { - self.my_registry() - .and_then(|r| r.groups.iter().find(|g| g.id == id)) + self.schema.registry.groups.iter().find(|g| g.id == id) } } diff --git a/crates/weaver_version/README.md b/crates/weaver_version/README.md index 50f254a4..e7d30d91 100644 --- a/crates/weaver_version/README.md +++ b/crates/weaver_version/README.md @@ -1,4 +1,9 @@ -# Telemetry Schema Versioning +# Telemetry Schema Versioning and Schema Diff Data Model -This crate describes the data model for the OpenTelemetry schema versioning -(section `versions` in the Telemetry Schema v1.1.0). \ No newline at end of file +This document describes the data model for OpenTelemetry schema versioning +(section `versions` in Telemetry Schema v1.1.0) and the schema changes data +model used in the weaver tool to represent differences between two versions +of a semantic convention registry. + +For more information about the schema diffing process, see this +[document](/docs/schema-changes.md). \ No newline at end of file diff --git a/crates/weaver_version/src/all_changes.rs b/crates/weaver_version/src/all_changes.rs new file mode 100644 index 00000000..53673c66 --- /dev/null +++ b/crates/weaver_version/src/all_changes.rs @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: Apache-2.0 + +//! Section "all" changes in the OpenTelemetry Schema file. + +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use std::collections::HashMap; + +/// Changes to apply to the attributes of resource attributes, span attributes, +/// event attributes, log attributes, and metric attributes. +#[derive(Serialize, Deserialize, Debug, Default, Clone, JsonSchema)] +#[serde(deny_unknown_fields)] +pub struct AllChanges { + /// Changes to apply to the attributes. + pub changes: Vec, +} + +/// Changes to apply to the attributes for a specific version. +#[derive(Serialize, Deserialize, Debug, Default, Clone, JsonSchema)] +#[serde(deny_unknown_fields)] +pub struct AllChange { + /// Changes to apply to the resource attributes for a specific version. + pub rename_attributes: RenameAttributes, +} + +/// Changes to apply to the attributes for a specific version. +#[derive(Serialize, Deserialize, Debug, Default, Clone, JsonSchema)] +#[serde(deny_unknown_fields)] +pub struct RenameAttributes { + /// A collection of rename operations to apply to the resource attributes. + pub attribute_map: HashMap, +} diff --git a/crates/weaver_version/src/lib.rs b/crates/weaver_version/src/lib.rs index 60ec599a..ed5d5551 100644 --- a/crates/weaver_version/src/lib.rs +++ b/crates/weaver_version/src/lib.rs @@ -8,25 +8,23 @@ use std::fs::File; use std::io::BufReader; use std::path::Path; +use crate::all_changes::AllChanges; +use crate::logs_changes::LogsChanges; +use crate::metrics_changes::MetricsChanges; +use crate::resource_changes::ResourceChanges; +use crate::spans_changes::SpansChanges; +use logs_changes::LogsChange; +use metrics_changes::MetricsChange; +use resource_changes::ResourceChange; use serde::{Deserialize, Serialize}; +use spans_changes::SpansChange; -use crate::logs_change::LogsChange; -use crate::logs_version::LogsVersion; -use crate::metrics_change::MetricsChange; -use crate::metrics_version::MetricsVersion; -use crate::resource_change::ResourceChange; -use crate::resource_version::ResourceVersion; -use crate::spans_change::SpansChange; -use crate::spans_version::SpansVersion; - -pub mod logs_change; -pub mod logs_version; -pub mod metrics_change; -pub mod metrics_version; -pub mod resource_change; -pub mod resource_version; -pub mod spans_change; -pub mod spans_version; +mod all_changes; +pub mod logs_changes; +pub mod metrics_changes; +pub mod resource_changes; +pub mod schema_changes; +pub mod spans_changes; /// An error that can occur while loading or resolving version changes. #[derive(thiserror::Error, Debug)] @@ -70,14 +68,17 @@ pub struct Versions { #[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)] #[serde(deny_unknown_fields)] pub struct VersionSpec { + /// The changes to apply to the following telemetry data: resource attributes, + /// span attributes, span event attributes, log attributes, metric attributes. + pub all: Option, /// The changes to apply to the metrics specification for a specific version. - pub metrics: Option, + pub metrics: Option, /// The changes to apply to the logs specification for a specific version. - pub logs: Option, + pub logs: Option, /// The changes to apply to the spans specification for a specific version. - pub spans: Option, + pub spans: Option, /// The changes to apply to the resource specification for a specific version. - pub resources: Option, + pub resources: Option, } /// The changes to apply to rename attributes and metrics for @@ -312,7 +313,7 @@ impl VersionSpec { 'next_parent_renaming: for (old, new) in change.rename_attributes.attribute_map { for local_change in self .resources - .get_or_insert_with(ResourceVersion::default) + .get_or_insert_with(ResourceChanges::default) .changes .iter() { @@ -335,17 +336,17 @@ impl VersionSpec { if !resource_change.rename_attributes.attribute_map.is_empty() { if self .resources - .get_or_insert_with(ResourceVersion::default) + .get_or_insert_with(ResourceChanges::default) .changes .is_empty() { self.resources - .get_or_insert_with(ResourceVersion::default) + .get_or_insert_with(ResourceChanges::default) .changes .push(resource_change); } else { self.resources - .get_or_insert_with(ResourceVersion::default) + .get_or_insert_with(ResourceChanges::default) .changes[0] .rename_attributes .attribute_map @@ -361,7 +362,7 @@ impl VersionSpec { 'next_parent_renaming: for (old, new) in change.rename_metrics { for local_change in self .metrics - .get_or_insert_with(MetricsVersion::default) + .get_or_insert_with(MetricsChanges::default) .changes .iter() { @@ -377,17 +378,17 @@ impl VersionSpec { if !metrics_change.rename_metrics.is_empty() { if self .metrics - .get_or_insert_with(MetricsVersion::default) + .get_or_insert_with(MetricsChanges::default) .changes .is_empty() { self.metrics - .get_or_insert_with(MetricsVersion::default) + .get_or_insert_with(MetricsChanges::default) .changes .push(metrics_change); } else { self.metrics - .get_or_insert_with(MetricsVersion::default) + .get_or_insert_with(MetricsChanges::default) .changes[0] .rename_metrics .extend(metrics_change.rename_metrics); @@ -402,7 +403,7 @@ impl VersionSpec { 'next_parent_renaming: for (old, new) in change.rename_attributes.attribute_map { for local_change in self .logs - .get_or_insert_with(LogsVersion::default) + .get_or_insert_with(LogsChanges::default) .changes .iter() { @@ -422,16 +423,16 @@ impl VersionSpec { if !logs_change.rename_attributes.attribute_map.is_empty() { if self .logs - .get_or_insert_with(LogsVersion::default) + .get_or_insert_with(LogsChanges::default) .changes .is_empty() { self.logs - .get_or_insert_with(LogsVersion::default) + .get_or_insert_with(LogsChanges::default) .changes .push(logs_change); } else { - self.logs.get_or_insert_with(LogsVersion::default).changes[0] + self.logs.get_or_insert_with(LogsChanges::default).changes[0] .rename_attributes .attribute_map .extend(logs_change.rename_attributes.attribute_map); @@ -446,7 +447,7 @@ impl VersionSpec { 'next_parent_renaming: for (old, new) in change.rename_attributes.attribute_map { for local_change in self .spans - .get_or_insert_with(SpansVersion::default) + .get_or_insert_with(SpansChanges::default) .changes .iter() { @@ -469,16 +470,16 @@ impl VersionSpec { if !spans_change.rename_attributes.attribute_map.is_empty() { if self .spans - .get_or_insert_with(SpansVersion::default) + .get_or_insert_with(SpansChanges::default) .changes .is_empty() { self.spans - .get_or_insert_with(SpansVersion::default) + .get_or_insert_with(SpansChanges::default) .changes .push(spans_change); } else { - self.spans.get_or_insert_with(SpansVersion::default).changes[0] + self.spans.get_or_insert_with(SpansChanges::default).changes[0] .rename_attributes .attribute_map .extend(spans_change.rename_attributes.attribute_map); diff --git a/crates/weaver_version/src/logs_change.rs b/crates/weaver_version/src/logs_changes.rs similarity index 71% rename from crates/weaver_version/src/logs_change.rs rename to crates/weaver_version/src/logs_changes.rs index 0230baeb..049eb3e3 100644 --- a/crates/weaver_version/src/logs_change.rs +++ b/crates/weaver_version/src/logs_changes.rs @@ -1,11 +1,19 @@ // SPDX-License-Identifier: Apache-2.0 -//! Changes to apply to the logs for a specific version. +//! Logs change definitions. use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use std::collections::HashMap; +/// Changes to apply to the logs for a specific version. +#[derive(Serialize, Deserialize, Debug, Default, Clone, JsonSchema)] +#[serde(deny_unknown_fields)] +pub struct LogsChanges { + /// Changes to apply to the logs for a specific version. + pub changes: Vec, +} + /// Changes to apply to the logs for a specific version. #[derive(Serialize, Deserialize, Debug, Default, Clone, JsonSchema)] #[serde(deny_unknown_fields)] diff --git a/crates/weaver_version/src/logs_version.rs b/crates/weaver_version/src/logs_version.rs deleted file mode 100644 index a6963a53..00000000 --- a/crates/weaver_version/src/logs_version.rs +++ /dev/null @@ -1,15 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 - -//! Logs version. - -use crate::logs_change::LogsChange; -use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; - -/// Changes to apply to the logs for a specific version. -#[derive(Serialize, Deserialize, Debug, Default, Clone, JsonSchema)] -#[serde(deny_unknown_fields)] -pub struct LogsVersion { - /// Changes to apply to the logs for a specific version. - pub changes: Vec, -} diff --git a/crates/weaver_version/src/metrics_change.rs b/crates/weaver_version/src/metrics_changes.rs similarity index 79% rename from crates/weaver_version/src/metrics_change.rs rename to crates/weaver_version/src/metrics_changes.rs index db8d65f8..288831f1 100644 --- a/crates/weaver_version/src/metrics_change.rs +++ b/crates/weaver_version/src/metrics_changes.rs @@ -6,6 +6,14 @@ use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use std::collections::HashMap; +/// Changes to apply to the metrics for a specific version. +#[derive(Serialize, Deserialize, Debug, Default, Clone, JsonSchema)] +#[serde(deny_unknown_fields)] +pub struct MetricsChanges { + /// Changes to apply to the metrics for a specific version. + pub changes: Vec, +} + /// Changes to apply to the metrics for a specific version. #[derive(Serialize, Deserialize, Debug, Default, Clone, JsonSchema)] #[serde(deny_unknown_fields)] diff --git a/crates/weaver_version/src/metrics_version.rs b/crates/weaver_version/src/metrics_version.rs deleted file mode 100644 index 8a2c8583..00000000 --- a/crates/weaver_version/src/metrics_version.rs +++ /dev/null @@ -1,15 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 - -//! Metrics version. - -use crate::metrics_change::MetricsChange; -use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; - -/// Changes to apply to the metrics for a specific version. -#[derive(Serialize, Deserialize, Debug, Default, Clone, JsonSchema)] -#[serde(deny_unknown_fields)] -pub struct MetricsVersion { - /// Changes to apply to the metrics for a specific version. - pub changes: Vec, -} diff --git a/crates/weaver_version/src/resource_change.rs b/crates/weaver_version/src/resource_changes.rs similarity index 70% rename from crates/weaver_version/src/resource_change.rs rename to crates/weaver_version/src/resource_changes.rs index 7e3fbd2e..2ec3ea71 100644 --- a/crates/weaver_version/src/resource_change.rs +++ b/crates/weaver_version/src/resource_changes.rs @@ -1,11 +1,19 @@ // SPDX-License-Identifier: Apache-2.0 -//! Changes to apply to the resources for a specific version. +//! Resource change definitions. use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use std::collections::HashMap; +/// Changes to apply to the resource for a specific version. +#[derive(Serialize, Deserialize, Debug, Default, Clone, JsonSchema)] +#[serde(deny_unknown_fields)] +pub struct ResourceChanges { + /// Changes to apply to the resource for a specific version. + pub changes: Vec, +} + /// Changes to apply to the resources for a specific version. #[derive(Serialize, Deserialize, Debug, Default, Clone, JsonSchema)] #[serde(deny_unknown_fields)] diff --git a/crates/weaver_version/src/resource_version.rs b/crates/weaver_version/src/resource_version.rs deleted file mode 100644 index a559c8cb..00000000 --- a/crates/weaver_version/src/resource_version.rs +++ /dev/null @@ -1,15 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 - -//! Resource version. - -use crate::resource_change::ResourceChange; -use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; - -/// Changes to apply to the resource for a specific version. -#[derive(Serialize, Deserialize, Debug, Default, Clone, JsonSchema)] -#[serde(deny_unknown_fields)] -pub struct ResourceVersion { - /// Changes to apply to the resource for a specific version. - pub changes: Vec, -} diff --git a/crates/weaver_version/src/schema_changes.rs b/crates/weaver_version/src/schema_changes.rs new file mode 100644 index 00000000..7d64c744 --- /dev/null +++ b/crates/weaver_version/src/schema_changes.rs @@ -0,0 +1,328 @@ +// SPDX-License-Identifier: Apache-2.0 + +//! Data structures and utilities for tracking schema changes between versions. + +use serde::Serialize; +use std::collections::{HashMap, HashSet}; + +/// The type of schema item. +#[derive(Debug, Serialize, Hash, Eq, PartialEq, Copy, Clone)] +#[serde(rename_all = "snake_case")] +pub enum SchemaItemType { + /// Attributes + Attributes, + /// Metrics + Metrics, + /// Events + Events, + /// Spans + Spans, + /// Resources + Resources, +} + +/// A summary of schema changes between two versions of a schema. +#[derive(Debug, Default, Serialize)] +#[serde(rename_all = "snake_case")] +pub struct SchemaChanges { + /// Information on the registry manifest for the most recent version of the schema. + head: RegistryManifest, + + /// Information of the registry manifest for the baseline version of the schema. + baseline: RegistryManifest, + + /// A map where the key is the type of schema item (e.g., "attributes", "metrics", + /// "events, "spans", "resources"), and the value is a list of changes associated + /// with that item type. + changes: HashMap>, +} + +/// Represents the information of a semantic convention registry manifest. +#[derive(Debug, Default, Serialize)] +#[serde(rename_all = "snake_case")] +pub struct RegistryManifest { + /// The version of the registry which will be used to define the semconv package version. + pub semconv_version: String, +} + +/// Represents the different types of changes that can occur between +/// two versions of a schema. This covers changes such as adding, removing, +/// renaming, and deprecating schema items (attributes, metrics, etc.). +#[derive(Debug, Serialize)] +#[serde(rename_all = "snake_case")] +#[serde(tag = "type")] +pub enum SchemaItemChange { + /// An item (e.g. attribute, metric, ...) has been added + /// into the most recent version of the schema. + Added { + /// The name of the added item. + name: String, + }, + /// One or more items have been renamed into a new item. + RenamedToNew { + /// The old names of the items that have been renamed. + old_names: HashSet, + /// The new name of the items that have been renamed. + new_name: String, + }, + /// One or more items have been renamed into an existing item. + RenamedToExisting { + /// The old names of the items that have been renamed. + old_names: HashSet, + /// The current name of the items that have been renamed. + current_name: String, + }, + /// An item has been deprecated. + Deprecated { + /// The name of the deprecated item. + name: String, + /// A deprecation note providing further context. + note: String, + }, + /// An item has been removed. + Removed { + /// The name of the removed item. + name: String, + }, +} + +impl SchemaChanges { + /// Create a new instance of `SchemaChanges`. + #[must_use] + pub fn new() -> Self { + let mut schema_changes = SchemaChanges { + head: RegistryManifest::default(), + baseline: RegistryManifest::default(), + changes: HashMap::new(), + }; + let _ = schema_changes + .changes + .insert(SchemaItemType::Attributes, Vec::new()); + let _ = schema_changes + .changes + .insert(SchemaItemType::Metrics, Vec::new()); + let _ = schema_changes + .changes + .insert(SchemaItemType::Events, Vec::new()); + let _ = schema_changes + .changes + .insert(SchemaItemType::Spans, Vec::new()); + let _ = schema_changes + .changes + .insert(SchemaItemType::Resources, Vec::new()); + + schema_changes + } + + /// Returns true if there are no changes in the schema. + /// Otherwise, it returns false. + #[must_use] + pub fn is_empty(&self) -> bool { + self.changes.values().all(|v| v.is_empty()) + } + + /// Counts the number of changes in the schema. + #[must_use] + pub fn count_changes(&self) -> usize { + self.changes.values().map(|v| v.len()).sum() + } + + /// Counts the number of attribute changes in the schema. + #[must_use] + pub fn count_attribute_changes(&self) -> usize { + self.changes + .get(&SchemaItemType::Attributes) + .map(|v| v.len()) + .unwrap_or(0) + } + + /// Counts the number of added attributes in the schema. + #[must_use] + pub fn count_added_attributes(&self) -> usize { + self.changes + .get(&SchemaItemType::Attributes) + .map(|v| { + v.iter() + .filter(|c| matches!(c, SchemaItemChange::Added { .. })) + .count() + }) + .unwrap_or(0) + } + + /// Counts the number of deprecated attributes in the schema. + #[must_use] + pub fn count_deprecated_attributes(&self) -> usize { + self.changes + .get(&SchemaItemType::Attributes) + .map(|v| { + v.iter() + .filter(|c| matches!(c, SchemaItemChange::Deprecated { .. })) + .count() + }) + .unwrap_or(0) + } + + /// Counts the number of renamed to new attributes in the schema. + #[must_use] + pub fn count_renamed_to_new_attributes(&self) -> usize { + self.changes + .get(&SchemaItemType::Attributes) + .map(|v| { + v.iter() + .filter(|c| matches!(c, SchemaItemChange::RenamedToNew { .. })) + .count() + }) + .unwrap_or(0) + } + + /// Counts the number of renamed to existing attributes in the schema. + #[must_use] + pub fn count_renamed_to_existing_attributes(&self) -> usize { + self.changes + .get(&SchemaItemType::Attributes) + .map(|v| { + v.iter() + .filter(|c| matches!(c, SchemaItemChange::RenamedToExisting { .. })) + .count() + }) + .unwrap_or(0) + } + + /// Counts the number of removed attributes in the schema. + #[must_use] + pub fn count_removed_attributes(&self) -> usize { + self.changes + .get(&SchemaItemType::Attributes) + .map(|v| { + v.iter() + .filter(|c| matches!(c, SchemaItemChange::Removed { .. })) + .count() + }) + .unwrap_or(0) + } + + /// Returns all the renamed to existing attributes changes. + #[must_use] + pub fn renamed_to_existing_attributes(&self) -> Vec<&SchemaItemChange> { + self.changes + .get(&SchemaItemType::Attributes) + .map(|v| { + v.iter() + .filter(|c| matches!(c, SchemaItemChange::RenamedToExisting { .. })) + .collect() + }) + .unwrap_or_default() + } + + /// Returns all the renamed to new attributes changes. + #[must_use] + pub fn renamed_to_new_attributes(&self) -> Vec<&SchemaItemChange> { + self.changes + .get(&SchemaItemType::Attributes) + .map(|v| { + v.iter() + .filter(|c| matches!(c, SchemaItemChange::RenamedToNew { .. })) + .collect() + }) + .unwrap_or_default() + } + + /// Add a `SchemaChange` to the list of changes for the specified schema item type. + pub fn add_change(&mut self, item_type: SchemaItemType, change: SchemaItemChange) { + self.changes + .get_mut(&item_type) + .expect("All the possible schema item types should be initialized.") + .push(change); + } + + /// Set the baseline manifest for the schema changes. + pub fn set_head_manifest(&mut self, head: RegistryManifest) { + self.head = head; + } + + /// Set the baseline manifest for the schema changes. + pub fn set_baseline_manifest(&mut self, baseline: RegistryManifest) { + self.baseline = baseline; + } + + /// Return a string representation of the statistics on the schema changes. + #[must_use] + pub fn dump_stats(&self) -> String { + fn print_changes( + changes: Option<&Vec>, + item_type: &str, + result: &mut String, + ) { + if let Some(changes) = changes { + result.push_str(&format!("{}:\n", item_type)); + result.push_str(&format!( + " Added: {}\n", + changes + .iter() + .filter(|c| matches!(c, SchemaItemChange::Added { .. })) + .count() + )); + result.push_str(&format!( + " Renamed to new: {}\n", + changes + .iter() + .filter(|c| matches!(c, SchemaItemChange::RenamedToNew { .. })) + .count() + )); + result.push_str(&format!( + " Renamed to existing: {}\n", + changes + .iter() + .filter(|c| matches!(c, SchemaItemChange::RenamedToExisting { .. })) + .count() + )); + result.push_str(&format!( + " Deprecated: {}\n", + changes + .iter() + .filter(|c| matches!(c, SchemaItemChange::Deprecated { .. })) + .count() + )); + result.push_str(&format!( + " Removed: {}\n", + changes + .iter() + .filter(|c| matches!(c, SchemaItemChange::Removed { .. })) + .count() + )); + } + } + + let mut result = String::new(); + + result.push_str("Schema Changes:\n"); + + print_changes( + self.changes.get(&SchemaItemType::Attributes), + "Attributes", + &mut result, + ); + print_changes( + self.changes.get(&SchemaItemType::Metrics), + "Metrics", + &mut result, + ); + print_changes( + self.changes.get(&SchemaItemType::Events), + "Events", + &mut result, + ); + print_changes( + self.changes.get(&SchemaItemType::Spans), + "Spans", + &mut result, + ); + print_changes( + self.changes.get(&SchemaItemType::Resources), + "Resources", + &mut result, + ); + + result + } +} diff --git a/crates/weaver_version/src/spans_change.rs b/crates/weaver_version/src/spans_changes.rs similarity index 66% rename from crates/weaver_version/src/spans_change.rs rename to crates/weaver_version/src/spans_changes.rs index 57050cbd..5e0826dd 100644 --- a/crates/weaver_version/src/spans_change.rs +++ b/crates/weaver_version/src/spans_changes.rs @@ -1,11 +1,19 @@ // SPDX-License-Identifier: Apache-2.0 -//! Spans change specification. +//! Changes to apply to the spans specification for a specific version. use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use std::collections::HashMap; +/// Changes to apply to the spans specification for a specific version. +#[derive(Serialize, Deserialize, Debug, Default, Clone, JsonSchema)] +#[serde(deny_unknown_fields)] +pub struct SpansChanges { + /// Changes to apply to the spans specification for a specific version. + pub changes: Vec, +} + /// Changes to apply to the spans specification for a specific version. #[derive(Serialize, Deserialize, Debug, Default, Clone, JsonSchema)] #[serde(deny_unknown_fields)] diff --git a/crates/weaver_version/src/spans_version.rs b/crates/weaver_version/src/spans_version.rs deleted file mode 100644 index 8fcdc6ef..00000000 --- a/crates/weaver_version/src/spans_version.rs +++ /dev/null @@ -1,15 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 - -//! Changes to apply to the spans specification for a specific version. - -use crate::spans_change::SpansChange; -use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; - -/// Changes to apply to the spans specification for a specific version. -#[derive(Serialize, Deserialize, Debug, Default, Clone, JsonSchema)] -#[serde(deny_unknown_fields)] -pub struct SpansVersion { - /// Changes to apply to the spans specification for a specific version. - pub changes: Vec, -} diff --git a/defaults/diff_templates/ansi-stats/diff.txt.j2 b/defaults/diff_templates/ansi-stats/diff.txt.j2 new file mode 100644 index 00000000..745c9bc1 --- /dev/null +++ b/defaults/diff_templates/ansi-stats/diff.txt.j2 @@ -0,0 +1,18 @@ + +{{ ("Summary of Schema Changes between " ~ ctx.head.semconv_version ~ " and " ~ ctx.baseline.semconv_version) | ansi_red | ansi_bold }} + +{% set item_types = ["attributes", "metrics", "events", "spans", "resources"] %} +{% for item_type in item_types %} +{% set added_count = ctx.changes[item_type] | selectattr('type', 'equalto', 'added') | count %} +{% set renamed_to_new_count = ctx.changes[item_type] | selectattr('type', 'equalto', 'renamed_to_new') | count %} +{% set renamed_to_existing_count = ctx.changes[item_type] | selectattr('type', 'equalto', 'renamed_to_existing') | count %} +{% set deprecated_count = ctx.changes[item_type] | selectattr('type', 'equalto', 'deprecated') | count %} +{% set removed_count = ctx.changes[item_type] | selectattr('type', 'equalto', 'removed') | count %} +{{ ((item_type | title_case) ~ " Changes") | ansi_green }} + Added : {{ added_count }} + Renamed to new : {{ renamed_to_new_count }} + Renamed to existing: {{ renamed_to_existing_count }} + Deprecated : {{ deprecated_count }} + Removed : {{ removed_count }} + +{% endfor %} diff --git a/defaults/diff_templates/ansi-stats/weaver.yaml b/defaults/diff_templates/ansi-stats/weaver.yaml new file mode 100644 index 00000000..b09bafbc --- /dev/null +++ b/defaults/diff_templates/ansi-stats/weaver.yaml @@ -0,0 +1,9 @@ +# Whitespace control settings to simplify the definition of templates +whitespace_control: + trim_blocks: true + lstrip_blocks: true + +templates: + - template: diff.txt.j2 + filter: . + application_mode: single \ No newline at end of file diff --git a/defaults/diff_templates/ansi/diff.txt.j2 b/defaults/diff_templates/ansi/diff.txt.j2 new file mode 100644 index 00000000..858312e2 --- /dev/null +++ b/defaults/diff_templates/ansi/diff.txt.j2 @@ -0,0 +1,41 @@ +{%- set item_types = ["attributes", "metrics", "events", "spans", "resources"] %} + +{{ ("Schema Changes between `" ~ ctx.head.semconv_version ~ "` and `" ~ ctx.baseline.semconv_version ~ "`") | ansi_red | ansi_bold }} + +{% for item_type in item_types %} +{% if ctx.changes[item_type] %} +{{ ("List of Changes to " ~ (item_type | title_case)) | ansi_cyan | ansi_bold }} +{% set attributes = ctx.changes[item_type] | selectattr('type', 'equalto', 'added') | sort(attribute='name') -%} +{% if attributes -%} +{{ ("New " ~ item_type ~ ":") | ansi_bold | ansi_green }} + {% for added in attributes %} + - Add {{ added.name | ansi_bright_white }} + {% endfor %} + +{% endif %} +{% set attributes = ctx.changes[item_type] | selectattr('type', 'equalto', 'renamed_to_new') | sort(attribute='name') -%} +{% if attributes -%} +{{ ("Rename to new " ~ item_type ~ ":") | ansi_bold | ansi_green }} + {% for renamed in attributes %} + - Rename {{ renamed.old_names | map('ansi_bright_white') | join(', ') }} to {{ renamed.new_name | ansi_bright_white }} + {% endfor %} + +{% endif %} +{% set attributes = ctx.changes[item_type] | selectattr('type', 'equalto', 'renamed_to_existing') | sort(attribute='name') -%} +{% if attributes -%} +{{ ("Rename to existing " ~ item_type ~ ":") | ansi_bold | ansi_green }} + {% for renamed in attributes %} + - Rename {{ renamed.old_names | map('ansi_bright_white') | join(', ') }} to {{ renamed.current_name | ansi_bright_white }} + {% endfor %} + +{% endif %} +{% set attributes = ctx.changes[item_type] | selectattr('type', 'equalto', 'deprecated') | sort(attribute='name') -%} +{% if attributes -%} +{{ ("Deprecated " ~ item_type ~ ":") | ansi_bold | ansi_green }} + {% for deprecated in attributes %} + - {{ deprecated.name | ansi_bright_white }} (Note: {{ deprecated.note }}) + {% endfor %} + +{% endif %} +{% endif %} +{% endfor %} diff --git a/defaults/diff_templates/ansi/weaver.yaml b/defaults/diff_templates/ansi/weaver.yaml new file mode 100644 index 00000000..b09bafbc --- /dev/null +++ b/defaults/diff_templates/ansi/weaver.yaml @@ -0,0 +1,9 @@ +# Whitespace control settings to simplify the definition of templates +whitespace_control: + trim_blocks: true + lstrip_blocks: true + +templates: + - template: diff.txt.j2 + filter: . + application_mode: single \ No newline at end of file diff --git a/defaults/diff_templates/json/diff.json.j2 b/defaults/diff_templates/json/diff.json.j2 new file mode 100644 index 00000000..86ce8997 --- /dev/null +++ b/defaults/diff_templates/json/diff.json.j2 @@ -0,0 +1 @@ +{{ ctx | tojson(true) }} \ No newline at end of file diff --git a/defaults/diff_templates/json/weaver.yaml b/defaults/diff_templates/json/weaver.yaml new file mode 100644 index 00000000..596f9ba5 --- /dev/null +++ b/defaults/diff_templates/json/weaver.yaml @@ -0,0 +1,9 @@ +# Whitespace control settings to simplify the definition of templates +whitespace_control: + trim_blocks: true + lstrip_blocks: true + +templates: + - template: diff.json.j2 + filter: . + application_mode: single \ No newline at end of file diff --git a/defaults/diff_templates/markdown/diff.md.j2 b/defaults/diff_templates/markdown/diff.md.j2 new file mode 100644 index 00000000..9fc2f171 --- /dev/null +++ b/defaults/diff_templates/markdown/diff.md.j2 @@ -0,0 +1,39 @@ +{%- set item_types = ["attributes", "metrics", "events", "spans", "resources"] -%} +# Schema Changes between {{ ctx.baseline.semconv_version }} and {{ ctx.head.semconv_version }} +{% for item_type in item_types %} +{% if ctx.changes[item_type] %} +## {{ item_type | title_case }} +{% set attributes = ctx.changes[item_type] | selectattr('type', 'equalto', 'added') | sort(attribute='name') -%} +{% if attributes -%} +### New {{ item_type }}: + {% for added in attributes %} +- Add {{ added.name }} + {% endfor %} + +{% endif %} +{% set attributes = ctx.changes[item_type] | selectattr('type', 'equalto', 'renamed_to_new') | sort(attribute='name') -%} +{% if attributes -%} +### Rename to new {{ item_type }}: + {% for renamed in attributes %} +- Rename {{ renamed.old_names | join(', ') }} to {{ renamed.new_name }} + {% endfor %} + +{% endif %} +{% set attributes = ctx.changes[item_type] | selectattr('type', 'equalto', 'renamed_to_existing') | sort(attribute='name') -%} +{% if attributes -%} +### Rename to existing {{ item_type }}: + {% for renamed in attributes %} +- Rename {{ renamed.old_names | join(', ') }} to {{ renamed.current_name }} + {% endfor %} + +{% endif %} +{% set attributes = ctx.changes[item_type] | selectattr('type', 'equalto', 'deprecated') | sort(attribute='name') -%} +{% if attributes -%} +### Deprecated {{ item_type }}: + {% for deprecated in attributes %} +- {{ deprecated.name }} (Note: {{ deprecated.note }}) + {% endfor %} + +{% endif %} +{% endif %} +{% endfor %} diff --git a/defaults/diff_templates/markdown/weaver.yaml b/defaults/diff_templates/markdown/weaver.yaml new file mode 100644 index 00000000..29aa2363 --- /dev/null +++ b/defaults/diff_templates/markdown/weaver.yaml @@ -0,0 +1,9 @@ +# Whitespace control settings to simplify the definition of templates +whitespace_control: + trim_blocks: true + lstrip_blocks: true + +templates: + - template: diff.md.j2 + filter: . + application_mode: single \ No newline at end of file diff --git a/defaults/diff_templates/yaml/diff.yaml.j2 b/defaults/diff_templates/yaml/diff.yaml.j2 new file mode 100644 index 00000000..d5fd20cf --- /dev/null +++ b/defaults/diff_templates/yaml/diff.yaml.j2 @@ -0,0 +1 @@ +{{ ctx | toyaml }} \ No newline at end of file diff --git a/defaults/diff_templates/yaml/weaver.yaml b/defaults/diff_templates/yaml/weaver.yaml new file mode 100644 index 00000000..0f9a2c2c --- /dev/null +++ b/defaults/diff_templates/yaml/weaver.yaml @@ -0,0 +1,9 @@ +# Whitespace control settings to simplify the definition of templates +whitespace_control: + trim_blocks: true + lstrip_blocks: true + +templates: + - template: diff.yaml.j2 + filter: . + application_mode: single \ No newline at end of file diff --git a/docs/schema-changes.md b/docs/schema-changes.md new file mode 100644 index 00000000..2a879697 --- /dev/null +++ b/docs/schema-changes.md @@ -0,0 +1,119 @@ +# Schema Changes Data Model + +## Introduction + +Weaver can be used to compare two versions of a semantic convention registry +and generate a diff report. This document describes the data model used to +represent the differences between two versions of a semantic convention +registry, as well as the diffing process. This diff report can be used to: + +- Understand the changes between two versions of a semantic convention registry. +- Update the OpenTelemetry Schema file, section versions, with the new version. +- Generate a migration guide for users of the semantic convention registry. +- Generate a SQL DDL script to update a database schema. +- And more. + +> Note: The current implementation of the diffing process focuses on the top-level +items attributes, metrics, events, spans, resources and does not compare the fields +of those top-level items. + +## Data Model + +The schema changes data model is composed of the following components: + +- `head`: The registry manifest of the most recent version of the semantic + convention registry used in the diffing process. +- `baseline`: The registry manifest of the oldest version of the semantic + convention registry used in the diffing process. +- `changes`: A dictionary of changes between the head and baseline registries. + The dictionary is composed of the following keys (when applicable): + - `attributes`: A list of changes to attributes. + - `metrics`: A list of changes to metrics. + - `events`: A list of changes to events. + - `spans`: A list of changes to spans. + - `resources`: A list of changes to resources. + +Each change in the changes dictionary for any key is represented as a list of +schema changes, represented by one of the following types: + +- `added`: A new schema item (e.g., attribute, metric, etc.) was added in the + head registry. The name of the new item is stored in the name attribute. +- `renamed_to_new`: One or more schema items in the baseline registry were + renamed to the same new name in the head registry. The old names of the + items are stored in the old_names attribute, and the new name is stored in + the current_name attribute. +- `renamed_to_existing`: One or more schema items in the baseline registry were + renamed to an existing item in the head registry. The old names of the items + are stored in the old_names attribute, and the existing item name is stored + in the current_name attribute. +- `deprecated`: An item in the baseline registry was deprecated in the head + registry. The name of the deprecated item is stored in the name attribute, + and the deprecation note is stored in the note attribute. +- `removed`: An item in the baseline registry was removed in the head + registry. The name of the removed item is stored in the name attribute. + +> Note: Although the removed schema change type is a valid output of the diffing +process, it should never be present in the diff report between two versions of +well-formed semantic convention registries. The policy for semantic convention +registries is to deprecate items instead of removing them. + +Example Schema Diff in YAML + +```yaml +head: + semconv_version: v1.27.0 +baseline: + semconv_version: v1.26.0 +changes: + attributes: + - type: deprecated + name: http.server_name + note: deprecated + - type: added + name: user.email + - ... + events: + - type: added + name: exception + - ... + metrics: + - type: added + name: go.goroutine.count + - type: deprecated + name: db.client.connections.max + note: Deprecated + - ... +``` + +## Diffing Process + +The following rules are applied during the diffing process to generate the schema +changes for attributes: + +1. Deprecations: + - If an attribute in the latest schema is now marked as deprecated, it is + classified into the following cases: + - Renamed to new: Attributes in the deprecated metadata pointing to a new + attribute are marked as renamed_to_new. + - Renamed to existing: Attributes in the deprecated metadata pointing to an + already existing attribute are marked as renamed_to_existing. + - Deprecated without replacement: The attribute is marked as deprecated with + no replacement. +1. Additions: + - If an attribute exists in the latest schema but not in the baseline, it is + classified as a new attribute. + - However, if this new attribute is referenced in the deprecated metadata of an + old attribute, it is considered a renamed attribute. +1. Removals: + - Attributes present in the baseline but missing in the latest schema are marked + as removed. This should not happen if registry evolution processes are followed. + +The diffing process for the signals (metrics, events, spans, resources) is similar +to the attributes comparison. + +## Future Evolutions + +The current implementation of the diffing process focuses on the top-level schema +items (attributes, metrics, events, spans, resources). Future evolutions of the +diffing process could generate a more detailed diff report by comparing the fields +of those top-level schema items. \ No newline at end of file diff --git a/docs/usage.md b/docs/usage.md index e3549cdf..b535c1f3 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -164,6 +164,69 @@ Options: Print help (see a summary with '-h') ``` +## registry diff + +``` +Generate a diff between two versions of a semantic convention registry. + +This diff can then be rendered in multiple formats: +- a console-friendly format (default: ansi), +- a structured document in JSON or YAML format, +- ... + +Usage: weaver registry diff [OPTIONS] --baseline-registry [OUTPUT] + +Arguments: + [OUTPUT] + Path to the directory where the generated artifacts will be saved. Default is the `output` directory + + [default: output] + +Options: + --debug... + Turn debugging information on + + -r, --registry + Local folder, Git repo URL, or Git archive URL of the semantic convention registry. For Git URLs, a sub-folder can be specified using the `[sub-folder]` syntax after the URL + + [default: https://github.com/open-telemetry/semantic-conventions.git[model]] + + --baseline-registry + Parameters to specify the baseline semantic convention registry + + --quiet + Turn the quiet mode on (i.e., minimal output) + + --diff-format + Format used to render the schema changes. Predefined formats are: ansi, json, yaml, and markdown + + [default: ansi] + + --future + Enable the most recent validation rules for the semconv registry. It is recommended to enable this flag when checking a new registry. Note: `semantic_conventions` main branch should always enable this flag + + --diff-template + Path to the directory where the schema changes templates are located + + [default: diff_templates] + + --diagnostic-format + Format used to render the diagnostic messages. Predefined formats are: ansi, json, gh_workflow_command + + [default: ansi] + + --diagnostic-template + Path to the directory where the diagnostic templates are located + + [default: diagnostic_templates] + + -s, --follow-symlinks + Boolean flag to specify whether to follow symlinks when loading the registry. Default is false + + -h, --help + Print help (see a summary with '-h') +``` + ## registry update-markdown ``` diff --git a/src/registry/check.rs b/src/registry/check.rs index 57ee4bdf..6e38cdd7 100644 --- a/src/registry/check.rs +++ b/src/registry/check.rs @@ -77,16 +77,14 @@ pub(crate) fn command( (baseline_registry_repo, baseline_semconv_specs) { let mut baseline_registry = SemConvRegistry::from_semconv_specs( - baseline_registry_repo.id(), + &baseline_registry_repo, baseline_semconv_specs, - ); + )?; let baseline_resolved_schema = resolve_semconv_specs(&mut baseline_registry, logger.clone()) - .combine_diag_msgs_with(&diag_msgs)?; + .capture_non_fatal_errors(&mut diag_msgs)?; let baseline_resolved_registry = ResolvedRegistry::try_from_resolved_registry( - baseline_resolved_schema - .registry(baseline_registry_repo.id()) - .expect("Failed to get the registry from the baseline resolved schema"), + &baseline_resolved_schema.registry, baseline_resolved_schema.catalog(), ) .combine_diag_msgs_with(&diag_msgs)?; diff --git a/src/registry/diff.rs b/src/registry/diff.rs new file mode 100644 index 00000000..574de539 --- /dev/null +++ b/src/registry/diff.rs @@ -0,0 +1,157 @@ +// SPDX-License-Identifier: Apache-2.0 + +//! Generate a diff between two versions of a semantic convention registry. + +use crate::registry::Error::DiffRender; +use crate::registry::RegistryArgs; +use crate::util::{load_semconv_specs, resolve_telemetry_schema}; +use crate::{DiagnosticArgs, ExitDirectives}; +use clap::Args; +use include_dir::{include_dir, Dir}; +use miette::Diagnostic; +use serde::Serialize; +use std::path::PathBuf; +use weaver_cache::registry_path::RegistryPath; +use weaver_cache::RegistryRepo; +use weaver_common::diagnostic::{DiagnosticMessage, DiagnosticMessages}; +use weaver_common::Logger; +use weaver_forge::config::{Params, WeaverConfig}; +use weaver_forge::file_loader::EmbeddedFileLoader; +use weaver_forge::{OutputDirective, TemplateEngine}; + +/// Embedded default schema changes templates +pub(crate) static DEFAULT_DIFF_TEMPLATES: Dir<'_> = include_dir!("defaults/diff_templates"); + +/// Parameters for the `registry diff` sub-command +#[derive(Debug, Args)] +pub struct RegistryDiffArgs { + /// Parameters to specify the semantic convention registry + #[command(flatten)] + registry: RegistryArgs, + + /// Parameters to specify the baseline semantic convention registry + #[arg(long)] + baseline_registry: RegistryPath, + + /// Format used to render the schema changes. Predefined formats are: ansi, json, + /// yaml, and markdown. + #[arg(long, default_value = "ansi")] + diff_format: String, + + /// Path to the directory where the schema changes templates are located. + #[arg(long, default_value = "diff_templates")] + diff_template: PathBuf, + + /// Path to the directory where the generated artifacts will be saved. + /// Default is the `output` directory. + #[arg(default_value = "output")] + output: PathBuf, + + /// Parameters to specify the diagnostic format. + #[command(flatten)] + pub(crate) diagnostic: DiagnosticArgs, +} + +/// An error that can occur while generating the diff between two versions of the same +/// semantic convention registry. +#[derive(thiserror::Error, Debug, Clone, PartialEq, Serialize, Diagnostic)] +#[non_exhaustive] +pub enum Error { + /// Writing to the file failed. + #[error("Writing to the file ‘{file}’ failed for the following reason: {error}")] + WriteError { + /// The path to the output file. + file: PathBuf, + /// The error that occurred. + error: String, + }, +} + +impl From for DiagnosticMessages { + fn from(error: Error) -> Self { + DiagnosticMessages::new(vec![DiagnosticMessage::new(error)]) + } +} + +/// Generate a diff between two versions of a semantic convention registry. +pub(crate) fn command( + logger: impl Logger + Sync + Clone, + args: &RegistryDiffArgs, +) -> Result { + let mut diag_msgs = DiagnosticMessages::empty(); + logger.log("Weaver Registry Diff"); + logger.loading(&format!("Checking registry `{}`", args.registry.registry)); + + let registry_path = args.registry.registry.clone(); + let main_registry_repo = RegistryRepo::try_new("main", ®istry_path)?; + let baseline_registry_repo = RegistryRepo::try_new("baseline", &args.baseline_registry)?; + let main_semconv_specs = load_semconv_specs( + &main_registry_repo, + logger.clone(), + args.registry.follow_symlinks, + ) + .capture_non_fatal_errors(&mut diag_msgs)?; + let baseline_semconv_specs = load_semconv_specs( + &baseline_registry_repo, + logger.clone(), + args.registry.follow_symlinks, + ) + .capture_non_fatal_errors(&mut diag_msgs)?; + + let main_resolved_schema = + resolve_telemetry_schema(&main_registry_repo, main_semconv_specs, logger.clone()) + .capture_non_fatal_errors(&mut diag_msgs)?; + let baseline_resolved_schema = resolve_telemetry_schema( + &baseline_registry_repo, + baseline_semconv_specs, + logger.clone(), + ) + .capture_non_fatal_errors(&mut diag_msgs)?; + + // Generate the diff between the two versions of the registries. + let changes = main_resolved_schema.diff(&baseline_resolved_schema); + + if diag_msgs.has_error() { + return Err(diag_msgs); + } + + let loader = EmbeddedFileLoader::try_new( + &DEFAULT_DIFF_TEMPLATES, + args.diff_template.clone(), + &args.diff_format, + ) + .expect("Failed to create the embedded file loader for the diff templates"); + let config = WeaverConfig::try_from_loader(&loader) + .expect("Failed to load `defaults/diff_templates/weaver.yaml`"); + let engine = TemplateEngine::new(config, loader, Params::default()); + let output_directive = if args.diff_format == "ansi" || args.diff_format == "ansi-stats" { + OutputDirective::Stdout + } else { + OutputDirective::File + }; + + match engine.generate( + logger.clone(), + &changes, + args.output.as_path(), + &output_directive, + ) { + Ok(_) => {} + Err(e) => { + return Err(DiagnosticMessages::from(DiffRender { + error: e.to_string(), + })); + } + } + + Ok(ExitDirectives { + exit_code: 0, + quiet_mode: false, + }) +} + +#[cfg(test)] +mod tests { + #[test] + fn test_registry_diff() {} +} diff --git a/src/registry/mod.rs b/src/registry/mod.rs index 50ad8257..19d4f0ec 100644 --- a/src/registry/mod.rs +++ b/src/registry/mod.rs @@ -8,6 +8,7 @@ use clap::{Args, Subcommand}; use miette::Diagnostic; use serde::Serialize; +use crate::registry::diff::RegistryDiffArgs; use crate::registry::generate::RegistryGenerateArgs; use crate::registry::json_schema::RegistryJsonSchemaArgs; use crate::registry::resolve::RegistryResolveArgs; @@ -21,6 +22,7 @@ use weaver_common::diagnostic::{DiagnosticMessage, DiagnosticMessages}; use weaver_common::Logger; mod check; +mod diff; mod generate; mod json_schema; mod resolve; @@ -39,6 +41,10 @@ pub enum Error { /// Invalid params file passed to the command line #[error("The params file `{params_file}` is invalid. {error}")] InvalidParams { params_file: PathBuf, error: String }, + + /// Failed to render the registry diff + #[error("Failed to render the registry diff: {error}")] + DiffRender { error: String }, } impl From for DiagnosticMessages { @@ -101,6 +107,14 @@ pub enum RegistrySubCommand { /// The produced JSON Schema can be used to generate documentation of the resolved registry format or to generate code in your language of choice if you need to interact with the resolved registry format for any reason. #[clap(verbatim_doc_comment)] JsonSchema(RegistryJsonSchemaArgs), + /// Generate a diff between two versions of a semantic convention registry. + /// + /// This diff can then be rendered in multiple formats: + /// - a console-friendly format (default: ansi), + /// - a structured document in JSON or YAML format, + /// - ... + #[clap(verbatim_doc_comment)] + Diff(RegistryDiffArgs), } /// Set of parameters used to specify a semantic convention registry. @@ -171,5 +185,9 @@ pub fn semconv_registry(log: impl Logger + Sync + Clone, command: &RegistryComma json_schema::command(log.clone(), args), Some(args.diagnostic.clone()), ), + RegistrySubCommand::Diff(args) => CmdResult::new( + diff::command(log.clone(), args), + Some(args.diagnostic.clone()), + ), } } diff --git a/src/registry/resolve.rs b/src/registry/resolve.rs index 5a758e85..348bbe75 100644 --- a/src/registry/resolve.rs +++ b/src/registry/resolve.rs @@ -60,7 +60,6 @@ pub(crate) fn command( logger.loading(&format!("Resolving registry `{}`", args.registry.registry)); let mut diag_msgs = DiagnosticMessages::empty(); - let (registry, _) = prepare_main_registry(&args.registry, &args.policy, logger.clone(), &mut diag_msgs)?; diff --git a/src/registry/search.rs b/src/registry/search.rs index 3270e9ea..c59bd27c 100644 --- a/src/registry/search.rs +++ b/src/registry/search.rs @@ -106,17 +106,12 @@ impl<'a> SearchApp<'a> { .title_alignment(ratatui::layout::Alignment::Center) .title_style(Style::default().fg(Color::Green)) .title("Weaver Search"); - let group_count: usize = self - .schema - .registries - .values() - .map(|r| r.stats().group_count) - .sum(); + let group_count: usize = self.schema.registry.stats().group_count; let title_contents = Line::from(vec![Span::styled( format!( "Loaded {0:?} groups w/ {1} attributes", group_count, - self.schema.catalog.attributes.len() + self.schema.catalog.count_attributes() ), Style::default().fg(Color::Gray), )]); @@ -132,7 +127,6 @@ impl<'a> SearchApp<'a> { fn result_set(&'a self) -> impl Iterator { self.schema .catalog - .attributes .iter() .filter(|a| a.name.contains(self.search_string().as_str())) } @@ -362,7 +356,6 @@ fn run_ui(schema: &ResolvedTelemetrySchema) -> Result<(), Error> { fn run_command_line_search(schema: &ResolvedTelemetrySchema, pattern: &str) { let results = schema .catalog() - .attributes .iter() .filter(|a| a.name.contains(pattern)) .map(|a| a.name.to_owned()) @@ -376,9 +369,8 @@ pub(crate) fn command( ) -> Result { logger.loading(&format!("Resolving registry `{}`", args.registry.registry)); - let registry_id = "default"; + let mut diag_msgs = DiagnosticMessages::empty(); let registry_path = &args.registry.registry; - let registry_repo = RegistryRepo::try_new("main", registry_path)?; // Load the semantic convention registry into a local cache. @@ -389,8 +381,9 @@ pub(crate) fn command( ) .ignore(|e| matches!(e.severity(), Some(miette::Severity::Warning))) .into_result_failing_non_fatal()?; - let mut registry = SemConvRegistry::from_semconv_specs(registry_id, semconv_specs); - let schema = resolve_semconv_specs(&mut registry, logger.clone())?; + let mut registry = SemConvRegistry::from_semconv_specs(®istry_repo, semconv_specs)?; + let schema = resolve_semconv_specs(&mut registry, logger.clone()) + .capture_non_fatal_errors(&mut diag_msgs)?; // We should have two modes: // 1. a single input we take in and directly output some rendered result. @@ -407,6 +400,11 @@ pub(crate) fn command( quiet_mode: false, }); } + + if !diag_msgs.is_empty() { + return Err(diag_msgs); + } + Ok(ExitDirectives { exit_code: 0, quiet_mode: false, diff --git a/src/registry/stats.rs b/src/registry/stats.rs index 65cd4433..6297d7b2 100644 --- a/src/registry/stats.rs +++ b/src/registry/stats.rs @@ -37,9 +37,8 @@ pub(crate) fn command( args.registry.registry )); - let registry_id = "default"; + let mut diag_msgs = DiagnosticMessages::empty(); let registry_path = &args.registry.registry; - let registry_repo = RegistryRepo::try_new("main", registry_path)?; // Load the semantic convention registry into a local cache. @@ -50,12 +49,17 @@ pub(crate) fn command( ) .ignore(|e| matches!(e.severity(), Some(miette::Severity::Warning))) .into_result_failing_non_fatal()?; - let mut registry = SemConvRegistry::from_semconv_specs(registry_id, semconv_specs); + let mut registry = SemConvRegistry::from_semconv_specs(®istry_repo, semconv_specs)?; display_semconv_registry_stats(®istry); // Resolve the semantic convention registry. - let resolved_schema = resolve_semconv_specs(&mut registry, logger)?; + let resolved_schema = + resolve_semconv_specs(&mut registry, logger).capture_non_fatal_errors(&mut diag_msgs)?; + + if !diag_msgs.is_empty() { + return Err(diag_msgs); + } display_schema_stats(&resolved_schema); Ok(ExitDirectives { diff --git a/src/util.rs b/src/util.rs index c0b48817..7b6a5010 100644 --- a/src/util.rs +++ b/src/util.rs @@ -9,7 +9,7 @@ use std::path::PathBuf; use weaver_cache::RegistryRepo; use weaver_checker::Error::{InvalidPolicyFile, PolicyViolation}; use weaver_checker::{Engine, Error, PolicyStage, SEMCONV_REGO}; -use weaver_common::diagnostic::{DiagnosticMessages, ResultExt}; +use weaver_common::diagnostic::{DiagnosticMessage, DiagnosticMessages, ResultExt}; use weaver_common::result::WResult; use weaver_common::Logger; use weaver_forge::registry::ResolvedRegistry; @@ -197,12 +197,37 @@ pub(crate) fn check_policy( pub(crate) fn resolve_semconv_specs( registry: &mut SemConvRegistry, logger: impl Logger + Sync + Clone, -) -> Result { +) -> WResult { let registry_id = registry.id().to_owned(); - let resolved_schema = SchemaResolver::resolve_semantic_convention_registry(registry)?; + match SchemaResolver::resolve_semantic_convention_registry(registry) { + WResult::Ok(resolved_schema) => { + logger.success(&format!("`{}` semconv registry resolved", registry_id)); + WResult::Ok(resolved_schema) + } + WResult::OkWithNFEs(resolved_schema, errs) => { + logger.success(&format!("`{}` semconv registry resolved", registry_id)); + let nfes = errs.into_iter().map(DiagnosticMessage::new).collect(); + WResult::OkWithNFEs(resolved_schema, nfes) + } + WResult::FatalErr(err) => WResult::FatalErr(DiagnosticMessage::new(err)), + } +} - logger.success(&format!("`{}` semconv registry resolved", registry_id)); - Ok(resolved_schema) +/// Resolves the telemetry schema from the given semantic convention specifications. +pub(crate) fn resolve_telemetry_schema( + registry_repo: &RegistryRepo, + semconv_specs: Vec<(String, SemConvSpec)>, + logger: impl Logger + Sync + Clone, +) -> WResult { + let mut registry = match SemConvRegistry::from_semconv_specs(registry_repo, semconv_specs) { + Ok(registry) => registry, + Err(e) => return WResult::FatalErr(DiagnosticMessage::new(e)), + }; + // Resolve the semantic convention specifications. + // If there are any resolution errors, they should be captured into the ongoing list of + // diagnostic messages and returned immediately because there is no point in continuing + // as the resolution is a prerequisite for the next stages. + resolve_semconv_specs(&mut registry, logger.clone()) } /// Resolves the main registry and optionally checks policies. @@ -267,14 +292,16 @@ pub(crate) fn prepare_main_registry( // Resolve the main registry let mut main_registry = - SemConvRegistry::from_semconv_specs(main_registry_repo.id(), main_semconv_specs); + SemConvRegistry::from_semconv_specs(&main_registry_repo, main_semconv_specs)?; + // Resolve the semantic convention specifications. + // If there are any resolution errors, they should be captured into the ongoing list of + // diagnostic messages and returned immediately because there is no point in continuing + // as the resolution is a prerequisite for the next stages. let main_resolved_schema = resolve_semconv_specs(&mut main_registry, logger.clone()) - .combine_diag_msgs_with(diag_msgs)?; + .capture_non_fatal_errors(diag_msgs)?; let main_resolved_registry = ResolvedRegistry::try_from_resolved_registry( - main_resolved_schema - .registry(main_registry_repo.id()) - .expect("Failed to get the registry from the resolved schema"), + &main_resolved_schema.registry, main_resolved_schema.catalog(), ) .combine_diag_msgs_with(diag_msgs)?; diff --git a/tests/resolution_process.rs b/tests/resolution_process.rs index b7217258..18f2f4f0 100644 --- a/tests/resolution_process.rs +++ b/tests/resolution_process.rs @@ -29,7 +29,6 @@ const SEMCONV_REGISTRY_MODEL: &str = "model"; #[test] fn test_cli_interface() { let log = TestLogger::new(); - let registry_id = "default"; // Load the official semantic convention registry into a local cache. // No parsing errors should be observed. @@ -47,7 +46,7 @@ fn test_cli_interface() { .unwrap_or_else(|e| { panic!("Failed to load the semantic convention specs, error: {e}"); }); - let semconv_specs = SemConvRegistry::from_semconv_specs(registry_id, semconv_specs); + let semconv_specs = SemConvRegistry::from_semconv_specs(®istry_repo, semconv_specs).unwrap(); // Check if the logger has reported any warnings or errors. assert_eq!(log.warn_count(), 0); @@ -57,6 +56,7 @@ fn test_cli_interface() { let mut attr_catalog = AttributeCatalog::default(); let resolved_registry = resolve_semconv_registry(&mut attr_catalog, SEMCONV_REGISTRY_URL, &semconv_specs) + .into_result_failing_non_fatal() .unwrap_or_else(|e| { panic!("Failed to resolve the official semantic convention registry, error: {e}"); });