sketching/
otel.rs

1use opentelemetry::{global, trace::TracerProvider as _, KeyValue};
2use opentelemetry_otlp::{Protocol, WithExportConfig};
3use opentelemetry_sdk::{
4    trace::{Sampler, SdkTracerProvider},
5    Resource,
6};
7use std::str::FromStr;
8use std::time::Duration;
9use tracing::Subscriber;
10use tracing_core::Level;
11use tracing_subscriber::{filter::Directive, prelude::*, EnvFilter, Registry};
12
13pub const MAX_EVENTS_PER_SPAN: u32 = 64 * 1024;
14pub const MAX_ATTRIBUTES_PER_SPAN: u32 = 128;
15
16use opentelemetry_semantic_conventions::{
17    attribute::{DEPLOYMENT_ENVIRONMENT_NAME, SERVICE_NAME, SERVICE_VERSION},
18    SCHEMA_URL,
19};
20
21// TODO: this is coming back later
22// #[allow(dead_code)]
23// pub fn init_metrics() -> metrics::Result<MeterProvider> {
24//     let export_config = opentelemetry_otlp::ExportConfig {
25//         endpoint: "http://localhost:4318/v1/metrics".to_string(),
26//         ..opentelemetry_otlp::ExportConfig::default()
27//     };
28//     opentelemetry_otlp::new_pipeline()
29//         .metrics(opentelemetry_sdk::runtime::Tokio)
30//         .with_exporter(
31//             opentelemetry_otlp::new_exporter()
32//                 .http()
33//                 .with_export_config(export_config),
34//         )
35//         .build()
36// }
37
38/// This does all the startup things for the logging pipeline
39pub fn start_logging_pipeline(
40    otlp_endpoint: &Option<String>,
41    log_filter: crate::LogLevel,
42    service_name: &'static str,
43) -> Result<(Option<SdkTracerProvider>, Box<dyn Subscriber + Send + Sync>), String> {
44    let forest_filter: EnvFilter = EnvFilter::builder()
45        .with_default_directive(log_filter.into())
46        .from_env_lossy()
47        // Always force the event span to be generated at the correct level, regardless
48        // of what the user set.
49        .add_directive(
50            Directive::from_str("kanidmd_core::https::trace=info")
51                .expect("Invalid directive during log setup"),
52        );
53
54    // TODO: work out how to do metrics things
55    match otlp_endpoint {
56        Some(endpoint) => {
57            // adding these filters because when you close out the process the OTLP comms layer is NOISY
58            // let forest_filter = forest_filter
59            //     .add_directive(
60            //         Directive::from_str("tonic=info").expect("Failed to set tonic logging to info"),
61            //     )
62            //     .add_directive(
63            //         Directive::from_str("h2=info").expect("Failed to set h2 logging to info"),
64            //     )
65            //     .add_directive(
66            //         Directive::from_str("hyper=info").expect("Failed to set hyper logging to info"),
67            //     );
68            // let forest_layer = tracing_forest::ForestLayer::default().with_filter(forest_filter);
69            let t_filter: EnvFilter = EnvFilter::builder()
70                .with_default_directive(log_filter.into())
71                .from_env_lossy();
72
73            let otlp_exporter = opentelemetry_otlp::SpanExporter::builder()
74                .with_tonic()
75                .with_endpoint(endpoint)
76                .with_protocol(Protocol::HttpBinary)
77                .with_timeout(Duration::from_secs(5))
78                .build()
79                .map_err(|err| err.to_string())?;
80
81            // this env var gets set at build time, if we can pull it, add it to the metadata
82            let git_rev = match option_env!("KANIDM_PKG_COMMIT_REV") {
83                Some(rev) => format!("-{rev}"),
84                None => "".to_string(),
85            };
86
87            let version = format!("{}{}", env!("CARGO_PKG_VERSION"), git_rev);
88            let hostname = gethostname::gethostname();
89            let hostname = hostname.to_string_lossy();
90            let hostname = hostname.to_lowercase();
91
92            let resource = Resource::builder()
93                .with_schema_url(
94                    [
95                        // TODO: it'd be really nice to be able to set the instance ID here, from the server UUID so we know *which* instance on this host is logging
96                        KeyValue::new(SERVICE_NAME, service_name),
97                        KeyValue::new(SERVICE_VERSION, version),
98                        KeyValue::new(DEPLOYMENT_ENVIRONMENT_NAME, hostname),
99                    ],
100                    SCHEMA_URL,
101                )
102                .build();
103
104            let provider = opentelemetry_sdk::trace::TracerProviderBuilder::default()
105                .with_batch_exporter(otlp_exporter)
106                // we want *everything!*
107                .with_sampler(Sampler::AlwaysOn)
108                .with_max_events_per_span(MAX_EVENTS_PER_SPAN)
109                .with_max_attributes_per_span(MAX_ATTRIBUTES_PER_SPAN)
110                .with_resource(resource)
111                .build();
112
113            let provider_handle = provider.clone();
114
115            global::set_tracer_provider(provider.clone());
116            provider.tracer("tracing-otel-subscriber");
117            use tracing_opentelemetry::OpenTelemetryLayer;
118
119            let registry = tracing_subscriber::registry()
120                .with(
121                    tracing_subscriber::filter::LevelFilter::from_level(Level::INFO)
122                        .with_filter(t_filter),
123                )
124                // .with(MetricsLayer::new(meter_provider.clone()))
125                // .with(forest_layer)
126                .with(OpenTelemetryLayer::new(
127                    provider.tracer("tracing-otel-subscriber"),
128                ));
129
130            Ok((Some(provider_handle), Box::new(registry)))
131        }
132        None => {
133            let forest_layer = tracing_forest::ForestLayer::default().with_filter(forest_filter);
134            Ok((None, Box::new(Registry::default().with(forest_layer))))
135        }
136    }
137}
138
139/// This helps with cleanly shutting down the tracing/logging providers when done,
140/// so we don't lose traces.
141pub struct TracingPipelineGuard(pub Option<SdkTracerProvider>);
142
143impl Drop for TracingPipelineGuard {
144    fn drop(&mut self) {
145        if let Some(provider) = self.0.take() {
146            if let Err(err) = provider.shutdown() {
147                eprintln!("Error shutting down logging pipeline: {}", err);
148            } else {
149                eprintln!("Logging pipeline completed shutdown");
150            }
151        }
152    }
153}