kanidm_unixd/
kanidm_unixd.rs

1#![deny(warnings)]
2#![warn(unused_extern_crates)]
3#![deny(clippy::todo)]
4#![deny(clippy::unimplemented)]
5#![deny(clippy::unwrap_used)]
6#![deny(clippy::expect_used)]
7#![deny(clippy::panic)]
8#![deny(clippy::unreachable)]
9#![deny(clippy::await_holding_lock)]
10#![deny(clippy::needless_pass_by_value)]
11#![deny(clippy::trivially_copy_pass_by_ref)]
12
13use clap::{Arg, ArgAction, Command};
14use futures::{SinkExt, StreamExt};
15use kanidm_client::KanidmClientBuilder;
16use kanidm_hsm_crypto::{
17    provider::{BoxedDynTpm, SoftTpm, Tpm},
18    AuthValue,
19};
20use kanidm_proto::constants::DEFAULT_CLIENT_CONFIG_PATH;
21use kanidm_proto::internal::OperationError;
22use kanidm_unix_common::constants::DEFAULT_CONFIG_PATH;
23use kanidm_unix_common::json_codec::JsonCodec;
24use kanidm_unix_common::unix_config::{HsmType, UnixdConfig};
25use kanidm_unix_common::unix_passwd::EtcDb;
26use kanidm_unix_common::unix_proto::{
27    ClientRequest, ClientResponse, TaskRequest, TaskRequestFrame, TaskResponse,
28};
29use kanidm_unix_resolver::db::{Cache, Db};
30use kanidm_unix_resolver::idprovider::interface::IdProvider;
31use kanidm_unix_resolver::idprovider::kanidm::KanidmProvider;
32use kanidm_unix_resolver::idprovider::system::SystemProvider;
33use kanidm_unix_resolver::resolver::{AuthSession, Resolver};
34use kanidm_utils_users::{get_current_gid, get_current_uid, get_effective_gid, get_effective_uid};
35use libc::umask;
36use lru::LruCache;
37use sketching::tracing::span;
38use sketching::tracing_forest::util::*;
39use sketching::tracing_forest::{self, traits::*};
40use std::collections::BTreeMap;
41use std::error::Error;
42use std::fs::metadata;
43use std::io::Error as IoError;
44use std::num::NonZeroUsize;
45use std::os::unix::fs::MetadataExt;
46use std::path::PathBuf;
47use std::process::ExitCode;
48use std::str::FromStr;
49use std::sync::Arc;
50use std::time::{Duration, SystemTime};
51use time::OffsetDateTime;
52use tokio::fs::File;
53use tokio::io::AsyncReadExt; // for read_to_end()
54use tokio::net::{UnixListener, UnixStream};
55use tokio::sync::broadcast;
56use tokio::sync::mpsc::{channel, Receiver, Sender};
57use tokio::sync::oneshot;
58use tokio_util::codec::Framed;
59
60#[cfg(feature = "dhat-heap")]
61#[global_allocator]
62static ALLOC: dhat::Alloc = dhat::Alloc;
63
64const DEFAULT_CONCURRENT_AUTH_SESSIONS: NonZeroUsize = NonZeroUsize::new(64)
65    .expect("Invalid DEFAULT_CONCURRENT_AUTH_SESSIONS constant at compile time");
66const REFRESH_DEBOUNCE_SIZE: NonZeroUsize =
67    NonZeroUsize::new(16).expect("Invalid REFRESH_DEBOUNCE_SIZE constant at compile time");
68const REFRESH_DEBOUNCE_WINDOW: Duration = Duration::from_secs(5);
69
70struct AsyncTaskRequest {
71    task_req: TaskRequest,
72    task_chan: oneshot::Sender<()>,
73}
74
75fn rm_if_exist(p: &str) {
76    debug!("Attempting to remove file {}", p);
77    let _ = std::fs::remove_file(p).map_err(|e| match e.kind() {
78        std::io::ErrorKind::NotFound => {
79            debug!("{} not present, no need to remove.", p);
80        }
81        _ => {
82            error!(
83                "Failure while attempting to attempting to remove {} -> {}",
84                p,
85                e.to_string()
86            );
87        }
88    });
89}
90
91async fn handle_task_client(
92    stream: UnixStream,
93    notify_shadow_change_tx: &Sender<EtcDb>,
94    task_channel_rx: &mut Receiver<AsyncTaskRequest>,
95    broadcast_rx: &mut broadcast::Receiver<bool>,
96) -> Result<(), Box<dyn Error>> {
97    // setup the codec, this is to the unix socket which the task daemon
98    // connected to us with.
99    let mut last_task_id: u64 = 0;
100    let mut task_handles: BTreeMap<u64, oneshot::Sender<()>> = BTreeMap::new();
101
102    let codec: JsonCodec<TaskResponse, TaskRequestFrame> = JsonCodec::default();
103
104    let mut framed_stream = Framed::new(stream, codec);
105
106    loop {
107        tokio::select! {
108            biased; // tell tokio to poll these in order
109            // We have been commanded to stop operation.
110            _ = broadcast_rx.recv() => {
111                return Ok(())
112            }
113            // We bias to *reading* messages in the resolver.
114            response = framed_stream.next() => {
115                // Process incoming messages. They may be out of order.
116                match response {
117                    Some(Ok(TaskResponse::Success(task_id))) => {
118                        debug!("Task was acknowledged and completed.");
119
120                        if let Some(handle) = task_handles.remove(&task_id) {
121                            // Send a result back via the one-shot
122                            // Ignore if it fails.
123                            let _ = handle.send(());
124                        }
125                        // If the ID was unregistered, ignore.
126                    }
127                    Some(Ok(TaskResponse::NotifyShadowChange(etc_db))) => {
128                        let _ = notify_shadow_change_tx.send(etc_db).await;
129                    }
130                    // Other things ....
131                    // Some(Ok(TaskResponse::ReloadSystemIds))
132
133                    other => {
134                        error!("Error -> {:?}", other);
135                        return Err(Box::new(IoError::other("oh no!")));
136                    }
137                }
138            }
139            task_request = task_channel_rx.recv() => {
140                let Some(AsyncTaskRequest {
141                    task_req,
142                    task_chan
143                }) = task_request else {
144                    // Task channel has died, cease operation.
145                    return Ok(())
146                };
147
148                debug!("Sending Task -> {:?}", task_req);
149
150                last_task_id += 1;
151                let task_id = last_task_id;
152
153                // Setup the task handle so we know who to get back to.
154                task_handles.insert(task_id, task_chan);
155
156                let task_frame = TaskRequestFrame {
157                    id: task_id,
158                    req: task_req,
159                };
160
161                if let Err(err) = framed_stream.send(task_frame).await {
162                    warn!("Unable to queue task for completion");
163                    return Err(Box::new(err));
164                }
165                // Task sent
166            }
167        }
168    }
169}
170
171async fn handle_client(
172    sock: UnixStream,
173    cachelayer: Arc<Resolver>,
174    task_channel_tx: &Sender<AsyncTaskRequest>,
175) {
176    let conn_id = uuid::Uuid::new_v4();
177
178    let Ok(ucred) = sock.peer_cred() else {
179        error!("Unable to verify peer credentials, terminating connection.");
180        return;
181    };
182
183    let codec: JsonCodec<ClientRequest, ClientResponse> = JsonCodec::default();
184
185    let mut reqs = Framed::new(sock, codec);
186    let mut session_id_counter: u64 = 1;
187    let mut pam_auth_session_state: LruCache<u64, AuthSession> =
188        LruCache::new(DEFAULT_CONCURRENT_AUTH_SESSIONS);
189
190    // Setup a broadcast channel so that if we have an unexpected disconnection, we can
191    // tell consumers to stop work.
192    let (shutdown_tx, _shutdown_rx) = broadcast::channel(1);
193
194    while let Some(Ok(req)) = reqs.next().await {
195        let maybe_err: Result<(), Box<dyn Error>> = async {
196            debug!(uid = ?ucred.uid(), gid = ?ucred.gid(), pid = ?ucred.pid());
197
198            let resp = match req {
199                ClientRequest::SshKey(account_id) => cachelayer
200                    .get_sshkeys(account_id.as_str())
201                    .await
202                    .map(ClientResponse::SshKeys)
203                    .unwrap_or_else(|_| {
204                        error!("unable to load keys, returning empty set.");
205                        ClientResponse::SshKeys(vec![])
206                    }),
207                ClientRequest::NssAccounts => cachelayer
208                    .get_nssaccounts()
209                    .await
210                    .map(ClientResponse::NssAccounts)
211                    .unwrap_or_else(|_| {
212                        error!("unable to enum accounts");
213                        ClientResponse::NssAccounts(Vec::new())
214                    }),
215                ClientRequest::NssAccountByUid(gid) => cachelayer
216                    .get_nssaccount_gid(gid)
217                    .await
218                    .map(ClientResponse::NssAccount)
219                    .unwrap_or_else(|_| {
220                        error!("unable to load account, returning empty.");
221                        ClientResponse::NssAccount(None)
222                    }),
223                ClientRequest::NssAccountByName(account_id) => cachelayer
224                    .get_nssaccount_name(account_id.as_str())
225                    .await
226                    .map(ClientResponse::NssAccount)
227                    .unwrap_or_else(|_| {
228                        error!("unable to load account, returning empty.");
229                        ClientResponse::NssAccount(None)
230                    }),
231                ClientRequest::NssGroups => cachelayer
232                    .get_nssgroups()
233                    .await
234                    .map(ClientResponse::NssGroups)
235                    .unwrap_or_else(|_| {
236                        error!("unable to enum groups");
237                        ClientResponse::NssGroups(Vec::new())
238                    }),
239                ClientRequest::NssGroupByGid(gid) => cachelayer
240                    .get_nssgroup_gid(gid)
241                    .await
242                    .map(ClientResponse::NssGroup)
243                    .unwrap_or_else(|_| {
244                        error!("unable to load group, returning empty.");
245                        ClientResponse::NssGroup(None)
246                    }),
247                ClientRequest::NssGroupByName(grp_id) => cachelayer
248                    .get_nssgroup_name(grp_id.as_str())
249                    .await
250                    .map(ClientResponse::NssGroup)
251                    .unwrap_or_else(|_| {
252                        error!("unable to load group, returning empty.");
253                        ClientResponse::NssGroup(None)
254                    }),
255                ClientRequest::PamAuthenticateInit { account_id, info } => {
256                   #[allow(clippy::disallowed_methods)]
257                    // Allowed as this is the time check for the remainder of the function and check.
258
259
260                    let current_time = OffsetDateTime::now_utc();
261
262                    match cachelayer
263                        .pam_account_authenticate_init(
264                            account_id.as_str(),
265                            &info,
266                            current_time,
267                            shutdown_tx.subscribe(),
268                        )
269                        .await
270                    {
271                        Ok((auth_session, pam_auth_response)) => {
272
273                            let session_id = session_id_counter;
274                            session_id_counter += 1;
275
276                            if pam_auth_session_state.push(session_id, auth_session).is_some() {
277                                // Something really bad is up, stop everything.
278                                pam_auth_session_state.clear();
279                                error!("session_id was reused, unable to proceed. cancelling all inflight authentication sessions.");
280                                ClientResponse::Error(OperationError::KU001InitWhileSessionActive)
281                            } else {
282                                ClientResponse::PamAuthenticateStepResponse {
283                                    response: pam_auth_response,
284                                    session_id,
285                                }
286                            }
287                        }
288                        Err(_) => ClientResponse::Error(OperationError::KU004PamInitFailed),
289                    }
290                }
291                ClientRequest::PamAuthenticateStep {
292                    request,
293                    session_id,
294                } => {
295                    match pam_auth_session_state.get_mut(&session_id) {
296                        Some(auth_session) => {
297                            let response = cachelayer
298                                .pam_account_authenticate_step(auth_session, request)
299                                .await;
300
301                            let is_complete = auth_session.is_complete();
302
303                            // Release the reference so that we can manipulate the
304                            // btreemap if needed
305                            let _ = auth_session;
306
307                            if let Ok(pam_auth_response) = response {
308                                // What was the response? Is it a terminating case?
309                                if is_complete {
310                                    pam_auth_session_state.pop(&session_id);
311                                }
312
313                                ClientResponse::PamAuthenticateStepResponse {
314                                    response: pam_auth_response,
315                                    session_id,
316                                }
317                            } else {
318                                ClientResponse::Error(OperationError::KU003PamAuthFailed)
319                            }
320                        }
321                        None => {
322                            error!("Attempt to continue auth session, but session id was not present. There may be too many concurrent authentication sessions in progress.");
323                            ClientResponse::Error(OperationError::KU002ContinueWhileSessionInActive)
324                        }
325                    }
326
327                }
328                ClientRequest::PamAccountAllowed(account_id) => cachelayer
329                    .pam_account_allowed(account_id.as_str())
330                    .await
331                    .map(ClientResponse::PamStatus)
332                    .unwrap_or(ClientResponse::Error(
333                        OperationError::KU005ErrorCheckingAccount,
334                    )),
335                ClientRequest::PamAccountBeginSession(account_id) => {
336                    match cachelayer
337                        .pam_account_beginsession(account_id.as_str())
338                        .await
339                    {
340                        Ok(Some(info)) => {
341                            let (tx, rx) = oneshot::channel();
342
343                            match task_channel_tx
344                                .send_timeout(
345                                    AsyncTaskRequest {
346                                        task_req: TaskRequest::HomeDirectory(info),
347                                        task_chan: tx,
348                                    },
349                                    Duration::from_millis(100),
350                                )
351                                .await
352                            {
353                                Ok(()) => {
354                                    // Now wait for the other end OR timeout.
355                                    match tokio::time::timeout_at(
356                                        tokio::time::Instant::now() + Duration::from_millis(1000),
357                                        rx,
358                                    )
359                                    .await
360                                    {
361                                        Ok(Ok(_)) => {
362                                            debug!("Task completed, returning to pam ...");
363                                            ClientResponse::Ok
364                                        }
365                                        _ => {
366                                            // Timeout or other error.
367                                            ClientResponse::Error(OperationError::KG001TaskTimeout)
368                                        }
369                                    }
370                                }
371                                Err(_) => {
372                                    // We could not submit the req. Move on!
373                                    ClientResponse::Error(OperationError::KG002TaskCommFailure)
374                                }
375                            }
376                        }
377                        Ok(None) => {
378                            // The session can begin, but we do not need to create the home dir.
379                            ClientResponse::Ok
380                        }
381                        Err(_) => ClientResponse::Error(OperationError::KU005ErrorCheckingAccount),
382                    }
383                }
384                ClientRequest::InvalidateCache => cachelayer
385                    .invalidate()
386                    .await
387                    .map(|_| ClientResponse::Ok)
388                    .unwrap_or(ClientResponse::Error(OperationError::KG003CacheClearFailed)),
389                ClientRequest::ClearCache => {
390                    if ucred.uid() == 0 {
391                        cachelayer
392                            .clear_cache()
393                            .await
394                            .map(|_| ClientResponse::Ok)
395                            .unwrap_or(ClientResponse::Error(OperationError::KG003CacheClearFailed))
396                    } else {
397                        error!("{}", OperationError::KU006OnlyRootAllowed);
398                        ClientResponse::Error(OperationError::KU006OnlyRootAllowed)
399                    }
400                }
401                ClientRequest::Status => {
402                    let status = cachelayer.provider_status().await;
403                    ClientResponse::ProviderStatus(status)
404                }
405            };
406
407            trace!(?resp);
408
409            reqs.send(resp).await
410                .inspect_err(|err| {
411                    error!(?err, "unable to send response");
412                })?;
413            reqs.flush().await
414                .inspect_err(|err| {
415                    error!(?err, "unable to flush response");
416                })?;
417
418            trace!("flushed response!");
419
420            Ok(())
421        }
422            .instrument(
423                span!(Level::INFO, "client request", uuid = %conn_id, defer = true)
424            )
425            .await;
426
427        if maybe_err.is_err() {
428            break;
429        }
430    }
431
432    // Disconnect them
433    // Signal any tasks that they need to stop.
434    if let Err(shutdown_err) = shutdown_tx.send(()) {
435        warn!(
436            ?shutdown_err,
437            "Unable to signal tasks to stop, they will naturally timeout instead."
438        )
439    }
440}
441
442async fn read_hsm_pin(hsm_pin_path: &str) -> Result<Vec<u8>, Box<dyn Error>> {
443    if !PathBuf::from_str(hsm_pin_path)?.exists() {
444        return Err(std::io::Error::new(
445            std::io::ErrorKind::NotFound,
446            format!("HSM PIN file '{hsm_pin_path}' not found"),
447        )
448        .into());
449    }
450
451    let mut file = File::open(hsm_pin_path).await?;
452    let mut contents = vec![];
453    file.read_to_end(&mut contents).await?;
454    Ok(contents)
455}
456
457async fn write_hsm_pin(hsm_pin_path: &str) -> Result<(), Box<dyn Error>> {
458    if !PathBuf::from_str(hsm_pin_path)?.exists() {
459        let new_pin = AuthValue::generate().map_err(|hsm_err| {
460            error!(?hsm_err, "Unable to generate new pin");
461            std::io::Error::other("Unable to generate new pin")
462        })?;
463
464        std::fs::write(hsm_pin_path, new_pin)?;
465
466        info!("Generated new HSM pin");
467    }
468
469    Ok(())
470}
471
472#[cfg(feature = "tpm")]
473fn open_tpm(tcti_name: &str) -> Option<BoxedDynTpm> {
474    use kanidm_hsm_crypto::provider::TssTpm;
475    match TssTpm::new(tcti_name) {
476        Ok(tpm) => {
477            debug!("opened hw tpm");
478            Some(BoxedDynTpm::new(tpm))
479        }
480        Err(tpm_err) => {
481            error!(?tpm_err, "Unable to open requested tpm device");
482            None
483        }
484    }
485}
486
487#[cfg(not(feature = "tpm"))]
488fn open_tpm(_tcti_name: &str) -> Option<BoxedDynTpm> {
489    error!("Hardware TPM supported was not enabled in this build. Unable to proceed");
490    None
491}
492
493#[cfg(feature = "tpm")]
494fn open_tpm_if_possible(tcti_name: &str) -> BoxedDynTpm {
495    use kanidm_hsm_crypto::provider::TssTpm;
496    match TssTpm::new(tcti_name) {
497        Ok(tpm) => {
498            debug!("opened hw tpm");
499            BoxedDynTpm::new(tpm)
500        }
501        Err(tpm_err) => {
502            warn!(
503                ?tpm_err,
504                "Unable to open requested tpm device, falling back to soft tpm"
505            );
506            BoxedDynTpm::new(SoftTpm::new())
507        }
508    }
509}
510
511#[cfg(not(feature = "tpm"))]
512fn open_tpm_if_possible(_tcti_name: &str) -> BoxedDynTpm {
513    debug!("opened soft tpm");
514    BoxedDynTpm::new(SoftTpm::default())
515}
516
517async fn main_inner(clap_args: clap::ArgMatches) -> ExitCode {
518    let cuid = get_current_uid();
519    let ceuid = get_effective_uid();
520    let cgid = get_current_gid();
521    let cegid = get_effective_gid();
522
523    if clap_args.get_flag("skip-root-check") {
524        warn!("Skipping root user check, if you're running this for testing, ensure you clean up temporary files.")
525        // TODO: this wording is not great m'kay.
526    } else if cuid == 0 || ceuid == 0 || cgid == 0 || cegid == 0 {
527        error!("Refusing to run - this process must not operate as root.");
528        return ExitCode::FAILURE;
529    };
530
531    debug!("Profile -> {}", env!("KANIDM_PROFILE_NAME"));
532    debug!("CPU Flags -> {}", env!("KANIDM_CPU_FLAGS"));
533
534    let Some(cfg_path_str) = clap_args.get_one::<String>("client-config") else {
535        error!("Failed to pull the client config path");
536        return ExitCode::FAILURE;
537    };
538    let cfg_path: PathBuf = PathBuf::from(cfg_path_str);
539
540    if !cfg_path.exists() {
541        // there's no point trying to start up if we can't read a usable config!
542        error!(
543            "Client config missing from {} - cannot start up. Quitting.",
544            cfg_path_str
545        );
546        let diag = kanidm_lib_file_permissions::diagnose_path(cfg_path.as_ref());
547        info!(%diag);
548        return ExitCode::FAILURE;
549    } else {
550        let cfg_meta = match metadata(&cfg_path) {
551            Ok(v) => v,
552            Err(e) => {
553                error!("Unable to read metadata for {} - {:?}", cfg_path_str, e);
554                let diag = kanidm_lib_file_permissions::diagnose_path(cfg_path.as_ref());
555                info!(%diag);
556                return ExitCode::FAILURE;
557            }
558        };
559        if !kanidm_lib_file_permissions::readonly(&cfg_meta) {
560            warn!("permissions on {} may not be secure. Should be readonly to running uid. This could be a security risk ...",
561                        cfg_path_str
562                        );
563        }
564
565        if cfg_meta.uid() == cuid || cfg_meta.uid() == ceuid {
566            warn!("WARNING: {} owned by the current uid, which may allow file permission changes. This could be a security risk ...",
567                        cfg_path_str
568                    );
569        }
570    }
571
572    let Some(unixd_path_str) = clap_args.get_one::<String>("unixd-config") else {
573        error!("Failed to pull the unixd config path");
574        return ExitCode::FAILURE;
575    };
576    let unixd_path = PathBuf::from(unixd_path_str);
577
578    if !unixd_path.exists() {
579        // there's no point trying to start up if we can't read a usable config!
580        error!(
581            "unixd config missing from {} - cannot start up. Quitting.",
582            unixd_path_str
583        );
584        let diag = kanidm_lib_file_permissions::diagnose_path(unixd_path.as_ref());
585        info!(%diag);
586        return ExitCode::FAILURE;
587    } else {
588        let unixd_meta = match metadata(&unixd_path) {
589            Ok(v) => v,
590            Err(e) => {
591                error!("Unable to read metadata for {} - {:?}", unixd_path_str, e);
592                let diag = kanidm_lib_file_permissions::diagnose_path(unixd_path.as_ref());
593                info!(%diag);
594                return ExitCode::FAILURE;
595            }
596        };
597        if !kanidm_lib_file_permissions::readonly(&unixd_meta) {
598            warn!("permissions on {} may not be secure. Should be readonly to running uid. This could be a security risk ...",
599                        unixd_path_str);
600        }
601
602        if unixd_meta.uid() == cuid || unixd_meta.uid() == ceuid {
603            warn!("WARNING: {} owned by the current uid, which may allow file permission changes. This could be a security risk ...",
604                        unixd_path_str
605                    );
606        }
607    }
608
609    let cfg = match UnixdConfig::new().read_options_from_optional_config(&unixd_path) {
610        Ok(v) => v,
611        Err(_) => {
612            error!("Failed to parse {}", unixd_path_str);
613            return ExitCode::FAILURE;
614        }
615    };
616
617    let client_builder = if let Some(kconfig) = &cfg.kanidm_config {
618        if kconfig.pam_allowed_login_groups.is_empty() {
619            error!("Kanidm is enabled but no pam_allowed_login_groups are set - KANIDM USERS CANNOT AUTHENTICATE !!!");
620        }
621
622        // setup
623        let cb = match KanidmClientBuilder::new().read_options_from_optional_config(&cfg_path) {
624            Ok(v) => v,
625            Err(_) => {
626                error!("Failed to parse {}", cfg_path_str);
627                return ExitCode::FAILURE;
628            }
629        };
630
631        Some((cb, kconfig))
632    } else {
633        None
634    };
635
636    if clap_args.get_flag("configtest") {
637        eprintln!("###################################");
638        eprintln!("Dumping configs:\n###################################");
639        eprintln!("kanidm_unixd config (from {:#?})", &unixd_path);
640        eprintln!("{cfg}");
641        eprintln!("###################################");
642        if let Some((cb, _)) = client_builder.as_ref() {
643            eprintln!("kanidm client config (from {:#?})", &cfg_path);
644            eprintln!("{cb}");
645        } else {
646            eprintln!("kanidm client: disabled");
647        }
648        return ExitCode::SUCCESS;
649    }
650
651    debug!("🧹 Cleaning up sockets from previous invocations");
652    rm_if_exist(&cfg.sock_path);
653    rm_if_exist(&cfg.task_sock_path);
654
655    // Check the db path will be okay.
656    if !cfg.cache_db_path.is_empty() {
657        let cache_db_path = PathBuf::from(cfg.cache_db_path.as_str());
658        // We only need to check the parent folder path permissions as the db itself may not exist yet.
659        if let Some(db_parent_path) = cache_db_path.parent() {
660            if !db_parent_path.exists() {
661                error!(
662                    "Refusing to run, DB folder {} does not exist",
663                    db_parent_path
664                        .to_str()
665                        .unwrap_or("<db_parent_path invalid>")
666                );
667                let diag = kanidm_lib_file_permissions::diagnose_path(cache_db_path.as_ref());
668                info!(%diag);
669                return ExitCode::FAILURE;
670            }
671
672            let db_par_path_buf = db_parent_path.to_path_buf();
673
674            let i_meta = match metadata(&db_par_path_buf) {
675                Ok(v) => v,
676                Err(e) => {
677                    error!(
678                        "Unable to read metadata for {} - {:?}",
679                        db_par_path_buf
680                            .to_str()
681                            .unwrap_or("<db_par_path_buf invalid>"),
682                        e
683                    );
684                    return ExitCode::FAILURE;
685                }
686            };
687
688            if !i_meta.is_dir() {
689                error!(
690                    "Refusing to run - DB folder {} may not be a directory",
691                    db_par_path_buf
692                        .to_str()
693                        .unwrap_or("<db_par_path_buf invalid>")
694                );
695                return ExitCode::FAILURE;
696            }
697            if kanidm_lib_file_permissions::readonly(&i_meta) {
698                warn!("WARNING: DB folder permissions on {} indicate it may not be RW. This could cause the server start up to fail!", db_par_path_buf.to_str()
699                        .unwrap_or("<db_par_path_buf invalid>")
700                        );
701            }
702
703            if i_meta.mode() & 0o007 != 0 {
704                warn!("WARNING: DB folder {} has 'everyone' permission bits in the mode. This could be a security risk ...", db_par_path_buf.to_str()
705                        .unwrap_or("<db_par_path_buf invalid>")
706                        );
707            }
708        }
709
710        // check to see if the db's already there
711        if cache_db_path.exists() {
712            if !cache_db_path.is_file() {
713                error!(
714                    "Refusing to run - DB path {} already exists and is not a file.",
715                    cache_db_path.to_str().unwrap_or("<cache_db_path invalid>")
716                );
717                let diag = kanidm_lib_file_permissions::diagnose_path(cache_db_path.as_ref());
718                info!(%diag);
719                return ExitCode::FAILURE;
720            };
721
722            match metadata(&cache_db_path) {
723                Ok(v) => v,
724                Err(e) => {
725                    error!(
726                        "Unable to read metadata for {} - {:?}",
727                        cache_db_path.to_str().unwrap_or("<cache_db_path invalid>"),
728                        e
729                    );
730                    let diag = kanidm_lib_file_permissions::diagnose_path(cache_db_path.as_ref());
731                    info!(%diag);
732                    return ExitCode::FAILURE;
733                }
734            };
735            // TODO: permissions dance to enumerate the user's ability to write to the file? ref #456 - r2d2 will happily keep trying to do things without bailing.
736        };
737    }
738
739    let db = match Db::new(cfg.cache_db_path.as_str()) {
740        Ok(db) => db,
741        Err(_e) => {
742            error!("Failed to create database");
743            return ExitCode::FAILURE;
744        }
745    };
746
747    // perform any db migrations.
748    let mut dbtxn = db.write().await;
749    if dbtxn.migrate().and_then(|_| dbtxn.commit()).is_err() {
750        error!("Failed to migrate database");
751        return ExitCode::FAILURE;
752    }
753
754    // Check for and create the hsm pin if required.
755    if let Err(err) = write_hsm_pin(cfg.hsm_pin_path.as_str()).await {
756        let diag = kanidm_lib_file_permissions::diagnose_path(cfg.hsm_pin_path.as_ref());
757        info!(%diag);
758        error!(
759            ?err,
760            "Failed to create HSM PIN into {}",
761            cfg.hsm_pin_path.as_str()
762        );
763        return ExitCode::FAILURE;
764    };
765
766    // read the hsm pin
767    let hsm_pin = match read_hsm_pin(cfg.hsm_pin_path.as_str()).await {
768        Ok(hp) => hp,
769        Err(err) => {
770            let diag = kanidm_lib_file_permissions::diagnose_path(cfg.hsm_pin_path.as_ref());
771            info!(%diag);
772            error!(
773                ?err,
774                "Failed to read HSM PIN from {}",
775                cfg.hsm_pin_path.as_str()
776            );
777            return ExitCode::FAILURE;
778        }
779    };
780
781    let auth_value = match AuthValue::try_from(hsm_pin.as_slice()) {
782        Ok(av) => av,
783        Err(err) => {
784            error!(?err, "invalid hsm pin");
785            return ExitCode::FAILURE;
786        }
787    };
788
789    let mut hsm: BoxedDynTpm = match cfg.hsm_type {
790        HsmType::Soft => BoxedDynTpm::new(SoftTpm::default()),
791        HsmType::TpmIfPossible => open_tpm_if_possible(&cfg.tpm_tcti_name),
792        HsmType::Tpm => match open_tpm(&cfg.tpm_tcti_name) {
793            Some(hsm) => hsm,
794            None => return ExitCode::FAILURE,
795        },
796    };
797
798    // With the assistance of the DB, setup the HSM and its machine key.
799    let mut db_txn = db.write().await;
800
801    let loadable_machine_key = match db_txn.get_hsm_root_storage_key() {
802        Ok(Some(lmk)) => lmk,
803        Ok(None) => {
804            // No machine key found - create one, and store it.
805            let loadable_machine_key = match hsm.root_storage_key_create(&auth_value) {
806                Ok(lmk) => lmk,
807                Err(err) => {
808                    error!(?err, "Unable to create hsm loadable machine key");
809                    return ExitCode::FAILURE;
810                }
811            };
812
813            if let Err(err) = db_txn.insert_hsm_root_storage_key(&loadable_machine_key) {
814                error!(?err, "Unable to persist hsm loadable machine key");
815                return ExitCode::FAILURE;
816            }
817
818            loadable_machine_key
819        }
820        Err(err) => {
821            error!(?err, "Unable to access hsm loadable machine key");
822            return ExitCode::FAILURE;
823        }
824    };
825
826    let machine_key = match hsm.root_storage_key_load(&auth_value, &loadable_machine_key) {
827        Ok(mk) => mk,
828        Err(err) => {
829            error!(
830                ?err,
831                "Unable to load machine root key - This can occur if you have changed your HSM pin"
832            );
833            error!(
834                "To proceed you must remove the content of the cache db ({}) to reset all keys",
835                cfg.cache_db_path.as_str()
836            );
837            return ExitCode::FAILURE;
838        }
839    };
840
841    let Ok(system_provider) = SystemProvider::new() else {
842        error!("Failed to configure System Provider");
843        return ExitCode::FAILURE;
844    };
845
846    info!("Started system provider");
847
848    let mut clients: Vec<Arc<dyn IdProvider + Send + Sync>> = Vec::with_capacity(1);
849
850    // Setup Kanidm provider if the configuration requests it.
851    if let Some((cb, kconfig)) = client_builder {
852        let cb = cb.connect_timeout(kconfig.conn_timeout);
853        let cb = cb.request_timeout(kconfig.request_timeout);
854
855        let rsclient = match cb.build() {
856            Ok(rsc) => rsc,
857            Err(_e) => {
858                error!("Failed to build async client");
859                return ExitCode::FAILURE;
860            }
861        };
862
863        let Ok(idprovider) = KanidmProvider::new(
864            rsclient,
865            kconfig,
866            SystemTime::now(),
867            &mut (&mut db_txn).into(),
868            &mut hsm,
869            &machine_key,
870        )
871        .await
872        else {
873            error!("Failed to configure Kanidm Provider");
874            return ExitCode::FAILURE;
875        };
876
877        // Now stacked for the resolver.
878        clients.push(Arc::new(idprovider));
879        info!("Started kanidm provider");
880    }
881
882    drop(machine_key);
883
884    if let Err(err) = db_txn.commit() {
885        error!(
886            ?err,
887            "Failed to commit database transaction, unable to proceed"
888        );
889        return ExitCode::FAILURE;
890    }
891
892    if !cfg.default_shell.is_empty() {
893        let shell_path = PathBuf::from_str(&cfg.default_shell)
894            .expect("Failed to build a representation of your default_shell path!");
895        if !shell_path.exists() {
896            error!(
897                "Cannot find configured default shell at {}, this could cause login issues!",
898                shell_path.display()
899            )
900        }
901    }
902
903    // Okay, the hsm is now loaded and ready to go.
904    let (cl_inner, mut async_refresh_rx) = match Resolver::new(
905        db,
906        Arc::new(system_provider),
907        clients,
908        hsm,
909        cfg.cache_timeout,
910        cfg.default_shell.clone(),
911        cfg.home_prefix.clone(),
912        cfg.home_attr,
913        cfg.home_alias,
914        cfg.uid_attr_map,
915        cfg.gid_attr_map,
916    )
917    .await
918    {
919        Ok(c) => c,
920        Err(_e) => {
921            error!("Failed to build cache layer.");
922            return ExitCode::FAILURE;
923        }
924    };
925
926    let cachelayer = Arc::new(cl_inner);
927
928    // Setup the root-only tasks socket. Take away all other access bits.
929    let before = unsafe { umask(0o0077) };
930    let task_listener = match UnixListener::bind(cfg.task_sock_path.as_str()) {
931        Ok(l) => l,
932        Err(_e) => {
933            let diag = kanidm_lib_file_permissions::diagnose_path(cfg.task_sock_path.as_ref());
934            info!(%diag);
935            error!("Failed to bind UNIX socket {}", cfg.task_sock_path.as_str());
936            return ExitCode::FAILURE;
937        }
938    };
939    // Undo umask changes.
940    let _ = unsafe { umask(before) };
941
942    // The tasks ... well task. Tasks-task. Anyway, the tasks-task is bidirectional
943    // in its communication to the tasks-daemon. We submit tasks to the tasks-daemon
944    // via this channel here -\
945    //                        |
946    //                        v
947    let (task_channel_tx, mut task_channel_rx) = channel(16);
948    let task_channel_tx = Arc::new(task_channel_tx);
949    let task_channel_tx_cln = task_channel_tx.clone();
950    // Each submitted task contains a oneshot channel allowing the tasks-task to
951    // notify the submitter of the task that the task is completed.
952
953    // This channel is for the second case - the tasks-daemon can send us
954    // unsolicited dm's about system state, and when these occure we need to
955    // response to these notifications. Because each potential dm that the
956    // daemon can send us has a specific intent, we need a channel for each
957    // type of notification that we could get. This channel is for when
958    // the tasks daemon has a reloaded shadow database for us to process
959    // and cache:
960    let (notify_shadow_channel_tx, mut notify_shadow_channel_rx) = channel(16);
961    let notify_shadow_channel_tx = Arc::new(notify_shadow_channel_tx);
962
963    // Broadcast receivers so that the tasks-task can be shut down when we get
964    // signals etc.
965    let (broadcast_tx, mut broadcast_rx) = broadcast::channel(4);
966    let mut c_broadcast_rx = broadcast_tx.subscribe();
967    let mut d_broadcast_rx = broadcast_tx.subscribe();
968
969    let task_b = tokio::spawn(async move {
970        loop {
971            tokio::select! {
972                // Wait on the broadcast to see if we need to close down.
973                _ = c_broadcast_rx.recv() => {
974                    break;
975                }
976                accept_res = task_listener.accept() => {
977                    match accept_res {
978                        Ok((socket, _addr)) => {
979                            // Did it come from root? If not, they don't have the needed
980                            // permissions to actually be a task handler, and we wouldn't
981                            // want to leak to anyone else anyway.
982                            if let Ok(ucred) = socket.peer_cred() {
983                                if ucred.uid() != 0 {
984                                    // move along.
985                                    warn!("Task handler not running as root, ignoring ...");
986                                    continue;
987                                }
988                            } else {
989                                // move along.
990                                warn!("Unable to determine socked peer cred, ignoring ...");
991                                continue;
992                            };
993                            debug!("A task handler has connected.");
994                            // It did? Great, now we can wait and spin on that one
995                            // client.
996
997                            // We have to check for signals here else this tasks waits forever.
998                            if let Err(err) = handle_task_client(socket, &notify_shadow_channel_tx, &mut task_channel_rx, &mut d_broadcast_rx).await {
999                                error!(?err, "Task client error occurred");
1000                            }
1001                            // If they disconnect we go back to accept.
1002                        }
1003                        Err(err) => {
1004                            error!("Task Accept error -> {:?}", err);
1005                        }
1006                    }
1007                }
1008            }
1009            // done
1010        }
1011        info!("Stopped task connector");
1012    });
1013
1014    // ====== Listen for shadow change notification from tasks ======
1015
1016    let shadow_notify_cachelayer = cachelayer.clone();
1017    let mut c_broadcast_rx = broadcast_tx.subscribe();
1018
1019    let task_c = tokio::spawn(async move {
1020        debug!("Spawned shadow reload task handler");
1021        loop {
1022            tokio::select! {
1023                _ = c_broadcast_rx.recv() => {
1024                    break;
1025                }
1026                Some(EtcDb {
1027                    users, shadow, groups
1028                }) = notify_shadow_channel_rx.recv() => {
1029                    shadow_notify_cachelayer
1030                        .reload_system_identities(users, shadow, groups)
1031                        .await;
1032                }
1033            }
1034        }
1035        info!("Stopped shadow reload task handler");
1036    });
1037
1038    // Setup the task that handles async pre-fetching here.
1039    let prefetch_cachelayer = cachelayer.clone();
1040    let _task_prefetch = tokio::spawn(async move {
1041        let mut refresh_cache = LruCache::new(REFRESH_DEBOUNCE_SIZE);
1042
1043        while let Some(refresh_account_id) = async_refresh_rx.recv().await {
1044            let current_time = SystemTime::now();
1045
1046            // Have we already checked this item in the last few seconds?
1047            match refresh_cache.get(&refresh_account_id).copied() {
1048                Some(not_before) if current_time < not_before => {
1049                    debug!(?refresh_account_id, "debounce triggered");
1050                    continue;
1051                }
1052                _ => {}
1053            };
1054
1055            // Mark that we are about to refresh this, and that we shouldn't attempt again for a few seconds.
1056            refresh_cache.put(
1057                refresh_account_id.clone(),
1058                current_time + REFRESH_DEBOUNCE_WINDOW,
1059            );
1060
1061            // we don't mind if there was an error, it's already logged, and on success
1062            // we don't need the info anyway.
1063            if prefetch_cachelayer
1064                .refresh_usertoken(&refresh_account_id, current_time)
1065                .await
1066                .is_ok()
1067            {
1068                debug!(?refresh_account_id, "successful refresh of account");
1069            } else {
1070                warn!(?refresh_account_id, "failed to refresh account");
1071            }
1072        }
1073    });
1074
1075    // Set the umask while we open the path for most clients.
1076    let before = unsafe { umask(0) };
1077    let listener = match UnixListener::bind(cfg.sock_path.as_str()) {
1078        Ok(l) => l,
1079        Err(_e) => {
1080            error!("Failed to bind UNIX socket at {}", cfg.sock_path.as_str());
1081            return ExitCode::FAILURE;
1082        }
1083    };
1084    // Undo umask changes.
1085    let _ = unsafe { umask(before) };
1086
1087    let task_a = tokio::spawn(async move {
1088        loop {
1089            let tc_tx = task_channel_tx_cln.clone();
1090
1091            tokio::select! {
1092                _ = broadcast_rx.recv() => {
1093                    break;
1094                }
1095                accept_res = listener.accept() => {
1096                    match accept_res {
1097                        Ok((socket, _addr)) => {
1098                            let cachelayer_ref = cachelayer.clone();
1099                            tokio::spawn(async move {
1100                                handle_client(socket, cachelayer_ref.clone(), &tc_tx).await;
1101                            });
1102                        }
1103                        Err(err) => {
1104                            error!(?err, "Error while accepting connection");
1105                        }
1106                    }
1107                }
1108            }
1109        }
1110        info!("Stopped resolver");
1111    });
1112
1113    info!("Server started ...");
1114
1115    // On linux, notify systemd.
1116    #[cfg(target_os = "linux")]
1117    let _ = sd_notify::notify(true, &[sd_notify::NotifyState::Ready]);
1118
1119    loop {
1120        tokio::select! {
1121            Ok(()) = tokio::signal::ctrl_c() => {
1122                break
1123            }
1124            Some(()) = async move {
1125                let sigterm = tokio::signal::unix::SignalKind::terminate();
1126                #[allow(clippy::unwrap_used)]
1127                tokio::signal::unix::signal(sigterm).unwrap().recv().await
1128            } => {
1129                break
1130            }
1131            Some(()) = async move {
1132                let sigterm = tokio::signal::unix::SignalKind::alarm();
1133                #[allow(clippy::unwrap_used)]
1134                tokio::signal::unix::signal(sigterm).unwrap().recv().await
1135            } => {
1136                // Ignore
1137            }
1138            Some(()) = async move {
1139                let sigterm = tokio::signal::unix::SignalKind::hangup();
1140                #[allow(clippy::unwrap_used)]
1141                tokio::signal::unix::signal(sigterm).unwrap().recv().await
1142            } => {
1143                // Ignore
1144            }
1145            Some(()) = async move {
1146                let sigterm = tokio::signal::unix::SignalKind::user_defined1();
1147                #[allow(clippy::unwrap_used)]
1148                tokio::signal::unix::signal(sigterm).unwrap().recv().await
1149            } => {
1150                // Ignore
1151            }
1152            Some(()) = async move {
1153                let sigterm = tokio::signal::unix::SignalKind::user_defined2();
1154                #[allow(clippy::unwrap_used)]
1155                tokio::signal::unix::signal(sigterm).unwrap().recv().await
1156            } => {
1157                // Ignore
1158            }
1159        }
1160    }
1161    info!("Signal received, sending down signal to tasks");
1162    // Send a broadcast that we are done.
1163    if let Err(e) = broadcast_tx.send(true) {
1164        error!("Unable to shutdown workers {:?}", e);
1165    }
1166
1167    let _ = task_a.await;
1168    let _ = task_b.await;
1169    let _ = task_c.await;
1170
1171    ExitCode::SUCCESS
1172}
1173
1174// #[tokio::main(flavor = "current_thread")]
1175#[tokio::main(flavor = "multi_thread", worker_threads = 2)]
1176async fn main() -> ExitCode {
1177    // On linux when debug assertions are disabled, prevent ptrace
1178    // from attaching to us.
1179    #[cfg(all(target_os = "linux", not(debug_assertions)))]
1180    if let Err(code) = prctl::set_dumpable(false) {
1181        error!(?code, "CRITICAL: Unable to set prctl flags");
1182        return ExitCode::FAILURE;
1183    }
1184
1185    // We need enough backtrace depth to find leak sources if they exist.
1186    #[cfg(feature = "dhat-heap")]
1187    let _profiler = dhat::Profiler::builder()
1188        .file_name(format!(
1189            "/var/cache/kanidm-unixd/heap-{}.json",
1190            std::process::id()
1191        ))
1192        .trim_backtraces(Some(40))
1193        .build();
1194
1195    let clap_args = Command::new("kanidm_unixd")
1196        .version(env!("CARGO_PKG_VERSION"))
1197        .about("Kanidm Unix daemon")
1198        .arg(
1199            Arg::new("skip-root-check")
1200                .help("Allow running as root. Don't use this in production as it is risky!")
1201                .short('r')
1202                .long("skip-root-check")
1203                .env("KANIDM_SKIP_ROOT_CHECK")
1204                .action(ArgAction::SetTrue),
1205        )
1206        .arg(
1207            Arg::new("debug")
1208                .help("Show extra debug information")
1209                .short('d')
1210                .long("debug")
1211                .env("KANIDM_DEBUG")
1212                .action(ArgAction::SetTrue),
1213        )
1214        .arg(
1215            Arg::new("configtest")
1216                .help("Display the configuration and exit")
1217                .short('t')
1218                .long("configtest")
1219                .action(ArgAction::SetTrue),
1220        )
1221        .arg(
1222            Arg::new("unixd-config")
1223                .help("Set the unixd config file path")
1224                .short('u')
1225                .long("unixd-config")
1226                .default_value(DEFAULT_CONFIG_PATH)
1227                .env("KANIDM_UNIX_CONFIG")
1228                .action(ArgAction::Set),
1229        )
1230        .arg(
1231            Arg::new("client-config")
1232                .help("Set the client config file path")
1233                .short('c')
1234                .long("client-config")
1235                .default_value(DEFAULT_CLIENT_CONFIG_PATH)
1236                .env("KANIDM_CLIENT_CONFIG")
1237                .action(ArgAction::Set),
1238        )
1239        .get_matches();
1240
1241    if clap_args.get_flag("debug") {
1242        std::env::set_var("RUST_LOG", "debug");
1243    }
1244
1245    #[allow(clippy::expect_used)]
1246    tracing_forest::worker_task()
1247        .set_global(true)
1248        // Fall back to stderr
1249        .map_sender(|sender| sender.or_stderr())
1250        .build_on(|subscriber| {
1251            subscriber.with(
1252                EnvFilter::try_from_default_env()
1253                    .or_else(|_| EnvFilter::try_new("info"))
1254                    .expect("Failed to init envfilter"),
1255            )
1256        })
1257        .on(main_inner(clap_args))
1258        .await
1259}