kanidm_unixd/
kanidm_unixd.rs

1#![deny(warnings)]
2#![warn(unused_extern_crates)]
3#![deny(clippy::todo)]
4#![deny(clippy::unimplemented)]
5#![deny(clippy::unwrap_used)]
6#![deny(clippy::expect_used)]
7#![deny(clippy::panic)]
8#![deny(clippy::unreachable)]
9#![deny(clippy::await_holding_lock)]
10#![deny(clippy::needless_pass_by_value)]
11#![deny(clippy::trivially_copy_pass_by_ref)]
12
13use clap::{Arg, ArgAction, Command};
14use futures::{SinkExt, StreamExt};
15use kanidm_client::KanidmClientBuilder;
16use kanidm_hsm_crypto::{
17    provider::{BoxedDynTpm, SoftTpm, Tpm},
18    AuthValue,
19};
20use kanidm_lib_file_permissions::diagnose_path;
21use kanidm_proto::constants::DEFAULT_CLIENT_CONFIG_PATH;
22use kanidm_proto::internal::OperationError;
23use kanidm_unix_common::constants::DEFAULT_CONFIG_PATH;
24use kanidm_unix_common::json_codec::JsonCodec;
25use kanidm_unix_common::unix_config::{HsmType, UnixdConfig};
26use kanidm_unix_common::unix_passwd::EtcDb;
27use kanidm_unix_common::unix_proto::{
28    ClientRequest, ClientResponse, TaskRequest, TaskRequestFrame, TaskResponse,
29};
30use kanidm_unix_resolver::idprovider::interface::IdProvider;
31use kanidm_unix_resolver::idprovider::kanidm::KanidmProvider;
32use kanidm_unix_resolver::idprovider::system::SystemProvider;
33use kanidm_unix_resolver::resolver::{AuthSession, Resolver};
34use kanidm_unix_resolver::{
35    check_nsswitch_has_kanidm,
36    db::{Cache, Db},
37};
38use kanidm_utils_users::{get_current_gid, get_current_uid, get_effective_gid, get_effective_uid};
39use libc::umask;
40use lru::LruCache;
41use sketching::tracing::span;
42use sketching::tracing_forest::util::*;
43use sketching::tracing_forest::{self, traits::*};
44use std::collections::BTreeMap;
45use std::error::Error;
46use std::fs::metadata;
47use std::io::Error as IoError;
48use std::num::NonZeroUsize;
49use std::os::unix::fs::MetadataExt;
50use std::path::PathBuf;
51use std::process::ExitCode;
52use std::str::FromStr;
53use std::sync::Arc;
54use std::time::{Duration, SystemTime};
55use time::OffsetDateTime;
56use tokio::fs::File;
57use tokio::io::AsyncReadExt; // for read_to_end()
58use tokio::net::{UnixListener, UnixStream};
59use tokio::sync::broadcast;
60use tokio::sync::mpsc::{channel, Receiver, Sender};
61use tokio::sync::oneshot;
62use tokio_util::codec::Framed;
63
64#[cfg(feature = "dhat-heap")]
65#[global_allocator]
66static ALLOC: dhat::Alloc = dhat::Alloc;
67
68const DEFAULT_CONCURRENT_AUTH_SESSIONS: NonZeroUsize = NonZeroUsize::new(64)
69    .expect("Invalid DEFAULT_CONCURRENT_AUTH_SESSIONS constant at compile time");
70const REFRESH_DEBOUNCE_SIZE: NonZeroUsize =
71    NonZeroUsize::new(16).expect("Invalid REFRESH_DEBOUNCE_SIZE constant at compile time");
72const REFRESH_DEBOUNCE_WINDOW: Duration = Duration::from_secs(5);
73
74struct AsyncTaskRequest {
75    task_req: TaskRequest,
76    task_chan: oneshot::Sender<()>,
77}
78
79fn rm_if_exist(p: &str) {
80    debug!("Attempting to remove file {}", p);
81    let _ = std::fs::remove_file(p).map_err(|e| match e.kind() {
82        std::io::ErrorKind::NotFound => {
83            debug!("{} not present, no need to remove.", p);
84        }
85        _ => {
86            error!(
87                "Failure while attempting to attempting to remove {} -> {}",
88                p,
89                e.to_string()
90            );
91        }
92    });
93}
94
95async fn handle_task_client(
96    stream: UnixStream,
97    notify_shadow_change_tx: &Sender<EtcDb>,
98    task_channel_rx: &mut Receiver<AsyncTaskRequest>,
99    broadcast_rx: &mut broadcast::Receiver<bool>,
100) -> Result<(), Box<dyn Error>> {
101    // setup the codec, this is to the unix socket which the task daemon
102    // connected to us with.
103    let mut last_task_id: u64 = 0;
104    let mut task_handles: BTreeMap<u64, oneshot::Sender<()>> = BTreeMap::new();
105
106    let codec: JsonCodec<TaskResponse, TaskRequestFrame> = JsonCodec::default();
107
108    let mut framed_stream = Framed::new(stream, codec);
109
110    loop {
111        tokio::select! {
112            biased; // tell tokio to poll these in order
113            // We have been commanded to stop operation.
114            _ = broadcast_rx.recv() => {
115                return Ok(())
116            }
117            // We bias to *reading* messages in the resolver.
118            response = framed_stream.next() => {
119                // Process incoming messages. They may be out of order.
120                match response {
121                    Some(Ok(TaskResponse::Success(task_id))) => {
122                        debug!("Task was acknowledged and completed.");
123
124                        if let Some(handle) = task_handles.remove(&task_id) {
125                            // Send a result back via the one-shot
126                            // Ignore if it fails.
127                            let _ = handle.send(());
128                        }
129                        // If the ID was unregistered, ignore.
130                    }
131                    Some(Ok(TaskResponse::NotifyShadowChange(etc_db))) => {
132                        let _ = notify_shadow_change_tx.send(etc_db).await;
133                    }
134                    // Other things ....
135                    // Some(Ok(TaskResponse::ReloadSystemIds))
136
137                    None => {
138                        // The stream has ended, we should stop operation.
139                        info!("Task client disconnected, stopping task handler.");
140                        return Ok(())
141                    }
142
143                    other => {
144                        let errmsg = format!("Received unexpected message from kanidm-unixd-tasks: {:?}", other);
145                        error!("{}", errmsg);
146                        return Err(Box::new(IoError::other(errmsg)));
147                    }
148                }
149            }
150            task_request = task_channel_rx.recv() => {
151                let Some(AsyncTaskRequest {
152                    task_req,
153                    task_chan
154                }) = task_request else {
155                    // Task channel has died, cease operation.
156                    return Ok(())
157                };
158
159                debug!("Sending Task -> {:?}", task_req);
160
161                last_task_id += 1;
162                let task_id = last_task_id;
163
164                // Setup the task handle so we know who to get back to.
165                task_handles.insert(task_id, task_chan);
166
167                let task_frame = TaskRequestFrame {
168                    id: task_id,
169                    req: task_req,
170                };
171
172                if let Err(err) = framed_stream.send(task_frame).await {
173                    warn!("Unable to queue task for completion");
174                    return Err(Box::new(err));
175                }
176                // Task sent
177            }
178        }
179    }
180}
181
182async fn handle_client(
183    sock: UnixStream,
184    cachelayer: Arc<Resolver>,
185    task_channel_tx: &Sender<AsyncTaskRequest>,
186) {
187    let conn_id = uuid::Uuid::new_v4();
188
189    let Ok(ucred) = sock.peer_cred() else {
190        error!("Unable to verify peer credentials, terminating connection.");
191        return;
192    };
193
194    let codec: JsonCodec<ClientRequest, ClientResponse> = JsonCodec::default();
195
196    let mut reqs = Framed::new(sock, codec);
197    let mut session_id_counter: u64 = 1;
198    let mut pam_auth_session_state: LruCache<u64, AuthSession> =
199        LruCache::new(DEFAULT_CONCURRENT_AUTH_SESSIONS);
200
201    // Setup a broadcast channel so that if we have an unexpected disconnection, we can
202    // tell consumers to stop work.
203    let (shutdown_tx, _shutdown_rx) = broadcast::channel(1);
204
205    while let Some(Ok(req)) = reqs.next().await {
206        let maybe_err: Result<(), Box<dyn Error>> = async {
207            debug!(uid = ?ucred.uid(), gid = ?ucred.gid(), pid = ?ucred.pid());
208
209            let resp = match req {
210                ClientRequest::SshKey(account_id) => cachelayer
211                    .get_sshkeys(account_id.as_str())
212                    .await
213                    .map(ClientResponse::SshKeys)
214                    .unwrap_or_else(|_| {
215                        error!("unable to load keys, returning empty set.");
216                        ClientResponse::SshKeys(vec![])
217                    }),
218                ClientRequest::NssAccounts => cachelayer
219                    .get_nssaccounts()
220                    .await
221                    .map(ClientResponse::NssAccounts)
222                    .unwrap_or_else(|_| {
223                        error!("unable to enum accounts");
224                        ClientResponse::NssAccounts(Vec::new())
225                    }),
226                ClientRequest::NssAccountByUid(gid) => cachelayer
227                    .get_nssaccount_gid(gid)
228                    .await
229                    .map(ClientResponse::NssAccount)
230                    .unwrap_or_else(|_| {
231                        error!("unable to load account, returning empty.");
232                        ClientResponse::NssAccount(None)
233                    }),
234                ClientRequest::NssAccountByName(account_id) => cachelayer
235                    .get_nssaccount_name(account_id.as_str())
236                    .await
237                    .map(ClientResponse::NssAccount)
238                    .unwrap_or_else(|_| {
239                        error!("unable to load account, returning empty.");
240                        ClientResponse::NssAccount(None)
241                    }),
242                ClientRequest::NssGroups => cachelayer
243                    .get_nssgroups()
244                    .await
245                    .map(ClientResponse::NssGroups)
246                    .unwrap_or_else(|_| {
247                        error!("unable to enum groups");
248                        ClientResponse::NssGroups(Vec::new())
249                    }),
250                ClientRequest::NssGroupByGid(gid) => cachelayer
251                    .get_nssgroup_gid(gid)
252                    .await
253                    .map(ClientResponse::NssGroup)
254                    .unwrap_or_else(|_| {
255                        error!("unable to load group, returning empty.");
256                        ClientResponse::NssGroup(None)
257                    }),
258                ClientRequest::NssGroupByName(grp_id) => cachelayer
259                    .get_nssgroup_name(grp_id.as_str())
260                    .await
261                    .map(ClientResponse::NssGroup)
262                    .unwrap_or_else(|_| {
263                        error!("unable to load group, returning empty.");
264                        ClientResponse::NssGroup(None)
265                    }),
266                ClientRequest::PamAuthenticateInit { account_id, info } => {
267                   #[allow(clippy::disallowed_methods)]
268                    // Allowed as this is the time check for the remainder of the function and check.
269
270
271                    let current_time = OffsetDateTime::now_utc();
272
273                    match cachelayer
274                        .pam_account_authenticate_init(
275                            account_id.as_str(),
276                            &info,
277                            current_time,
278                            shutdown_tx.subscribe(),
279                        )
280                        .await
281                    {
282                        Ok((auth_session, pam_auth_response)) => {
283
284                            let session_id = session_id_counter;
285                            session_id_counter += 1;
286
287                            if pam_auth_session_state.push(session_id, auth_session).is_some() {
288                                // Something really bad is up, stop everything.
289                                pam_auth_session_state.clear();
290                                error!("session_id was reused, unable to proceed. cancelling all inflight authentication sessions.");
291                                ClientResponse::Error(OperationError::KU001InitWhileSessionActive)
292                            } else {
293                                ClientResponse::PamAuthenticateStepResponse {
294                                    response: pam_auth_response,
295                                    session_id,
296                                }
297                            }
298                        }
299                        Err(_) => ClientResponse::Error(OperationError::KU004PamInitFailed),
300                    }
301                }
302                ClientRequest::PamAuthenticateStep {
303                    request,
304                    session_id,
305                } => {
306                    match pam_auth_session_state.get_mut(&session_id) {
307                        Some(auth_session) => {
308                            let response = cachelayer
309                                .pam_account_authenticate_step(auth_session, request)
310                                .await;
311
312                            let is_complete = auth_session.is_complete();
313
314                            // Release the reference so that we can manipulate the
315                            // btreemap if needed
316                            let _ = auth_session;
317
318                            if let Ok(pam_auth_response) = response {
319                                // What was the response? Is it a terminating case?
320                                if is_complete {
321                                    pam_auth_session_state.pop(&session_id);
322                                }
323
324                                ClientResponse::PamAuthenticateStepResponse {
325                                    response: pam_auth_response,
326                                    session_id,
327                                }
328                            } else {
329                                ClientResponse::Error(OperationError::KU003PamAuthFailed)
330                            }
331                        }
332                        None => {
333                            error!("Attempt to continue auth session, but session id was not present. There may be too many concurrent authentication sessions in progress.");
334                            ClientResponse::Error(OperationError::KU002ContinueWhileSessionInActive)
335                        }
336                    }
337
338                }
339                ClientRequest::PamAccountAllowed(account_id) => cachelayer
340                    .pam_account_allowed(account_id.as_str())
341                    .await
342                    .map(ClientResponse::PamStatus)
343                    .unwrap_or(ClientResponse::Error(
344                        OperationError::KU005ErrorCheckingAccount,
345                    )),
346                ClientRequest::PamAccountBeginSession(account_id) => {
347                    match cachelayer
348                        .pam_account_beginsession(account_id.as_str())
349                        .await
350                    {
351                        Ok(Some(info)) => {
352                            let (tx, rx) = oneshot::channel();
353
354                            match task_channel_tx
355                                .send_timeout(
356                                    AsyncTaskRequest {
357                                        task_req: TaskRequest::HomeDirectory(info),
358                                        task_chan: tx,
359                                    },
360                                    Duration::from_millis(100),
361                                )
362                                .await
363                            {
364                                Ok(()) => {
365                                    // Now wait for the other end OR timeout.
366                                    match tokio::time::timeout_at(
367                                        tokio::time::Instant::now() + Duration::from_millis(1000),
368                                        rx,
369                                    )
370                                    .await
371                                    {
372                                        Ok(Ok(_)) => {
373                                            debug!("Task completed, returning to pam ...");
374                                            ClientResponse::Ok
375                                        }
376                                        _ => {
377                                            // Timeout or other error.
378                                            ClientResponse::Error(OperationError::KG001TaskTimeout)
379                                        }
380                                    }
381                                }
382                                Err(_) => {
383                                    // We could not submit the req. Move on!
384                                    ClientResponse::Error(OperationError::KG002TaskCommFailure)
385                                }
386                            }
387                        }
388                        Ok(None) => {
389                            // The session can begin, but we do not need to create the home dir.
390                            ClientResponse::Ok
391                        }
392                        Err(_) => ClientResponse::Error(OperationError::KU005ErrorCheckingAccount),
393                    }
394                }
395                ClientRequest::InvalidateCache => cachelayer
396                    .invalidate()
397                    .await
398                    .map(|_| ClientResponse::Ok)
399                    .unwrap_or(ClientResponse::Error(OperationError::KG003CacheClearFailed)),
400                ClientRequest::ClearCache => {
401                    if ucred.uid() == 0 {
402                        cachelayer
403                            .clear_cache()
404                            .await
405                            .map(|_| ClientResponse::Ok)
406                            .unwrap_or(ClientResponse::Error(OperationError::KG003CacheClearFailed))
407                    } else {
408                        error!("{}", OperationError::KU006OnlyRootAllowed);
409                        ClientResponse::Error(OperationError::KU006OnlyRootAllowed)
410                    }
411                }
412                ClientRequest::Status => {
413                    let status = cachelayer.provider_status().await;
414                    ClientResponse::ProviderStatus(status)
415                }
416            };
417
418            trace!(?resp);
419
420            reqs.send(resp).await
421                .inspect_err(|err| {
422                    error!(?err, "unable to send response");
423                })?;
424            reqs.flush().await
425                .inspect_err(|err| {
426                    error!(?err, "unable to flush response");
427                })?;
428
429            trace!("flushed response!");
430
431            Ok(())
432        }
433            .instrument(
434                span!(Level::INFO, "client request", uuid = %conn_id, defer = true)
435            )
436            .await;
437
438        if maybe_err.is_err() {
439            break;
440        }
441    }
442
443    // Disconnect them
444    // Signal any tasks that they need to stop.
445    if let Err(shutdown_err) = shutdown_tx.send(()) {
446        warn!(
447            ?shutdown_err,
448            "Unable to signal tasks to stop, they will naturally timeout instead."
449        )
450    }
451}
452
453async fn read_hsm_pin(hsm_pin_path: &str) -> Result<Vec<u8>, Box<dyn Error>> {
454    if !PathBuf::from_str(hsm_pin_path)?.exists() {
455        return Err(std::io::Error::new(
456            std::io::ErrorKind::NotFound,
457            format!("HSM PIN file '{hsm_pin_path}' not found"),
458        )
459        .into());
460    }
461
462    let mut file = File::open(hsm_pin_path).await?;
463    let mut contents = vec![];
464    file.read_to_end(&mut contents).await?;
465    Ok(contents)
466}
467
468async fn write_hsm_pin(hsm_pin_path: &str) -> Result<(), Box<dyn Error>> {
469    if !PathBuf::from_str(hsm_pin_path)?.exists() {
470        let new_pin = AuthValue::generate().map_err(|hsm_err| {
471            error!(?hsm_err, "Unable to generate new pin");
472            std::io::Error::other("Unable to generate new pin")
473        })?;
474
475        std::fs::write(hsm_pin_path, new_pin)?;
476
477        info!("Generated new HSM pin");
478    }
479
480    Ok(())
481}
482
483#[cfg(feature = "tpm")]
484fn open_tpm(tcti_name: &str) -> Option<BoxedDynTpm> {
485    use kanidm_hsm_crypto::provider::TssTpm;
486    match TssTpm::new(tcti_name) {
487        Ok(tpm) => {
488            debug!("opened hw tpm");
489            Some(BoxedDynTpm::new(tpm))
490        }
491        Err(tpm_err) => {
492            error!(?tpm_err, "Unable to open requested tpm device");
493            None
494        }
495    }
496}
497
498#[cfg(not(feature = "tpm"))]
499fn open_tpm(_tcti_name: &str) -> Option<BoxedDynTpm> {
500    error!("Hardware TPM supported was not enabled in this build. Unable to proceed");
501    None
502}
503
504#[cfg(feature = "tpm")]
505fn open_tpm_if_possible(tcti_name: &str) -> BoxedDynTpm {
506    use kanidm_hsm_crypto::provider::TssTpm;
507    match TssTpm::new(tcti_name) {
508        Ok(tpm) => {
509            debug!("opened hw tpm");
510            BoxedDynTpm::new(tpm)
511        }
512        Err(tpm_err) => {
513            warn!(
514                ?tpm_err,
515                "Unable to open requested tpm device, falling back to soft tpm"
516            );
517            BoxedDynTpm::new(SoftTpm::new())
518        }
519    }
520}
521
522#[cfg(not(feature = "tpm"))]
523fn open_tpm_if_possible(_tcti_name: &str) -> BoxedDynTpm {
524    debug!("opened soft tpm");
525    BoxedDynTpm::new(SoftTpm::default())
526}
527
528async fn main_inner(clap_args: clap::ArgMatches) -> ExitCode {
529    let cuid = get_current_uid();
530    let ceuid = get_effective_uid();
531    let cgid = get_current_gid();
532    let cegid = get_effective_gid();
533
534    if clap_args.get_flag("skip-root-check") {
535        warn!("Skipping root user check, if you're running this for testing, ensure you clean up temporary files.")
536        // TODO: this wording is not great m'kay.
537    } else if cuid == 0 || ceuid == 0 || cgid == 0 || cegid == 0 {
538        error!("Refusing to run - this process must not operate as root.");
539        return ExitCode::FAILURE;
540    };
541
542    debug!("Profile -> {}", env!("KANIDM_PROFILE_NAME"));
543    debug!("CPU Flags -> {}", env!("KANIDM_CPU_FLAGS"));
544
545    let Some(cfg_path_str) = clap_args.get_one::<String>("client-config") else {
546        error!("Failed to pull the client config path");
547        return ExitCode::FAILURE;
548    };
549    let cfg_path: PathBuf = PathBuf::from(cfg_path_str);
550
551    if !cfg_path.exists() {
552        // there's no point trying to start up if we can't read a usable config!
553        error!(
554            "Client config missing from {} - cannot start up. Quitting.",
555            cfg_path_str
556        );
557        let diag = diagnose_path(cfg_path.as_ref());
558        info!(%diag);
559        return ExitCode::FAILURE;
560    } else {
561        let cfg_meta = match metadata(&cfg_path) {
562            Ok(v) => v,
563            Err(e) => {
564                error!("Unable to read metadata for {} - {:?}", cfg_path_str, e);
565                let diag = diagnose_path(cfg_path.as_ref());
566                info!(%diag);
567                return ExitCode::FAILURE;
568            }
569        };
570        if !kanidm_lib_file_permissions::readonly(&cfg_meta) {
571            warn!("permissions on {} may not be secure. Should be readonly to running uid. This could be a security risk ...",
572                        cfg_path_str
573                        );
574        }
575
576        if cfg_meta.uid() == cuid || cfg_meta.uid() == ceuid {
577            warn!("WARNING: {} owned by the current uid, which may allow file permission changes. This could be a security risk ...",
578                        cfg_path_str
579                    );
580        }
581    }
582
583    let Some(unixd_path_str) = clap_args.get_one::<String>("unixd-config") else {
584        error!("Failed to pull the unixd config path");
585        return ExitCode::FAILURE;
586    };
587    let unixd_path = PathBuf::from(unixd_path_str);
588
589    if !unixd_path.exists() {
590        // there's no point trying to start up if we can't read a usable config!
591        error!(
592            "unixd config missing from {} - cannot start up. Quitting.",
593            unixd_path_str
594        );
595        let diag = diagnose_path(unixd_path.as_ref());
596        info!(%diag);
597        return ExitCode::FAILURE;
598    } else {
599        let unixd_meta = match metadata(&unixd_path) {
600            Ok(v) => v,
601            Err(e) => {
602                error!("Unable to read metadata for {} - {:?}", unixd_path_str, e);
603                let diag = diagnose_path(unixd_path.as_ref());
604                info!(%diag);
605                return ExitCode::FAILURE;
606            }
607        };
608        if !kanidm_lib_file_permissions::readonly(&unixd_meta) {
609            warn!("permissions on {} may not be secure. Should be readonly to running uid. This could be a security risk ...",
610                        unixd_path_str);
611        }
612
613        if unixd_meta.uid() == cuid || unixd_meta.uid() == ceuid {
614            warn!("WARNING: {} owned by the current uid, which may allow file permission changes. This could be a security risk ...",
615                        unixd_path_str
616                    );
617        }
618    }
619
620    let cfg = match UnixdConfig::new().read_options_from_optional_config(&unixd_path) {
621        Ok(v) => v,
622        Err(_) => {
623            error!("Failed to parse {}", unixd_path_str);
624            return ExitCode::FAILURE;
625        }
626    };
627
628    let client_builder = if let Some(kconfig) = &cfg.kanidm_config {
629        if kconfig.pam_allowed_login_groups.is_empty() {
630            error!("Kanidm is enabled but no pam_allowed_login_groups are set - KANIDM USERS CANNOT AUTHENTICATE !!!");
631        }
632
633        // setup
634        let cb = match KanidmClientBuilder::new().read_options_from_optional_config(&cfg_path) {
635            Ok(v) => v,
636            Err(_) => {
637                error!("Failed to parse {}", cfg_path_str);
638                return ExitCode::FAILURE;
639            }
640        };
641
642        Some((cb, kconfig))
643    } else {
644        None
645    };
646
647    check_nsswitch_has_kanidm(None);
648
649    if clap_args.get_flag("configtest") {
650        eprintln!("###################################");
651        eprintln!("Dumping configs:\n###################################");
652        eprintln!("kanidm_unixd config (from {:#?})", &unixd_path);
653        eprintln!("{cfg}");
654        eprintln!("###################################");
655        if let Some((cb, _)) = client_builder.as_ref() {
656            eprintln!("kanidm client config (from {:#?})", &cfg_path);
657            eprintln!("{cb}");
658        } else {
659            eprintln!("kanidm client: disabled");
660        }
661        return ExitCode::SUCCESS;
662    }
663
664    debug!("🧹 Cleaning up sockets from previous invocations");
665    rm_if_exist(&cfg.sock_path);
666    rm_if_exist(&cfg.task_sock_path);
667
668    // Check the db path will be okay.
669    if !cfg.cache_db_path.is_empty() {
670        let cache_db_path = PathBuf::from(cfg.cache_db_path.as_str());
671        // We only need to check the parent folder path permissions as the db itself may not exist yet.
672        if let Some(db_parent_path) = cache_db_path.parent() {
673            if !db_parent_path.exists() {
674                error!(
675                    "Refusing to run, DB folder {} does not exist",
676                    db_parent_path
677                        .to_str()
678                        .unwrap_or("<db_parent_path invalid>")
679                );
680                let diag = diagnose_path(cache_db_path.as_ref());
681                info!(%diag);
682                return ExitCode::FAILURE;
683            }
684
685            let db_par_path_buf = db_parent_path.to_path_buf();
686
687            let i_meta = match metadata(&db_par_path_buf) {
688                Ok(v) => v,
689                Err(e) => {
690                    error!(
691                        "Unable to read metadata for {} - {:?}",
692                        db_par_path_buf
693                            .to_str()
694                            .unwrap_or("<db_par_path_buf invalid>"),
695                        e
696                    );
697                    return ExitCode::FAILURE;
698                }
699            };
700
701            if !i_meta.is_dir() {
702                error!(
703                    "Refusing to run - DB folder {} may not be a directory",
704                    db_par_path_buf
705                        .to_str()
706                        .unwrap_or("<db_par_path_buf invalid>")
707                );
708                return ExitCode::FAILURE;
709            }
710            if kanidm_lib_file_permissions::readonly(&i_meta) {
711                warn!("WARNING: DB folder permissions on {} indicate it may not be RW. This could cause the server start up to fail!", db_par_path_buf.to_str()
712                        .unwrap_or("<db_par_path_buf invalid>")
713                        );
714            }
715
716            if i_meta.mode() & 0o007 != 0 {
717                warn!("WARNING: DB folder {} has 'everyone' permission bits in the mode. This could be a security risk ...", db_par_path_buf.to_str()
718                        .unwrap_or("<db_par_path_buf invalid>")
719                        );
720            }
721        }
722
723        // check to see if the db's already there
724        if cache_db_path.exists() {
725            if !cache_db_path.is_file() {
726                error!(
727                    "Refusing to run - DB path {} already exists and is not a file.",
728                    cache_db_path.to_str().unwrap_or("<cache_db_path invalid>")
729                );
730                let diag = diagnose_path(cache_db_path.as_ref());
731                info!(%diag);
732                return ExitCode::FAILURE;
733            };
734
735            match metadata(&cache_db_path) {
736                Ok(v) => v,
737                Err(e) => {
738                    error!(
739                        "Unable to read metadata for {} - {:?}",
740                        cache_db_path.to_str().unwrap_or("<cache_db_path invalid>"),
741                        e
742                    );
743                    let diag = diagnose_path(cache_db_path.as_ref());
744                    info!(%diag);
745                    return ExitCode::FAILURE;
746                }
747            };
748            // TODO: permissions dance to enumerate the user's ability to write to the file? ref #456 - r2d2 will happily keep trying to do things without bailing.
749        };
750    }
751
752    let db = match Db::new(cfg.cache_db_path.as_str()) {
753        Ok(db) => db,
754        Err(_e) => {
755            error!("Failed to create database");
756            return ExitCode::FAILURE;
757        }
758    };
759
760    // perform any db migrations.
761    let mut dbtxn = db.write().await;
762    if dbtxn.migrate().and_then(|_| dbtxn.commit()).is_err() {
763        error!("Failed to migrate database");
764        return ExitCode::FAILURE;
765    }
766
767    // Check for and create the hsm pin if required.
768    if let Err(err) = write_hsm_pin(cfg.hsm_pin_path.as_str()).await {
769        let diag = diagnose_path(cfg.hsm_pin_path.as_ref());
770        info!(%diag);
771        error!(
772            ?err,
773            "Failed to create HSM PIN into {}",
774            cfg.hsm_pin_path.as_str()
775        );
776        return ExitCode::FAILURE;
777    };
778
779    // read the hsm pin
780    let hsm_pin = match read_hsm_pin(cfg.hsm_pin_path.as_str()).await {
781        Ok(hp) => hp,
782        Err(err) => {
783            let diag = diagnose_path(cfg.hsm_pin_path.as_ref());
784            info!(%diag);
785            error!(
786                ?err,
787                "Failed to read HSM PIN from {}",
788                cfg.hsm_pin_path.as_str()
789            );
790            return ExitCode::FAILURE;
791        }
792    };
793
794    let auth_value = match AuthValue::try_from(hsm_pin.as_slice()) {
795        Ok(av) => av,
796        Err(err) => {
797            error!(?err, "invalid hsm pin");
798            return ExitCode::FAILURE;
799        }
800    };
801
802    let mut hsm: BoxedDynTpm = match cfg.hsm_type {
803        HsmType::Soft => BoxedDynTpm::new(SoftTpm::default()),
804        HsmType::TpmIfPossible => open_tpm_if_possible(&cfg.tpm_tcti_name),
805        HsmType::Tpm => match open_tpm(&cfg.tpm_tcti_name) {
806            Some(hsm) => hsm,
807            None => return ExitCode::FAILURE,
808        },
809    };
810
811    // With the assistance of the DB, setup the HSM and its machine key.
812    let mut db_txn = db.write().await;
813
814    let loadable_machine_key = match db_txn.get_hsm_root_storage_key() {
815        Ok(Some(lmk)) => lmk,
816        Ok(None) => {
817            // No machine key found - create one, and store it.
818            let loadable_machine_key = match hsm.root_storage_key_create(&auth_value) {
819                Ok(lmk) => lmk,
820                Err(err) => {
821                    error!(?err, "Unable to create hsm loadable machine key");
822                    return ExitCode::FAILURE;
823                }
824            };
825
826            if let Err(err) = db_txn.insert_hsm_root_storage_key(&loadable_machine_key) {
827                error!(?err, "Unable to persist hsm loadable machine key");
828                return ExitCode::FAILURE;
829            }
830
831            loadable_machine_key
832        }
833        Err(err) => {
834            error!(?err, "Unable to access hsm loadable machine key");
835            return ExitCode::FAILURE;
836        }
837    };
838
839    let machine_key = match hsm.root_storage_key_load(&auth_value, &loadable_machine_key) {
840        Ok(mk) => mk,
841        Err(err) => {
842            error!(
843                ?err,
844                "Unable to load machine root key - This can occur if you have changed your HSM pin"
845            );
846            error!(
847                "To proceed you must remove the content of the cache db ({}) to reset all keys",
848                cfg.cache_db_path.as_str()
849            );
850            return ExitCode::FAILURE;
851        }
852    };
853
854    let Ok(system_provider) = SystemProvider::new() else {
855        error!("Failed to configure System Provider");
856        return ExitCode::FAILURE;
857    };
858
859    info!("Started system provider");
860
861    let mut clients: Vec<Arc<dyn IdProvider + Send + Sync>> = Vec::with_capacity(1);
862
863    // Setup Kanidm provider if the configuration requests it.
864    if let Some((cb, kconfig)) = client_builder {
865        let cb = cb.connect_timeout(kconfig.conn_timeout);
866        let cb = cb.request_timeout(kconfig.request_timeout);
867
868        let rsclient = match cb.build() {
869            Ok(rsc) => rsc,
870            Err(_e) => {
871                error!("Failed to build async client");
872                return ExitCode::FAILURE;
873            }
874        };
875
876        let Ok(idprovider) = KanidmProvider::new(
877            rsclient,
878            kconfig,
879            SystemTime::now(),
880            &mut (&mut db_txn).into(),
881            &mut hsm,
882            &machine_key,
883        )
884        .await
885        else {
886            error!("Failed to configure Kanidm Provider");
887            return ExitCode::FAILURE;
888        };
889
890        // Now stacked for the resolver.
891        clients.push(Arc::new(idprovider));
892        info!("Started kanidm provider");
893    }
894
895    drop(machine_key);
896
897    if let Err(err) = db_txn.commit() {
898        error!(
899            ?err,
900            "Failed to commit database transaction, unable to proceed"
901        );
902        return ExitCode::FAILURE;
903    }
904
905    if !cfg.default_shell.is_empty() {
906        let shell_path = PathBuf::from_str(&cfg.default_shell)
907            .expect("Failed to build a representation of your default_shell path!");
908        if !shell_path.exists() {
909            error!(
910                "Cannot find configured default shell at {}, this could cause login issues!",
911                shell_path.display()
912            )
913        }
914    }
915
916    // Okay, the hsm is now loaded and ready to go.
917    let (cl_inner, mut async_refresh_rx) = match Resolver::new(
918        db,
919        Arc::new(system_provider),
920        clients,
921        hsm,
922        cfg.cache_timeout,
923        cfg.default_shell.clone(),
924        cfg.home_prefix.clone(),
925        cfg.home_attr,
926        cfg.home_alias,
927        cfg.uid_attr_map,
928        cfg.gid_attr_map,
929    )
930    .await
931    {
932        Ok(c) => c,
933        Err(_e) => {
934            error!("Failed to build cache layer.");
935            return ExitCode::FAILURE;
936        }
937    };
938
939    let cachelayer = Arc::new(cl_inner);
940
941    // Setup the root-only tasks socket. Take away all other access bits.
942    let before = unsafe { umask(0o0077) };
943    let task_listener = match UnixListener::bind(cfg.task_sock_path.as_str()) {
944        Ok(l) => l,
945        Err(_e) => {
946            let diag = diagnose_path(cfg.task_sock_path.as_ref());
947            info!(%diag);
948            error!("Failed to bind UNIX socket {}", cfg.task_sock_path.as_str());
949            return ExitCode::FAILURE;
950        }
951    };
952    // Undo umask changes.
953    let _ = unsafe { umask(before) };
954
955    // The tasks ... well task. Tasks-task. Anyway, the tasks-task is bidirectional
956    // in its communication to the tasks-daemon. We submit tasks to the tasks-daemon
957    // via this channel here -\
958    //                        |
959    //                        v
960    let (task_channel_tx, mut task_channel_rx) = channel(16);
961    let task_channel_tx = Arc::new(task_channel_tx);
962    let task_channel_tx_cln = task_channel_tx.clone();
963    // Each submitted task contains a oneshot channel allowing the tasks-task to
964    // notify the submitter of the task that the task is completed.
965
966    // This channel is for the second case - the tasks-daemon can send us
967    // unsolicited dm's about system state, and when these occure we need to
968    // response to these notifications. Because each potential dm that the
969    // daemon can send us has a specific intent, we need a channel for each
970    // type of notification that we could get. This channel is for when
971    // the tasks daemon has a reloaded shadow database for us to process
972    // and cache:
973    let (notify_shadow_channel_tx, mut notify_shadow_channel_rx) = channel(16);
974    let notify_shadow_channel_tx = Arc::new(notify_shadow_channel_tx);
975
976    // Broadcast receivers so that the tasks-task can be shut down when we get
977    // signals etc.
978    let (broadcast_tx, mut broadcast_rx) = broadcast::channel(4);
979    let mut c_broadcast_rx = broadcast_tx.subscribe();
980    let mut d_broadcast_rx = broadcast_tx.subscribe();
981
982    let task_b = tokio::spawn(async move {
983        loop {
984            tokio::select! {
985                // Wait on the broadcast to see if we need to close down.
986                _ = c_broadcast_rx.recv() => {
987                    break;
988                }
989                accept_res = task_listener.accept() => {
990                    match accept_res {
991                        Ok((socket, _addr)) => {
992                            // Did it come from root? If not, they don't have the needed
993                            // permissions to actually be a task handler, and we wouldn't
994                            // want to leak to anyone else anyway.
995                            if let Ok(ucred) = socket.peer_cred() {
996                                if ucred.uid() != 0 {
997                                    // move along.
998                                    warn!("Task handler not running as root, ignoring ...");
999                                    continue;
1000                                }
1001                            } else {
1002                                // move along.
1003                                warn!("Unable to determine socked peer cred, ignoring ...");
1004                                continue;
1005                            };
1006                            debug!("A task handler has connected.");
1007                            // It did? Great, now we can wait and spin on that one
1008                            // client.
1009
1010                            // We have to check for signals here else this tasks waits forever.
1011                            if let Err(err) = handle_task_client(socket, &notify_shadow_channel_tx, &mut task_channel_rx, &mut d_broadcast_rx).await {
1012                                error!(?err, "Task client error occurred");
1013                            }
1014                            // If they disconnect we go back to accept.
1015                        }
1016                        Err(err) => {
1017                            error!("Task Accept error -> {:?}", err);
1018                        }
1019                    }
1020                }
1021            }
1022            // done
1023        }
1024        info!("Stopped task connector");
1025    });
1026
1027    // ====== Listen for shadow change notification from tasks ======
1028
1029    let shadow_notify_cachelayer = cachelayer.clone();
1030    let mut c_broadcast_rx = broadcast_tx.subscribe();
1031
1032    let task_c = tokio::spawn(async move {
1033        debug!("Spawned shadow reload task handler");
1034        loop {
1035            tokio::select! {
1036                _ = c_broadcast_rx.recv() => {
1037                    break;
1038                }
1039                Some(EtcDb {
1040                    users, shadow, groups
1041                }) = notify_shadow_channel_rx.recv() => {
1042                    shadow_notify_cachelayer
1043                        .reload_system_identities(users, shadow, groups)
1044                        .await;
1045                }
1046            }
1047        }
1048        info!("Stopped shadow reload task handler");
1049    });
1050
1051    // Setup the task that handles async pre-fetching here.
1052    let prefetch_cachelayer = cachelayer.clone();
1053    let _task_prefetch = tokio::spawn(async move {
1054        let mut refresh_cache = LruCache::new(REFRESH_DEBOUNCE_SIZE);
1055
1056        while let Some(refresh_account_id) = async_refresh_rx.recv().await {
1057            let current_time = SystemTime::now();
1058
1059            // Have we already checked this item in the last few seconds?
1060            match refresh_cache.get(&refresh_account_id).copied() {
1061                Some(not_before) if current_time < not_before => {
1062                    debug!(?refresh_account_id, "debounce triggered");
1063                    continue;
1064                }
1065                _ => {}
1066            };
1067
1068            // Mark that we are about to refresh this, and that we shouldn't attempt again for a few seconds.
1069            refresh_cache.put(
1070                refresh_account_id.clone(),
1071                current_time + REFRESH_DEBOUNCE_WINDOW,
1072            );
1073
1074            // we don't mind if there was an error, it's already logged, and on success
1075            // we don't need the info anyway.
1076            if prefetch_cachelayer
1077                .refresh_usertoken(&refresh_account_id, current_time)
1078                .await
1079                .is_ok()
1080            {
1081                debug!(?refresh_account_id, "successful refresh of account");
1082            } else {
1083                warn!(?refresh_account_id, "failed to refresh account");
1084            }
1085        }
1086    });
1087
1088    // Set the umask while we open the path for most clients.
1089    let before = unsafe { umask(0) };
1090    let listener = match UnixListener::bind(cfg.sock_path.as_str()) {
1091        Ok(l) => l,
1092        Err(_e) => {
1093            error!("Failed to bind UNIX socket at {}", cfg.sock_path.as_str());
1094            return ExitCode::FAILURE;
1095        }
1096    };
1097    // Undo umask changes.
1098    let _ = unsafe { umask(before) };
1099
1100    let task_a = tokio::spawn(async move {
1101        loop {
1102            let tc_tx = task_channel_tx_cln.clone();
1103
1104            tokio::select! {
1105                _ = broadcast_rx.recv() => {
1106                    break;
1107                }
1108                accept_res = listener.accept() => {
1109                    match accept_res {
1110                        Ok((socket, _addr)) => {
1111                            let cachelayer_ref = cachelayer.clone();
1112                            tokio::spawn(async move {
1113                                handle_client(socket, cachelayer_ref.clone(), &tc_tx).await;
1114                            });
1115                        }
1116                        Err(err) => {
1117                            error!(?err, "Error while accepting connection");
1118                        }
1119                    }
1120                }
1121            }
1122        }
1123        info!("Stopped resolver");
1124    });
1125
1126    info!("Server started ...");
1127
1128    // On linux, notify systemd.
1129    #[cfg(target_os = "linux")]
1130    unsafe {
1131        let _ = sd_notify::notify_and_unset_env(&[sd_notify::NotifyState::Ready]);
1132    }
1133
1134    loop {
1135        tokio::select! {
1136            Ok(()) = tokio::signal::ctrl_c() => {
1137                break
1138            }
1139            Some(()) = async move {
1140                let sigterm = tokio::signal::unix::SignalKind::terminate();
1141                #[allow(clippy::unwrap_used)]
1142                tokio::signal::unix::signal(sigterm).unwrap().recv().await
1143            } => {
1144                break
1145            }
1146            Some(()) = async move {
1147                let sigterm = tokio::signal::unix::SignalKind::alarm();
1148                #[allow(clippy::unwrap_used)]
1149                tokio::signal::unix::signal(sigterm).unwrap().recv().await
1150            } => {
1151                // Ignore
1152            }
1153            Some(()) = async move {
1154                let sigterm = tokio::signal::unix::SignalKind::hangup();
1155                #[allow(clippy::unwrap_used)]
1156                tokio::signal::unix::signal(sigterm).unwrap().recv().await
1157            } => {
1158                // Ignore
1159            }
1160            Some(()) = async move {
1161                let sigterm = tokio::signal::unix::SignalKind::user_defined1();
1162                #[allow(clippy::unwrap_used)]
1163                tokio::signal::unix::signal(sigterm).unwrap().recv().await
1164            } => {
1165                // Ignore
1166            }
1167            Some(()) = async move {
1168                let sigterm = tokio::signal::unix::SignalKind::user_defined2();
1169                #[allow(clippy::unwrap_used)]
1170                tokio::signal::unix::signal(sigterm).unwrap().recv().await
1171            } => {
1172                // Ignore
1173            }
1174        }
1175    }
1176    info!("Signal received, sending down signal to tasks");
1177    // Send a broadcast that we are done.
1178    if let Err(e) = broadcast_tx.send(true) {
1179        error!("Unable to shutdown workers {:?}", e);
1180    }
1181
1182    let _ = task_a.await;
1183    let _ = task_b.await;
1184    let _ = task_c.await;
1185
1186    ExitCode::SUCCESS
1187}
1188
1189// #[tokio::main(flavor = "current_thread")]
1190#[tokio::main(flavor = "multi_thread", worker_threads = 2)]
1191async fn main() -> ExitCode {
1192    // On linux when debug assertions are disabled, prevent ptrace
1193    // from attaching to us.
1194    #[cfg(all(target_os = "linux", not(debug_assertions)))]
1195    if let Err(code) = prctl::set_dumpable(false) {
1196        error!(?code, "CRITICAL: Unable to set prctl flags");
1197        return ExitCode::FAILURE;
1198    }
1199
1200    // We need enough backtrace depth to find leak sources if they exist.
1201    #[cfg(feature = "dhat-heap")]
1202    let _profiler = dhat::Profiler::builder()
1203        .file_name(format!(
1204            "/var/cache/kanidm-unixd/heap-{}.json",
1205            std::process::id()
1206        ))
1207        .trim_backtraces(Some(40))
1208        .build();
1209
1210    let clap_args = Command::new("kanidm_unixd")
1211        .version(env!("CARGO_PKG_VERSION"))
1212        .about("Kanidm Unix daemon")
1213        .arg(
1214            Arg::new("skip-root-check")
1215                .help("Allow running as root. Don't use this in production as it is risky!")
1216                .short('r')
1217                .long("skip-root-check")
1218                .env("KANIDM_SKIP_ROOT_CHECK")
1219                .action(ArgAction::SetTrue),
1220        )
1221        .arg(
1222            Arg::new("debug")
1223                .help("Show extra debug information")
1224                .short('d')
1225                .long("debug")
1226                .env("KANIDM_DEBUG")
1227                .action(ArgAction::SetTrue),
1228        )
1229        .arg(
1230            Arg::new("configtest")
1231                .help("Display the configuration and exit")
1232                .short('t')
1233                .long("configtest")
1234                .action(ArgAction::SetTrue),
1235        )
1236        .arg(
1237            Arg::new("unixd-config")
1238                .help("Set the unixd config file path")
1239                .short('u')
1240                .long("unixd-config")
1241                .default_value(DEFAULT_CONFIG_PATH)
1242                .env("KANIDM_UNIX_CONFIG")
1243                .action(ArgAction::Set),
1244        )
1245        .arg(
1246            Arg::new("client-config")
1247                .help("Set the client config file path")
1248                .short('c')
1249                .long("client-config")
1250                .default_value(DEFAULT_CLIENT_CONFIG_PATH)
1251                .env("KANIDM_CLIENT_CONFIG")
1252                .action(ArgAction::Set),
1253        )
1254        .get_matches();
1255
1256    if clap_args.get_flag("debug") {
1257        std::env::set_var("RUST_LOG", "debug");
1258    }
1259
1260    #[allow(clippy::expect_used)]
1261    tracing_forest::worker_task()
1262        .set_global(true)
1263        // Fall back to stderr
1264        .map_sender(|sender| sender.or_stderr())
1265        .build_on(|subscriber| {
1266            subscriber.with(
1267                EnvFilter::try_from_default_env()
1268                    .or_else(|_| EnvFilter::try_new("info"))
1269                    .expect("Failed to init envfilter"),
1270            )
1271        })
1272        .on(main_inner(clap_args))
1273        .await
1274}