kanidmd_lib/valueset/
utf8.rs

1use crate::prelude::*;
2use crate::schema::SchemaAttribute;
3use crate::utils::trigraph_iter;
4use crate::valueset::{
5    DbValueSetV2, ScimResolveStatus, ValueSet, ValueSetResolveStatus, ValueSetScimPut,
6};
7use kanidm_proto::scim_v1::client::ScimStrings;
8use kanidm_proto::scim_v1::JsonValue;
9use std::collections::BTreeSet;
10
11#[derive(Debug, Clone)]
12pub struct ValueSetUtf8 {
13    set: BTreeSet<String>,
14}
15
16impl ValueSetUtf8 {
17    pub fn new(s: String) -> Box<Self> {
18        let mut set = BTreeSet::new();
19        set.insert(s);
20        Box::new(ValueSetUtf8 { set })
21    }
22
23    pub fn push(&mut self, s: String) -> bool {
24        self.set.insert(s)
25    }
26
27    pub fn from_dbvs2(data: Vec<String>) -> Result<ValueSet, OperationError> {
28        // #4200 - It was accidentally possible to have an empty string in the UTF8 field. When
29        // we rehydrate these from the DB we need to ensure they will pass the !empty check in
30        // validate. This converts empty -> "Not Present" which technically is a valid string.
31        let set = data
32            .into_iter()
33            .map(|s| {
34                if s.is_empty() {
35                    "Not Present".to_string()
36                } else {
37                    s
38                }
39            })
40            .collect();
41        Ok(Box::new(ValueSetUtf8 { set }))
42    }
43}
44
45impl ValueSetScimPut for ValueSetUtf8 {
46    fn from_scim_json_put(value: JsonValue) -> Result<ValueSetResolveStatus, OperationError> {
47        let ScimStrings(values) = serde_json::from_value(value).map_err(|err| {
48            error!(?err, "SCIM Utf8 Syntax Invalid");
49            OperationError::SC0026Utf8SyntaxInvalid
50        })?;
51
52        let set = values.into_iter().collect();
53
54        Ok(ValueSetResolveStatus::Resolved(Box::new(ValueSetUtf8 {
55            set,
56        })))
57    }
58}
59
60impl ValueSetT for ValueSetUtf8 {
61    fn insert_checked(&mut self, value: Value) -> Result<bool, OperationError> {
62        match value {
63            Value::Utf8(s) => Ok(self.set.insert(s)),
64            _ => Err(OperationError::InvalidValueState),
65        }
66    }
67
68    fn clear(&mut self) {
69        self.set.clear();
70    }
71
72    fn remove(&mut self, pv: &PartialValue, _cid: &Cid) -> bool {
73        match pv {
74            PartialValue::Utf8(s) => self.set.remove(s),
75            _ => {
76                debug_assert!(false);
77                true
78            }
79        }
80    }
81
82    fn contains(&self, pv: &PartialValue) -> bool {
83        match pv {
84            PartialValue::Utf8(s) => self.set.contains(s.as_str()),
85            _ => false,
86        }
87    }
88
89    fn substring(&self, pv: &PartialValue) -> bool {
90        match pv {
91            PartialValue::Utf8(s2) => {
92                // We lowercase as LDAP and similar expect case insensitive searches here.
93                let s2_lower = s2.to_lowercase();
94                self.set
95                    .iter()
96                    .any(|s1| s1.to_lowercase().contains(&s2_lower))
97            }
98            _ => {
99                debug_assert!(false);
100                false
101            }
102        }
103    }
104
105    fn startswith(&self, pv: &PartialValue) -> bool {
106        match pv {
107            PartialValue::Utf8(s2) => {
108                // We lowercase as LDAP and similar expect case insensitive searches here.
109                let s2_lower = s2.to_lowercase();
110                self.set
111                    .iter()
112                    .any(|s1| s1.to_lowercase().starts_with(&s2_lower))
113            }
114            _ => {
115                debug_assert!(false);
116                false
117            }
118        }
119    }
120
121    fn endswith(&self, pv: &PartialValue) -> bool {
122        match pv {
123            PartialValue::Utf8(s2) => {
124                // We lowercase as LDAP and similar expect case insensitive searches here.
125                let s2_lower = s2.to_lowercase();
126                self.set
127                    .iter()
128                    .any(|s1| s1.to_lowercase().ends_with(&s2_lower))
129            }
130            _ => {
131                debug_assert!(false);
132                false
133            }
134        }
135    }
136
137    fn lessthan(&self, _pv: &PartialValue) -> bool {
138        false
139    }
140
141    fn len(&self) -> usize {
142        self.set.len()
143    }
144
145    fn generate_idx_eq_keys(&self) -> Vec<String> {
146        self.set.iter().cloned().collect()
147    }
148
149    fn generate_idx_sub_keys(&self) -> Vec<String> {
150        let lower: Vec<_> = self.set.iter().map(|s| s.to_lowercase()).collect();
151        let mut trigraphs: Vec<_> = lower.iter().flat_map(|v| trigraph_iter(v)).collect();
152
153        trigraphs.sort_unstable();
154        trigraphs.dedup();
155
156        trigraphs.into_iter().map(String::from).collect()
157    }
158
159    fn syntax(&self) -> SyntaxType {
160        SyntaxType::Utf8String
161    }
162
163    fn validate(&self, _schema_attr: &SchemaAttribute) -> bool {
164        self.set.iter().all(|s| {
165            !s.is_empty() && Value::validate_str_escapes(s) && Value::validate_singleline(s)
166        })
167    }
168
169    fn to_proto_string_clone_iter(&self) -> Box<dyn Iterator<Item = String> + '_> {
170        Box::new(self.set.iter().cloned())
171    }
172
173    fn to_scim_value(&self) -> Option<ScimResolveStatus> {
174        let mut iter = self.set.iter().cloned();
175        if self.len() == 1 {
176            let v = iter.next().unwrap_or_default();
177            Some(v.into())
178        } else {
179            let arr = iter.collect::<Vec<_>>();
180            Some(arr.into())
181        }
182    }
183
184    fn to_db_valueset_v2(&self) -> DbValueSetV2 {
185        DbValueSetV2::Utf8(self.set.iter().cloned().collect())
186    }
187
188    fn to_partialvalue_iter(&self) -> Box<dyn Iterator<Item = PartialValue> + '_> {
189        Box::new(self.set.iter().map(|i| PartialValue::new_utf8s(i.as_str())))
190    }
191
192    fn to_value_iter(&self) -> Box<dyn Iterator<Item = Value> + '_> {
193        Box::new(self.set.iter().map(|i| Value::new_utf8s(i.as_str())))
194    }
195
196    fn equal(&self, other: &ValueSet) -> bool {
197        if let Some(other) = other.as_utf8_set() {
198            &self.set == other
199        } else {
200            debug_assert!(false);
201            false
202        }
203    }
204
205    fn merge(&mut self, other: &ValueSet) -> Result<(), OperationError> {
206        if let Some(b) = other.as_utf8_set() {
207            mergesets!(self.set, b)
208        } else {
209            debug_assert!(false);
210            Err(OperationError::InvalidValueState)
211        }
212    }
213
214    fn to_utf8_single(&self) -> Option<&str> {
215        if self.set.len() == 1 {
216            self.set.iter().take(1).next().map(|s| s.as_str())
217        } else {
218            None
219        }
220    }
221
222    fn as_utf8_set(&self) -> Option<&BTreeSet<String>> {
223        Some(&self.set)
224    }
225
226    fn as_utf8_iter(&self) -> Option<Box<dyn Iterator<Item = &str> + '_>> {
227        Some(Box::new(self.set.iter().map(|s| s.as_str())))
228    }
229}
230
231#[cfg(test)]
232mod tests {
233    use super::ValueSetUtf8;
234    use crate::migration_data::latest::schema::SCHEMA_ATTR_DISPLAYNAME_DL7;
235    use crate::prelude::{PartialValue, ValueSet, ValueSetT};
236
237    #[test]
238    fn test_utf8_substring_insensitive() {
239        let vs = ValueSetUtf8::new("Test User".to_string());
240
241        let pv_xx = PartialValue::Utf8("xx".to_string());
242        let pv_test = PartialValue::Utf8("test".to_string());
243        let pv_user = PartialValue::Utf8("usEr".to_string());
244
245        assert!(!vs.substring(&pv_xx));
246        assert!(vs.substring(&pv_test));
247        assert!(vs.substring(&pv_user));
248
249        assert!(!vs.startswith(&pv_xx));
250        assert!(vs.startswith(&pv_test));
251        assert!(!vs.startswith(&pv_user));
252
253        assert!(!vs.endswith(&pv_xx));
254        assert!(!vs.endswith(&pv_test));
255        assert!(vs.endswith(&pv_user));
256    }
257
258    #[test]
259    fn test_utf8_validation() {
260        // # 4200 - prevent empty strings from being set.
261        let vs = ValueSetUtf8::new("".into());
262        assert!(!vs.validate(&SCHEMA_ATTR_DISPLAYNAME_DL7));
263
264        let vs = ValueSetUtf8::new("Tobias Oxford".into());
265        assert!(vs.validate(&SCHEMA_ATTR_DISPLAYNAME_DL7));
266    }
267
268    #[test]
269    fn test_scim_utf8() {
270        let vs: ValueSet = ValueSetUtf8::new("Test".to_string());
271        // Test that the output json matches some known str
272        crate::valueset::scim_json_reflexive(&vs, r#""Test""#);
273
274        // Test that we can parse json values into a valueset.
275        crate::valueset::scim_json_put_reflexive::<ValueSetUtf8>(&vs, &[])
276    }
277}