standard_lib/fs/path/
path.rs

1use std::borrow::{Borrow, Cow};
2use std::cmp::Ordering;
3use std::ffi::{CString, OsStr, OsString};
4use std::fmt::{self, Debug, Formatter};
5use std::marker::PhantomData;
6use std::num::NonZero;
7use std::ops::Deref;
8use std::os::unix::ffi::OsStrExt;
9
10use super::{DisplayPath, IntoComponents, Rel};
11use crate::fs::path::{Ancestors, Components, validity};
12use crate::util::error::CapacityOverflow;
13use crate::util::result::ResultExtension;
14use crate::util::{self, sealed::Sealed};
15
16pub trait PathState: Sealed + Debug {}
17
18pub struct OwnedPath<State: PathState> {
19    pub(crate) _state: PhantomData<fn() -> State>,
20    pub(crate) bytes: Vec<u8>,
21}
22
23#[repr(transparent)]
24pub struct Path<State: PathState> {
25    pub(crate) _state: PhantomData<fn() -> State>,
26    pub(crate) bytes: [u8],
27}
28
29impl<T: AsRef<OsStr>, S: PathState> From<T> for OwnedPath<S> {
30    fn from(value: T) -> Self {
31        Self {
32            _state: PhantomData,
33            bytes: validity::sanitize(value.as_ref()),
34        }
35    }
36}
37
38impl<S: PathState> OwnedPath<S> {
39    pub unsafe fn from_unchecked<O: Into<OsString>>(inner: O) -> Self {
40        Self {
41            _state: PhantomData,
42            bytes: inner.into().into_encoded_bytes(),
43        }
44    }
45
46    pub unsafe fn from_unchecked_bytes(inner: Vec<u8>) -> Self {
47        Self {
48            _state: PhantomData,
49            bytes: inner,
50        }
51    }
52
53    pub fn as_path(&self) -> &Path<S> {
54        self
55    }
56
57    pub fn into_bytes(self) -> Vec<u8> {
58        self.bytes
59    }
60
61    pub fn push<P: Into<OwnedPath<Rel>>>(&mut self, other: P) {
62        let other_path = other.into();
63
64        match self.len().get().checked_add(other_path.len().get()) {
65            // TODO: Standardize crate panic method.
66            Some(l) if l > isize::MAX as usize => Err(CapacityOverflow).throw(),
67            None                               => Err(CapacityOverflow).throw(),
68            Some(_)                            => (),
69        }
70
71        // We've already determined that this won't surpass size::MAX.
72        self.bytes.reserve(other_path.len().get());
73
74        // Path is designed in such a way that two valid Paths can't be concatenated to create an
75        // invalid Path.
76        self.bytes.extend(other_path.bytes);
77    }
78
79    pub fn pop(&mut self) -> Option<OwnedPath<Rel>> {
80        if self.bytes.len() == 1 {
81            // If a Path has length 1, it only contains b'/', and nothing can be popped from it.
82            return None;
83        }
84
85        // A Path has at least one character, so subtracting 1 from the length can't be less than 0.
86        let mut index = self.bytes.len() - 1;
87
88        while let Some(ch) = self.bytes.get(index) && *ch != b'/' {
89            // A Path has to start with a b'/', so entering this loop already confirms that there is
90            // another character preceding this one.
91            index -= 1;
92        }
93
94        // The index is guaranteed to be less than the length of bytes, so this can't panic.
95        let split = self.bytes.split_off(index);
96
97        if self.bytes.is_empty() {
98            // We've literally just checked that bytes is empty, to a single push can't panic.
99            self.bytes.push(b'/');
100        }
101
102        Some(OwnedPath::<Rel> {
103            _state: PhantomData,
104            bytes: split,
105        })
106    }
107
108
109
110    /// Creates an owned [`Iterator`] over the components of an `OwnedPath`. This iterator produces
111    /// `OwnedPath<Rel>` representing each `/`-separated string in the Path, from left to right.
112    pub fn into_components(self) -> IntoComponents<S> {
113        IntoComponents {
114            _state: PhantomData,
115            path: self.into_bytes(),
116        }
117    }
118}
119
120impl<S: PathState> Path<S> {
121    pub fn new<'a, O: AsRef<OsStr> + ?Sized>(value: &'a O) -> Cow<'a, Path<S>> {
122        match validity::validate(value.as_ref()) {
123            Some(_) => Cow::Borrowed(unsafe { Path::from_unchecked(value) }),
124            None    => Cow::Owned(OwnedPath::from(value)),
125        }
126    }
127
128    pub fn from_checked<O: AsRef<OsStr> + ?Sized>(value: &O) -> Option<&Path<S>> {
129        validity::validate(value.as_ref())?;
130        Some(unsafe { Path::from_unchecked(value) })
131    }
132
133    pub unsafe fn from_unchecked<O: AsRef<OsStr> + ?Sized>(value: &O) -> &Self {
134        // SAFETY: Path<S> is `repr(transparent)`, so to it has the same layout as OsStr.
135        unsafe { &*(value.as_ref() as *const OsStr as *const Self) }
136    }
137
138    pub unsafe fn from_unchecked_mut<O: AsMut<OsStr> + ?Sized>(value: &mut O) -> &mut Self {
139        // SAFETY: Path<S> is `repr(transparent)`, so to it has the same layout as OsStr.
140        unsafe { &mut *(value.as_mut() as *mut OsStr as *mut Self) }
141    }
142
143    pub const unsafe fn from_unchecked_bytes(value: &[u8]) -> &Self {
144        // SAFETY: Path<S> is `repr(transparent)`, so to it has the same layout as &[u8].
145        unsafe { &*(value as *const [u8] as *const Self) }
146    }
147
148    pub const fn display<'a>(&'a self) -> DisplayPath<'a, S> {
149        DisplayPath::<S> {
150            _phantom: PhantomData,
151            inner: self,
152        }
153    }
154
155    pub const fn len(&self) -> NonZero<usize> {
156        unsafe { NonZero::new(self.bytes.len()).unwrap_unchecked() }
157    }
158
159    pub fn as_os_str(&self) -> &OsStr {
160        OsStr::from_bytes(&self.bytes)
161    }
162
163    pub fn as_os_str_no_lead(&self) -> &OsStr {
164        OsStr::from_bytes(&self.as_bytes()[1..])
165    }
166
167    pub const fn as_bytes(&self) -> &[u8] {
168        &self.bytes
169    }
170
171    // TODO: no_lead methods
172
173    /// Returns the basename of this path (the OsStr following the last `/` in the path). This OsStr
174    /// won't contain any instances of `/`.
175    /// 
176    /// See [`parent()`](Path::parent) for more info.
177    pub fn basename(&self) -> &OsStr {
178        let bytes = self.as_bytes();
179
180        let mut index = bytes.len() - 1;
181
182        while let Some(ch) = bytes.get(index) && *ch != b'/' {
183            index -= 1;
184        }
185
186        OsStr::from_bytes(&bytes[(index + 1)..])
187    }
188
189    /// Returns the parent directory of this path (lexically speaking). The result is a Path with
190    /// basename and the preceding slash removed, such that the following holds for any `path`.
191    /// 
192    /// ```
193    /// # use standard_lib::fs::path::Path;
194    /// let owned = OwnedPath::<Abs>::from("/my/path");
195    /// let path: &Path<Abs> = &owned;
196    /// let new_path = path.parent().join(Path::new(path.basename()));
197    /// assert_eq!(path, new_path);
198    /// ```
199    /// 
200    /// Because this method is the counterpart of [`basename`](Path::basename) and `basename` won't
201    /// contain any `/`, the behavior when calling these methods on `"/"` is as follows:
202    /// 
203    /// ```
204    /// # use standard_lib::fs::path::Path;
205    /// assert_eq!(Path::root().basename(), "");
206    /// assert_eq!(Path::root().parent(), Path::root());
207    /// ```
208    ///
209    /// This behavior is also consistent with Unix defaults: the `..` entry in the root directory
210    /// refers to the root itself.
211    pub fn parent(&self) -> &Self {
212        let bytes = self.as_bytes();
213
214        let mut index = bytes.len() - 1;
215
216        while let Some(ch) = bytes.get(index) && *ch != b'/' {
217            index -= 1;
218        }
219        
220        // If we would return an empty string, instead include the first slash representing the
221        // absolute or relative root.
222        if index == 0 {
223            index = 1;
224        }
225
226        unsafe { Path::from_unchecked_bytes(&bytes[..index]) }
227    }
228
229    pub fn join<P: AsRef<Path<Rel>>>(&self, other: P) -> OwnedPath<S> {
230        let mut bytes = Vec::with_capacity(self.bytes.len() + other.as_ref().bytes.len());
231        bytes.extend(&self.bytes);
232        bytes.extend(&other.as_ref().bytes);
233        unsafe {
234            OwnedPath::<S>::from_unchecked_bytes(bytes)
235        }
236    }
237
238    pub fn relative_to(&self, other: &Self) -> Option<&Path<Rel>> {
239        // As a general note for path interpretation: paths on Linux have no encoding, with the only
240        // constant being that they are delimited by b'/'. Because of this, we don't have to
241        // consider encoding, and splitting by b"/" is always entirely valid because thats what
242        // Linux does, even if b'/' is a later part of a variable-length character.
243        match self.bytes.strip_prefix(&other.bytes) {
244            None => None,
245            // If there is no leading slash, strip_prefix matched only part of a component so
246            // treat it as a fail.
247            Some(replaced) if !replaced.starts_with(b"/") => None,
248            // SAFETY: If the relative path starts with a b"/", then it is still a valid Path.
249            Some(replaced) => unsafe {
250                Some(Path::<Rel>::from_unchecked_bytes(replaced))
251            },
252        }
253    }
254
255    /// Creates an [`Iterator`] over the components of a `Path`. This iterator produces `Path<Rel>`s
256    /// representing each `/`-separated string in the Path, from left to right.
257    pub fn components<'a>(&'a self) -> Components<'a, S> {
258        Components {
259            _state: PhantomData,
260            path: self.as_bytes(),
261            head: 0,
262        }
263    }
264
265    /// Creates an [`Iterator`] over the ancestors of a `Path`. This iterator produces `Path<S>`s
266    /// representing each directory in the Path ordered with descending depth and ending with the
267    /// Path itself.
268    pub fn ancestors<'a>(&'a self) -> Ancestors<'a, S> {
269        Ancestors {
270            _state: PhantomData,
271            path: self.as_bytes(),
272            index: 0,
273        }
274    }
275}
276
277impl<S: PathState> From<OwnedPath<S>> for CString {
278    fn from(value: OwnedPath<S>) -> Self {
279        // SAFETY: OsString already guarantees that the internal string contains no '\0'.
280        unsafe { CString::from_vec_unchecked(value.bytes) }
281    }
282}
283
284impl<S: PathState> Deref for OwnedPath<S> {
285    type Target = Path<S>;
286
287    fn deref(&self) -> &Self::Target {
288        // SAFETY: OwnedPath upholds the same invariants as Path.
289        unsafe { Path::<S>::from_unchecked_bytes(&self.bytes) }
290    }
291}
292
293impl<S: PathState> From<&Path<S>> for OwnedPath<S> {
294    fn from(value: &Path<S>) -> Self {
295        value.to_owned()
296    }
297}
298
299impl<S: PathState> AsRef<Path<S>> for OwnedPath<S> {
300    fn as_ref(&self) -> &Path<S> {
301        self.deref()
302    }
303}
304
305// Apparently there isn't a blanket impl for this?
306impl<S: PathState> AsRef<Path<S>> for Path<S> {
307    fn as_ref(&self) -> &Path<S> {
308        self
309    }
310}
311
312impl<S: PathState> Borrow<Path<S>> for OwnedPath<S> {
313    fn borrow(&self) -> &Path<S> {
314        self.as_ref()
315    }
316}
317
318// AsRef<OsStr> causes conflicting implementations and makes it slightly too easy to interpret a
319// Path as an OsStr. The same functionality has been moved to Path::as_os_str(), which requires
320// explicit usage. Otherwise, users can accidentally convert between Path<Abs> and Path<Rel>.
321// impl<S: PathState> AsRef<OsStr> for Path<S> {
322//     fn as_ref(&self) -> &OsStr {
323//         &self.inner
324//     }
325// }
326
327impl<S: PathState> ToOwned for Path<S> {
328    type Owned = OwnedPath<S>;
329
330    fn to_owned(&self) -> Self::Owned {
331        OwnedPath::<S> {
332            _state: PhantomData,
333            bytes: self.bytes.to_vec(),
334        }
335    }
336}
337
338impl<S: PathState> Clone for OwnedPath<S> {
339    fn clone(&self) -> Self {
340        Self {
341            _state: PhantomData,
342            bytes: self.bytes.clone()
343        }
344    }
345}
346
347impl<S: PathState> PartialEq for OwnedPath<S> {
348    fn eq(&self, other: &Self) -> bool {
349        self.as_ref().bytes == other.as_ref().bytes
350    }
351}
352
353impl<S: PathState> PartialEq for Path<S> {
354    fn eq(&self, other: &Self) -> bool {
355        self.bytes == other.bytes
356    }
357}
358
359impl<S: PathState> PartialEq<Path<S>> for OwnedPath<S> {
360    fn eq(&self, other: &Path<S>) -> bool {
361        self.as_ref().bytes == other.bytes
362    }
363}
364
365impl<S: PathState> PartialEq<OwnedPath<S>> for Path<S> {
366    fn eq(&self, other: &OwnedPath<S>) -> bool {
367        self.bytes == other.as_ref().bytes
368    }
369}
370
371impl<S: PathState> Eq for OwnedPath<S> {}
372
373impl<S: PathState> Eq for Path<S> {}
374
375impl<S: PathState> PartialOrd for OwnedPath<S> {
376    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
377        Some(self.cmp(other))
378    }
379}
380
381impl<S: PathState> Ord for OwnedPath<S> {
382    fn cmp(&self, other: &Self) -> Ordering {
383        self.as_ref().bytes.cmp(&other.as_ref().bytes)
384    }
385}
386
387impl<S: PathState> PartialOrd for Path<S> {
388    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
389        Some(self.cmp(other))
390    }
391}
392
393impl<S: PathState> Ord for Path<S> {
394    fn cmp(&self, other: &Self) -> Ordering {
395        self.bytes.cmp(&other.bytes)
396    }
397}
398
399impl<S: PathState> PartialOrd<Path<S>> for OwnedPath<S> {
400    fn partial_cmp(&self, other: &Path<S>) -> Option<Ordering> {
401        Some(self.as_ref().cmp(other))
402    }
403}
404
405impl<S: PathState> PartialOrd<OwnedPath<S>> for Path<S> {
406    fn partial_cmp(&self, other: &OwnedPath<S>) -> Option<Ordering> {
407        Some(self.cmp(other.as_ref()))
408    }
409}
410
411impl<S: PathState> Debug for OwnedPath<S> {
412    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
413        f.debug_struct("OwnedPath")
414            .field("<state>", &util::fmt::raw_type_name::<S>())
415            .field("value", &OsStr::from_bytes(&self.bytes))
416            .finish()
417    }
418}
419
420impl<S: PathState> Debug for Path<S> {
421    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
422        f.debug_struct("Path")
423            .field("<state>", &util::fmt::raw_type_name::<S>())
424            .field("value", &OsStr::from_bytes(&self.bytes))
425            .finish()
426    }
427}