From dfa328533aae0666852cad97c309b2be16b7a826 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tin=20=C5=A0vagelj?= Date: Wed, 20 Nov 2024 14:57:50 +0100 Subject: [PATCH] Improve documentation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Move comparison table to a separate section. - Use CSS icons to make table more readable. - Refer to the table from backend documentations. - Explain how backends store and manipulate interned data. Signed-off-by: Tin Å vagelj --- src/backend/bucket/mod.rs | 60 +++---- src/backend/buffer.rs | 38 ++-- src/backend/string.rs | 47 ++--- src/docs.rs | 356 ++++++++++++++++++++++++++++++++++++++ src/interner.rs | 14 +- src/lib.rs | 85 ++++----- 6 files changed, 467 insertions(+), 133 deletions(-) create mode 100644 src/docs.rs diff --git a/src/backend/bucket/mod.rs b/src/backend/bucket/mod.rs index 57bde81..4ecf2b4 100644 --- a/src/backend/bucket/mod.rs +++ b/src/backend/bucket/mod.rs @@ -9,39 +9,33 @@ use crate::{symbol::expect_valid_symbol, DefaultSymbol, Symbol}; use alloc::{string::String, vec::Vec}; use core::{iter::Enumerate, marker::PhantomData, slice}; -/// An interner backend that reduces memory allocations by using string buckets. -/// -/// # Note -/// -/// Implementation inspired by matklad's blog post that can be found here: -/// -/// -/// # Usage Hint -/// -/// Use when deallocations or copy overhead is costly or when -/// interning of static strings is especially common. -/// -/// # Usage -/// -/// - **Fill:** Efficiency of filling an empty string interner. -/// - **Resolve:** Efficiency of interned string look-up given a symbol. -/// - **Allocations:** The number of allocations performed by the backend. -/// - **Footprint:** The total heap memory consumed by the backend. -/// - **Contiguous:** True if the returned symbols have contiguous values. -/// - **Iteration:** Efficiency of iterating over the interned strings. -/// -/// Rating varies between **bad**, **ok**, **good** and **best**. -/// -/// | Scenario | Rating | -/// |:------------|:--------:| -/// | Fill | **good** | -/// | Resolve | **best** | -/// | Allocations | **good** | -/// | Footprint | **ok** | -/// | Supports `get_or_intern_static` | **yes** | -/// | `Send` + `Sync` | **yes** | -/// | Contiguous | **yes** | -/// | Iteration | **best** | +/// An interner backend that reduces memory allocations by using buckets. +/// +/// # Overview +/// This interner uses fixed-size buckets to store interned strings. Each bucket is +/// allocated once and holds a set number of strings. When a bucket becomes full, a new +/// bucket is allocated to hold more strings. Buckets are never deallocated, which reduces +/// the overhead of frequent memory allocations and copying. +/// +/// ## Trade-offs +/// - **Advantages:** +/// - Strings in already used buckets remain valid and accessible even as new strings +/// are added. +/// - **Disadvantages:** +/// - Slightly slower access times due to double indirection (looking up the string +/// involves an extra level of lookup through the bucket). +/// - Memory may be used inefficiently if many buckets are allocated but only partially +/// filled because of large strings. +/// +/// ## Use Cases +/// This backend is ideal when interned strings must remain valid even after new ones are +/// added.general use +/// +/// Refer to the [comparison table][crate::_docs::comparison_table] for comparison with +/// other backends. +/// +/// [matklad's blog post]: +/// https://matklad.github.io/2020/03/22/fast-simple-rust-interner.html #[derive(Debug)] pub struct BucketBackend<'i, S: Symbol = DefaultSymbol> { spans: Vec, diff --git a/src/backend/buffer.rs b/src/backend/buffer.rs index 40912e9..df59121 100644 --- a/src/backend/buffer.rs +++ b/src/backend/buffer.rs @@ -5,34 +5,22 @@ use crate::{symbol::expect_valid_symbol, DefaultSymbol, Symbol}; use alloc::vec::Vec; use core::{mem, str}; -/// An interner backend that appends all interned string information in a single buffer. +/// An interner backend that concatenates all interned string contents into one large +/// buffer [`Vec`]. Unlike [`StringBackend`][crate::backend::StringBackend], string +/// lengths are stored in the same buffer as strings preceeding the respective string +/// data. /// -/// # Usage Hint +/// ## Trade-offs +/// - **Advantages:** +/// - Accessing interned strings is fast, as it requires a single lookup. +/// - **Disadvantages:** +/// - Iteration is slow because it requires consecutive reading of lengths to advance. /// -/// Use this backend if memory consumption is what matters most to you. -/// Note though that unlike all other backends symbol values are not contigous! +/// ## Use Cases +/// This backend is ideal for storing many small (<255 characters) strings. /// -/// # Usage -/// -/// - **Fill:** Efficiency of filling an empty string interner. -/// - **Resolve:** Efficiency of interned string look-up given a symbol. -/// - **Allocations:** The number of allocations performed by the backend. -/// - **Footprint:** The total heap memory consumed by the backend. -/// - **Contiguous:** True if the returned symbols have contiguous values. -/// - **Iteration:** Efficiency of iterating over the interned strings. -/// -/// Rating varies between **bad**, **ok**, **good** and **best**. -/// -/// | Scenario | Rating | -/// |:------------|:--------:| -/// | Fill | **best** | -/// | Resolve | **bad** | -/// | Allocations | **best** | -/// | Footprint | **best** | -/// | Supports `get_or_intern_static` | **no** | -/// | `Send` + `Sync` | **yes** | -/// | Contiguous | **no** | -/// | Iteration | **bad** | +/// Refer to the [comparison table][crate::_docs::comparison_table] for comparison with +/// other backends. #[derive(Debug)] pub struct BufferBackend<'i, S: Symbol = DefaultSymbol> { len_strings: usize, diff --git a/src/backend/string.rs b/src/backend/string.rs index 8ed5c86..03d0276 100644 --- a/src/backend/string.rs +++ b/src/backend/string.rs @@ -5,38 +5,27 @@ use crate::{symbol::expect_valid_symbol, DefaultSymbol, Symbol}; use alloc::{string::String, vec::Vec}; use core::{iter::Enumerate, slice}; -/// An interner backend that accumulates all interned string contents into one string. +/// An interner backend that concatenates all interned string contents into one large +/// buffer and keeps track of string bounds in a separate [`Vec`]. +/// +/// Implementation is inspired by [CAD97's](https://github.com/CAD97) +/// [`strena`](https://github.com/CAD97/strena) crate. /// -/// # Note +/// ## Trade-offs +/// - **Advantages:** +/// - Separated length tracking allows fast iteration. +/// - **Disadvantages:** +/// - Many insertions separated by external allocations can cause the buffer to drift +/// far away (in memory) from `Vec` storing string ends, which impedes performance of +/// all interning operations. +/// - Resolving a symbol requires two heap lookups because data and length are stored in +/// separate containers. /// -/// Implementation inspired by [CAD97's](https://github.com/CAD97) research -/// project [`strena`](https://github.com/CAD97/strena). +/// ## Use Cases +/// This backend is good for storing fewer large strings and for general use. /// -/// # Usage Hint -/// -/// Use this backend if runtime performance is what matters most to you. -/// -/// # Usage -/// -/// - **Fill:** Efficiency of filling an empty string interner. -/// - **Resolve:** Efficiency of interned string look-up given a symbol. -/// - **Allocations:** The number of allocations performed by the backend. -/// - **Footprint:** The total heap memory consumed by the backend. -/// - **Contiguous:** True if the returned symbols have contiguous values. -/// - **Iteration:** Efficiency of iterating over the interned strings. -/// -/// Rating varies between **bad**, **ok**, **good** and **best**. -/// -/// | Scenario | Rating | -/// |:------------|:--------:| -/// | Fill | **good** | -/// | Resolve | **ok** | -/// | Allocations | **good** | -/// | Footprint | **good** | -/// | Supports `get_or_intern_static` | **no** | -/// | `Send` + `Sync` | **yes** | -/// | Contiguous | **yes** | -/// | Iteration | **good** | +/// Refer to the [comparison table][crate::_docs::comparison_table] for comparison with +/// other backends. #[derive(Debug)] pub struct StringBackend<'i, S: Symbol = DefaultSymbol> { ends: Vec, diff --git a/src/docs.rs b/src/docs.rs new file mode 100644 index 0000000..75cabdb --- /dev/null +++ b/src/docs.rs @@ -0,0 +1,356 @@ +//! Crate documentation supplements +//! +//! + +/// Stylesheet that adds simple clip-path based icons. +/// +/// They're used like so: +/// ```html +/// +/// ``` +/// +/// `icon-name`: is a meaningful description of icon meaning. +/// +/// This satisfies ARIA requirements and looks as expected. +macro_rules! icons { + () => { r#""# + } +} + +macro_rules! doc_item { + ($title: literal) => { concat![ + "# ", $title, "\n\n", + r#""#]} +} + +pub mod _01_comparison_table { + #![doc = doc_item!("Comparison Table")] + //!
+ //!
+ //! + //! | **Property** | [`BucketBackend`] | [`StringBackend`] | [`BufferBackend`] | + //! |:-----------------------------------------------------|:--:|:--:|:--:| + //! | [**Insertion**](#table-prop-insert) | | | | + //! | [**Resolution**](#table-prop-resolve) | | | | + //! | [**Allocations**](#table-prop-alloc) | | | | + //! | [**Memory footprint**](#table-prop-size) | | | | + //! | [**Iteration**](#table-prop-iteration) | | | | + //! | [**Contiguous**](#table-prop-contiguous) | | | | + //! | [**Stable adresses**](#table-prop-stable-addr) | | | | + //! | [**Intern `'static`**](#table-prop-static) | | | | + //! | [**Concurrent symbols**](#table-prop-concurrent-sym) | | | | + //! + //!
+ //! + //! #### Legend + //! + //!
    + //!
  • Best
  • + //!
  • Good
  • + //!
  • Ok
  • + //!
  • Bad
  • + //!
+ //!
    + //!
  • Yes
  • + //!
  • No
  • + //!
+ //!
+ //!
+ //! + //! + //! + //! #### Properties + //! + //! - **Insertion:** Efficiency of interning new + //! strings. + //! - **Resolution:** Efficiency of resolving a + //! symbol of an interned string. + //! - **Allocations:** The number of + //! (re-)allocations performed by the backend. + //! - **Memory footprint:** Heap memory consumtion + //! characteristics for the backend. + //! - **Iteration:** Efficiency of iterating + //! over the interned strings. + //! + //! - **Contiguous:** True if the interned + //! symbols are contiguously stored in memory. + //! - **Stable adresses:** True if resolved + //! strings won't be moved until the interner is dropped. + //! - **Intern `'static`:** True if interner can + //! resolve symbols to statically allocated strings that have been inserted using + //! [`StringInterner::get_or_intern_static`]. + //! - **Concurrent symbols:** True if + //! returned symbols are [`Send`] + [`Sync`]. + //! + #![doc = icons!()] + + use crate::interner::*; + use crate::backend::*; + use crate::symbol::*; + use std::marker::*; +} +pub use _01_comparison_table as comparison_table; diff --git a/src/interner.rs b/src/interner.rs index a95cede..905838e 100644 --- a/src/interner.rs +++ b/src/interner.rs @@ -242,18 +242,22 @@ where } /// Interns the given `'static` string. - /// + /// /// Returns a symbol for resolution into the original string. + /// + /// If the backend supports [`'static` interning][crate::_docs::comparison_table], + /// later calls to this or [`get_or_intern`][StringInterner::get_or_intern] function + /// will return a symbol that resolves to the original `&'static str` reference. /// /// # Note /// - /// This is more efficient than [`StringInterner::get_or_intern`] since it might - /// avoid some memory allocations if the backends supports this. + /// This is more efficient than [`StringInterner::get_or_intern`] since it might avoid + /// some memory allocations if the backends supports this. /// /// # Panics /// - /// If the interner already interns the maximum number of strings possible - /// by the chosen symbol type. + /// If the interner already interns the maximum number of strings possible by the + /// chosen symbol type. #[inline] pub fn get_or_intern_static(&mut self, string: &'static str) -> >::Symbol { self.get_or_intern_using(string, B::intern_static) diff --git a/src/lib.rs b/src/lib.rs index c4423ea..61f7c84 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,10 +2,19 @@ #![doc(html_root_url = "https://docs.rs/crate/string-interner/0.18.0")] #![warn(unsafe_op_in_unsafe_fn, clippy::redundant_closure_for_method_calls)] -//! Caches strings efficiently, with minimal memory footprint and associates them with unique symbols. -//! These symbols allow constant time comparisons and look-ups to the underlying interned strings. -//! -//! ### Example: Interning & Symbols +//! Caches strings efficiently, with minimal memory footprint and associates them with +//! unique symbols. These symbols allow constant time equality comparison and look-ups to +//! the underlying interned strings. +//! +//! For more information on purpose of string interning, refer to the corresponding +//! [wikipedia article]. +//! +//! See the [**comparison table**](crate::_docs::comparison_table) for a detailed +//! comparison summary of different backends. +//! +//! ## Examples +//! +//! #### Interning & Symbols //! //! ``` //! use string_interner::StringInterner; @@ -21,7 +30,7 @@ //! assert_eq!(sym1, sym3); // same! //! ``` //! -//! ### Example: Creation by `FromIterator` +//! #### Creation by `FromIterator` //! //! ``` //! # use string_interner::DefaultStringInterner; @@ -30,7 +39,7 @@ //! .collect::(); //! ``` //! -//! ### Example: Look-up +//! #### Look-up //! //! ``` //! # use string_interner::StringInterner; @@ -39,7 +48,7 @@ //! assert_eq!(interner.resolve(sym), Some("Banana")); //! ``` //! -//! ### Example: Iteration +//! #### Iteration //! //! ``` //! # use string_interner::{DefaultStringInterner, Symbol}; @@ -49,7 +58,7 @@ //! } //! ``` //! -//! ### Example: Use Different Backend +//! #### Use Different Backend //! //! ``` //! # use string_interner::StringInterner; @@ -63,7 +72,7 @@ //! assert_eq!(sym1, sym3); // same! //! ``` //! -//! ### Example: Use Different Backend & Symbol +//! #### Use Different Backend & Symbol //! //! ``` //! # use string_interner::StringInterner; @@ -79,44 +88,38 @@ //! //! ## Backends //! -//! The `string_interner` crate provides different backends with different strengths. -//! The table below compactly shows when to use which backend according to the following -//! performance characteristics. -//! -//! - **Fill:** Efficiency of filling an empty string interner. -//! - **Resolve:** Efficiency of resolving a symbol of an interned string. -//! - **Allocations:** The number of allocations performed by the backend. -//! - **Footprint:** The total heap memory consumed by the backend. -//! - **Contiguous:** True if the returned symbols have contiguous values. -//! - **Iteration:** Efficiency of iterating over the interned strings. -//! -//! | **Property** | **BucketBackend** | **StringBackend** | **BufferBackend** | -//! |:-------------|:-----------------:|:-----------------:|:-----------------:| -//! | **Fill** | ok | good | best | -//! | **Resolve** | best | good | bad | -//! | Allocations | ok | good | best | -//! | Footprint | ok | good | best | -//! | Contiguous | yes | yes | no | -//! | Iteration | best | good | bad | +//! The `string_interner` crate provides different backends with different strengths.
+//! +//! #### [Bucket Backend](backend/struct.BucketBackend.html) +//! +//! Stores strings in buckets which stay allocated for the lifespan of [`StringInterner`]. +//! This allows resolved symbols to be used even after new strings have been interned. //! -//! ## When to use which backend? +//! **Ideal for:** storing strings in persistent location in memory //! -//! ### Bucket Backend +//! #### [String Backend](backend/struct.StringBackend.html) +//! +//! Concatenates all interned string contents into one large buffer +//! [`String`][alloc::string::String], keeping interned string lenghts in a separate +//! [`Vec`][alloc::vec::Vec]. //! -//! Given the table above the `BucketBackend` might seem inferior to the other backends. -//! However, it allows to efficiently intern `&'static str` and avoids deallocations. +//! **Ideal for:** general use //! -//! ### String Backend +//! #### [Buffer Backend](backend/struct.BufferBackend.html) //! -//! Overall the `StringBackend` performs really well and therefore is the backend -//! that the `StringInterner` uses by default. +//! Concatenates all interned string contents into one large buffer +//! [`String`][alloc::string::String], and keeps interned string lenghts as prefixes. //! -//! ### Buffer Backend -//! -//! The `BufferBackend` is in some sense similar to the `StringBackend` on steroids. -//! Some operations are even slightly more efficient and it consumes less memory. -//! However, all this is at the costs of a less efficient resolution of symbols. -//! Note that the symbols generated by the `BufferBackend` are not contiguous. +//! **Ideal for:** storing many small (<255 characters) strings +//! +//! [Comparison table][crate::_docs::comparison_table] shows a high-level overview of +//! different backend characteristics. +//! +//! [wikipedia article]: https://en.wikipedia.org/wiki/String_interning + +#[cfg(doc)] +#[path ="docs.rs"] +pub mod _docs; extern crate alloc; #[cfg(feature = "std")]