Skip to content

Commit

Permalink
merge
Browse files Browse the repository at this point in the history
  • Loading branch information
farooqkz committed Jun 9, 2024
2 parents baeeb79 + bcb0895 commit 2d64dba
Showing 1 changed file with 120 additions and 0 deletions.
120 changes: 120 additions & 0 deletions src/parser/utils.rs.orig
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
use std::ops::RangeInclusive;
use super::unicode_ranges::UNICODE_PUNCTUATION_RANGES;

#[derive(Debug, PartialEq, Eq)]
enum FindRangeResult<'a> {
WasOnRangeStart,
Range(&'a RangeInclusive<u32>),
}

/// Find a range which `code` might be in it.
///
/// # Description
/// This function gets a sorted slice of inclusive u32 ranges, performs
/// binary search on them and returns a FindRangeResult enum telling
/// which range the `code` might be in. It returns `FindRangeResult::WasOnRangeStart`
/// if the code was exactly on start of a range. Or a `FindRangeResult::Range(range)`
/// which indicates `code` is in `range` or in no ranges.
///
/// # Arguments
///
/// - `code` the u32 to look for a range for.
///
/// - `ranges` a refernce to a slice of `RangeInclusive<u32>`
fn find_range_for_char(code: u32, ranges: &'_ [RangeInclusive<u32>]) -> FindRangeResult<'_> {
let index = ranges.binary_search_by_key(&code, |range| *range.start());
match index {
Ok(_) => FindRangeResult::WasOnRangeStart,
Err(index) => match index {
#[allow(clippy::arithmetic_side_effects, clippy::indexing_slicing)]
0 => FindRangeResult::Range(&ranges[0]),
// Since `index` can never be 0, `index - 1` will never overflow. Furthermore, the
// maximum value which the binary search function returns is `NUMBER_OF_RANGES`.
// Therefore, `index - 1` will never panic if we index the array with it.
#[allow(clippy::arithmetic_side_effects, clippy::indexing_slicing)]
index => FindRangeResult::Range(&ranges[index - 1]),
},
}
}

/// Returns true of `c` is one of the `ranges`, false otherwise.
///
/// # Arguments
///
/// - `c` A number(u32)
///
/// - `ranges` A sorted slice of ranges to see if `c` is in anyone of them
pub fn is_in_one_of_ranges(c: u32, ranges: &[RangeInclusive<u32>]) -> bool {
match find_range_for_char(c, ranges) {
FindRangeResult::WasOnRangeStart => true,
FindRangeResult::Range(range) => range.contains(&c),
}
}

#[inline(always)]
pub(crate) fn is_alpha(c: char) -> bool {
c.is_alphabetic()
}

#[inline(always)]
pub(crate) fn is_hex_digit(c: char) -> bool {
c.is_ascii_hexdigit()
}

#[inline(always)]
pub(crate) fn is_digit(c: char) -> bool {
c.is_ascii_digit()
}

pub(crate) fn is_sub_delim(c: char) -> bool {
matches!(
c,
'!' | '$' | '&' | '\'' | '(' | ')' | '*' | '+' | ',' | ';' | '='
)
}

pub(crate) fn is_unreserved(c: char) -> bool {
is_alpha(c) || is_digit(c) || matches!(c, '_' | '.' | '-' | '~')
}

pub(crate) fn is_white_space(c: char) -> bool {
matches!(c, '\n' | '\r' | '\t' | ' ')
}

<<<<<<< HEAD
pub(crate) fn is_unicode_white_space(c: char) -> bool {
is_white_space(c) ||
matches!(c as u32,
=======
pub(crate) fn is_unicode_whitespace(c: char) -> bool {
is_white_space(c) ||
matches!(c,
>>>>>>> bcb08956c51315a52bfb01b5c6472cd77c645748
0x20 |
0xa0 |
0x1680..=0x1680 |
0x2000..=0x200a |
0x202f..=0x202f |
0x205f..=0x205f |
0x3000..=0x3000)
// These ranges are extracted from unicode DB using
// the script /scripts/extract_unicode_whitespace_ranges.py
// -- Farooq fkz riseup.net
// farooqkz testrun.org
}

<<<<<<< HEAD
pub(crate) fn is_unicode_punctuation(c: char) -> bool {
=======
pub(crate) fn is_unicode_punctutation(c: char) -> bool {
>>>>>>> bcb08956c51315a52bfb01b5c6472cd77c645748
is_in_one_of_ranges(c as u32, UNICODE_PUNCTUATION_RANGES[..])
}

pub(crate) fn is_not_white_space(c: char) -> bool {
!is_white_space(c)
}

pub(crate) fn is_white_space_but_not_linebreak(c: char) -> bool {
matches!(c, '\t' | ' ')
}

0 comments on commit 2d64dba

Please sign in to comment.