diff --git a/message_parser_wasm/example.js b/message_parser_wasm/example.js index e248493..a17aeb3 100644 --- a/message_parser_wasm/example.js +++ b/message_parser_wasm/example.js @@ -96,6 +96,12 @@ function renderElement(elm) { ); return bcs; + case "TelephoneNumber": + let tn = document.createElement("a"); + tn.innerText = elm.c.number; + tn.href = elm.c.tel_link; + return tn; + case "Linebreak": return document.createElement("br"); diff --git a/message_parser_wasm/src/lib.rs b/message_parser_wasm/src/lib.rs index cb6d75f..5bf2696 100644 --- a/message_parser_wasm/src/lib.rs +++ b/message_parser_wasm/src/lib.rs @@ -57,5 +57,6 @@ export type ParsedElement = | { t: "LabeledLink"; c: { label: ParsedElement[]; destination: LinkDestination }; - }; + } + | {t: "TelephoneNumber", c: {number: string, tel_link: string}}; "#; diff --git a/message_parser_wasm/src/manual_typings.ts b/message_parser_wasm/src/manual_typings.ts index 2a7b7a0..3ff2e60 100644 --- a/message_parser_wasm/src/manual_typings.ts +++ b/message_parser_wasm/src/manual_typings.ts @@ -25,4 +25,5 @@ export type ParsedElement = | { t: "LabeledLink"; c: { label: ParsedElement[]; destination: LinkDestination }; - }; + } + | {t: "TelephoneNumber", c: {number: string, tel_link: string}}; diff --git a/src/parser/mod.rs b/src/parser/mod.rs index dc71d0f..ed1243b 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -21,6 +21,12 @@ pub enum Element<'a> { Link { destination: LinkDestination<'a>, }, + TelephoneNumber { + /// number exactly how it was found in the input text + number: &'a str, + /// the tel: link (without special chars, but keeps the + in the beginning if it is present) + tel_link: String, + }, EmailAddress(&'a str), // Later: // Mention { diff --git a/src/parser/parse_from_text/base_parsers.rs b/src/parser/parse_from_text/base_parsers.rs index 9881d36..de249e6 100644 --- a/src/parser/parse_from_text/base_parsers.rs +++ b/src/parser/parse_from_text/base_parsers.rs @@ -19,6 +19,7 @@ pub enum CustomError { UnexpectedContent, PrecedingWhitespaceMissing, OptionIsUnexpectedNone, + PhoneNumberNotEnoughDigits, UnxepectedError(String), } diff --git a/src/parser/parse_from_text/mod.rs b/src/parser/parse_from_text/mod.rs index a3180f4..95cd7d9 100644 --- a/src/parser/parse_from_text/mod.rs +++ b/src/parser/parse_from_text/mod.rs @@ -4,6 +4,7 @@ pub(crate) mod base_parsers; mod desktop_subset; pub mod hashtag_content_char_ranges; mod markdown_elements; +mod phone_numbers; mod text_elements; /// parses text elements such as links and email addresses, excluding markdown diff --git a/src/parser/parse_from_text/phone_numbers.rs b/src/parser/parse_from_text/phone_numbers.rs new file mode 100644 index 0000000..ef0db1a --- /dev/null +++ b/src/parser/parse_from_text/phone_numbers.rs @@ -0,0 +1,147 @@ +use super::base_parsers::*; +use super::Element; + +use nom::bytes::complete::take; +use nom::bytes::complete::{tag, take_while, take_while_m_n}; +use nom::character::complete::satisfy; +use nom::combinator::opt; +use nom::sequence::{delimited, tuple}; +use nom::AsChar; +use nom::{combinator::recognize, IResult}; + +const MAX_COUNTRY_LEN: usize = 3; +const MAX_AREA_LEN: usize = 10; // TODO find real number? +const MAX_LOCAL_LEN: usize = 15; // TODO find real number? +const PHONE_NUMBER_MINIMUM_DIGITS: usize = 5; + +/// spaces, dots, or dashes +fn is_sdd(input: char) -> bool { + matches!(input, ' ' | '.' | '-') +} + +fn is_digit(input: char) -> bool { + input.is_ascii_digit() +} + +fn is_digit_or_ssd(input: char) -> bool { + is_digit(input) || is_sdd(input) +} + +fn eat_while_digit_or_sdd_but_spare_last_digit( + input: &str, +) -> IResult<&str, &str, CustomError<&str>> { + let (_, result) = take_while_m_n(1, MAX_LOCAL_LEN, is_digit_or_ssd)(input)?; + + if result.chars().filter(|c| is_digit(*c)).count() < PHONE_NUMBER_MINIMUM_DIGITS { + return Err(nom::Err::Error(CustomError::PhoneNumberNotEnoughDigits)); + } + + for (offset, char) in result.chars().rev().enumerate() { + // find index of last digit + if is_digit(char.as_char()) { + // take everything but the last digit + let consumed_count = result + .chars() + .count() + .saturating_sub(offset.saturating_add(1)); + let (remainder, digits) = take(consumed_count)(input)?; + return Ok((remainder, digits)); + } + } + + Err(nom::Err::Error(CustomError::UnexpectedContent)) +} + +fn internal_telephone_number(input: &str) -> IResult<&str, String, CustomError<&str>> { + // reimplement the android regex rules: from PHONE in android/util/Patterns.java + let (input, (country, area, local)) = tuple(( + opt(tuple(( + opt(tag("+")), + take_while_m_n(1, MAX_COUNTRY_LEN, is_digit), + take_while(is_sdd), + ))), // +* + opt(tuple(( + delimited( + tag("("), + take_while_m_n(1, MAX_AREA_LEN, is_digit), + tag(")"), + ), + take_while(is_sdd), + ))), // ()* + recognize(delimited( + satisfy(is_digit), + eat_while_digit_or_sdd_but_spare_last_digit, + satisfy(is_digit), + )), // + + ))(input)?; + + // construct the telephone number uri (currently used by the test in this file) + let country = country + .map(|(plus, digits, _)| format!("{}{digits}", plus.unwrap_or(""))) + .unwrap_or_else(|| "".to_owned()); + let area = area.map(|(digits, _)| digits).unwrap_or(""); + let local = local.replace(is_sdd, ""); + let telephone_number_uri = format!("tel:{}{}{}", country, area, local); + Ok((input, telephone_number_uri)) +} + +pub(crate) fn telephone_number(input: &str) -> IResult<&str, Element, CustomError<&str>> { + let (input, original_number) = recognize(internal_telephone_number)(input)?; + let (_, tel_link) = internal_telephone_number(original_number)?; + Ok(( + input, + Element::TelephoneNumber { + number: original_number, + tel_link, + }, + )) +} + +#[cfg(test)] +mod test { + #![allow(clippy::unwrap_used)] + + use crate::parser::{parse_from_text::phone_numbers::telephone_number, Element}; + + #[test] + fn test_phone_numbers() { + // from https://stackoverflow.com/a/29767609/7655232 + let test_cases = vec![ + ("(123) 456-7890", "1234567890"), + ("(123)456-7890", "1234567890"), + ("123-456-7890", "1234567890"), + ("123.456.7890", "1234567890"), + // ("1234567890", "1234567890"), + //("+31636363634", "+31636363634"), + ("+31 636363634", "+31636363634"), + ("075-63546725", "07563546725"), + // from wikipedia https://de.wikipedia.org/w/index.php?title=Rufnummer&oldid=236385081#Nationales + ("089 1234567", "0891234567"), + // https://www.bundesnetzagentur.de/SharedDocs/Downloads/DE/Sachgebiete/Telekommunikation/Unternehmen_Institutionen/Nummerierung/Rufnummern/Mittlg148_2021.pdf?__blob=publicationFile&v=1 + ("(0)152 28817386", "015228817386"), + ("69 90009000", "6990009000"), + // ("90009000", "90009000"), + // https://en.wikipedia.org/w/index.php?title=E.123&oldid=1181303803 + ("(0607) 123 4567", "06071234567"), + ("+22 607 123 4567", "+226071234567"), + ]; + + for (number, expected_uri) in test_cases { + println!("testing {number}"); + assert_eq!( + telephone_number(number).unwrap().1, + Element::TelephoneNumber { + number, + tel_link: format!("tel:{expected_uri}") + } + ) + } + } + + #[test] + fn test_not_enough_digits() { + telephone_number("(0)152 28").expect_err("fails because number is to short"); + telephone_number("152 28").expect_err("fails because too short"); + telephone_number("(152) 28").expect_err("fails because too short"); + } +} diff --git a/src/parser/parse_from_text/text_elements.rs b/src/parser/parse_from_text/text_elements.rs index 161d8c6..a88dbfa 100644 --- a/src/parser/parse_from_text/text_elements.rs +++ b/src/parser/parse_from_text/text_elements.rs @@ -3,6 +3,7 @@ use crate::parser::link_url::LinkDestination; use super::base_parsers::*; use super::hashtag_content_char_ranges::hashtag_content_char; +use super::phone_numbers::telephone_number; use super::Element; use crate::nom::{Offset, Slice}; use nom::bytes::complete::take_while; @@ -275,6 +276,8 @@ pub(crate) fn parse_text_element( } } { Ok((i, elm)) + } else if let Ok((i, elm)) = telephone_number(input) { + Ok((i, elm)) } else if let Ok((i, _)) = linebreak(input) { Ok((i, Element::Linebreak)) } else {