From aa854b30709a68d56597f390767c7b5c437c470d Mon Sep 17 00:00:00 2001 From: Farooq Karimi Zadeh Date: Thu, 20 Jun 2024 17:42:37 +0330 Subject: [PATCH] upadte --- .../parse_from_text/markdown_elements.rs | 80 +++++++++++++------ 1 file changed, 56 insertions(+), 24 deletions(-) diff --git a/src/parser/parse_from_text/markdown_elements.rs b/src/parser/parse_from_text/markdown_elements.rs index eafdea1..12df528 100644 --- a/src/parser/parse_from_text/markdown_elements.rs +++ b/src/parser/parse_from_text/markdown_elements.rs @@ -15,10 +15,8 @@ use crate::parser::{ Element, }, utils::{ - is_white_space, + is_unicode_punctuation, is_unicode_white_space, is_white_space, is_white_space_but_not_linebreak, - is_unicode_white_space, - is_unicode_punctuation, }, }; @@ -47,15 +45,15 @@ pub(crate) fn code_block(input: &str) -> IResult<&str, Element, CustomError<&str (content, Some(lang)) }; - // expect whitespace or new line after language or beginning (if no language is defined) + // expect white_space or new line after language or beginning (if no language is defined) let char_in_question = content .chars() .next() .ok_or(nom::Err::Error(CustomError::NoContent))?; - // remove starting whitespace and first newline (if there is any). + // remove starting white_space and first newline (if there is any). let content = if is_white_space_but_not_linebreak(char_in_question) { - // remove whitespaces until newline or non whitespaces + // remove white_spaces until newline or non white_spaces let (content, _) = take_while(is_white_space_but_not_linebreak)(content)?; // remove new line if there is one let (content, _) = opt(tag("\n"))(content)?; @@ -123,31 +121,65 @@ pub(crate) fn labeled_link(input: &str) -> IResult<&str, Element, CustomError<&s Ok((input, Element::LabeledLink { label, destination })) } - /* * For description on how these functions(parse_italics and parse_bold) work * refer to this link: https://spec.commonmark.org/0.31.2/#emphasis-and-strong-emphasis */ -fn parse_italics(input: &str, prev_char: Option) -> IResult<&str, &str, CustomError<&str>> { - let (input_, (content, tag_str)) = direct_delimited(input, &["_", "*"][..])?; - let is_start_left_flanking: bool = - b.starts_with(is_unicode_white_space) && - (!b.starts_with(is_unicode_punctuation) || - (b.starts_with(is_unicode_punctuation) && - (prev_char.is_none() || - is_unicode_punctuation(prev_char.unwrap()) || - is_unicode_white_space(prev_char.unwrap())))); - // it is of great note here that order is very important. Iff prev_char.is_none() evals to - // false, this means it is a Some, and prev_char.unwrap() won't panic. - // On the other hand, iff it evals to true, the rest won't be run and again - // no panic happens. +macro_rules! left_flanking { + ($next_char: expr, $prev_char: expr) => { + is_unicode_white_space($next_char) && ( // followed by whitespace and ... + !is_unicode_punctuation($next_char) || ( // not followed by punct or ... + is_unicode_punctuation($next_char) && ( // followed by punct or ... + $prev_char.is_none() || + is_unicode_punctuation($prev_char.unwrap()) || + is_unicode_white_space($prev_char.unwrap()) + // preceded by whitespace or punct + /* + * Note that order here is important. Iff prev_char is Some(=not None), + * unwrap will be executed and won't panic. + * On the other hand, iff it's None, the rest won't be run and again + * no panic happens. The same goes for right flanking. --Farooq + */ + ) + ) + ) + } } -fn parse_bold(input: &str, prev_char: Option) -> IResult<&str, &str, CustomError<&str>> { - +macro_rules! right_flanking { + ($prev_char: expr, $next_char: expr) => { + is_unicode_white_space($next_char) && ( + !is_unicode_punctuation($next_char) || ( + is_unicode_punctuation($next_char) && ( + is_unicode_white_space($prev_char) || + is_unicode_punctuation($prev_char) + ) + ) + ) + } } +fn parse_italics(input: &str, prev_char: Option) -> IResult<&str, &str, CustomError<&str>> { + let (input_, (content, tag_str)) = direct_delimited(input, &["_", "*"][..])?; + let is_wspace = is_unicode_white_space; + let is_punct = is_unicode_punctuation; + let is_start_left_flanking: bool = left_flanking!(content.chars().last().unwrap_or('\0'), prev_char); + let is_start_right_flanking: bool = right_flanking!( + prev_char.unwrap_or('\0'), + content.chars().next().unwrap_or('\0') + ); + let is_end_left_flanking: bool = left_flanking!(input_.chars().last().unwrap_or('\0'), content.chars().next()); + let is_end_right_flanking: bool = right_flanking!(content.chars().last().unwrap_or('\0'), input_.chars().next().unwrap_or('\0')); + if tag_str == "*" && is_start_left_flanking && is_end_right_flanking { + return Ok((input_, content)); + } else if is_start_left_flanking && (!is_start_right_flanking || (is_start_right_flanking && (prev_char.is_some() && is_unicode_punctuation(prev_char.unwrap())))) && is_end_right_flanking && (!is_end_left_flanking || (is_end_left_flanking && prev_char.is_some() && input_.ends_with(is_unicode_punctuation))) { + return Ok((input_, content)); + } + Err(nom::Err::Error(CustomError::UnexpectedContent)) +} + +fn parse_bold(input: &str, prev_char: Option) -> IResult<&str, &str, CustomError<&str>> {} pub(crate) fn parse_element( input: &str, @@ -190,7 +222,7 @@ fn eat_markdown_text(input: &str) -> IResult<&str, (), CustomError<&str>> { if peek(|input| parse_element(input, taken.chars().next()))(remaining).is_ok() { break; } - // take until whitespace + // take until white_space //remaining = take_while(|c| not_blank_space(c))(remaining)?.0; } Ok((remaining, ())) @@ -199,7 +231,7 @@ fn eat_markdown_text(input: &str) -> IResult<&str, (), CustomError<&str>> { /// Consumes text until another parser of [parse_element] works again /// /// used as last parser, if the others do not consume the input it consumes the input until another parser works again -/// (uses whitespace seperation to make the parsing faster) +/// (uses white_space seperation to make the parsing faster) pub(crate) fn markdown_text(input: &str) -> IResult<&str, Element, CustomError<&str>> { let (rest, content) = recognize(eat_markdown_text)(input)?; Ok((rest, Element::Text(content)))