fix: restrict elements that can appear inside of a label for a labled…

… link (#74) * fix: restrict elements that can appear inside of a label for a labled link * cargo fmt * add test for the codeblock bug * Apply suggestions from code review Co-authored-by: Farooq Karimi Zadeh <[email protected]> * clarify the confusing comment * fix broken documentation reference --------- Co-authored-by: Farooq Karimi Zadeh <[email protected]>
deltachat · May 30, 2024 · 967dca4 · 967dca4
1 parent 19b18d1
commit 967dca4
Show file tree

Hide file tree

Showing 7 changed files with 278 additions and 16 deletions.
diff --git a/spec.md b/spec.md
@@ -182,6 +182,14 @@ Optionally, a client can implement a system to trust a domain (a "don't ask agai
 
 URL parsing allows all valid URLs, no restrictions on schemes, no whitelist is needed, because the format already specifies that it is a link.
 
+The label can contain basic markdown elements (bold, italics), but no "complex" linkified elements such as hashtags, links and email addresses.
+
+- parsers that run for a label:
+  - (desktop set): none
+  - (markdown set): bold, italics, underline, code-inline
+- parsers that do not run for a label (just returned as part of Text element):
+  - hashtag, email, link, labeled link, delimited email & link, codeblock, mentions (basically everything clickable)
+
 ## Ideas For The Future:
 
 ### `:emoji:`

diff --git a/src/parser/parse_from_text/desktop_subset.rs b/src/parser/parse_from_text/desktop_subset.rs
@@ -1,19 +1,38 @@
 //! desktop subset of markdown, becase this way we can already use the punycode detection of this crate
 //! and also we can keep delimited and labled links in desktop
-
-use super::base_parsers::CustomError;
-use super::markdown_elements::{delimited_email_address, delimited_link, labeled_link};
-use super::text_elements::parse_text_element;
-use super::Element;
 use nom::{
-    bytes::complete::take,
+    bytes::complete::{is_not, tag, take},
     combinator::{peek, recognize},
+    sequence::{delimited, tuple},
     IResult,
 };
 
-/// consumes all text until [parse_element] works again, internal use text instead
+use crate::parser::LinkDestination;
+
+use super::base_parsers::CustomError;
+use super::markdown_elements::{delimited_email_address, delimited_link};
+use super::text_elements::parse_text_element;
+use super::Element;
+
+// [labeled](https://link)
+pub(crate) fn labeled_link(input: &str) -> IResult<&str, Element, CustomError<&str>> {
+    let (input, raw_label) = delimited(tag("["), is_not("]"), tag("]"))(input)?;
+    if raw_label.is_empty() {
+        return Err(nom::Err::Error(CustomError::NoContent));
+    }
+
+    // in desktop set there is no element that can appear inside of a lablel
+    let label = vec![Element::Text(raw_label)];
+
+    let (input, (_, destination, _)) =
+        tuple((tag("("), LinkDestination::parse_labelled, tag(")")))(input)?;
+
+    Ok((input, Element::LabeledLink { label, destination }))
+}
+
+/// consumes all text until [parse_element] works again, this method is only for internal use by [desktopset_text]
 ///
-/// its output is useable on its own, always combinate this with [nom::combinator::recognize]
+/// its output is not useable on its own, always combinate this with [nom::combinator::recognize]
 fn eat_desktopset_text(input: &str) -> IResult<&str, (), CustomError<&str>> {
     let mut remaining = input;
     while !remaining.is_empty() {

diff --git a/src/parser/parse_from_text/markdown_elements.rs b/src/parser/parse_from_text/markdown_elements.rs
@@ -17,11 +17,14 @@ use crate::parser::{
     utils::{is_white_space, is_white_space_but_not_linebreak},
 };
 
-fn inline_code(input: &str) -> IResult<&str, &str, CustomError<&str>> {
+mod label_elements;
+use label_elements::parse_label_elements;
+
+pub(crate) fn inline_code(input: &str) -> IResult<&str, &str, CustomError<&str>> {
     delimited(tag("`"), is_not("`"), tag("`"))(input)
 }
 
-fn code_block(input: &str) -> IResult<&str, Element, CustomError<&str>> {
+pub(crate) fn code_block(input: &str) -> IResult<&str, Element, CustomError<&str>> {
     let (input, content): (&str, &str) = delimited(tag("```"), is_not("```"), tag("```"))(input)?;
 
     // parse language
@@ -105,7 +108,9 @@ pub(crate) fn labeled_link(input: &str) -> IResult<&str, Element, CustomError<&s
     if raw_label.is_empty() {
         return Err(nom::Err::Error(CustomError::NoContent));
     }
-    let label = parse_all(raw_label);
+    // the list of elements that can appear inside of a label is restricted
+    // clickable elements make no sense there.
+    let label = parse_label_elements(raw_label);
 
     let (input, (_, destination, _)) =
         tuple((tag("("), LinkDestination::parse_labelled, tag(")")))(input)?;
@@ -145,9 +150,9 @@ pub(crate) fn parse_element(
     }
 }
 
-/// consumes all text until [parse_element] works again, internal use text instead
+/// consumes all text until [parse_element] works again, this method is only for internal use by [markdown_text]
 ///
-/// its output is useable on its own, always combinate this with [nom::combinator::recognize]
+/// its output is not useable on its own, always combinate this with [nom::combinator::recognize]
 fn eat_markdown_text(input: &str) -> IResult<&str, (), CustomError<&str>> {
     let mut remaining = input;
     while !remaining.is_empty() {

diff --git a/src/parser/parse_from_text/markdown_elements/label_elements.rs b/src/parser/parse_from_text/markdown_elements/label_elements.rs
@@ -0,0 +1,88 @@
+use nom::{
+    bytes::complete::take,
+    combinator::{peek, recognize},
+    IResult,
+};
+
+use crate::parser::{
+    parse_from_text::{
+        base_parsers::{direct_delimited, CustomError},
+        markdown_elements::inline_code,
+    },
+    Element,
+};
+
+/// Parsers for label in labelled links and later also labeled hashtags
+/// parse elements inside of label in markdown set
+pub(crate) fn parse_label_elements(input: &str) -> Vec<Element> {
+    let mut result = Vec::new();
+    let mut remaining = input;
+    // println!("p-{}", input);
+    while !remaining.is_empty() {
+        // println!("r-{}", remaining);
+        if let Ok((rest, element)) = parse_markdown_label_element(remaining) {
+            // println!("e-{:?} - {}", element, remaining);
+            remaining = rest;
+            result.push(element);
+        } else if let Ok((rest, element)) = markdown_label_text(remaining) {
+            // println!("e-{:?} - {}", element, remaining);
+            result.push(element);
+            remaining = rest;
+        } else {
+            // println!("e-textDefault-{}", remaining);
+            result.push(Element::Text(remaining));
+            break;
+        }
+    }
+    result
+}
+
+pub(crate) fn parse_markdown_label_element(
+    input: &str,
+) -> IResult<&str, Element, CustomError<&str>> {
+    // the order is important
+    // generaly more specific parsers that fail/return fast should be in the front
+    // But keep in mind that the order can also change how and if the parser works as intended
+    if let Ok((i, b)) = direct_delimited(input, "**") {
+        Ok((i, Element::Bold(parse_label_elements(b))))
+    } else if let Ok((i, b)) = direct_delimited(input, "__") {
+        Ok((i, Element::Bold(parse_label_elements(b))))
+    } else if let Ok((i, b)) = direct_delimited(input, "_") {
+        Ok((i, Element::Italics(parse_label_elements(b))))
+    } else if let Ok((i, b)) = direct_delimited(input, "*") {
+        Ok((i, Element::Italics(parse_label_elements(b))))
+    } else if let Ok((i, b)) = direct_delimited(input, "~~") {
+        Ok((i, Element::StrikeThrough(parse_label_elements(b))))
+    } else if let Ok((i, b)) = inline_code(input) {
+        Ok((i, Element::InlineCode { content: b }))
+    } else {
+        Err(nom::Err::Error(CustomError::NoElement))
+    }
+}
+/// consumes all text until [parse_label_elements] works again, this method is only for internal use by [markdown_label_text]
+///
+/// its output is not useable on its own, always combinate this with [nom::combinator::recognize]
+fn eat_markdown_label_text(input: &str) -> IResult<&str, (), CustomError<&str>> {
+    let mut remaining = input;
+    while !remaining.is_empty() {
+        // take 1, because other parsers didn't work (text is always the last used parser)
+        let (remainder, _taken) = take(1usize)(remaining)?;
+        remaining = remainder;
+        // peek if there is an element
+        if peek(|input| parse_markdown_label_element(input))(remaining).is_ok() {
+            break;
+        }
+        // take until whitespace
+        //remaining = take_while(|c| not_blank_space(c))(remaining)?.0;
+    }
+    Ok((remaining, ()))
+}
+
+/// Consumes text until another parser of [parse_markdown_label_element] works again
+///
+/// used as last parser, if the others do not consume the input it consumes the input until another parser works again
+/// (uses whitespace seperation to make the parsing faster)
+fn markdown_label_text(input: &str) -> IResult<&str, Element, CustomError<&str>> {
+    let (rest, content) = recognize(eat_markdown_label_text)(input)?;
+    Ok((rest, Element::Text(content)))
+}
diff --git a/src/parser/parse_from_text/text_elements.rs b/src/parser/parse_from_text/text_elements.rs
@@ -287,9 +287,9 @@ pub(crate) fn parse_text_element(
     }
 }
 
-/// consumes all text until [parse_text_element] works again, internal use text instead
+/// consumes all text until [parse_text_element] works again, this method is only for internal use by [text]
 ///
-/// its output is useable on its own, always combinate this with [nom::combinator::recognize]
+/// its output is not useable on its own, always combinate this with [nom::combinator::recognize]
 fn eat_text(input: &str) -> IResult<&str, (), CustomError<&str>> {
     let mut remaining = input;
     while !remaining.is_empty() {

diff --git a/tests/text_to_ast/desktop_set.rs b/tests/text_to_ast/desktop_set.rs
@@ -355,7 +355,7 @@ fn labeled_link_should_not_work() {
             "[rich content **bold**](https://delta.chat/en/help?hi=5&e=4#section2.0)"
         ),
         vec![LabeledLink {
-            label: vec![Text("rich content "), Bold(vec![Text("bold")])],
+            label: vec![Text("rich content **bold**")],
             destination: https_link_no_puny(
                 "https://delta.chat/en/help?hi=5&e=4#section2.0",
                 "delta.chat",
@@ -406,3 +406,47 @@ fn inline_link_do_not_eat_last_char_if_it_is_special() {
         }]
     );
 }
+
+#[test]
+fn labeled_link() {
+    assert_eq!(
+        parse_desktop_set("[a link](https://delta.chat/en/help?hi=5&e=4#section2.0)"),
+        vec![LabeledLink {
+            label: vec![Text("a link")],
+            destination: https_link_no_puny(
+                "https://delta.chat/en/help?hi=5&e=4#section2.0",
+                "delta.chat"
+            ),
+        }]
+    );
+}
+
+#[test]
+fn labeled_link_no_markdown_in_desktop_set() {
+    assert_ne!(
+        parse_desktop_set(
+            "[rich content **bold**](https://delta.chat/en/help?hi=5&e=4#section2.0)"
+        ),
+        vec![LabeledLink {
+            label: vec![Text("rich content "), Bold(vec![Text("bold")])],
+            destination: https_link_no_puny(
+                "https://delta.chat/en/help?hi=5&e=4#section2.0",
+                "delta.chat"
+            ),
+        }]
+    );
+}
+
+#[test]
+fn labeled_link_should_not_allow_codeblock() {
+    assert_ne!(
+        parse_desktop_set("[```\nhello world\n```](https://delta.chat)"),
+        vec![
+            LabeledLink {
+                label: vec![Text("```\nhello world\n```")],
+                destination: https_link_no_puny("https://delta.chat/en/help", "delta.chat"),
+            },
+            Text(".")
+        ]
+    );
+}
diff --git a/tests/text_to_ast/markdown.rs b/tests/text_to_ast/markdown.rs
@@ -762,3 +762,101 @@ fn labeled_link_can_have_comma_or_dot_at_end() {
         ]
     );
 }
+
+#[test]
+fn labeled_link_should_not_allow_link_element() {
+    assert_eq!(
+        parse_markdown_text(
+            "you can find the details [here https://delta.chat](https://delta.chat/en/help)."
+        ),
+        vec![
+            Text("you can find the details "),
+            LabeledLink {
+                label: vec![Text("here https://delta.chat")],
+                destination: https_link_no_puny("https://delta.chat/en/help", "delta.chat"),
+            },
+            Text(".")
+        ]
+    );
+}
+
+#[test]
+fn labeled_link_should_not_allow_hashtag_element() {
+    assert_eq!(
+        parse_markdown_text("you can find the details [here #42](https://delta.chat/en/help)."),
+        vec![
+            Text("you can find the details "),
+            LabeledLink {
+                label: vec![Text("here #42")],
+                destination: https_link_no_puny("https://delta.chat/en/help", "delta.chat"),
+            },
+            Text(".")
+        ]
+    );
+}
+
+#[test]
+fn labeled_link_should_not_allow_email() {
+    assert_eq!(
+        parse_markdown_text(
+            "you can find the details [here [email protected]](https://delta.chat/en/help)."
+        ),
+        vec![
+            Text("you can find the details "),
+            LabeledLink {
+                label: vec![Text("here [email protected]")],
+                destination: https_link_no_puny("https://delta.chat/en/help", "delta.chat"),
+            },
+            Text(".")
+        ]
+    );
+}
+
+#[test]
+fn labeled_link_should_allow_bold() {
+    assert_eq!(
+        parse_markdown_text(
+            "you can find the details [here **bold**](https://delta.chat/en/help)."
+        ),
+        vec![
+            Text("you can find the details "),
+            LabeledLink {
+                label: vec![Text("here "), Bold(vec![Text("bold")])],
+                destination: https_link_no_puny("https://delta.chat/en/help", "delta.chat"),
+            },
+            Text(".")
+        ]
+    );
+}
+
+#[test]
+fn labeled_link_should_not_allow_email_in_bold() {
+    assert_ne!(
+        parse_markdown_text(
+            "you can find the details [here **[email protected]**](https://delta.chat/en/help)."
+        ),
+        vec![
+            Text("you can find the details"),
+            Bold(vec![Text("[email protected]")]),
+            LabeledLink {
+                label: vec![Text("here [email protected]")],
+                destination: https_link_no_puny("https://delta.chat/en/help", "delta.chat"),
+            },
+            Text(".")
+        ]
+    );
+}
+
+#[test]
+fn labeled_link_should_not_allow_codeblock() {
+    assert_ne!(
+        parse_markdown_text("[```\nhello world\n```](https://delta.chat)"),
+        vec![
+            LabeledLink {
+                label: vec![Text("```\nhello world\n```")],
+                destination: https_link_no_puny("https://delta.chat/en/help", "delta.chat"),
+            },
+            Text(".")
+        ]
+    );
+}