Skip to content

Commit

Permalink
fix: restrict elements that can appear inside of a label for a labled…
Browse files Browse the repository at this point in the history
… link (#74)

* fix: restrict elements that can appear inside of a label for a labled link

* cargo fmt

* add test for the codeblock bug

* Apply suggestions from code review

Co-authored-by: Farooq Karimi Zadeh <[email protected]>

* clarify the confusing comment

* fix broken documentation reference

---------

Co-authored-by: Farooq Karimi Zadeh <[email protected]>
  • Loading branch information
Simon-Laux and farooqkz authored May 30, 2024
1 parent 19b18d1 commit 967dca4
Show file tree
Hide file tree
Showing 7 changed files with 278 additions and 16 deletions.
8 changes: 8 additions & 0 deletions spec.md
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,14 @@ Optionally, a client can implement a system to trust a domain (a "don't ask agai

URL parsing allows all valid URLs, no restrictions on schemes, no whitelist is needed, because the format already specifies that it is a link.

The label can contain basic markdown elements (bold, italics), but no "complex" linkified elements such as hashtags, links and email addresses.

- parsers that run for a label:
- (desktop set): none
- (markdown set): bold, italics, underline, code-inline
- parsers that do not run for a label (just returned as part of Text element):
- hashtag, email, link, labeled link, delimited email & link, codeblock, mentions (basically everything clickable)

## Ideas For The Future:

### `:emoji:`
Expand Down
35 changes: 27 additions & 8 deletions src/parser/parse_from_text/desktop_subset.rs
Original file line number Diff line number Diff line change
@@ -1,19 +1,38 @@
//! desktop subset of markdown, becase this way we can already use the punycode detection of this crate
//! and also we can keep delimited and labled links in desktop
use super::base_parsers::CustomError;
use super::markdown_elements::{delimited_email_address, delimited_link, labeled_link};
use super::text_elements::parse_text_element;
use super::Element;
use nom::{
bytes::complete::take,
bytes::complete::{is_not, tag, take},
combinator::{peek, recognize},
sequence::{delimited, tuple},
IResult,
};

/// consumes all text until [parse_element] works again, internal use text instead
use crate::parser::LinkDestination;

use super::base_parsers::CustomError;
use super::markdown_elements::{delimited_email_address, delimited_link};
use super::text_elements::parse_text_element;
use super::Element;

// [labeled](https://link)
pub(crate) fn labeled_link(input: &str) -> IResult<&str, Element, CustomError<&str>> {
let (input, raw_label) = delimited(tag("["), is_not("]"), tag("]"))(input)?;
if raw_label.is_empty() {
return Err(nom::Err::Error(CustomError::NoContent));
}

// in desktop set there is no element that can appear inside of a lablel
let label = vec![Element::Text(raw_label)];

let (input, (_, destination, _)) =
tuple((tag("("), LinkDestination::parse_labelled, tag(")")))(input)?;

Ok((input, Element::LabeledLink { label, destination }))
}

/// consumes all text until [parse_element] works again, this method is only for internal use by [desktopset_text]
///
/// its output is useable on its own, always combinate this with [nom::combinator::recognize]
/// its output is not useable on its own, always combinate this with [nom::combinator::recognize]
fn eat_desktopset_text(input: &str) -> IResult<&str, (), CustomError<&str>> {
let mut remaining = input;
while !remaining.is_empty() {
Expand Down
15 changes: 10 additions & 5 deletions src/parser/parse_from_text/markdown_elements.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,14 @@ use crate::parser::{
utils::{is_white_space, is_white_space_but_not_linebreak},
};

fn inline_code(input: &str) -> IResult<&str, &str, CustomError<&str>> {
mod label_elements;
use label_elements::parse_label_elements;

pub(crate) fn inline_code(input: &str) -> IResult<&str, &str, CustomError<&str>> {
delimited(tag("`"), is_not("`"), tag("`"))(input)
}

fn code_block(input: &str) -> IResult<&str, Element, CustomError<&str>> {
pub(crate) fn code_block(input: &str) -> IResult<&str, Element, CustomError<&str>> {
let (input, content): (&str, &str) = delimited(tag("```"), is_not("```"), tag("```"))(input)?;

// parse language
Expand Down Expand Up @@ -105,7 +108,9 @@ pub(crate) fn labeled_link(input: &str) -> IResult<&str, Element, CustomError<&s
if raw_label.is_empty() {
return Err(nom::Err::Error(CustomError::NoContent));
}
let label = parse_all(raw_label);
// the list of elements that can appear inside of a label is restricted
// clickable elements make no sense there.
let label = parse_label_elements(raw_label);

let (input, (_, destination, _)) =
tuple((tag("("), LinkDestination::parse_labelled, tag(")")))(input)?;
Expand Down Expand Up @@ -145,9 +150,9 @@ pub(crate) fn parse_element(
}
}

/// consumes all text until [parse_element] works again, internal use text instead
/// consumes all text until [parse_element] works again, this method is only for internal use by [markdown_text]
///
/// its output is useable on its own, always combinate this with [nom::combinator::recognize]
/// its output is not useable on its own, always combinate this with [nom::combinator::recognize]
fn eat_markdown_text(input: &str) -> IResult<&str, (), CustomError<&str>> {
let mut remaining = input;
while !remaining.is_empty() {
Expand Down
88 changes: 88 additions & 0 deletions src/parser/parse_from_text/markdown_elements/label_elements.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
use nom::{
bytes::complete::take,
combinator::{peek, recognize},
IResult,
};

use crate::parser::{
parse_from_text::{
base_parsers::{direct_delimited, CustomError},
markdown_elements::inline_code,
},
Element,
};

/// Parsers for label in labelled links and later also labeled hashtags
/// parse elements inside of label in markdown set
pub(crate) fn parse_label_elements(input: &str) -> Vec<Element> {
let mut result = Vec::new();
let mut remaining = input;
// println!("p-{}", input);
while !remaining.is_empty() {
// println!("r-{}", remaining);
if let Ok((rest, element)) = parse_markdown_label_element(remaining) {
// println!("e-{:?} - {}", element, remaining);
remaining = rest;
result.push(element);
} else if let Ok((rest, element)) = markdown_label_text(remaining) {
// println!("e-{:?} - {}", element, remaining);
result.push(element);
remaining = rest;
} else {
// println!("e-textDefault-{}", remaining);
result.push(Element::Text(remaining));
break;
}
}
result
}

pub(crate) fn parse_markdown_label_element(
input: &str,
) -> IResult<&str, Element, CustomError<&str>> {
// the order is important
// generaly more specific parsers that fail/return fast should be in the front
// But keep in mind that the order can also change how and if the parser works as intended
if let Ok((i, b)) = direct_delimited(input, "**") {
Ok((i, Element::Bold(parse_label_elements(b))))
} else if let Ok((i, b)) = direct_delimited(input, "__") {
Ok((i, Element::Bold(parse_label_elements(b))))
} else if let Ok((i, b)) = direct_delimited(input, "_") {
Ok((i, Element::Italics(parse_label_elements(b))))
} else if let Ok((i, b)) = direct_delimited(input, "*") {
Ok((i, Element::Italics(parse_label_elements(b))))
} else if let Ok((i, b)) = direct_delimited(input, "~~") {
Ok((i, Element::StrikeThrough(parse_label_elements(b))))
} else if let Ok((i, b)) = inline_code(input) {
Ok((i, Element::InlineCode { content: b }))
} else {
Err(nom::Err::Error(CustomError::NoElement))
}
}
/// consumes all text until [parse_label_elements] works again, this method is only for internal use by [markdown_label_text]
///
/// its output is not useable on its own, always combinate this with [nom::combinator::recognize]
fn eat_markdown_label_text(input: &str) -> IResult<&str, (), CustomError<&str>> {
let mut remaining = input;
while !remaining.is_empty() {
// take 1, because other parsers didn't work (text is always the last used parser)
let (remainder, _taken) = take(1usize)(remaining)?;
remaining = remainder;
// peek if there is an element
if peek(|input| parse_markdown_label_element(input))(remaining).is_ok() {
break;
}
// take until whitespace
//remaining = take_while(|c| not_blank_space(c))(remaining)?.0;
}
Ok((remaining, ()))
}

/// Consumes text until another parser of [parse_markdown_label_element] works again
///
/// used as last parser, if the others do not consume the input it consumes the input until another parser works again
/// (uses whitespace seperation to make the parsing faster)
fn markdown_label_text(input: &str) -> IResult<&str, Element, CustomError<&str>> {
let (rest, content) = recognize(eat_markdown_label_text)(input)?;
Ok((rest, Element::Text(content)))
}
4 changes: 2 additions & 2 deletions src/parser/parse_from_text/text_elements.rs
Original file line number Diff line number Diff line change
Expand Up @@ -287,9 +287,9 @@ pub(crate) fn parse_text_element(
}
}

/// consumes all text until [parse_text_element] works again, internal use text instead
/// consumes all text until [parse_text_element] works again, this method is only for internal use by [text]
///
/// its output is useable on its own, always combinate this with [nom::combinator::recognize]
/// its output is not useable on its own, always combinate this with [nom::combinator::recognize]
fn eat_text(input: &str) -> IResult<&str, (), CustomError<&str>> {
let mut remaining = input;
while !remaining.is_empty() {
Expand Down
46 changes: 45 additions & 1 deletion tests/text_to_ast/desktop_set.rs
Original file line number Diff line number Diff line change
Expand Up @@ -355,7 +355,7 @@ fn labeled_link_should_not_work() {
"[rich content **bold**](https://delta.chat/en/help?hi=5&e=4#section2.0)"
),
vec![LabeledLink {
label: vec![Text("rich content "), Bold(vec![Text("bold")])],
label: vec![Text("rich content **bold**")],
destination: https_link_no_puny(
"https://delta.chat/en/help?hi=5&e=4#section2.0",
"delta.chat",
Expand Down Expand Up @@ -406,3 +406,47 @@ fn inline_link_do_not_eat_last_char_if_it_is_special() {
}]
);
}

#[test]
fn labeled_link() {
assert_eq!(
parse_desktop_set("[a link](https://delta.chat/en/help?hi=5&e=4#section2.0)"),
vec![LabeledLink {
label: vec![Text("a link")],
destination: https_link_no_puny(
"https://delta.chat/en/help?hi=5&e=4#section2.0",
"delta.chat"
),
}]
);
}

#[test]
fn labeled_link_no_markdown_in_desktop_set() {
assert_ne!(
parse_desktop_set(
"[rich content **bold**](https://delta.chat/en/help?hi=5&e=4#section2.0)"
),
vec![LabeledLink {
label: vec![Text("rich content "), Bold(vec![Text("bold")])],
destination: https_link_no_puny(
"https://delta.chat/en/help?hi=5&e=4#section2.0",
"delta.chat"
),
}]
);
}

#[test]
fn labeled_link_should_not_allow_codeblock() {
assert_ne!(
parse_desktop_set("[```\nhello world\n```](https://delta.chat)"),
vec![
LabeledLink {
label: vec![Text("```\nhello world\n```")],
destination: https_link_no_puny("https://delta.chat/en/help", "delta.chat"),
},
Text(".")
]
);
}
98 changes: 98 additions & 0 deletions tests/text_to_ast/markdown.rs
Original file line number Diff line number Diff line change
Expand Up @@ -762,3 +762,101 @@ fn labeled_link_can_have_comma_or_dot_at_end() {
]
);
}

#[test]
fn labeled_link_should_not_allow_link_element() {
assert_eq!(
parse_markdown_text(
"you can find the details [here https://delta.chat](https://delta.chat/en/help)."
),
vec![
Text("you can find the details "),
LabeledLink {
label: vec![Text("here https://delta.chat")],
destination: https_link_no_puny("https://delta.chat/en/help", "delta.chat"),
},
Text(".")
]
);
}

#[test]
fn labeled_link_should_not_allow_hashtag_element() {
assert_eq!(
parse_markdown_text("you can find the details [here #42](https://delta.chat/en/help)."),
vec![
Text("you can find the details "),
LabeledLink {
label: vec![Text("here #42")],
destination: https_link_no_puny("https://delta.chat/en/help", "delta.chat"),
},
Text(".")
]
);
}

#[test]
fn labeled_link_should_not_allow_email() {
assert_eq!(
parse_markdown_text(
"you can find the details [here [email protected]](https://delta.chat/en/help)."
),
vec![
Text("you can find the details "),
LabeledLink {
label: vec![Text("here [email protected]")],
destination: https_link_no_puny("https://delta.chat/en/help", "delta.chat"),
},
Text(".")
]
);
}

#[test]
fn labeled_link_should_allow_bold() {
assert_eq!(
parse_markdown_text(
"you can find the details [here **bold**](https://delta.chat/en/help)."
),
vec![
Text("you can find the details "),
LabeledLink {
label: vec![Text("here "), Bold(vec![Text("bold")])],
destination: https_link_no_puny("https://delta.chat/en/help", "delta.chat"),
},
Text(".")
]
);
}

#[test]
fn labeled_link_should_not_allow_email_in_bold() {
assert_ne!(
parse_markdown_text(
"you can find the details [here **[email protected]**](https://delta.chat/en/help)."
),
vec![
Text("you can find the details"),
Bold(vec![Text("[email protected]")]),
LabeledLink {
label: vec![Text("here [email protected]")],
destination: https_link_no_puny("https://delta.chat/en/help", "delta.chat"),
},
Text(".")
]
);
}

#[test]
fn labeled_link_should_not_allow_codeblock() {
assert_ne!(
parse_markdown_text("[```\nhello world\n```](https://delta.chat)"),
vec![
LabeledLink {
label: vec![Text("```\nhello world\n```")],
destination: https_link_no_puny("https://delta.chat/en/help", "delta.chat"),
},
Text(".")
]
);
}

0 comments on commit 967dca4

Please sign in to comment.