From 9076664614c9545740a9ee8eba18b87b5fe281ad Mon Sep 17 00:00:00 2001 From: Farooq Karimi Zadeh Date: Thu, 9 May 2024 15:31:56 +0330 Subject: [PATCH 1/7] draft --- src/parser/mod.rs | 5 ++ src/parser/parse_from_text/text_elements.rs | 18 +++++- tests/text_to_ast/mod.rs.orig | 63 --------------------- tests/text_to_ast/text_only.rs | 28 +++++++++ 4 files changed, 50 insertions(+), 64 deletions(-) delete mode 100644 tests/text_to_ast/mod.rs.orig diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 02e7daa..980aefd 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -17,6 +17,11 @@ pub enum Element<'a> { Text(&'a str), /// #hashtag Tag(&'a str), + /// [label](#tag) + LabelledTag { + label: Box>, + tag: &'a str + }, /// Represents a linebreak - \n Linebreak, Link { diff --git a/src/parser/parse_from_text/text_elements.rs b/src/parser/parse_from_text/text_elements.rs index 5853f82..6279fdc 100644 --- a/src/parser/parse_from_text/text_elements.rs +++ b/src/parser/parse_from_text/text_elements.rs @@ -9,7 +9,7 @@ use nom::{ streaming::take_till1, }, character::complete::char, - combinator::{peek, recognize, verify}, + combinator::{peek, recognize, verify, consumed}, sequence::tuple, AsChar, IResult, Offset, Slice, }; @@ -253,6 +253,20 @@ fn bot_command_suggestion(input: &str) -> IResult<&str, Element, CustomError<&st } } +fn labelled_tag(input: &str) -> IResult<&str, Element, CustomError<&str>> { + let (input, label) = delimited(char('['), is_not("["), char(']'))(input)?; + let (_, label) = parse_text_element(label, None)?; + let (input, tag) = delimited(char('('), is_not("("), char(')'))(input)?; + let (_, tag) = consumed(hashtag)(tag)?; + Ok(( + input, + Element::LabelledTag { + label: Box::new(label), + tag + } + )) +} + pub(crate) fn parse_text_element( input: &str, prev_char: Option, @@ -266,6 +280,8 @@ pub(crate) fn parse_text_element( if let Ok((i, elm)) = hashtag(input) { Ok((i, elm)) + } else if let Ok((i, elm)) = labelled_tag(input) { + Ok((i, elm)) } else if let Ok((i, elm)) = { if prev_char == Some(' ') || prev_char.is_none() { bot_command_suggestion(input) diff --git a/tests/text_to_ast/mod.rs.orig b/tests/text_to_ast/mod.rs.orig deleted file mode 100644 index a1a809d..0000000 --- a/tests/text_to_ast/mod.rs.orig +++ /dev/null @@ -1,63 +0,0 @@ -use deltachat_message_parser::parser::Element::*; -use deltachat_message_parser::parser::LinkDestination; - -fn gopher_link_no_puny<'a>(target: &'a str, hostname: &'a str) -> LinkDestination<'a> { - LinkDestination { - target, - hostname: Some(hostname), - scheme: "gopher", - punycode: None, - } -} - -<<<<<<< HEAD -======= -fn internal_link(target: &str) -> LinkDestination<'_> { - LinkDestination { - target, - hostname: None, - scheme: "", - punycode: None, - } -} - ->>>>>>> a0203f4363e504cbe5d32a846a9c8770d6442cf7 -fn http_link_no_puny<'a>(target: &'a str, hostname: &'a str) -> LinkDestination<'a> { - LinkDestination { - target, - hostname: Some(hostname), - scheme: "http", - punycode: None, - } -} - -fn ftp_link_no_puny<'a>(target: &'a str, hostname: &'a str) -> LinkDestination<'a> { - LinkDestination { - target, - hostname: Some(hostname), - scheme: "ftp", - punycode: None, - } -} - -fn https_link_no_puny<'a>(target: &'a str, hostname: &'a str) -> LinkDestination<'a> { - LinkDestination { - target, - hostname: Some(hostname), - scheme: "https", - punycode: None, - } -} - -fn mailto_link_no_puny(target: &str) -> LinkDestination<'_> { - LinkDestination { - target, - hostname: None, - scheme: "mailto", - punycode: None, - } -} - -mod desktop_set; -mod markdown; -mod text_only; diff --git a/tests/text_to_ast/text_only.rs b/tests/text_to_ast/text_only.rs index 09b6c40..1866328 100644 --- a/tests/text_to_ast/text_only.rs +++ b/tests/text_to_ast/text_only.rs @@ -173,6 +173,34 @@ fn persian_hashtag_with_underline() { ); } +#[test] +fn labelled_hashtag() { + let input = "[Hello](#hello) #world"; + + assert_eq!( + parse_only_text(input), + vec![ + LabelledTag { + label: vec![Text("Hello")], + tag: "#hello", + }, + Tag("#world"), + ] + ); + + let input_bold = "[**Hello**](#hi) "; + assert_eq!( + parse_only_text(input_bold), + vec![ + LaballedTag { + label: vec![Bold(vec![Text(Hello)])], + tag: "#hi", + }, + Text(" ") + ] + ); +} + #[test] fn email_address_standalone() { let test_cases = vec![ From 25b0248b6b47f3cb52ce2138cb7cc045dec2a1cb Mon Sep 17 00:00:00 2001 From: Farooq Karimi Zadeh Date: Tue, 28 May 2024 17:26:51 +0330 Subject: [PATCH 2/7] update --- src/main.rs | 4 +- src/parser/mod.rs | 2 +- src/parser/parse_from_text/text_elements.rs | 52 ++++++++++++++------- 3 files changed, 39 insertions(+), 19 deletions(-) diff --git a/src/main.rs b/src/main.rs index 1fb2963..98fc113 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,6 +1,6 @@ use std::io::{self, Read, Write}; -use parser::parse_markdown_text; +use parser::parse_only_text; #[allow(dead_code)] mod parser; extern crate nom; @@ -13,7 +13,7 @@ fn main() -> io::Result<()> { //println!("input: {:?}", buffer); - let output = parse_markdown_text(&buffer); + let output = parse_only_text(&buffer); io::stdout().write_all(format!("output: {:?}", output).as_bytes())?; diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 980aefd..28ab816 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -19,7 +19,7 @@ pub enum Element<'a> { Tag(&'a str), /// [label](#tag) LabelledTag { - label: Box>, + label: Vec>, tag: &'a str }, /// Represents a linebreak - \n diff --git a/src/parser/parse_from_text/text_elements.rs b/src/parser/parse_from_text/text_elements.rs index 6279fdc..8b18e30 100644 --- a/src/parser/parse_from_text/text_elements.rs +++ b/src/parser/parse_from_text/text_elements.rs @@ -5,12 +5,12 @@ use super::hashtag_content_char_ranges::hashtag_content_char; use super::Element; use nom::{ bytes::{ - complete::{tag, take, take_while, take_while1}, + complete::{tag, take, take_while, take_while1, is_not}, streaming::take_till1, }, character::complete::char, combinator::{peek, recognize, verify, consumed}, - sequence::tuple, + sequence::{tuple, delimited}, AsChar, IResult, Offset, Slice, }; @@ -254,17 +254,35 @@ fn bot_command_suggestion(input: &str) -> IResult<&str, Element, CustomError<&st } fn labelled_tag(input: &str) -> IResult<&str, Element, CustomError<&str>> { - let (input, label) = delimited(char('['), is_not("["), char(']'))(input)?; - let (_, label) = parse_text_element(label, None)?; - let (input, tag) = delimited(char('('), is_not("("), char(')'))(input)?; - let (_, tag) = consumed(hashtag)(tag)?; - Ok(( - input, - Element::LabelledTag { - label: Box::new(label), - tag - } - )) + let (input, label) = delimited( + char('['), + take_while1(|c| !matches!(c, '[' | ']')), + char(']') + )(input)?; + println!("Label: {label}"); + let mut remaining = label; + let mut elements: Vec = vec![]; + while let Ok((remaining, elm)) = parse_text_element(label, None) { + elements.push(elm); + } + println!("Elements: {:?}", elements); + let (input, tag) = delimited( + char('('), + take_while1(|c| !matches!(c, '(' | ')')), + char(')') + )(input)?; + let (_, (consumed, _output)) = consumed(hashtag)(tag)?; + if consumed == tag { + Ok(( + input, + Element::LabelledTag { + label: elements, + tag: consumed, + } + )) + } else { + Err(nom::Err::Error(CustomError::UnexpectedContent)) + } } pub(crate) fn parse_text_element( @@ -277,10 +295,12 @@ pub(crate) fn parse_text_element( // // Also as this is the text element parser, // text elements parsers MUST NOT call the parser for markdown elements internally - - if let Ok((i, elm)) = hashtag(input) { + { + println!("{:?}", labelled_tag(input)); + } + if let Ok((i, elm)) = labelled_tag(input) { Ok((i, elm)) - } else if let Ok((i, elm)) = labelled_tag(input) { + } else if let Ok((i, elm)) = hashtag(input) { Ok((i, elm)) } else if let Ok((i, elm)) = { if prev_char == Some(' ') || prev_char.is_none() { From 0c6d53ff988d6e33804a7bc277ac5080b7a573cb Mon Sep 17 00:00:00 2001 From: Farooq Karimi Zadeh Date: Tue, 28 May 2024 17:27:02 +0330 Subject: [PATCH 3/7] add more test case --- tests/text_to_ast/text_only.rs | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/tests/text_to_ast/text_only.rs b/tests/text_to_ast/text_only.rs index 1866328..d249c59 100644 --- a/tests/text_to_ast/text_only.rs +++ b/tests/text_to_ast/text_only.rs @@ -192,13 +192,29 @@ fn labelled_hashtag() { assert_eq!( parse_only_text(input_bold), vec![ - LaballedTag { - label: vec![Bold(vec![Text(Hello)])], + LabelledTag { + label: vec![Bold(vec![Text("Hello")])], tag: "#hi", }, Text(" ") ] ); + + let input_bold_and_italic = "Hello this is a [_labeled_ **hashtag**](#tag)"; + assert_eq!( + parse_only_text(input_bold_and_italic), + vec![ + Text("Hello this is a "), + LabelledTag { + label: vec![ + Italics(vec![Text("labeled")]), + Text(" "), + Bold(vec![Text("hashtag")]), + ], + tag: "#tag", + } + ] + ); } #[test] From f631e4149bea14f0742720171f834ea0f728cba9 Mon Sep 17 00:00:00 2001 From: Farooq Karimi Zadeh Date: Wed, 29 May 2024 11:28:18 +0330 Subject: [PATCH 4/7] remove unused code --- src/parser/parse_from_text/text_elements.rs | 137 -------------------- 1 file changed, 137 deletions(-) diff --git a/src/parser/parse_from_text/text_elements.rs b/src/parser/parse_from_text/text_elements.rs index 8b18e30..48dcfcf 100644 --- a/src/parser/parse_from_text/text_elements.rs +++ b/src/parser/parse_from_text/text_elements.rs @@ -89,143 +89,6 @@ pub(crate) fn email_address(input: &str) -> IResult<&str, Element, CustomError<& } } -/* -fn not_link_part_char(c: char) -> bool { - !matches!(c, ':' | '\n' | '\r' | '\t' | ' ') -} - -fn link(input: &str) -> IResult<&str, (), CustomError<&str>> { - let (input, _) = take_while1(link_scheme)(input)?; -} - -/// rough recognition of an link, results gets checked by a real link parser -fn link_intern(input: &str) -> IResult<&str, (), CustomError<&str>> { - let (input, _) = take_while1(not_link_part_char)(input)?; - let (input, _) = tag(":")(input)?; - let i = <&str>::clone(&input); - let (remaining, consumed) = take_while1(is_not_white_space)(i)?; - - let mut parentheses_count = 0usize; // () - let mut curly_brackets_count = 0usize; // {} - let mut brackets_count = 0usize; // [] - let mut angle_brackets = 0usize; // <> - - let mut alternative_offset = None; - for (i, char) in consumed.chars().enumerate() { - match char { - '(' => { - parentheses_count = parentheses_count.saturating_add(1); - // if there is no closing bracket in the link, then don't take the bracket as a part of the link - if (<&str>::clone(&consumed)).slice(i..).find(')').is_none() { - alternative_offset = Some(i); - break; - } - } - '{' => { - curly_brackets_count = curly_brackets_count.saturating_add(1); - // if there is no closing bracket in the link, then don't take the bracket as a part of the link - if (<&str>::clone(&consumed)).slice(i..).find('}').is_none() { - alternative_offset = Some(i); - break; - } - } - '[' => { - brackets_count = brackets_count.saturating_add(1); - // if there is no closing bracket in the link, then don't take the bracket as a part of the link - if (<&str>::clone(&consumed)).slice(i..).find(']').is_none() { - alternative_offset = Some(i); - break; - } - } - '<' => { - angle_brackets = angle_brackets.saturating_add(1); - // if there is no closing bracket in the link, then don't take the bracket as a part of the link - if (<&str>::clone(&consumed)).slice(i..).find('>').is_none() { - alternative_offset = Some(i); - break; - } - } - ')' => { - if parentheses_count == 0 { - alternative_offset = Some(i); - break; - } else { - parentheses_count = parentheses_count.saturating_sub(1); - } - } - '}' => { - if curly_brackets_count == 0 { - alternative_offset = Some(i); - break; - } else { - curly_brackets_count = curly_brackets_count.saturating_sub(1); - } - } - ']' => { - if brackets_count == 0 { - alternative_offset = Some(i); - break; - } else { - brackets_count = brackets_count.saturating_sub(1); - } - } - '>' => { - if angle_brackets == 0 { - alternative_offset = Some(i); - break; - } else { - angle_brackets = angle_brackets.saturating_sub(1); - } - } - _ => continue, - } - } - - if let Some(offset) = alternative_offset { - let remaining = input.slice(offset..); - Ok((remaining, ())) - } else { - Ok((remaining, ())) - } -} - -pub(crate) fn link(input: &str) -> IResult<&str, Element, CustomError<&str>> { - // basically - //let (input, content) = recognize(link_intern)(input)?; - // but don't eat the last char if it is one of these: `.,;:` - let i = <&str>::clone(&input); - let i2 = <&str>::clone(&input); - let i3 = <&str>::clone(&input); - let (input, content) = match link_intern(i) { - Ok((remaining, _)) => { - let index = i2.offset(remaining); - let consumed = i2.slice(..index); - match consumed.chars().last() { - Some(c) => match c { - '.' | ',' | ':' | ';' => { - let index = input.offset(remaining).saturating_sub(1); - let consumed = i3.slice(..index); - let remaining = input.slice(index..); - Ok((remaining, consumed)) - } - _ => Ok((remaining, consumed)), - }, - _ => Ok((remaining, consumed)), - } - } - Err(e) => Err(e), - }?; - - // check if result is valid link - let (remainder, destination) = LinkDestination::parse_standalone_with_whitelist(content)?; - - if remainder.is_empty() { - Ok((input, Element::Link { destination })) - } else { - Err(nom::Err::Error(CustomError::InvalidLink)) - } -} -*/ fn is_allowed_bot_cmd_suggestion_char(char: char) -> bool { match char { '@' | '\\' | '_' | '.' | '-' | '/' => true, From ebc50da327b0b65b7c4991294b0c7bba34a063b5 Mon Sep 17 00:00:00 2001 From: Farooq Karimi Zadeh Date: Wed, 29 May 2024 11:50:50 +0330 Subject: [PATCH 5/7] add labelled hashtags --- src/parser/parse_from_text/text_elements.rs | 17 +++++++---------- tests/text_to_ast/text_only.rs | 19 ++----------------- 2 files changed, 9 insertions(+), 27 deletions(-) diff --git a/src/parser/parse_from_text/text_elements.rs b/src/parser/parse_from_text/text_elements.rs index 48dcfcf..40e031b 100644 --- a/src/parser/parse_from_text/text_elements.rs +++ b/src/parser/parse_from_text/text_elements.rs @@ -5,7 +5,7 @@ use super::hashtag_content_char_ranges::hashtag_content_char; use super::Element; use nom::{ bytes::{ - complete::{tag, take, take_while, take_while1, is_not}, + complete::{tag, take, take_while, take_while1}, streaming::take_till1, }, character::complete::char, @@ -14,7 +14,10 @@ use nom::{ AsChar, IResult, Offset, Slice, }; -use super::base_parsers::CustomError; +use super::{ + parse_only_text, + base_parsers::CustomError, +}; fn linebreak(input: &str) -> IResult<&str, char, CustomError<&str>> { char('\n')(input) @@ -122,13 +125,7 @@ fn labelled_tag(input: &str) -> IResult<&str, Element, CustomError<&str>> { take_while1(|c| !matches!(c, '[' | ']')), char(']') )(input)?; - println!("Label: {label}"); - let mut remaining = label; - let mut elements: Vec = vec![]; - while let Ok((remaining, elm)) = parse_text_element(label, None) { - elements.push(elm); - } - println!("Elements: {:?}", elements); + let elements: Vec = parse_only_text(label); let (input, tag) = delimited( char('('), take_while1(|c| !matches!(c, '(' | ')')), @@ -159,7 +156,7 @@ pub(crate) fn parse_text_element( // Also as this is the text element parser, // text elements parsers MUST NOT call the parser for markdown elements internally { - println!("{:?}", labelled_tag(input)); + //println!("{:?}", labelled_tag(input)); } if let Ok((i, elm)) = labelled_tag(input) { Ok((i, elm)) diff --git a/tests/text_to_ast/text_only.rs b/tests/text_to_ast/text_only.rs index d249c59..9794576 100644 --- a/tests/text_to_ast/text_only.rs +++ b/tests/text_to_ast/text_only.rs @@ -184,6 +184,7 @@ fn labelled_hashtag() { label: vec![Text("Hello")], tag: "#hello", }, + Text(" "), Tag("#world"), ] ); @@ -193,28 +194,12 @@ fn labelled_hashtag() { parse_only_text(input_bold), vec![ LabelledTag { - label: vec![Bold(vec![Text("Hello")])], + label: vec![Text("**Hello**")], tag: "#hi", }, Text(" ") ] ); - - let input_bold_and_italic = "Hello this is a [_labeled_ **hashtag**](#tag)"; - assert_eq!( - parse_only_text(input_bold_and_italic), - vec![ - Text("Hello this is a "), - LabelledTag { - label: vec![ - Italics(vec![Text("labeled")]), - Text(" "), - Bold(vec![Text("hashtag")]), - ], - tag: "#tag", - } - ] - ); } #[test] From 073d1a3da23a18cb6ba16d1b44f68c64c17b57a4 Mon Sep 17 00:00:00 2001 From: Farooq Karimi Zadeh Date: Wed, 29 May 2024 11:59:23 +0330 Subject: [PATCH 6/7] correct formatting --- src/parser/mod.rs | 2 +- src/parser/parse_from_text/text_elements.rs | 23 +++++++++------------ tests/text_to_ast/text_only.rs | 2 +- 3 files changed, 12 insertions(+), 15 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 28ab816..bc90985 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -20,7 +20,7 @@ pub enum Element<'a> { /// [label](#tag) LabelledTag { label: Vec>, - tag: &'a str + tag: &'a str, }, /// Represents a linebreak - \n Linebreak, diff --git a/src/parser/parse_from_text/text_elements.rs b/src/parser/parse_from_text/text_elements.rs index 40e031b..a73d633 100644 --- a/src/parser/parse_from_text/text_elements.rs +++ b/src/parser/parse_from_text/text_elements.rs @@ -9,15 +9,12 @@ use nom::{ streaming::take_till1, }, character::complete::char, - combinator::{peek, recognize, verify, consumed}, - sequence::{tuple, delimited}, + combinator::{consumed, peek, recognize, verify}, + sequence::{delimited, tuple}, AsChar, IResult, Offset, Slice, }; -use super::{ - parse_only_text, - base_parsers::CustomError, -}; +use super::{base_parsers::CustomError, parse_only_text}; fn linebreak(input: &str) -> IResult<&str, char, CustomError<&str>> { char('\n')(input) @@ -123,22 +120,22 @@ fn labelled_tag(input: &str) -> IResult<&str, Element, CustomError<&str>> { let (input, label) = delimited( char('['), take_while1(|c| !matches!(c, '[' | ']')), - char(']') + char(']'), )(input)?; let elements: Vec = parse_only_text(label); let (input, tag) = delimited( char('('), take_while1(|c| !matches!(c, '(' | ')')), - char(')') + char(')'), )(input)?; let (_, (consumed, _output)) = consumed(hashtag)(tag)?; if consumed == tag { Ok(( - input, - Element::LabelledTag { - label: elements, - tag: consumed, - } + input, + Element::LabelledTag { + label: elements, + tag: consumed, + }, )) } else { Err(nom::Err::Error(CustomError::UnexpectedContent)) diff --git a/tests/text_to_ast/text_only.rs b/tests/text_to_ast/text_only.rs index 9794576..3fd81b4 100644 --- a/tests/text_to_ast/text_only.rs +++ b/tests/text_to_ast/text_only.rs @@ -188,7 +188,7 @@ fn labelled_hashtag() { Tag("#world"), ] ); - + let input_bold = "[**Hello**](#hi) "; assert_eq!( parse_only_text(input_bold), From bc2d9594a8a00adfb4ccb7f515dfa31940503d3f Mon Sep 17 00:00:00 2001 From: Farooq Karimi Zadeh Date: Wed, 29 May 2024 12:04:40 +0330 Subject: [PATCH 7/7] add some sort of spec for lablled tags --- spec.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/spec.md b/spec.md index 3d07967..3f078b2 100644 --- a/spec.md +++ b/spec.md @@ -23,6 +23,7 @@ - [Delimited Email addresses: ``](#delimited-email-addresses) - [Delimited Links: ``](#delimited-links) - [Labeled Links: `[Name](url)`](#labled-links) + - [Labeled hashtags: `[Tag][#tag]`](#labeled-tags) ## Text Enhancements @@ -182,6 +183,13 @@ Optionally, a client can implement a system to trust a domain (a "don't ask agai URL parsing allows all valid URLs, no restrictions on schemes, no whitelist is needed, because the format already specifies that it is a link. + + + +### Labelled hashtags + +The idea is to have hashtags but labelled with an alternative text. This feature is very unique and less seen in other IMs. + ## Ideas For The Future: ### `:emoji:`