Skip to content

Commit

Permalink
some smaller code cleanup (#86)
Browse files Browse the repository at this point in the history
* remove comment with dead code

* move parenthesis counter into dedicated file and change to more descriptive name
also add some tests
  • Loading branch information
Simon-Laux authored Jan 9, 2025
1 parent 6c259a4 commit d7844c6
Show file tree
Hide file tree
Showing 4 changed files with 85 additions and 199 deletions.
1 change: 1 addition & 0 deletions src/parser/link_url/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
mod ip;
mod parenthesis_counter;
mod parse_link;

use nom::{
Expand Down
81 changes: 81 additions & 0 deletions src/parser/link_url/parenthesis_counter.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
use nom::Slice;

macro_rules! adjust_balance {
($a: expr, $b: expr, $c: expr, $d: expr) => {
// for opening ones
{
$a = $a.saturating_add(1);
if $d.slice($c..).find($b).is_none() {
return Some($c);
}
}
};
($a: expr, $b: expr) => {
// for closing ones
{
if $a == 0 {
return Some($b);
} else {
$a = $a.saturating_sub(1);
}
}
};
}

/// finds unbalanced closing parenthesesis and returns distance to it.
/// unbalanced means it was closed but not opened before in the given string
pub(super) fn count_chars_in_complete_parenthesis(input: &str) -> Option<usize> {
let mut parenthes = 0usize; // ()
let mut curly_bracket = 0usize; // {}
let mut bracket = 0usize; // []
let mut angle = 0usize; // <>

for (i, ch) in input.chars().enumerate() {
match ch {
'(' => {
adjust_balance!(parenthes, ')', i, input);
}
'{' => {
adjust_balance!(curly_bracket, '}', i, input);
}
'[' => {
adjust_balance!(bracket, ']', i, input);
}
'<' => {
adjust_balance!(angle, '>', i, input);
}
')' => {
adjust_balance!(parenthes, i);
}
']' => {
adjust_balance!(bracket, i);
}
'}' => {
adjust_balance!(curly_bracket, i);
}
'>' => {
adjust_balance!(angle, i);
}
_ => continue,
}
}
None
}

#[test]
fn test_count_parenthesis() {
assert_eq!(count_chars_in_complete_parenthesis("{}"), None);
assert_eq!(count_chars_in_complete_parenthesis("{} test"), None);
assert_eq!(count_chars_in_complete_parenthesis("(test) test"), None);
assert_eq!(count_chars_in_complete_parenthesis("(test)) test"), Some(6));
}

#[test]
fn test_count_different_types_invalid() {
assert_eq!(count_chars_in_complete_parenthesis("(({(})))"), None);
}

#[test]
fn test_count_different_types_invalid2() {
assert_eq!(count_chars_in_complete_parenthesis("}(({(})))"), Some(0));
}
65 changes: 3 additions & 62 deletions src/parser/link_url/parse_link.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ use crate::parser::{
},
};

use super::parenthesis_counter::count_chars_in_complete_parenthesis;

/// determines which generic schemes (without '://') get linkifyed
fn is_allowed_generic_scheme(scheme: &str) -> bool {
matches!(
Expand Down Expand Up @@ -272,67 +274,6 @@ fn ifragment(input: &str) -> IResult<&str, &str, CustomError<&str>> {
recognize(tuple((char('#'), take_while_ifragment)))(input)
}

macro_rules! link_correct {
($a: expr, $b: expr, $c: expr, $d: expr) => {
// for opening ones
{
$a = $a.saturating_add(1);
if $d.slice($c..).find($b).is_none() {
return Some($c);
}
}
};
($a: expr, $b: expr) => {
// for closing ones
{
if $a == 0 {
return Some($b);
} else {
$a = $a.saturating_sub(1);
}
}
};
}

// TODO: better name for this function
fn get_correct_link(link: &str) -> Option<usize> {
let mut parenthes = 0usize; // ()
let mut curly_bracket = 0usize; // {}
let mut bracket = 0usize; // []
let mut angle = 0usize; // <>

for (i, ch) in link.chars().enumerate() {
match ch {
'(' => {
link_correct!(parenthes, ')', i, link);
}
'{' => {
link_correct!(curly_bracket, '}', i, link);
}
'[' => {
link_correct!(bracket, ']', i, link);
}
'<' => {
link_correct!(angle, '>', i, link);
}
')' => {
link_correct!(parenthes, i);
}
']' => {
link_correct!(bracket, i);
}
'}' => {
link_correct!(curly_bracket, i);
}
'>' => {
link_correct!(angle, i);
}
_ => continue,
}
}
None
}

fn parse_ipath_abempty(input: &str) -> IResult<&str, &str, CustomError<&str>> {
recognize(many0(tuple((char('/'), opt(take_while_ipchar1)))))(input)
}
Expand Down Expand Up @@ -406,7 +347,7 @@ fn parse_iri(input: &str) -> IResult<&str, LinkDestination, CustomError<&str>> {
host = input_.slice(scheme.len().saturating_add(3)..input_.len().saturating_sub(1));
}
}
len = get_correct_link(link).unwrap_or(len);
len = count_chars_in_complete_parenthesis(link).unwrap_or(len);
let link = input_.slice(0..len);
let input = input_.slice(len..);

Expand Down
137 changes: 0 additions & 137 deletions src/parser/parse_from_text/text_elements.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,143 +95,6 @@ pub(crate) fn fediverse_address_as_text(input: &str) -> IResult<&str, Element, C
Ok((input, Element::Text(consumed)))
}

/*
fn not_link_part_char(c: char) -> bool {
!matches!(c, ':' | '\n' | '\r' | '\t' | ' ')
}
fn link(input: &str) -> IResult<&str, (), CustomError<&str>> {
let (input, _) = take_while1(link_scheme)(input)?;
}
/// rough recognition of an link, results gets checked by a real link parser
fn link_intern(input: &str) -> IResult<&str, (), CustomError<&str>> {
let (input, _) = take_while1(not_link_part_char)(input)?;
let (input, _) = tag(":")(input)?;
let i = <&str>::clone(&input);
let (remaining, consumed) = take_while1(is_not_white_space)(i)?;
let mut parentheses_count = 0usize; // ()
let mut curly_brackets_count = 0usize; // {}
let mut brackets_count = 0usize; // []
let mut angle_brackets = 0usize; // <>
let mut alternative_offset = None;
for (i, char) in consumed.chars().enumerate() {
match char {
'(' => {
parentheses_count = parentheses_count.saturating_add(1);
// if there is no closing bracket in the link, then don't take the bracket as a part of the link
if (<&str>::clone(&consumed)).slice(i..).find(')').is_none() {
alternative_offset = Some(i);
break;
}
}
'{' => {
curly_brackets_count = curly_brackets_count.saturating_add(1);
// if there is no closing bracket in the link, then don't take the bracket as a part of the link
if (<&str>::clone(&consumed)).slice(i..).find('}').is_none() {
alternative_offset = Some(i);
break;
}
}
'[' => {
brackets_count = brackets_count.saturating_add(1);
// if there is no closing bracket in the link, then don't take the bracket as a part of the link
if (<&str>::clone(&consumed)).slice(i..).find(']').is_none() {
alternative_offset = Some(i);
break;
}
}
'<' => {
angle_brackets = angle_brackets.saturating_add(1);
// if there is no closing bracket in the link, then don't take the bracket as a part of the link
if (<&str>::clone(&consumed)).slice(i..).find('>').is_none() {
alternative_offset = Some(i);
break;
}
}
')' => {
if parentheses_count == 0 {
alternative_offset = Some(i);
break;
} else {
parentheses_count = parentheses_count.saturating_sub(1);
}
}
'}' => {
if curly_brackets_count == 0 {
alternative_offset = Some(i);
break;
} else {
curly_brackets_count = curly_brackets_count.saturating_sub(1);
}
}
']' => {
if brackets_count == 0 {
alternative_offset = Some(i);
break;
} else {
brackets_count = brackets_count.saturating_sub(1);
}
}
'>' => {
if angle_brackets == 0 {
alternative_offset = Some(i);
break;
} else {
angle_brackets = angle_brackets.saturating_sub(1);
}
}
_ => continue,
}
}
if let Some(offset) = alternative_offset {
let remaining = input.slice(offset..);
Ok((remaining, ()))
} else {
Ok((remaining, ()))
}
}
pub(crate) fn link(input: &str) -> IResult<&str, Element, CustomError<&str>> {
// basically
//let (input, content) = recognize(link_intern)(input)?;
// but don't eat the last char if it is one of these: `.,;:`
let i = <&str>::clone(&input);
let i2 = <&str>::clone(&input);
let i3 = <&str>::clone(&input);
let (input, content) = match link_intern(i) {
Ok((remaining, _)) => {
let index = i2.offset(remaining);
let consumed = i2.slice(..index);
match consumed.chars().last() {
Some(c) => match c {
'.' | ',' | ':' | ';' => {
let index = input.offset(remaining).saturating_sub(1);
let consumed = i3.slice(..index);
let remaining = input.slice(index..);
Ok((remaining, consumed))
}
_ => Ok((remaining, consumed)),
},
_ => Ok((remaining, consumed)),
}
}
Err(e) => Err(e),
}?;
// check if result is valid link
let (remainder, destination) = LinkDestination::parse_standalone_with_whitelist(content)?;
if remainder.is_empty() {
Ok((input, Element::Link { destination }))
} else {
Err(nom::Err::Error(CustomError::InvalidLink))
}
}
*/
fn is_allowed_bot_cmd_suggestion_char(char: char) -> bool {
match char {
'@' | '\\' | '_' | '.' | '-' | '/' => true,
Expand Down

0 comments on commit d7844c6

Please sign in to comment.