From e7259c1dceaf6923801b23ad8ef17a8925fd8801 Mon Sep 17 00:00:00 2001 From: Daniel Stenberg Date: Fri, 13 Sep 2024 13:20:43 +0200 Subject: [PATCH] trurl: introduce --qtrim for trimming queries (only) --trim is now deprecated. When we added that we thought we would trim other components over time but that has not materialized. Switching to --qtrim makes for easier command lines with no functionality loss. --trim is no longer displayed in the help output but is still tested in several test cases. Closes #364 --- README.md | 4 ++-- tests.json | 68 ++++++++++++++++++++++++++++++++++++++---------------- trurl.c | 21 +++++++++-------- trurl.md | 23 ++++++++++++++---- 4 files changed, 79 insertions(+), 37 deletions(-) diff --git a/README.md b/README.md index ab8acac6..80026f31 100644 --- a/README.md +++ b/README.md @@ -93,7 +93,7 @@ $ trurl "https://fake.host/hello#frag" --set user=::moo:: --json **Remove tracking tuples from query:** ```text -$ trurl "https://curl.se?search=hey&utm_source=tracker" --trim query="utm_*" +$ trurl "https://curl.se?search=hey&utm_source=tracker" --qtrim "utm_*" https://curl.se/?search=hey ``` @@ -114,7 +114,7 @@ https://example.com?a=c&b=a&c=b **Work with a query that uses a semicolon separator:** ```text -$ trurl "https://curl.se?search=fool;page=5" --trim query="search" --query-separator ";" +$ trurl "https://curl.se?search=fool;page=5" --qtrim "search" --query-separator ";" https://curl.se?page=5 ``` diff --git a/tests.json b/tests.json index 49ad7169..909337a9 100644 --- a/tests.json +++ b/tests.json @@ -827,6 +827,20 @@ "returncode": 0 } }, + { + "input": { + "arguments": [ + "https://example.com?search=hello&utm_source=tracker", + "--qtrim", + "utm_*" + ] + }, + "expected": { + "stdout": "https://example.com/?search=hello\n", + "stderr": "", + "returncode": 0 + } + }, { "input": { "arguments": [ @@ -841,12 +855,26 @@ "returncode": 0 } }, + { + "input": { + "arguments": [ + "https://example.com?search=hello&utm_source=tracker&more=data", + "--qtrim", + "utm_*" + ] + }, + "expected": { + "stdout": "https://example.com/?search=hello&more=data\n", + "stderr": "", + "returncode": 0 + } + }, { "input": { "arguments": [ "https://example.com?search=hello&more=data", - "--trim", - "query=utm_*" + "--qtrim", + "utm_*" ] }, "expected": { @@ -873,8 +901,8 @@ "input": { "arguments": [ "https://example.com?search=hello&utm_source=tracker&more=data", - "--trim", - "query=utm_source" + "--qtrim", + "utm_source" ] }, "expected": { @@ -887,12 +915,12 @@ "input": { "arguments": [ "https://example.com?search=hello&utm_source=tracker&more=data", - "--trim", - "query=utm_source", - "--trim", - "query=more", - "--trim", - "query=search" + "--qtrim", + "utm_source", + "--qtrim", + "more", + "--qtrim", + "search" ] }, "expected": { @@ -951,8 +979,8 @@ "input": { "arguments": [ "https://example.com?moo&search=hello", - "--trim", - "query=search" + "--qtrim", + "search" ] }, "expected": { @@ -965,8 +993,8 @@ "input": { "arguments": [ "https://example.com?search=hello&moo", - "--trim", - "query=search" + "--qtrim", + "search" ] }, "expected": { @@ -979,8 +1007,8 @@ "input": { "arguments": [ "https://example.com?search=hello", - "--trim", - "query=search", + "--qtrim", + "search", "--append", "query=moo" ] @@ -2157,8 +2185,8 @@ { "input": { "arguments": [ - "--trim", - "query=a", + "--qtrim", + "a", "-a", "query=a=ciao", "-a", @@ -2267,8 +2295,8 @@ "arguments": [ "--url", "https://curl.se/we/are.html?*=moo&user=many#more", - "--trim", - "query=\\*" + "--qtrim", + "\\*" ] }, "expected": { diff --git a/trurl.c b/trurl.c index 31544c40..b66a9f60 100644 --- a/trurl.c +++ b/trurl.c @@ -144,7 +144,7 @@ static const struct var variables[] = { #define ERROR_SET 5 /* a --set problem */ #define ERROR_MEM 6 /* out of memory */ #define ERROR_URL 7 /* could not get a URL out of the set components */ -#define ERROR_TRIM 8 /* a --trim problem */ +#define ERROR_TRIM 8 /* a --qtrim problem */ #define ERROR_BADURL 9 /* if --verify is set and the URL cannot parse */ #define ERROR_GET 10 /* bad --get syntax */ #define ERROR_ITER 11 /* bad --iterate syntax */ @@ -244,6 +244,7 @@ static void help(void) " --keep-port - keep known default ports\n" " --no-guess-scheme - require scheme in URLs\n" " --punycode - encode hostnames in punycode\n" + " --qtrim [what] - trim the query\n" " --query-separator [letter] - if something else than '&'\n" " --quiet - Suppress (some) notes and comments\n" " --redirect [URL] - redirect to this\n" @@ -251,7 +252,6 @@ static void help(void) " --replace-append [data] - appends a new query if not found\n" " -s, --set [component]=[data] - set component content\n" " --sort-query - alpha-sort the query pairs\n" - " --trim [component]=[what] - trim component\n" " --url [URL] - URL to work with\n" " --urlencode - URL encode components by default\n" " -v, --version - show version\n" @@ -652,6 +652,13 @@ static int getarg(struct option *o, *usedarg = gap; } else if(checkoptarg(o, "--trim", flag, arg)) { + if(strncmp(arg, "query=", 6)) + errorf(o, ERROR_TRIM, "Unsupported trim component: %s", arg); + + trimadd(o, &arg[6]); + *usedarg = gap; + } + else if(checkoptarg(o, "--qtrim", flag, arg)) { trimadd(o, arg); *usedarg = gap; } @@ -1234,13 +1241,8 @@ static bool trim(struct option *o) bool query_is_modified = false; struct curl_slist *node; for(node = o->trim_list; node; node = node->next) { - char *ptr; - char *instr = node->data; - if(strncmp(instr, "query", 5)) - /* for now we can only trim query components */ - errorf(o, ERROR_TRIM, "Unsupported trim component: %s", instr); - ptr = strchr(instr, '='); - if(ptr && (ptr > instr)) { + char *ptr = node->data; + if(ptr) { /* 'ptr' should be a fixed string or a pattern ending with an asterisk */ size_t inslen; @@ -1248,7 +1250,6 @@ static bool trim(struct option *o) int i; char *temp = NULL; - ptr++; /* pass the = */ inslen = strlen(ptr); if(inslen) { pattern = ptr[inslen - 1] == '*'; diff --git a/trurl.md b/trurl.md index 238808eb..dce38107 100644 --- a/trurl.md +++ b/trurl.md @@ -231,6 +231,17 @@ Uses the punycode version of the hostname, which is how International Domain Names are converted into plain ASCII. If the hostname is not using IDN, the regular ASCII name is used. +## --qtrim [what] + +Trims data off a query. + +*what* is specified as a full name of a name/value pair, or as a word prefix +(using a single trailing asterisk (`*`)) which makes trurl remove the tuples +from the query string that match the instruction. + +To match a literal trailing asterisk instead of using a wildcard, escape it +with a backslash in front of it. Like `\\*`. + ## --query-separator [what] Specify the single letter used for separating query pairs. The default is `&` @@ -292,6 +303,8 @@ otherwise only had their query pairs in different orders. ## --trim [component]=[what] +Deprecated: use **--qtrim**. + Trims data off a component. Currently this can only trim a query component. *what* is specified as a full word or as a word prefix (using a single @@ -546,9 +559,9 @@ them first at least increases the chances of it working: http://alpha/?one=real&three=alsoreal&two=fake Remove name/value pairs from the URL by specifying exact name or wildcard -pattern with **--trim**: +pattern with **--qtrim**: - $ trurl 'https://example.com?a12=hej&a23=moo&b12=foo' --trim 'query=a*' + $ trurl 'https://example.com?a12=hej&a23=moo&b12=foo' --qtrim a*' https://example.com/?b12=foo ## fragment @@ -746,7 +759,7 @@ $ trurl "https://fake.host/search?q=answers&user=me#frag" --json ## Remove tracking tuples from query ~~~ -$ trurl "https://curl.se?search=hey&utm_source=tracker" --trim query="utm_*" +$ trurl "https://curl.se?search=hey&utm_source=tracker" --qtrim "utm_*" https://curl.se/?search=hey ~~~ @@ -767,7 +780,7 @@ https://example.com?a=c&b=a&c=b ## Work with a query that uses a semicolon separator ~~~ -$ trurl "https://curl.se?search=fool;page=5" --trim query="search" --query-separator ";" +$ trurl "https://curl.se?search=fool;page=5" --qtrim "search" --query-separator ";" https://curl.se?page=5 ~~~ @@ -821,7 +834,7 @@ Could not output a valid URL ## 8 -A problem with --trim +A problem with --qtrim ## 9