Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

trurl: canonicalize the path #331

Merged
merged 1 commit into from
Aug 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions tests.json
Original file line number Diff line number Diff line change
Expand Up @@ -2650,5 +2650,31 @@
"stderr": "trurl note: URL decode error, most likely because of rubbish in the input (path)\n",
"returncode": 0
}
},
{
"input": {
"arguments": [
"https://example.com/one/t%61o/%2F%42/"
]
},
"expected": {
"stdout": "https://example.com/one/tao/%2fB/\n",
"stderr": "",
"returncode": 0
}
},
{
"input": {
"arguments": [
"https://example.com/one/t%61o/%2F%42/",
"--append",
"path=%61"
]
},
"expected": {
"stdout": "https://example.com/one/tao/%2fB/%2561\n",
"stderr": "",
"returncode": 0
}
}
]
66 changes: 66 additions & 0 deletions trurl.c
Original file line number Diff line number Diff line change
Expand Up @@ -1573,6 +1573,59 @@ static CURLUcode seturl(struct option *o, CURLU *uh, const char *url)
CURLU_URLENCODE);
}

static char *canonical_path(const char *path)
{
/* split the path per slash, URL decode + encode, then put together again */
size_t len = strlen(path);
char *sl;
char *dupe = NULL;

do {
char *opath;
char *npath;
char *ndupe;
int olen;
sl = memchr(path, '/', len);
size_t partlen = sl ? (size_t)(sl - path) : len;

if(partlen) {
/* First URL decode the part */
opath = curl_easy_unescape(NULL, path, (int)partlen, &olen);
if(!opath)
return NULL;

/* Then URL encode it again */
npath = curl_easy_escape(NULL, opath, olen);
if(!npath)
return NULL;

curl_free(opath);
ndupe = curl_maprintf("%s%s%s", dupe ? dupe : "", npath, sl ? "/": "");
curl_free(npath);
}
else if(sl) {
/* zero length part but a slash */
ndupe = curl_maprintf("%s/", dupe ? dupe : "");
}
else {
/* no part, no slash */
break;
}
curl_free(dupe);
if(!ndupe)
return NULL;

dupe = ndupe;
if(sl) {
path = sl + 1;
len -= partlen + 1;
}

} while(sl);

return dupe;
}

static void singleurl(struct option *o,
const char *url, /* might be NULL */
struct iterinfo *iinfo,
Expand Down Expand Up @@ -1687,6 +1740,7 @@ static void singleurl(struct option *o,
if(first_lap) {
/* extract the current path */
char *opath;
char *cpath;
bool path_is_modified = false;
if(curl_url_get(uh, CURLUPART_PATH, &opath, 0))
errorf(o, ERROR_ITER, "out of memory");
Expand All @@ -1709,6 +1763,18 @@ static void singleurl(struct option *o,
opath = npath;
path_is_modified = true;
}
cpath = canonical_path(opath);
if(!cpath)
errorf(o, ERROR_MEM, "out of memory");

if(strcmp(cpath, opath)) {
/* updated */
path_is_modified = true;
curl_free(opath);
opath = cpath;
}
else
curl_free(cpath);
if(path_is_modified) {
/* set the new path */
if(curl_url_set(uh, CURLUPART_PATH, opath, 0))
Expand Down
Loading