Merge branch 'main' in the --output feature branch

curl · Dec 7, 2024 · 1fbfed7 · 1fbfed7
2 parents c0d31c7 + e01d578
commit 1fbfed7
Show file tree

Hide file tree

Showing 7 changed files with 217 additions and 50 deletions.
diff --git a/AUTHORS b/AUTHORS
@@ -0,0 +1,12 @@
+<!--
+Copyright (C) Samuel Henrique <[email protected]>, Sergio Durigan Junior <[email protected]> and many contributors, see the AUTHORS file.
+
+SPDX-License-Identifier: curl
+-->
+Ben Zanin
+Daniel Stenberg <[email protected]>
+Guilherme Puida <[email protected]>
+Ryan Carsten Schmidt <[email protected]>
+Samuel Henrique <[email protected]>
+Sergio Durigan Junior <[email protected]>
+Viktor Szakats
diff --git a/LICENSE b/LICENSE
@@ -1,9 +1,8 @@
 COPYRIGHT AND PERMISSION NOTICE
 
-Copyright (C) Samuel Henrique, <[email protected]>.
-Copyright (C) Sergio Durigan Junior, <[email protected]>
-Copyright (C) Ryan Carsten Schmidt <[email protected]>
-Copyright (C) Ben Zanin
+Copyright (C) Samuel Henrique <[email protected]>, Sergio Durigan
+Junior <[email protected]> and many contributors, see the AUTHORS
+file.
 
 All rights reserved.
 

diff --git a/LICENSES/curl.txt b/LICENSES/curl.txt
@@ -1,7 +1,8 @@
 COPYRIGHT AND PERMISSION NOTICE
 
-Copyright (C) Daniel Stenberg, <[email protected]>, and many
-contributors, see the THANKS file.
+Copyright (C) Samuel Henrique <[email protected]>, Sergio Durigan
+Junior <[email protected]> and many contributors, see the AUTHORS
+file.
 
 All rights reserved.
 

diff --git a/README.md b/README.md
@@ -1,5 +1,7 @@
 <!--
-Copyright (C) Samuel Henrique <[email protected]>
+Copyright (C) Samuel Henrique <[email protected]>, Sergio Durigan
+Junior <[email protected]> and many contributors, see the AUTHORS
+file.
 
 SPDX-License-Identifier: curl
 -->
@@ -14,8 +16,9 @@ SPDX-License-Identifier: curl
 
 # Synopsis
 
-    wcurl [--curl-options <CURL_OPTIONS>]... [-o|-O|--output <PATH>] [--dry-run] [--] <URL>...
-    wcurl [--curl-options=<CURL_OPTIONS>]... [--output=<PATH>] [--dry-run] [--] <URL>...
+    wcurl <URL>...
+    wcurl [--curl-options <CURL_OPTIONS>]... [--no-decode-filename] [-o|-O|--output <PATH>] [--dry-run] [--] <URL>...
+    wcurl [--curl-options=<CURL_OPTIONS>]... [--no-decode-filename] [--output=<PATH>] [--dry-run] [--] <URL>...
     wcurl -V|--version
     wcurl -h|--help
 
@@ -33,14 +36,16 @@ should be using curl directly if your use case is not covered.
 
 
 * By default, **wcurl** will:
-  * Encode whitespaces in URLs;
+  * Percent-encode whitespaces in URLs;
   * Download multiple URLs in parallel if the installed curl's version is >= 7.66.0;
   * Follow redirects;
   * Automatically choose a filename as output;
   * Avoid overwriting files if the installed curl's version is >= 7.83.0 (`--no-clobber`);
   * Perform retries;
   * Set the downloaded file timestamp to the value provided by the server, if available;
   * Disable **curl**'s URL globbing parser so `{}` and `[]` characters in URLs are not treated specially.
+  * Percent-decode the resulting filename.
+  * Use "index.html" as default filename if there's none in the URL.
 
 # Options
 
@@ -51,10 +56,14 @@ should be using curl directly if your use case is not covered.
 
 * `-o, -O, --output=<PATH>`
 
-  Use explicit output path instead of curl's --remote-name logic. If multiple
+  Use the provided output path instead of getting it from the URL. If multiple
   URLs are provided, all files will have the same name with a number appended to
   the end (curl >= 7.83.0).
 
+* `--no-decode-filename`
+  Don't percent-decode the output filename, even if the percent-encoding in the
+  URL was done by wcurl, e.g.: The URL contained whitespaces.
+
 * `--dry-run`
 
   Don't actually execute curl, just print what would be invoked.
@@ -70,7 +79,7 @@ should be using curl directly if your use case is not covered.
 # Url
 
 Anything which is not a parameter will be considered an URL.
-**wcurl** will encode whitespaces and pass that to curl, which will perform the
+**wcurl** will percent-encode whitespaces and pass that to curl, which will perform the
 parsing of the URL.
 
 # Examples
@@ -108,8 +117,7 @@ script:
 
 Samuel Henrique &lt;[[email protected]](mailto:[email protected])&gt;  
 Sergio Durigan Junior &lt;[[email protected]](mailto:[email protected])&gt;  
-Ryan Carsten Schmidt &lt;[[email protected]](mailto:[email protected])&gt;  
-Ben Zanin  
+and many contributors, see the AUTHORS file.
 
 # Reporting Bugs
 

diff --git a/tests/tests.sh b/tests/tests.sh
@@ -4,8 +4,9 @@
 #
 # This is wcurl's testsuite.
 #
-# Copyright (C) Sergio Durigan Junior, <[email protected]>
-# Copyright (C) Guilherme Puida Moreira, <[email protected]>
+# Copyright (C) Samuel Henrique <[email protected]>, Sergio Durigan
+# Junior <[email protected]> and many contributors, see the AUTHORS
+# file.
 #
 # Permission to use, copy, modify, and distribute this software for any purpose
 # with or without fee is hereby granted, provided that the above copyright
@@ -119,6 +120,83 @@ testOutputFileName()
     assertContains "Verify whether 'wcurl' correctly sets a custom output filename" "${ret}" 'test filename'
 }
 
+testUrlDefaultName()
+{
+    url='example%20with%20spaces.com'
+    ret=$(${WCURL_CMD} ${url} 2>&1)
+    assertContains "Verify whether 'wcurl' chooses the correct default filename when there's no path in the URL" "${ret}" 'index.html'
+}
+
+testUrlDefaultNameTrailingSlash()
+{
+    url='example%20with%20spaces.com/'
+    ret=$(${WCURL_CMD} ${url} 2>&1)
+    assertContains "Verify whether 'wcurl' chooses the correct default filename when there's no path in the URL and the URl ends with a slash" "${ret}" 'index.html'
+}
+
+testUrlDecodingWhitespaces()
+{
+    url='example.com/filename%20with%20spaces'
+    ret=$(${WCURL_CMD} ${url} 2>&1)
+    assertContains "Verify whether 'wcurl' successfully decodes percent-encoded whitespaces in URLs" "${ret}" 'filename with spaces'
+}
+
+testUrlDecodingWhitespacesTwoFiles()
+{
+    url='example.com/filename%20with%20spaces'
+    url_2='example.com/filename2%20with%20spaces'
+    ret=$(${WCURL_CMD} ${url} ${url_2} 2>&1)
+    assertContains "Verify whether 'wcurl' successfully decodes percent-encoded whitespaces in URLs" "${ret}" 'filename with spaces'
+    assertContains "Verify whether 'wcurl' successfully decodes percent-encoded whitespaces in URLs" "${ret}" 'filename2 with spaces'
+}
+
+testUrlDecodingDisabled()
+{
+    url='example.com/filename%20with%20spaces'
+    ret=$(${WCURL_CMD} --no-decode-filename ${url} 2>&1)
+    assertContains "Verify whether 'wcurl' successfully decodes percent-encoded whitespaces in URLs" "${ret}" 'filename%20with%20spaces'
+}
+
+testUrlDecodingWhitespacesQueryString()
+{
+    url='example.com/filename%20with%20spaces?query=string'
+    ret=$(${WCURL_CMD} ${url} 2>&1)
+    assertContains "Verify whether 'wcurl' successfully decodes percent-encoded whitespaces in URLs with query strings" "${ret}" 'filename with spaces'
+}
+
+testUrlDecodingWhitespacesTrailingSlash()
+{
+    url='example.com/filename%20with%20spaces/'
+    ret=$(${WCURL_CMD} ${url} 2>&1)
+    assertContains "Verify whether 'wcurl' successfully uses the default filename when the URL ends with a slash" "${ret}" 'index.html'
+}
+
+# Test decoding a bunch of different languages (that don't use the latin
+# alphabet), we could split each language on its own test, but for now it
+# doesn't make a difference.
+testUrlDecodingNonLatinLanguages()
+{
+    # Arabic
+    url='example.com/%D8%AA%D8%B1%D9%85%D9%8A%D8%B2_%D8%A7%D9%84%D9%86%D8%B3%D8%A8%D8%A9_%D8%A7%D9%84%D9%85%D8%A6%D9%88%D9%8A%D8%A9'
+    ret=$(${WCURL_CMD} ${url} 2>&1)
+    assertContains "Verify whether 'wcurl' successfully decodes percent-encoded Arabic in URLs" "${ret}" 'ترميز_النسبة_المئوية'
+
+    # Persian
+    url='example.com/%DA%A9%D8%AF%D8%A8%D9%86%D8%AF%DB%8C_%D8%AF%D8%B1%D8%B5%D8%AF%DB%8C'
+    ret=$(${WCURL_CMD} ${url} 2>&1)
+    assertContains "Verify whether 'wcurl' successfully decodes percent-encoded Persian in URLs" "${ret}" 'کدبندی_درصدی'
+
+    # Japanese
+    url='example.com/%E3%83%91%E3%83%BC%E3%82%BB%E3%83%B3%E3%83%88%E3%82%A8%E3%83%B3%E3%82%B3%E3%83%BC%E3%83%87%E3%82%A3%E3%83%B3%E3%82%B0'
+    ret=$(${WCURL_CMD} ${url} 2>&1)
+    assertContains "Verify whether 'wcurl' successfully decodes percent-encoded Japanese in URLs" "${ret}" 'パーセントエンコーディング'
+
+    # Korean
+    url='example.com/%ED%8D%BC%EC%84%BC%ED%8A%B8_%EC%9D%B8%EC%BD%94%EB%94%A9'
+    ret=$(${WCURL_CMD} ${url} 2>&1)
+    assertContains "Verify whether 'wcurl' successfully decodes percent-encoded Korean in URLs" "${ret}" '퍼센트_인코딩'
+}
+
 ## Ideas for tests:
 ##
 ## - URL with whitespace