Skip to content

Commit

Permalink
Download check for cache directory
Browse files Browse the repository at this point in the history
Add a marker .download file to validate the contents in cache directories.
Previously only the existence of the directory was used, so if the
download was aborted the cache directory had to be deleted manually if
this occurred (with a likely cryptic error message).
If the .download check file does not exist, the directory will be deleted
and downloaded again.

It is also possible to check the contents with a checksum.
If not matching, the directory will be deleted and downloaded again.

For Git repos the repos can be deleted if the status is not clean,
a checksum is not relevant (but used in the tests).
  • Loading branch information
gerhardol committed Sep 10, 2024
1 parent 0bc73f4 commit bc31cc5
Show file tree
Hide file tree
Showing 4 changed files with 309 additions and 6 deletions.
14 changes: 14 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,12 @@ You can use `CPM_SOURCE_CACHE` on GitHub Actions workflows [cache](https://githu
The directory where the version for a project is stored is by default the hash of the arguments to `CPMAddPackage()`.
If for instance the patch command uses external files, the directory name can be set with the argument `CUSTOM_CACHE_KEY`.

It is possible to check the integrity of the downloaded content with a checksum by adding a [checksum command](test/unit/checksum_directory.sh) to `CPMAddPackage()`.
Checksum validation can be done in two ways:

- Setting the option `CPM_CHECK_CACHE_CHECKSUM` to validate to the checksum calculated when downloading the project.
- Providing the checksum in the call to `CPMAddPackage()`.

### CPM_DOWNLOAD_ALL

If set, CPM will forward all calls to `CPMFindPackage` as `CPMAddPackage`.
Expand All @@ -219,6 +225,14 @@ Note that this does not apply to dependencies that have been defined with a trut
If set, CPM use additional directory level in cache to improve readability of packages names in IDEs like CLion. It changes cache structure, so all dependencies are downloaded again. There is no problem to mix both structures in one cache directory but then there may be 2 copies of some dependencies.
This can also be set as an environmental variable.

### CPM_CHECK_CACHE_CHECKSUM

Enable validation of the checksum for a cache directory if a command to checksum the directory is provided. The validation is performed to a supplied checksum if provided, otherwise the checksum detected when downloading the dependency.

If `GIT_TAG` is set, `git-status` will check the status, checksum command is not required.

If the check fails, an existing directory will be deleted and downloaded again.

## Local package override

Library developers are often in the situation where they work on a locally checked out dependency at the same time as on a consumer project.
Expand Down
97 changes: 91 additions & 6 deletions cmake/CPM.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,11 @@ option(CPM_USE_NAMED_CACHE_DIRECTORIES
"Use additional directory of package name in cache on the most nested level."
$ENV{CPM_USE_NAMED_CACHE_DIRECTORIES}
)
option(
CPM_CHECK_CACHE_CHECKSUM
"If a package is stored in cache and there is a command to provide checksum, check the checksum when the cache dir exists."
$ENV{CPM_CHECK_CACHE_CHECKSUM}
)

set(CPM_VERSION
${CURRENT_CPM_VERSION}
Expand Down Expand Up @@ -601,9 +606,10 @@ function(CPMAddPackage)
EXCLUDE_FROM_ALL
SOURCE_SUBDIR
CUSTOM_CACHE_KEY
CUSTOM_CACHE_CHECKSUM_VALUE
)

set(multiValueArgs URL OPTIONS DOWNLOAD_COMMAND PATCHES)
set(multiValueArgs URL OPTIONS DOWNLOAD_COMMAND PATCHES CUSTOM_CACHE_CHECKSUM_COMMAND)

cmake_parse_arguments(CPM_ARGS "" "${oneValueArgs}" "${multiValueArgs}" "${ARGN}")

Expand Down Expand Up @@ -789,15 +795,69 @@ function(CPMAddPackage)
get_filename_component(download_directory ${download_directory} ABSOLUTE)
list(APPEND CPM_ARGS_UNPARSED_ARGUMENTS SOURCE_DIR ${download_directory})

if(CPM_SOURCE_CACHE)
file(LOCK ${download_directory}/../cmake.lock)
file(LOCK ${download_directory}/../cmake.lock)

if(EXISTS ${download_directory} AND NOT EXISTS ${download_directory}.download)
message(
WARNING
"Cache for ${CPM_ARGS_NAME} is missing .download, downloading. (${download_directory}.download)"
)
file(REMOVE_RECURSE ${download_directory})
endif()

if(EXISTS ${download_directory})
if(CPM_SOURCE_CACHE)
file(LOCK ${download_directory}/../cmake.lock RELEASE)
if(EXISTS ${download_directory}
AND CPM_ARGS_CUSTOM_CACHE_CHECKSUM_COMMAND
AND (CPM_CHECK_CACHE_CHECKSUM OR DEFINED CPM_ARGS_CUSTOM_CACHE_CHECKSUM_VALUE)
)
if(CPM_ARGS_CUSTOM_CACHE_CHECKSUM_VALUE)
# Explicit checksum provided, ignore value in .downloaded
set(expected_checksum ${CPM_ARGS_CUSTOM_CACHE_CHECKSUM_VALUE})
else()
file(READ ${download_directory}.download expected_checksum)
string(STRIP "${expected_checksum}" expected_checksum)
endif()

if(expected_checksum)
set(executeProcessExtraArgs "")
if(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.19.0")
list(APPEND executeProcessExtraArgs COMMAND_ERROR_IS_FATAL ANY)
endif()
execute_process(
COMMAND ${CPM_ARGS_CUSTOM_CACHE_CHECKSUM_COMMAND}
WORKING_DIRECTORY ${download_directory}
OUTPUT_VARIABLE checksum
OUTPUT_STRIP_TRAILING_WHITESPACE ${executeProcessExtraArgs}
)
if(NOT expected_checksum STREQUAL checksum)
message(
WARNING
"Checksum mismatch for ${CPM_ARGS_NAME}, removing (${expected_checksum} != ${checksum})"
)
file(REMOVE_RECURSE ${download_directory})
endif()
else()
message(
WARNING
"Checksum cannot be verified for ${CPM_ARGS_NAME}, no existing value (${expected_checksum})"
)
endif()
endif()
if(EXISTS ${download_directory}
AND DEFINED CPM_ARGS_GIT_TAG
AND NOT (PATCH_COMMAND IN_LIST CPM_ARGS_UNPARSED_ARGUMENTS)
)
# warn if cache has been changed since checkout
cpm_check_git_working_dir_is_clean(${download_directory} ${CPM_ARGS_GIT_TAG} IS_CLEAN)
if(NOT ${IS_CLEAN})
message(WARNING "${CPM_INDENT} Cache for ${CPM_ARGS_NAME} (${download_directory}) is dirty")
if(CPM_CHECK_CACHE_CHECKSUM OR DEFINED CPM_ARGS_CUSTOM_CACHE_CHECKSUM_VALUE)
file(REMOVE_RECURSE ${download_directory})
endif()
endif()
endif()
if(EXISTS ${download_directory})
# Directory content is considered OK
file(LOCK ${download_directory}/../cmake.lock RELEASE)
cpm_store_fetch_properties(
${CPM_ARGS_NAME} "${download_directory}"
"${CPM_FETCHCONTENT_BASE_DIR}/${lower_case_name}-build"
Expand Down Expand Up @@ -894,6 +954,31 @@ function(CPMAddPackage)

cpm_fetch_package("${CPM_ARGS_NAME}" ${DOWNLOAD_ONLY} populated ${CPM_ARGS_UNPARSED_ARGUMENTS})
if(CPM_SOURCE_CACHE AND download_directory)
if(${populated})
if(CPM_ARGS_CUSTOM_CACHE_CHECKSUM_COMMAND)
set(executeProcessExtraArgs "")
if(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.19.0")
list(APPEND executeProcessExtraArgs COMMAND_ERROR_IS_FATAL ANY)
endif()
execute_process(
COMMAND ${CPM_ARGS_CUSTOM_CACHE_CHECKSUM_COMMAND}
WORKING_DIRECTORY ${download_directory}
OUTPUT_VARIABLE checksum
OUTPUT_STRIP_TRAILING_WHITESPACE ${executeProcessExtraArgs}
)
if(CPM_ARGS_CUSTOM_CACHE_CHECKSUM_VALUE AND NOT CPM_ARGS_CUSTOM_CACHE_CHECKSUM_VALUE
STREQUAL checksum
)
message(
FATAL_ERROR
"Checksum mismatch for ${CPM_ARGS_NAME} (${CPM_ARGS_CUSTOM_CACHE_CHECKSUM_VALUE} != ${checksum})"
)
endif()
else()
set(checksum "")
endif()
file(WRITE ${download_directory}.download ${checksum})
endif()
file(LOCK ${download_directory}/../cmake.lock RELEASE)
endif()
if(${populated} AND ${CMAKE_VERSION} VERSION_LESS "3.28.0")
Expand Down
141 changes: 141 additions & 0 deletions test/unit/cache.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -153,3 +153,144 @@ execute_process(

assert_equal(${ret} "0")
assert_exists("${CPM_SOURCE_CACHE_DIR}/fibonacci/my_custom_unique_dir")

# Cache checksum

reset_test()
set(FIBONACCI_VERSION 1.1)
set(FIBONACCI_GIT_TAG "GIT_TAG e9ebf168ca0fffaa4ef8c6fefc6346aaa22f6ed5")
set(TEST_CHECKSUM_DIR "${CPM_SOURCE_CACHE_DIR}/fibonacci/my_checksummed_dir")
set(TEST_CHECKSUM_VALUE
cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e
)

set(CHECKSUM_COMMAND "${CMAKE_CURRENT_LIST_DIR}/checksum_directory.sh")
set(INCORRECT_CHECKSUM_RESULT "1")
set(IGNORE_CHECKSUM_TEST)
if(WIN32)
# checksum example is not adapted to Windows
set(CHECKSUM_COMMAND "")
set(TEST_CHECKSUM_VALUE)
set(IGNORE_CHECKSUM_TEST True)
endif()

# OK download

set(FIBONACCI_PACKAGE_ARGS
"${FIBONACCI_GIT_TAG} CUSTOM_CACHE_KEY my_checksummed_dir CUSTOM_CACHE_CHECKSUM_COMMAND \"${CHECKSUM_COMMAND}\""
)
update_cmake_lists()

execute_process(
COMMAND ${CMAKE_COMMAND} -E env "CPM_SOURCE_CACHE=${CPM_SOURCE_CACHE_DIR}" ${CMAKE_COMMAND}
"-S${CMAKE_CURRENT_LIST_DIR}/remote_dependency" "-B${TEST_BUILD_DIR}" RESULT_VARIABLE ret
)

assert_equal(${ret} "0")
assert_exists("${TEST_CHECKSUM_DIR}.download")
file(READ "${TEST_CHECKSUM_DIR}.download" chksum)
assert_equal("${chksum}" "${TEST_CHECKSUM_VALUE}")

# Test download again if .download file is missing

file(REMOVE "${TEST_CHECKSUM_DIR}.download")
file(REMOVE "${TEST_CHECKSUM_DIR}/include/fibonacci.h")

set(FIBONACCI_PACKAGE_ARGS
"${FIBONACCI_GIT_TAG} CUSTOM_CACHE_KEY my_checksummed_dir CUSTOM_CACHE_CHECKSUM_COMMAND \"${CHECKSUM_COMMAND}\""
)
update_cmake_lists()

execute_process(
COMMAND ${CMAKE_COMMAND} -E env "CPM_SOURCE_CACHE=${CPM_SOURCE_CACHE_DIR}" ${CMAKE_COMMAND}
"-S${CMAKE_CURRENT_LIST_DIR}/remote_dependency" "-B${TEST_BUILD_DIR}" RESULT_VARIABLE ret
)

assert_equal(${ret} "0")
assert_exists("${TEST_CHECKSUM_DIR}.download")
assert_exists("${TEST_CHECKSUM_DIR}/include/fibonacci.h")

# check checksum for download

set(FIBONACCI_PACKAGE_ARGS
"${FIBONACCI_GIT_TAG} CUSTOM_CACHE_KEY my_checksummed_dir CUSTOM_CACHE_CHECKSUM_COMMAND \"${CHECKSUM_COMMAND}\""
)
update_cmake_lists()

execute_process(
COMMAND ${CMAKE_COMMAND} -E env "CPM_SOURCE_CACHE=${CPM_SOURCE_CACHE_DIR}" ${CMAKE_COMMAND}
"-S${CMAKE_CURRENT_LIST_DIR}/remote_dependency" "-B${TEST_BUILD_DIR}" RESULT_VARIABLE ret
)

assert_equal(${ret} "0")

# check checksum for download, provided

set(FIBONACCI_PACKAGE_ARGS
"${FIBONACCI_GIT_TAG} CUSTOM_CACHE_KEY my_checksummed_dir CUSTOM_CACHE_CHECKSUM_COMMAND \"${CHECKSUM_COMMAND}\" CUSTOM_CACHE_CHECKSUM_VALUE ${TEST_CHECKSUM_VALUE}"
)
update_cmake_lists()

execute_process(
COMMAND ${CMAKE_COMMAND} -E env "CPM_SOURCE_CACHE=${CPM_SOURCE_CACHE_DIR}" ${CMAKE_COMMAND}
"-S${CMAKE_CURRENT_LIST_DIR}/remote_dependency" "-B${TEST_BUILD_DIR}" RESULT_VARIABLE ret
)

assert_equal(${ret} "0")

# check checksum for download, provided incorrect, this will print a fatal_error (red) error to the
# console

set(FIBONACCI_PACKAGE_ARGS
"${FIBONACCI_GIT_TAG} CUSTOM_CACHE_KEY my_checksummed_dir CUSTOM_CACHE_CHECKSUM_COMMAND \"${CHECKSUM_COMMAND}\" CUSTOM_CACHE_CHECKSUM_VALUE invalid_checksum_value"
)
update_cmake_lists()

execute_process(
COMMAND ${CMAKE_COMMAND} -E env "CPM_SOURCE_CACHE=${CPM_SOURCE_CACHE_DIR}" ${CMAKE_COMMAND}
"-S${CMAKE_CURRENT_LIST_DIR}/remote_dependency" "-B${TEST_BUILD_DIR}" RESULT_VARIABLE ret
)

if(NOT IGNORE_CHECKSUM_TEST)
assert_equal(${ret} "1")
endif()

# redownload when checksum is changed

set(FIBONACCI_PACKAGE_ARGS
"${FIBONACCI_GIT_TAG} CUSTOM_CACHE_KEY my_checksummed_dir CUSTOM_CACHE_CHECKSUM_COMMAND \"${CHECKSUM_COMMAND}\" CUSTOM_CACHE_CHECKSUM_VALUE ${TEST_CHECKSUM_VALUE}"
)
update_cmake_lists()

# dummy change, to trigger checksum mismatch
file(WRITE "${TEST_CHECKSUM_DIR}/fail_checksum.txt" "dummy")

execute_process(
COMMAND ${CMAKE_COMMAND} -E env "CPM_SOURCE_CACHE=${CPM_SOURCE_CACHE_DIR}" ${CMAKE_COMMAND}
"-S${CMAKE_CURRENT_LIST_DIR}/remote_dependency" "-B${TEST_BUILD_DIR}" RESULT_VARIABLE ret
)

assert_equal(${ret} "0")
if(NOT IGNORE_CHECKSUM_TEST)
assert_not_exists("${TEST_CHECKSUM_DIR}/fail_checksum.txt")
endif()

# redownload when checksum is changed

set(FIBONACCI_PACKAGE_ARGS
"${FIBONACCI_GIT_TAG} CUSTOM_CACHE_KEY my_checksummed_dir CUSTOM_CACHE_CHECKSUM_VALUE ${TEST_CHECKSUM_VALUE}"
)
update_cmake_lists()

# dummy change, to trigger checksum mismatch
file(WRITE "${TEST_CHECKSUM_DIR}/fail_checksum.txt" "dummy")

execute_process(
COMMAND ${CMAKE_COMMAND} -E env "CPM_SOURCE_CACHE=${CPM_SOURCE_CACHE_DIR}" ${CMAKE_COMMAND}
"-S${CMAKE_CURRENT_LIST_DIR}/remote_dependency" "-B${TEST_BUILD_DIR}" RESULT_VARIABLE ret
)

assert_equal(${ret} "0")
if(NOT IGNORE_CHECKSUM_TEST)
assert_not_exists("${TEST_CHECKSUM_DIR}/fail_checksum.txt")
endif()
63 changes: 63 additions & 0 deletions test/unit/checksum_directory.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
#!/usr/bin/env bash

# Script to checksum contents recursively in a directory

set -o errexit
set -o nounset

function usage {
echo
echo "Checksum the contents of a directory"
echo "Usage: $0 [-d <directory>]"
echo ""
echo " -d directory Default '.'"
echo " -h Help, this message"
echo " -t Use alternative tar method (requires zstd binary)"
echo " -v Verbose output"
}

dir=.
use_tar=
# sha512 is faster than sha256 for large files, sha1 is even faster
SHA_ALGORITHM=sha512sum
if [[ "$OSTYPE" != "darwin"* ]]; then
# Some overrides required for macos
# Note also that 'xargs --max-procs' must be written as 'xargs -P'
SHA_ALGORITHM="shasum -a 512"
alias nproc="sysctl -n hw.logicalcpu"
fi

while getopts "d:htv" o; do
case "${o}" in
d)
dir=${OPTARG}
;;
h)
usage
exit 0
;;
t)
use_tar=1
;;
v)
set -x
;;
*)
echo "Incorrect argument switch"
usage
exit 1
;;
esac
done
shift "$((OPTIND-1))"

cd $dir
if [ ! -z $use_tar ]; then
# This is faster for single threads but requires more memory and requires the separate zstd binary
# For a 3 GB data this is 3s vs 'find' below: 5s (one thread) below, 2.5s with 28 threads, 0.7s with 100 files on each line
# Without --fast, just ZSTD_CLEVEL=1 ZSTD_NBTHREADS=0 is about 6s
tar -I "zstd --fast -1 -T0" -cf - . | $SHA_ALGORITHM | cut -f1 -d ' '
else
# In general, there is no point in checksumming Git repos, filter .git here as this is used in tests
find . \( -name .git -prune \) -o -type f -print0 | xargs -n 100 -P=$(nproc) -0 $SHA_ALGORITHM | sort -k 2 | $SHA_ALGORITHM | cut -f1 -d ' '
fi

0 comments on commit bc31cc5

Please sign in to comment.