From b48bf522d21903cdc9ec699a878e81bc0b4a4dc2 Mon Sep 17 00:00:00 2001
From: Philip Hyunsu Cho <chohyu01@cs.washington.edu>
Date: Mon, 24 Jun 2024 15:42:27 -0700
Subject: [PATCH 01/18] [CI] Set up pipeline to build manylinux2014 wheels
 (#10478) (#10482)

* [CI] Set up pipeline to build manylinux2014 wheels (#10478)

* [CI] Fix S3 upload for manylinux2014 wheels (#10483)
---
 doc/install.rst                               | 20 ++++++++++
 tests/buildkite/build-cuda-with-rmm.sh        |  4 +-
 tests/buildkite/build-cuda.sh                 |  4 +-
 .../buildkite/build-manylinux2014-aarch64.sh  | 33 +++++++++++++++++
 tests/buildkite/build-manylinux2014-x86_64.sh | 33 +++++++++++++++++
 tests/buildkite/manylinux2014_warning.patch   | 37 +++++++++++++++++++
 tests/buildkite/pipeline.yml                  | 15 ++++++--
 .../ci_build/Dockerfile.manylinux2014_aarch64 | 15 ++++++++
 .../ci_build/Dockerfile.manylinux2014_x86_64  | 15 ++++++++
 ...86_64 => Dockerfile.manylinux_2_28_x86_64} |  0
 10 files changed, 169 insertions(+), 7 deletions(-)
 create mode 100644 tests/buildkite/build-manylinux2014-aarch64.sh
 create mode 100644 tests/buildkite/build-manylinux2014-x86_64.sh
 create mode 100644 tests/buildkite/manylinux2014_warning.patch
 create mode 100644 tests/ci_build/Dockerfile.manylinux2014_aarch64
 create mode 100644 tests/ci_build/Dockerfile.manylinux2014_x86_64
 rename tests/ci_build/{Dockerfile.auditwheel_x86_64 => Dockerfile.manylinux_2_28_x86_64} (100%)

diff --git a/doc/install.rst b/doc/install.rst
index bf90a913bd31..e5229702e16c 100644
--- a/doc/install.rst
+++ b/doc/install.rst
@@ -27,6 +27,26 @@ Pre-built binary wheels are uploaded to PyPI (Python Package Index) for each rel
 You might need to run the command with ``--user`` flag or use ``virtualenv`` if you run
 into permission errors.
 
+.. note:: Parts of the Python package now require glibc 2.28+
+
+  Starting from 2.1.0, XGBoost Python package will be distributed in two variants:
+
+  * ``manylinux_2_28``: for recent Linux distros with glibc 2.28 or newer. This variant comes with all features enabled.
+  * ``manylinux2014``: for old Linux distros with glibc older than 2.28. This variant does not support GPU algorithms or federated learning.
+
+  The ``pip`` package manager will automatically choose the correct variant depending on your system.
+
+  Starting from **May 31, 2025**, we will stop distributing the ``manylinux2014`` variant and exclusively
+  distribute the ``manylinux_2_28`` variant. We made this decision so that our CI/CD pipeline won't have
+  depend on software components that reached end-of-life (such as CentOS 7). We strongly encourage
+  everyone to migrate to recent Linux distros in order to use future versions of XGBoost.
+
+  Note. If you want to use GPU algorithms or federated learning on an older Linux distro, you have
+  two alternatives:
+
+  1. Upgrade to a recent Linux distro with glibc 2.28+.  OR
+  2. Build XGBoost from the source.
+
 .. note:: Windows users need to install Visual C++ Redistributable
 
   XGBoost requires DLLs from `Visual C++ Redistributable
diff --git a/tests/buildkite/build-cuda-with-rmm.sh b/tests/buildkite/build-cuda-with-rmm.sh
index 42a1edc7947e..126bc6b3f28b 100755
--- a/tests/buildkite/build-cuda-with-rmm.sh
+++ b/tests/buildkite/build-cuda-with-rmm.sh
@@ -42,7 +42,7 @@ $command_wrapper python tests/ci_build/rename_whl.py  \
   --platform-tag ${WHEEL_TAG}
 
 echo "--- Audit binary wheel to ensure it's compliant with ${WHEEL_TAG} standard"
-tests/ci_build/ci_build.sh auditwheel_x86_64 auditwheel repair \
+tests/ci_build/ci_build.sh manylinux_2_28_x86_64 auditwheel repair \
   --plat ${WHEEL_TAG} python-package/dist/*.whl
 $command_wrapper python tests/ci_build/rename_whl.py  \
   --wheel-path wheelhouse/*.whl  \
@@ -50,7 +50,7 @@ $command_wrapper python tests/ci_build/rename_whl.py  \
   --platform-tag ${WHEEL_TAG}
 mv -v wheelhouse/*.whl python-package/dist/
 # Make sure that libgomp.so is vendored in the wheel
-tests/ci_build/ci_build.sh auditwheel_x86_64 bash -c \
+tests/ci_build/ci_build.sh manylinux_2_28_x86_64 bash -c \
   "unzip -l python-package/dist/*.whl | grep libgomp  || exit -1"
 
 echo "--- Upload Python wheel"
diff --git a/tests/buildkite/build-cuda.sh b/tests/buildkite/build-cuda.sh
index 2ddf9fefe07b..03d2cc8a6a24 100755
--- a/tests/buildkite/build-cuda.sh
+++ b/tests/buildkite/build-cuda.sh
@@ -41,7 +41,7 @@ $command_wrapper python tests/ci_build/rename_whl.py  \
   --platform-tag ${WHEEL_TAG}
 
 echo "--- Audit binary wheel to ensure it's compliant with ${WHEEL_TAG} standard"
-tests/ci_build/ci_build.sh auditwheel_x86_64 auditwheel repair \
+tests/ci_build/ci_build.sh manylinux_2_28_x86_64 auditwheel repair \
   --plat ${WHEEL_TAG} python-package/dist/*.whl
 $command_wrapper python tests/ci_build/rename_whl.py  \
   --wheel-path wheelhouse/*.whl  \
@@ -49,7 +49,7 @@ $command_wrapper python tests/ci_build/rename_whl.py  \
   --platform-tag ${WHEEL_TAG}
 mv -v wheelhouse/*.whl python-package/dist/
 # Make sure that libgomp.so is vendored in the wheel
-tests/ci_build/ci_build.sh auditwheel_x86_64 bash -c \
+tests/ci_build/ci_build.sh manylinux_2_28_x86_64 bash -c \
   "unzip -l python-package/dist/*.whl | grep libgomp  || exit -1"
 
 echo "--- Upload Python wheel"
diff --git a/tests/buildkite/build-manylinux2014-aarch64.sh b/tests/buildkite/build-manylinux2014-aarch64.sh
new file mode 100644
index 000000000000..802db3f66aaf
--- /dev/null
+++ b/tests/buildkite/build-manylinux2014-aarch64.sh
@@ -0,0 +1,33 @@
+#!/bin/bash
+
+set -euo pipefail
+
+source tests/buildkite/conftest.sh
+
+WHEEL_TAG=manylinux2014_aarch64
+command_wrapper="tests/ci_build/ci_build.sh manylinux2014_aarch64"
+python_bin="/opt/python/cp310-cp310/bin/python"
+
+echo "--- Build binary wheel for ${WHEEL_TAG}"
+# Patch to add warning about manylinux2014 variant
+patch -p0 < tests/buildkite/manylinux2014_warning.patch
+$command_wrapper bash -c \
+  "cd python-package && ${python_bin} -m pip wheel --no-deps -vvv . --wheel-dir dist/"
+git checkout python-package/xgboost/core.py  # discard the patch
+
+$command_wrapper auditwheel repair --plat ${WHEEL_TAG} python-package/dist/*.whl
+$command_wrapper ${python_bin} tests/ci_build/rename_whl.py  \
+  --wheel-path wheelhouse/*.whl  \
+  --commit-hash ${BUILDKITE_COMMIT}  \
+  --platform-tag ${WHEEL_TAG}
+rm -rf python-package/dist/
+mkdir python-package/dist/
+mv -v wheelhouse/*.whl python-package/dist/
+
+echo "--- Upload Python wheel"
+buildkite-agent artifact upload python-package/dist/*.whl
+if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]]
+then
+  aws s3 cp python-package/dist/*.whl s3://xgboost-nightly-builds/${BRANCH_NAME}/ \
+    --acl public-read --no-progress
+fi
diff --git a/tests/buildkite/build-manylinux2014-x86_64.sh b/tests/buildkite/build-manylinux2014-x86_64.sh
new file mode 100644
index 000000000000..b00616315b8d
--- /dev/null
+++ b/tests/buildkite/build-manylinux2014-x86_64.sh
@@ -0,0 +1,33 @@
+#!/bin/bash
+
+set -euo pipefail
+
+source tests/buildkite/conftest.sh
+
+WHEEL_TAG=manylinux2014_x86_64
+command_wrapper="tests/ci_build/ci_build.sh manylinux2014_x86_64"
+python_bin="/opt/python/cp310-cp310/bin/python"
+
+echo "--- Build binary wheel for ${WHEEL_TAG}"
+# Patch to add warning about manylinux2014 variant
+patch -p0 < tests/buildkite/manylinux2014_warning.patch
+$command_wrapper bash -c \
+  "cd python-package && ${python_bin} -m pip wheel --no-deps -vvv . --wheel-dir dist/"
+git checkout python-package/xgboost/core.py  # discard the patch
+
+$command_wrapper auditwheel repair --plat ${WHEEL_TAG} python-package/dist/*.whl
+$command_wrapper ${python_bin} tests/ci_build/rename_whl.py  \
+  --wheel-path wheelhouse/*.whl  \
+  --commit-hash ${BUILDKITE_COMMIT}  \
+  --platform-tag ${WHEEL_TAG}
+rm -rf python-package/dist/
+mkdir python-package/dist/
+mv -v wheelhouse/*.whl python-package/dist/
+
+echo "--- Upload Python wheel"
+buildkite-agent artifact upload python-package/dist/*.whl
+if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]]
+then
+  aws s3 cp python-package/dist/*.whl s3://xgboost-nightly-builds/${BRANCH_NAME}/ \
+    --acl public-read --no-progress
+fi
diff --git a/tests/buildkite/manylinux2014_warning.patch b/tests/buildkite/manylinux2014_warning.patch
new file mode 100644
index 000000000000..692a92672d2f
--- /dev/null
+++ b/tests/buildkite/manylinux2014_warning.patch
@@ -0,0 +1,37 @@
+diff --git python-package/pyproject.toml python-package/pyproject.toml
+index a273d8c13..dee49686a 100644
+--- python-package/pyproject.toml
++++ python-package/pyproject.toml
+@@ -30,8 +30,7 @@ classifiers = [
+ ]
+ dependencies = [
+     "numpy",
+-    "scipy",
+-    "nvidia-nccl-cu12 ; platform_system == 'Linux' and platform_machine != 'aarch64'"
++    "scipy"
+ ]
+ 
+ [project.urls]
+diff --git python-package/xgboost/core.py python-package/xgboost/core.py
+index e8bc735e6..030972ef2 100644
+--- python-package/xgboost/core.py
++++ python-package/xgboost/core.py
+@@ -262,6 +262,18 @@ Likely cause:
+         )
+         raise ValueError(msg)
+ 
++    warnings.warn(
++        "Your system has an old version of glibc (< 2.28). We will stop supporting "
++        "Linux distros with glibc older than 2.28 after **May 31, 2025**. "
++        "Please upgrade to a recent Linux distro (with glibc 2.28+) to use "
++        "future versions of XGBoost.\n"
++        "Note: You have installed the 'manylinux2014' variant of XGBoost. Certain "
++        "features such as GPU algorithms or federated learning are not available. "
++        "To use these features, please upgrade to a recent Linux distro with glibc "
++        "2.28+, and install the 'manylinux_2_28' variant.",
++        FutureWarning
++    )
++
+     return lib
+ 
+ 
diff --git a/tests/buildkite/pipeline.yml b/tests/buildkite/pipeline.yml
index 4bcfe03f96ea..73a60517a672 100644
--- a/tests/buildkite/pipeline.yml
+++ b/tests/buildkite/pipeline.yml
@@ -26,18 +26,17 @@ steps:
     key: run-clang-tidy
     agents:
       queue: linux-amd64-cpu
-  - wait
   - label: ":console: Build CPU"
     command: "tests/buildkite/build-cpu.sh"
     key: build-cpu
     agents:
       queue: linux-amd64-cpu
-  - label: ":console: Build CPU ARM64"
+  - label: ":console: Build CPU ARM64 + manylinux_2_28_aarch64 wheel"
     command: "tests/buildkite/build-cpu-arm64.sh"
     key: build-cpu-arm64
     agents:
       queue: linux-arm64-cpu
-  - label: ":console: Build CUDA"
+  - label: ":console: Build CUDA + manylinux_2_28_x86_64 wheel"
     command: "tests/buildkite/build-cuda.sh"
     key: build-cuda
     agents:
@@ -62,6 +61,16 @@ steps:
     key: build-jvm-doc
     agents:
       queue: linux-amd64-cpu
+  - label: ":console: Build manylinux2014_x86_64 wheel"
+    command: "tests/buildkite/build-manylinux2014-x86_64.sh"
+    key: build-manylinux2014-x86_64
+    agents:
+      queue: linux-amd64-cpu
+  - label: ":console: Build manylinux2014_aarch64 wheel"
+    command: "tests/buildkite/build-manylinux2014-aarch64.sh"
+    key: build-manylinux2014-aarch64
+    agents:
+      queue: linux-arm64-cpu
   - wait
   #### -------- TEST --------
   - label: ":console: Test Python package, CPU"
diff --git a/tests/ci_build/Dockerfile.manylinux2014_aarch64 b/tests/ci_build/Dockerfile.manylinux2014_aarch64
new file mode 100644
index 000000000000..05a1590eca76
--- /dev/null
+++ b/tests/ci_build/Dockerfile.manylinux2014_aarch64
@@ -0,0 +1,15 @@
+FROM quay.io/pypa/manylinux2014_aarch64
+
+# Install lightweight sudo (not bound to TTY)
+ENV GOSU_VERSION 1.10
+RUN set -ex; \
+    curl -o /usr/local/bin/gosu -L "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-arm64" && \
+    chmod +x /usr/local/bin/gosu && \
+    gosu nobody true
+
+# Default entry-point to use if running locally
+# It will preserve attributes of created files
+COPY entrypoint.sh /scripts/
+
+WORKDIR /workspace
+ENTRYPOINT ["/scripts/entrypoint.sh"]
diff --git a/tests/ci_build/Dockerfile.manylinux2014_x86_64 b/tests/ci_build/Dockerfile.manylinux2014_x86_64
new file mode 100644
index 000000000000..6c2150149fe5
--- /dev/null
+++ b/tests/ci_build/Dockerfile.manylinux2014_x86_64
@@ -0,0 +1,15 @@
+FROM quay.io/pypa/manylinux2014_x86_64
+
+# Install lightweight sudo (not bound to TTY)
+ENV GOSU_VERSION 1.10
+RUN set -ex; \
+    curl -o /usr/local/bin/gosu -L "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-amd64" && \
+    chmod +x /usr/local/bin/gosu && \
+    gosu nobody true
+
+# Default entry-point to use if running locally
+# It will preserve attributes of created files
+COPY entrypoint.sh /scripts/
+
+WORKDIR /workspace
+ENTRYPOINT ["/scripts/entrypoint.sh"]
diff --git a/tests/ci_build/Dockerfile.auditwheel_x86_64 b/tests/ci_build/Dockerfile.manylinux_2_28_x86_64
similarity index 100%
rename from tests/ci_build/Dockerfile.auditwheel_x86_64
rename to tests/ci_build/Dockerfile.manylinux_2_28_x86_64

From 7e85651dcc0da227f64db99fd5402941cb9a1f56 Mon Sep 17 00:00:00 2001
From: Philip Hyunsu Cho <chohyu01@cs.washington.edu>
Date: Wed, 26 Jun 2024 05:32:57 -0700
Subject: [PATCH 02/18] [CI] Add CI pipeline to build libxgboost4j.so targeting
 Linux ARM64 (#10487) (#10489)

---
 dev/prepare_jvm_release.py                    | 25 ++++-------
 .../build-jvm-linux-arm64-manylinux2014.sh    | 29 +++++++++++++
 .../build-jvm-linux-x86_64-manylinux2014.sh   | 29 +++++++++++++
 tests/buildkite/build-jvm-macos-m1.sh         | 41 +++++++++++++++++++
 tests/buildkite/pipeline-mac-m1.yml           |  5 +++
 tests/buildkite/pipeline.yml                  | 10 +++++
 tests/buildkite/test-macos-m1-clang11.sh      | 28 +------------
 .../Dockerfile.jvm_manylinux2014_aarch64      | 17 ++++++++
 .../Dockerfile.jvm_manylinux2014_x86_64       | 17 ++++++++
 9 files changed, 158 insertions(+), 43 deletions(-)
 create mode 100644 tests/buildkite/build-jvm-linux-arm64-manylinux2014.sh
 create mode 100644 tests/buildkite/build-jvm-linux-x86_64-manylinux2014.sh
 create mode 100644 tests/buildkite/build-jvm-macos-m1.sh
 create mode 100644 tests/ci_build/Dockerfile.jvm_manylinux2014_aarch64
 create mode 100644 tests/ci_build/Dockerfile.jvm_manylinux2014_x86_64

diff --git a/dev/prepare_jvm_release.py b/dev/prepare_jvm_release.py
index bcd119a892cb..af338c93cb55 100644
--- a/dev/prepare_jvm_release.py
+++ b/dev/prepare_jvm_release.py
@@ -134,6 +134,7 @@ def main():
         print("====Creating directories to hold native binaries====")
         for os_ident, arch in [
             ("linux", "x86_64"),
+            ("linux", "aarch64"),
             ("windows", "x86_64"),
             ("macos", "x86_64"),
             ("macos", "aarch64"),
@@ -156,6 +157,14 @@ def main():
             url=f"{nightly_bucket_prefix}/{git_branch}/libxgboost4j/xgboost4j_{commit_hash}.dll",
             filename="xgboost4j/src/main/resources/lib/windows/x86_64/xgboost4j.dll",
         )
+        retrieve(
+            url=f"{nightly_bucket_prefix}/{git_branch}/libxgboost4j/libxgboost4j_linux_x86_64_{commit_hash}.so",
+            filename="xgboost4j/src/main/resources/lib/linux/x86_64/libxgboost4j.so",
+        )
+        retrieve(
+            url=f"{nightly_bucket_prefix}/{git_branch}/libxgboost4j/libxgboost4j_linux_arm64_{commit_hash}.so",
+            filename="xgboost4j/src/main/resources/lib/linux/aarch64/libxgboost4j.so",
+        )
         retrieve(
             url=f"{nightly_bucket_prefix}/{git_branch}/libxgboost4j/libxgboost4j_{commit_hash}.dylib",
             filename="xgboost4j/src/main/resources/lib/macos/x86_64/libxgboost4j.dylib",
@@ -166,22 +175,6 @@ def main():
         )
 
         with tempfile.TemporaryDirectory() as tempdir:
-            # libxgboost4j.so for Linux x86_64, CPU only
-            zip_path = os.path.join(tempdir, "xgboost4j_2.12.jar")
-            extract_dir = os.path.join(tempdir, "xgboost4j")
-            retrieve(
-                url=f"{maven_repo_prefix}/xgboost4j_2.12/{version}/"
-                f"xgboost4j_2.12-{version}.jar",
-                filename=zip_path,
-            )
-            os.mkdir(extract_dir)
-            with zipfile.ZipFile(zip_path, "r") as t:
-                t.extractall(extract_dir)
-            cp(
-                os.path.join(extract_dir, "lib", "linux", "x86_64", "libxgboost4j.so"),
-                "xgboost4j/src/main/resources/lib/linux/x86_64/libxgboost4j.so",
-            )
-
             # libxgboost4j.so for Linux x86_64, GPU support
             zip_path = os.path.join(tempdir, "xgboost4j-gpu_2.12.jar")
             extract_dir = os.path.join(tempdir, "xgboost4j-gpu")
diff --git a/tests/buildkite/build-jvm-linux-arm64-manylinux2014.sh b/tests/buildkite/build-jvm-linux-arm64-manylinux2014.sh
new file mode 100644
index 000000000000..f72183676e42
--- /dev/null
+++ b/tests/buildkite/build-jvm-linux-arm64-manylinux2014.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+
+set -euo pipefail
+
+source tests/buildkite/conftest.sh
+
+command_wrapper="tests/ci_build/ci_build.sh jvm_manylinux2014_aarch64"
+
+# Build XGBoost4J binary
+echo "--- Build libxgboost4j.so (targeting glibc 2.17)"
+set -x
+mkdir build
+$command_wrapper bash -c \
+  "cd build && cmake .. -DJVM_BINDINGS=ON -DUSE_OPENMP=ON && make -j$(nproc)"
+ldd lib/libxgboost4j.so
+objdump -T lib/libxgboost4j.so | grep GLIBC_ | sed 's/.*GLIBC_\([.0-9]*\).*/\1/g' | sort -Vu
+
+echo "--- Upload libxgboost4j.so"
+pushd lib
+libname=libxgboost4j_linux_arm64_${BUILDKITE_COMMIT}.so
+mv -v libxgboost4j.so ${libname}
+buildkite-agent artifact upload ${libname}
+#if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]]
+#then
+  aws s3 cp ${libname} \
+    s3://xgboost-nightly-builds/${BRANCH_NAME}/libxgboost4j/ \
+    --acl public-read --no-progress
+#fi
+popd
diff --git a/tests/buildkite/build-jvm-linux-x86_64-manylinux2014.sh b/tests/buildkite/build-jvm-linux-x86_64-manylinux2014.sh
new file mode 100644
index 000000000000..3c299bb36a01
--- /dev/null
+++ b/tests/buildkite/build-jvm-linux-x86_64-manylinux2014.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+
+set -euo pipefail
+
+source tests/buildkite/conftest.sh
+
+command_wrapper="tests/ci_build/ci_build.sh jvm_manylinux2014_x86_64"
+
+# Build XGBoost4J binary
+echo "--- Build libxgboost4j.so (targeting glibc 2.17)"
+set -x
+mkdir build
+$command_wrapper bash -c \
+  "cd build && cmake .. -GNinja -DJVM_BINDINGS=ON -DUSE_OPENMP=ON && ninja -v"
+ldd lib/libxgboost4j.so
+objdump -T lib/libxgboost4j.so | grep GLIBC_ | sed 's/.*GLIBC_\([.0-9]*\).*/\1/g' | sort -Vu
+
+echo "--- Upload libxgboost4j.so"
+pushd lib
+libname=libxgboost4j_linux_x86_64_${BUILDKITE_COMMIT}.so
+mv -v libxgboost4j.so ${libname}
+buildkite-agent artifact upload ${libname}
+#if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]]
+#then
+  aws s3 cp ${libname} \
+    s3://xgboost-nightly-builds/${BRANCH_NAME}/libxgboost4j/ \
+    --acl public-read --no-progress
+#fi
+popd
diff --git a/tests/buildkite/build-jvm-macos-m1.sh b/tests/buildkite/build-jvm-macos-m1.sh
new file mode 100644
index 000000000000..469b0786ee37
--- /dev/null
+++ b/tests/buildkite/build-jvm-macos-m1.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+set -euo pipefail
+
+source tests/buildkite/conftest.sh
+
+# Display system info
+echo "--- Display system information"
+set -x
+system_profiler SPSoftwareDataType
+sysctl -n machdep.cpu.brand_string
+uname -m
+set +x
+
+# Build XGBoost4J binary
+echo "--- Build libxgboost4j.dylib"
+set -x
+mkdir build
+pushd build
+export JAVA_HOME=$(/usr/libexec/java_home)
+cmake .. -GNinja -DJVM_BINDINGS=ON -DUSE_OPENMP=ON -DCMAKE_OSX_DEPLOYMENT_TARGET=10.15
+ninja -v
+popd
+rm -rf build
+otool -L lib/libxgboost.dylib
+set +x
+
+echo "--- Upload libxgboost4j.dylib"
+set -x
+pushd lib
+libname=libxgboost4j_m1_${BUILDKITE_COMMIT}.dylib
+mv -v libxgboost4j.dylib ${libname}
+buildkite-agent artifact upload ${libname}
+#if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]]
+#then
+  aws s3 cp ${libname} \
+    s3://xgboost-nightly-builds/${BRANCH_NAME}/libxgboost4j/ \
+    --acl public-read --no-progress
+#fi
+popd
+set +x
diff --git a/tests/buildkite/pipeline-mac-m1.yml b/tests/buildkite/pipeline-mac-m1.yml
index 7e4a664acf6b..57b1b1d12010 100644
--- a/tests/buildkite/pipeline-mac-m1.yml
+++ b/tests/buildkite/pipeline-mac-m1.yml
@@ -1,6 +1,11 @@
 steps:
   - block: ":rocket: Run this test job"
     if: build.pull_request.id != null || build.branch =~ /^dependabot\//
+  - label: ":macos: Build libxgboost4j.dylib for MacOS M1"
+    command: "tests/buildkite/build-jvm-macos-m1.sh"
+    key: mac-m1-jvm
+    agents:
+      queue: mac-mini-m1
   - label: ":macos: Build and Test XGBoost for MacOS M1 with Clang 11"
     command: "tests/buildkite/test-macos-m1-clang11.sh"
     key: mac-m1-appleclang11
diff --git a/tests/buildkite/pipeline.yml b/tests/buildkite/pipeline.yml
index 73a60517a672..acdb71dba529 100644
--- a/tests/buildkite/pipeline.yml
+++ b/tests/buildkite/pipeline.yml
@@ -56,6 +56,16 @@ steps:
     key: build-jvm-packages
     agents:
       queue: linux-amd64-cpu
+  - label: ":console: Build libxgboost4j.so for Linux ARM64 (targeting glibc 2.17)"
+    command: "tests/buildkite/build-jvm-linux-arm64-manylinux2014.sh"
+    key: build-jvm-linux-arm64-manylinux2014
+    agents:
+      queue: linux-arm64-cpu
+  - label: ":console: Build libxgboost4j.so for Linux x86_64 (targeting glibc 2.17)"
+    command: "tests/buildkite/build-jvm-linux-x86_64-manylinux2014.sh"
+    key: build-jvm-linux-x86_64-manylinux2014
+    agents:
+      queue: linux-amd64-cpu
   - label: ":console: Build JVM package doc"
     command: "tests/buildkite/build-jvm-doc.sh"
     key: build-jvm-doc
diff --git a/tests/buildkite/test-macos-m1-clang11.sh b/tests/buildkite/test-macos-m1-clang11.sh
index a3f1eab6c589..6824cb7b14b4 100755
--- a/tests/buildkite/test-macos-m1-clang11.sh
+++ b/tests/buildkite/test-macos-m1-clang11.sh
@@ -12,33 +12,6 @@ sysctl -n machdep.cpu.brand_string
 uname -m
 set +x
 
-# Build XGBoost4J binary
-echo "--- Build libxgboost4j.dylib"
-set -x
-mkdir build
-pushd build
-export JAVA_HOME=$(/usr/libexec/java_home)
-cmake .. -GNinja -DJVM_BINDINGS=ON -DUSE_OPENMP=ON -DCMAKE_OSX_DEPLOYMENT_TARGET=10.15
-ninja -v
-popd
-rm -rf build
-otool -L lib/libxgboost.dylib
-set +x
-
-echo "--- Upload Python wheel"
-set -x
-pushd lib
-mv -v libxgboost4j.dylib libxgboost4j_m1_${BUILDKITE_COMMIT}.dylib
-buildkite-agent artifact upload libxgboost4j_m1_${BUILDKITE_COMMIT}.dylib
-if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]]
-then
-  aws s3 cp libxgboost4j_m1_${BUILDKITE_COMMIT}.dylib \
-    s3://xgboost-nightly-builds/${BRANCH_NAME}/libxgboost4j/ \
-    --acl public-read --no-progress
-fi
-popd
-set +x
-
 # Ensure that XGBoost can be built with Clang 11
 echo "--- Build and Test XGBoost with MacOS M1, Clang 11"
 set -x
@@ -49,3 +22,4 @@ cmake .. -GNinja -DCMAKE_C_COMPILER=${LLVM11_PATH}/bin/clang \
   -DCMAKE_CXX_COMPILER=${LLVM11_PATH}/bin/clang++ -DGOOGLE_TEST=ON \
   -DUSE_DMLC_GTEST=ON
 ninja -v
+./testxgboost
diff --git a/tests/ci_build/Dockerfile.jvm_manylinux2014_aarch64 b/tests/ci_build/Dockerfile.jvm_manylinux2014_aarch64
new file mode 100644
index 000000000000..1442a7644110
--- /dev/null
+++ b/tests/ci_build/Dockerfile.jvm_manylinux2014_aarch64
@@ -0,0 +1,17 @@
+FROM quay.io/pypa/manylinux2014_aarch64
+
+RUN yum update -y && yum install -y java-1.8.0-openjdk-devel
+
+# Install lightweight sudo (not bound to TTY)
+ENV GOSU_VERSION 1.10
+RUN set -ex; \
+    curl -o /usr/local/bin/gosu -L "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-arm64" && \
+    chmod +x /usr/local/bin/gosu && \
+    gosu nobody true
+
+# Default entry-point to use if running locally
+# It will preserve attributes of created files
+COPY entrypoint.sh /scripts/
+
+WORKDIR /workspace
+ENTRYPOINT ["/scripts/entrypoint.sh"]
diff --git a/tests/ci_build/Dockerfile.jvm_manylinux2014_x86_64 b/tests/ci_build/Dockerfile.jvm_manylinux2014_x86_64
new file mode 100644
index 000000000000..7626e252db0a
--- /dev/null
+++ b/tests/ci_build/Dockerfile.jvm_manylinux2014_x86_64
@@ -0,0 +1,17 @@
+FROM quay.io/pypa/manylinux2014_x86_64
+
+RUN yum update -y && yum install -y java-1.8.0-openjdk-devel ninja-build
+
+# Install lightweight sudo (not bound to TTY)
+ENV GOSU_VERSION 1.10
+RUN set -ex; \
+    curl -o /usr/local/bin/gosu -L "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-amd64" && \
+    chmod +x /usr/local/bin/gosu && \
+    gosu nobody true
+
+# Default entry-point to use if running locally
+# It will preserve attributes of created files
+COPY entrypoint.sh /scripts/
+
+WORKDIR /workspace
+ENTRYPOINT ["/scripts/entrypoint.sh"]

From d482ba1a4c75bbbd73974e1a4add09e62c2a79e5 Mon Sep 17 00:00:00 2001
From: Hyunsu Cho <chohyu01@cs.washington.edu>
Date: Wed, 26 Jun 2024 06:21:46 -0700
Subject: [PATCH 03/18] [CI] [Hotfix] Make S3 upload conditional

---
 tests/buildkite/build-jvm-linux-arm64-manylinux2014.sh  | 6 +++---
 tests/buildkite/build-jvm-linux-x86_64-manylinux2014.sh | 6 +++---
 tests/buildkite/build-jvm-macos-m1.sh                   | 6 +++---
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/tests/buildkite/build-jvm-linux-arm64-manylinux2014.sh b/tests/buildkite/build-jvm-linux-arm64-manylinux2014.sh
index f72183676e42..e7fec780b956 100644
--- a/tests/buildkite/build-jvm-linux-arm64-manylinux2014.sh
+++ b/tests/buildkite/build-jvm-linux-arm64-manylinux2014.sh
@@ -20,10 +20,10 @@ pushd lib
 libname=libxgboost4j_linux_arm64_${BUILDKITE_COMMIT}.so
 mv -v libxgboost4j.so ${libname}
 buildkite-agent artifact upload ${libname}
-#if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]]
-#then
+if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]]
+then
   aws s3 cp ${libname} \
     s3://xgboost-nightly-builds/${BRANCH_NAME}/libxgboost4j/ \
     --acl public-read --no-progress
-#fi
+fi
 popd
diff --git a/tests/buildkite/build-jvm-linux-x86_64-manylinux2014.sh b/tests/buildkite/build-jvm-linux-x86_64-manylinux2014.sh
index 3c299bb36a01..46a819a016d3 100644
--- a/tests/buildkite/build-jvm-linux-x86_64-manylinux2014.sh
+++ b/tests/buildkite/build-jvm-linux-x86_64-manylinux2014.sh
@@ -20,10 +20,10 @@ pushd lib
 libname=libxgboost4j_linux_x86_64_${BUILDKITE_COMMIT}.so
 mv -v libxgboost4j.so ${libname}
 buildkite-agent artifact upload ${libname}
-#if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]]
-#then
+if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]]
+then
   aws s3 cp ${libname} \
     s3://xgboost-nightly-builds/${BRANCH_NAME}/libxgboost4j/ \
     --acl public-read --no-progress
-#fi
+fi
 popd
diff --git a/tests/buildkite/build-jvm-macos-m1.sh b/tests/buildkite/build-jvm-macos-m1.sh
index 469b0786ee37..1d2e5e8703bc 100644
--- a/tests/buildkite/build-jvm-macos-m1.sh
+++ b/tests/buildkite/build-jvm-macos-m1.sh
@@ -31,11 +31,11 @@ pushd lib
 libname=libxgboost4j_m1_${BUILDKITE_COMMIT}.dylib
 mv -v libxgboost4j.dylib ${libname}
 buildkite-agent artifact upload ${libname}
-#if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]]
-#then
+if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]]
+then
   aws s3 cp ${libname} \
     s3://xgboost-nightly-builds/${BRANCH_NAME}/libxgboost4j/ \
     --acl public-read --no-progress
-#fi
+fi
 popd
 set +x

From 617a463255249d8517b7fb5e2771f8c8d9f08641 Mon Sep 17 00:00:00 2001
From: Philip Hyunsu Cho <chohyu01@cs.washington.edu>
Date: Sat, 29 Jun 2024 21:16:51 -0700
Subject: [PATCH 04/18] Fix build and C++ tests for FreeBSD (#10480) (#10501)

---
 .github/workflows/freebsd.yml         | 33 +++++++++++++++++++++++++++
 rabit/include/rabit/internal/socket.h |  4 +++-
 src/c_api/coll_c_api.cc               | 21 ++++++++++++++---
 src/collective/socket.cc              |  4 +++-
 tests/cpp/collective/test_worker.h    |  2 +-
 tests/cpp/common/test_random.cc       | 10 ++++++--
 tests/cpp/test_cache.cc               |  6 ++++-
 tests/cpp/test_learner.cc             | 10 ++++++--
 8 files changed, 79 insertions(+), 11 deletions(-)
 create mode 100644 .github/workflows/freebsd.yml

diff --git a/.github/workflows/freebsd.yml b/.github/workflows/freebsd.yml
new file mode 100644
index 000000000000..c56395964699
--- /dev/null
+++ b/.github/workflows/freebsd.yml
@@ -0,0 +1,33 @@
+name: FreeBSD
+
+on: [push, pull_request]
+
+permissions:
+  contents: read # to fetch code (actions/checkout)
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    name: A job to run test in FreeBSD
+    steps:
+    - uses: actions/checkout@v4
+      with:
+        submodules: 'true'
+    - name: Test in FreeBSD
+      id: test
+      uses: vmactions/freebsd-vm@v1
+      with:
+        usesh: true
+        prepare: |
+          pkg install -y cmake git ninja googletest
+
+        run: |
+          mkdir build
+          cd build
+          cmake .. -GNinja -DGOOGLE_TEST=ON
+          ninja -v
+          ./testxgboost
diff --git a/rabit/include/rabit/internal/socket.h b/rabit/include/rabit/internal/socket.h
index 3701146d4577..97fb3d4be497 100644
--- a/rabit/include/rabit/internal/socket.h
+++ b/rabit/include/rabit/internal/socket.h
@@ -78,6 +78,8 @@ namespace utils {
 
 template <typename PollFD>
 int PollImpl(PollFD* pfd, int nfds, std::chrono::seconds timeout) noexcept(true) {
+  // For Windows and Linux, negative timeout means infinite timeout. For freebsd,
+  // INFTIM(-1) should be used instead.
 #if defined(_WIN32)
 
 #if IS_MINGW()
@@ -88,7 +90,7 @@ int PollImpl(PollFD* pfd, int nfds, std::chrono::seconds timeout) noexcept(true)
 #endif  // IS_MINGW()
 
 #else
-  return poll(pfd, nfds, std::chrono::milliseconds(timeout).count());
+  return poll(pfd, nfds, timeout.count() < 0 ? -1 : std::chrono::milliseconds(timeout).count());
 #endif  // IS_MINGW()
 }
 
diff --git a/src/c_api/coll_c_api.cc b/src/c_api/coll_c_api.cc
index 1da22610367b..2cace7930e19 100644
--- a/src/c_api/coll_c_api.cc
+++ b/src/c_api/coll_c_api.cc
@@ -75,8 +75,11 @@ using CollAPIThreadLocalStore = dmlc::ThreadLocalStore<CollAPIEntry>;
 
 void WaitImpl(TrackerHandleT *ptr, std::chrono::seconds timeout) {
   constexpr std::int64_t kDft{collective::DefaultTimeoutSec()};
-  std::chrono::seconds wait_for{collective::HasTimeout(timeout) ? std::min(kDft, timeout.count())
-                                                                : kDft};
+  std::int64_t timeout_clipped = kDft;
+  if (collective::HasTimeout(timeout)) {
+    timeout_clipped = std::min(kDft, static_cast<std::int64_t>(timeout.count()));
+  }
+  std::chrono::seconds wait_for{timeout_clipped};
 
   common::Timer timer;
   timer.Start();
@@ -171,7 +174,19 @@ XGB_DLL int XGTrackerFree(TrackerHandle handle) {
   common::Timer timer;
   timer.Start();
   // Make sure no one else is waiting on the tracker.
-  while (!ptr->first.unique()) {
+
+  // Quote from https://en.cppreference.com/w/cpp/memory/shared_ptr/use_count#Notes:
+  //
+  // In multithreaded environment, `use_count() == 1` does not imply that the object is
+  // safe to modify because accesses to the managed object by former shared owners may not
+  // have completed, and because new shared owners may be introduced concurrently.
+  //
+  // - We don't have the first case since we never access the raw pointer.
+  //
+  // - We don't hve the second case for most of the scenarios since tracker is an unique
+  //   object, if the free function is called before another function calls, it's likely
+  //   to be a bug in the user code. The use_count should only decrease in this function.
+  while (ptr->first.use_count() != 1) {
     auto ela = timer.Duration().count();
     if (collective::HasTimeout(ptr->first->Timeout()) && ela > ptr->first->Timeout().count()) {
       LOG(WARNING) << "Time out " << ptr->first->Timeout().count()
diff --git a/src/collective/socket.cc b/src/collective/socket.cc
index 99b02f665f10..5145c13a1366 100644
--- a/src/collective/socket.cc
+++ b/src/collective/socket.cc
@@ -22,10 +22,12 @@ namespace xgboost::collective {
 SockAddress MakeSockAddress(StringView host, in_port_t port) {
   struct addrinfo hints;
   std::memset(&hints, 0, sizeof(hints));
-  hints.ai_protocol = SOCK_STREAM;
+  hints.ai_socktype = SOCK_STREAM;
   struct addrinfo *res = nullptr;
   int sig = getaddrinfo(host.c_str(), nullptr, &hints, &res);
   if (sig != 0) {
+    LOG(FATAL) << "Failed to get addr info for: " << host
+      << ", error: " << gai_strerror(sig);
     return {};
   }
   if (res->ai_family == static_cast<std::int32_t>(SockDomain::kV4)) {
diff --git a/tests/cpp/collective/test_worker.h b/tests/cpp/collective/test_worker.h
index f1889200b4d6..4fd982b06f35 100644
--- a/tests/cpp/collective/test_worker.h
+++ b/tests/cpp/collective/test_worker.h
@@ -105,7 +105,7 @@ inline Json MakeTrackerConfig(std::string host, std::int32_t n_workers,
   config["port"] = Integer{0};
   config["n_workers"] = Integer{n_workers};
   config["sortby"] = Integer{static_cast<std::int32_t>(Tracker::SortBy::kHost)};
-  config["timeout"] = timeout.count();
+  config["timeout"] = static_cast<std::int64_t>(timeout.count());
   return config;
 }
 
diff --git a/tests/cpp/common/test_random.cc b/tests/cpp/common/test_random.cc
index a517764754c5..45c20e4030f7 100644
--- a/tests/cpp/common/test_random.cc
+++ b/tests/cpp/common/test_random.cc
@@ -68,14 +68,20 @@ TEST(ColumnSampler, GPUTest) {
 // Test if different threads using the same seed produce the same result
 TEST(ColumnSampler, ThreadSynchronisation) {
   Context ctx;
-  const int64_t num_threads = 100;
+  // NOLINTBEGIN(clang-analyzer-deadcode.DeadStores)
+#if defined(__linux__)
+  std::int64_t const n_threads = std::thread::hardware_concurrency() * 128;
+#else
+  std::int64_t const n_threads = std::thread::hardware_concurrency();
+#endif
+  // NOLINTEND(clang-analyzer-deadcode.DeadStores)
   int n = 128;
   size_t iterations = 10;
   size_t levels = 5;
   std::vector<bst_feature_t> reference_result;
   std::vector<float> feature_weights;
   bool success = true; // Cannot use google test asserts in multithreaded region
-#pragma omp parallel num_threads(num_threads)
+#pragma omp parallel num_threads(n_threads)
   {
     for (auto j = 0ull; j < iterations; j++) {
       ColumnSampler cs(j);
diff --git a/tests/cpp/test_cache.cc b/tests/cpp/test_cache.cc
index 351730181f9a..cd4b28b0536b 100644
--- a/tests/cpp/test_cache.cc
+++ b/tests/cpp/test_cache.cc
@@ -59,7 +59,11 @@ TEST(DMatrixCache, MultiThread) {
   std::size_t constexpr kRows = 2, kCols = 1, kCacheSize = 3;
   auto p_fmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
 
-  auto n = std::thread::hardware_concurrency() * 128u;
+#if defined(__linux__)
+  auto const n = std::thread::hardware_concurrency() * 128;
+#else
+  auto const n = std::thread::hardware_concurrency();
+#endif
   CHECK_NE(n, 0);
   std::vector<std::shared_ptr<CacheForTest>> results(n);
 
diff --git a/tests/cpp/test_learner.cc b/tests/cpp/test_learner.cc
index 976ae2147a06..c25f684a42d1 100644
--- a/tests/cpp/test_learner.cc
+++ b/tests/cpp/test_learner.cc
@@ -267,8 +267,14 @@ TEST(Learner, MultiThreadedPredict) {
   learner->Configure();
 
   std::vector<std::thread> threads;
-  for (uint32_t thread_id = 0;
-       thread_id < 2 * std::thread::hardware_concurrency(); ++thread_id) {
+
+#if defined(__linux__)
+  auto n_threads = std::thread::hardware_concurrency() * 4u;
+#else
+  auto n_threads = std::thread::hardware_concurrency();
+#endif
+
+  for (decltype(n_threads) thread_id = 0; thread_id < n_threads; ++thread_id) {
     threads.emplace_back([learner, p_data] {
       size_t constexpr kIters = 10;
       auto &entry = learner->GetThreadLocal().prediction_entry;

From 0618c20d60afbe9ffa136d7a9cf435ec6b8305c5 Mon Sep 17 00:00:00 2001
From: Philip Hyunsu Cho <chohyu01@cs.washington.edu>
Date: Tue, 9 Jul 2024 15:13:47 -0700
Subject: [PATCH 05/18] Require Pandas 1.2+ (#10476) (#10477)

---
 python-package/pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python-package/pyproject.toml b/python-package/pyproject.toml
index f7b2dd48170a..1a46d4a52185 100644
--- a/python-package/pyproject.toml
+++ b/python-package/pyproject.toml
@@ -38,7 +38,7 @@ documentation = "https://xgboost.readthedocs.io/en/stable/"
 repository = "https://github.com/dmlc/xgboost"
 
 [project.optional-dependencies]
-pandas = ["pandas"]
+pandas = ["pandas>=1.2"]
 scikit-learn = ["scikit-learn"]
 dask = ["dask", "pandas", "distributed"]
 datatable = ["datatable"]

From 3f47fcbed4ef112a22b6a2a5346bb36ea1c20ee5 Mon Sep 17 00:00:00 2001
From: Jiaming Yuan <jm.yuan@outlook.com>
Date: Fri, 12 Jul 2024 17:14:09 +0800
Subject: [PATCH 06/18] [backport][doc] Fix learning to rank tutorial. [skip
 ci] (#10539) (#10578)

---
 doc/tutorials/learning_to_rank.rst | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/doc/tutorials/learning_to_rank.rst b/doc/tutorials/learning_to_rank.rst
index 15a611bd0c32..c5d6d0b33104 100644
--- a/doc/tutorials/learning_to_rank.rst
+++ b/doc/tutorials/learning_to_rank.rst
@@ -71,8 +71,12 @@ Please note that, as of writing, there's no learning-to-rank interface in scikit
 
 .. code-block:: python
 
+  import pandas as pd
+
+  # `X`, `qid`, and `y` are from the previous snippet, they are all sorted by the `sorted_idx`.
   df = pd.DataFrame(X, columns=[str(i) for i in range(X.shape[1])])
-  df["qid"] = qid[sorted_idx]
+  df["qid"] = qid
+
   ranker.fit(df, y)  # No need to pass qid as a separate argument
 
   from sklearn.model_selection import StratifiedGroupKFold, cross_val_score

From 644e00165b94a91730ab89705d9a61c60e6d8d4c Mon Sep 17 00:00:00 2001
From: Jiaming Yuan <jm.yuan@outlook.com>
Date: Fri, 12 Jul 2024 18:29:28 +0800
Subject: [PATCH 07/18] [backport][fed] Fixes for the encrypted GRPC backend.
 (#10503) (#10577)

---
 plugin/federated/federated_comm.cc            |   9 +-
 python-package/xgboost/federated.py           |  14 +-
 python-package/xgboost/testing/federated.py   | 153 ++++++++++++++++++
 src/context.cc                                |   5 +-
 tests/ci_build/lint_python.py                 |   4 +
 tests/ci_build/test_python.sh                 |   2 +
 .../test_federated/runtests-federated.sh      |  17 --
 .../test_federated/test_federated.py          |  88 +---------
 .../test_gpu_federated/test_gpu_federated.py  |   9 ++
 9 files changed, 192 insertions(+), 109 deletions(-)
 create mode 100644 python-package/xgboost/testing/federated.py
 delete mode 100755 tests/test_distributed/test_federated/runtests-federated.sh
 create mode 100644 tests/test_distributed/test_gpu_federated/test_gpu_federated.py

diff --git a/plugin/federated/federated_comm.cc b/plugin/federated/federated_comm.cc
index ec128741353b..b4aa02f9fe42 100644
--- a/plugin/federated/federated_comm.cc
+++ b/plugin/federated/federated_comm.cc
@@ -1,5 +1,5 @@
 /**
- * Copyright 2023, XGBoost contributors
+ * Copyright 2023-2024, XGBoost contributors
  */
 #include "federated_comm.h"
 
@@ -11,6 +11,7 @@
 #include <string>   // for string, stoi
 
 #include "../../src/common/common.h"      // for Split
+#include "../../src/common/io.h"          // for ReadAll
 #include "../../src/common/json_utils.h"  // for OptionalArg
 #include "xgboost/json.h"                 // for Json
 #include "xgboost/logging.h"
@@ -46,9 +47,9 @@ void FederatedComm::Init(std::string const& host, std::int32_t port, std::int32_
   } else {
     stub_ = [&] {
       grpc::SslCredentialsOptions options;
-      options.pem_root_certs = server_cert;
-      options.pem_private_key = client_key;
-      options.pem_cert_chain = client_cert;
+      options.pem_root_certs = common::ReadAll(server_cert);
+      options.pem_private_key = common::ReadAll(client_key);
+      options.pem_cert_chain = common::ReadAll(client_cert);
       grpc::ChannelArguments args;
       args.SetMaxReceiveMessageSize(std::numeric_limits<std::int32_t>::max());
       auto channel = grpc::CreateCustomChannel(host + ":" + std::to_string(port),
diff --git a/python-package/xgboost/federated.py b/python-package/xgboost/federated.py
index 2e42c03ac967..71db5a1c0345 100644
--- a/python-package/xgboost/federated.py
+++ b/python-package/xgboost/federated.py
@@ -39,9 +39,9 @@ def __init__(  # pylint: disable=R0913, W0231
         n_workers: int,
         port: int,
         secure: bool,
-        server_key_path: str = "",
-        server_cert_path: str = "",
-        client_cert_path: str = "",
+        server_key_path: Optional[str] = None,
+        server_cert_path: Optional[str] = None,
+        client_cert_path: Optional[str] = None,
         timeout: int = 300,
     ) -> None:
         handle = ctypes.c_void_p()
@@ -84,7 +84,13 @@ def run_federated_server(  # pylint: disable=too-many-arguments
         for path in [server_key_path, server_cert_path, client_cert_path]
     )
     tracker = FederatedTracker(
-        n_workers=n_workers, port=port, secure=secure, timeout=timeout
+        n_workers=n_workers,
+        port=port,
+        secure=secure,
+        timeout=timeout,
+        server_key_path=server_key_path,
+        server_cert_path=server_cert_path,
+        client_cert_path=client_cert_path,
     )
     tracker.start()
 
diff --git a/python-package/xgboost/testing/federated.py b/python-package/xgboost/testing/federated.py
new file mode 100644
index 000000000000..13755af9064d
--- /dev/null
+++ b/python-package/xgboost/testing/federated.py
@@ -0,0 +1,153 @@
+# pylint: disable=unbalanced-tuple-unpacking, too-many-locals
+"""Tests for federated learning."""
+
+import multiprocessing
+import os
+import subprocess
+import tempfile
+import time
+from typing import List, cast
+
+from sklearn.datasets import dump_svmlight_file, load_svmlight_file
+from sklearn.model_selection import train_test_split
+
+import xgboost as xgb
+import xgboost.federated
+from xgboost import testing as tm
+from xgboost.training import TrainingCallback
+
+SERVER_KEY = "server-key.pem"
+SERVER_CERT = "server-cert.pem"
+CLIENT_KEY = "client-key.pem"
+CLIENT_CERT = "client-cert.pem"
+
+
+def run_server(port: int, world_size: int, with_ssl: bool) -> None:
+    """Run federated server for test."""
+    if with_ssl:
+        xgboost.federated.run_federated_server(
+            world_size,
+            port,
+            server_key_path=SERVER_KEY,
+            server_cert_path=SERVER_CERT,
+            client_cert_path=CLIENT_CERT,
+        )
+    else:
+        xgboost.federated.run_federated_server(world_size, port)
+
+
+def run_worker(
+    port: int, world_size: int, rank: int, with_ssl: bool, device: str
+) -> None:
+    """Run federated client worker for test."""
+    communicator_env = {
+        "dmlc_communicator": "federated",
+        "federated_server_address": f"localhost:{port}",
+        "federated_world_size": world_size,
+        "federated_rank": rank,
+    }
+    if with_ssl:
+        communicator_env["federated_server_cert_path"] = SERVER_CERT
+        communicator_env["federated_client_key_path"] = CLIENT_KEY
+        communicator_env["federated_client_cert_path"] = CLIENT_CERT
+
+    cpu_count = os.cpu_count()
+    assert cpu_count is not None
+    n_threads = cpu_count // world_size
+
+    # Always call this before using distributed module
+    with xgb.collective.CommunicatorContext(**communicator_env):
+        # Load file, file will not be sharded in federated mode.
+        X, y = load_svmlight_file(f"agaricus.txt-{rank}.train")
+        dtrain = xgb.DMatrix(X, y)
+        X, y = load_svmlight_file(f"agaricus.txt-{rank}.test")
+        dtest = xgb.DMatrix(X, y)
+
+        # Specify parameters via map, definition are same as c++ version
+        param = {
+            "max_depth": 2,
+            "eta": 1,
+            "objective": "binary:logistic",
+            "nthread": n_threads,
+            "tree_method": "hist",
+            "device": device,
+        }
+
+        # Specify validations set to watch performance
+        watchlist = [(dtest, "eval"), (dtrain, "train")]
+        num_round = 20
+
+        # Run training, all the features in training API is available.
+        results: TrainingCallback.EvalsLog = {}
+        bst = xgb.train(
+            param,
+            dtrain,
+            num_round,
+            evals=watchlist,
+            early_stopping_rounds=2,
+            evals_result=results,
+        )
+        assert tm.non_increasing(cast(List[float], results["train"]["logloss"]))
+        assert tm.non_increasing(cast(List[float], results["eval"]["logloss"]))
+
+        # save the model, only ask process 0 to save the model.
+        if xgb.collective.get_rank() == 0:
+            with tempfile.TemporaryDirectory() as tmpdir:
+                bst.save_model(os.path.join(tmpdir, "model.json"))
+            xgb.collective.communicator_print("Finished training\n")
+
+
+def run_federated(world_size: int, with_ssl: bool, use_gpu: bool) -> None:
+    """Launcher for clients and the server."""
+    port = 9091
+
+    server = multiprocessing.Process(
+        target=run_server, args=(port, world_size, with_ssl)
+    )
+    server.start()
+    time.sleep(1)
+    if not server.is_alive():
+        raise ValueError("Error starting Federated Learning server")
+
+    workers = []
+    for rank in range(world_size):
+        device = f"cuda:{rank}" if use_gpu else "cpu"
+        worker = multiprocessing.Process(
+            target=run_worker, args=(port, world_size, rank, with_ssl, device)
+        )
+        workers.append(worker)
+        worker.start()
+    for worker in workers:
+        worker.join()
+    server.terminate()
+
+
+def run_federated_learning(with_ssl: bool, use_gpu: bool, test_path: str) -> None:
+    """Run federated learning tests."""
+    n_workers = 2
+
+    if with_ssl:
+        command = "openssl req -x509 -newkey rsa:2048 -days 7 -nodes -keyout {part}-key.pem -out {part}-cert.pem -subj /C=US/CN=localhost"  # pylint: disable=line-too-long
+        server_key = command.format(part="server").split()
+        subprocess.check_call(server_key)
+        client_key = command.format(part="client").split()
+        subprocess.check_call(client_key)
+
+    train_path = os.path.join(tm.data_dir(test_path), "agaricus.txt.train")
+    test_path = os.path.join(tm.data_dir(test_path), "agaricus.txt.test")
+
+    X_train, y_train = load_svmlight_file(train_path)
+    X_test, y_test = load_svmlight_file(test_path)
+
+    X0, X1, y0, y1 = train_test_split(X_train, y_train, test_size=0.5)
+    X0_valid, X1_valid, y0_valid, y1_valid = train_test_split(
+        X_test, y_test, test_size=0.5
+    )
+
+    dump_svmlight_file(X0, y0, "agaricus.txt-0.train")
+    dump_svmlight_file(X0_valid, y0_valid, "agaricus.txt-0.test")
+
+    dump_svmlight_file(X1, y1, "agaricus.txt-1.train")
+    dump_svmlight_file(X1_valid, y1_valid, "agaricus.txt-1.test")
+
+    run_federated(world_size=n_workers, with_ssl=with_ssl, use_gpu=use_gpu)
diff --git a/src/context.cc b/src/context.cc
index cc18bcc8709e..ef7110e7ce19 100644
--- a/src/context.cc
+++ b/src/context.cc
@@ -191,8 +191,11 @@ DeviceOrd CUDAOrdinal(DeviceOrd device, bool) {
   }
   if (device.IsCUDA()) {
     device = CUDAOrdinal(device, fail_on_invalid_gpu_id);
+    if (!device.IsCUDA()) {
+      // We allow loading a GPU-based pickle on a CPU-only machine.
+      LOG(WARNING) << "XGBoost is not compiled with CUDA support.";
+    }
   }
-
   return device;
 }
 }  // namespace
diff --git a/tests/ci_build/lint_python.py b/tests/ci_build/lint_python.py
index d56191dc4566..079996de66fb 100644
--- a/tests/ci_build/lint_python.py
+++ b/tests/ci_build/lint_python.py
@@ -34,6 +34,8 @@ class LintersPaths:
         "tests/python/test_with_pandas.py",
         "tests/python-gpu/",
         "tests/python-sycl/",
+        "tests/test_distributed/test_federated/",
+        "tests/test_distributed/test_gpu_federated/",
         "tests/test_distributed/test_with_dask/",
         "tests/test_distributed/test_gpu_with_dask/",
         "tests/test_distributed/test_with_spark/",
@@ -94,6 +96,8 @@ class LintersPaths:
         "tests/python-gpu/load_pickle.py",
         "tests/python-gpu/test_gpu_training_continuation.py",
         "tests/python/test_model_io.py",
+        "tests/test_distributed/test_federated/",
+        "tests/test_distributed/test_gpu_federated/",
         "tests/test_distributed/test_with_spark/test_data.py",
         "tests/test_distributed/test_gpu_with_spark/test_data.py",
         "tests/test_distributed/test_gpu_with_dask/test_gpu_with_dask.py",
diff --git a/tests/ci_build/test_python.sh b/tests/ci_build/test_python.sh
index a70b2796130f..dd43f43a9ce6 100755
--- a/tests/ci_build/test_python.sh
+++ b/tests/ci_build/test_python.sh
@@ -70,6 +70,7 @@ case "$suite" in
     pytest -v -s -rxXs --fulltrace --durations=0 -m "mgpu" ${args} tests/python-gpu
     pytest -v -s -rxXs --fulltrace --durations=0 -m "mgpu" ${args} tests/test_distributed/test_gpu_with_dask
     pytest -v -s -rxXs --fulltrace --durations=0 -m "mgpu" ${args} tests/test_distributed/test_gpu_with_spark
+    pytest -v -s -rxXs --fulltrace --durations=0 -m "mgpu" ${args} tests/test_distributed/test_gpu_federated
     unset_pyspark_envs
     uninstall_xgboost
     set +x
@@ -84,6 +85,7 @@ case "$suite" in
     pytest -v -s -rxXs --fulltrace --durations=0 ${args} tests/python
     pytest -v -s -rxXs --fulltrace --durations=0 ${args} tests/test_distributed/test_with_dask
     pytest -v -s -rxXs --fulltrace --durations=0 ${args} tests/test_distributed/test_with_spark
+    pytest -v -s -rxXs --fulltrace --durations=0 ${args} tests/test_distributed/test_federated
     unset_pyspark_envs
     uninstall_xgboost
     set +x
diff --git a/tests/test_distributed/test_federated/runtests-federated.sh b/tests/test_distributed/test_federated/runtests-federated.sh
deleted file mode 100755
index 8bdb2bc5b973..000000000000
--- a/tests/test_distributed/test_federated/runtests-federated.sh
+++ /dev/null
@@ -1,17 +0,0 @@
-#!/bin/bash
-
-set -e
-
-rm -f ./*.model* ./agaricus* ./*.pem
-
-world_size=$(nvidia-smi -L | wc -l)
-
-# Generate server and client certificates.
-openssl req -x509 -newkey rsa:2048 -days 7 -nodes -keyout server-key.pem -out server-cert.pem -subj "/C=US/CN=localhost"
-openssl req -x509 -newkey rsa:2048 -days 7 -nodes -keyout client-key.pem -out client-cert.pem -subj "/C=US/CN=localhost"
-
-# Split train and test files manually to simulate a federated environment.
-split -n l/"${world_size}" -d ../../../demo/data/agaricus.txt.train agaricus.txt.train-
-split -n l/"${world_size}" -d ../../../demo/data/agaricus.txt.test agaricus.txt.test-
-
-python test_federated.py "${world_size}"
diff --git a/tests/test_distributed/test_federated/test_federated.py b/tests/test_distributed/test_federated/test_federated.py
index dba797078114..460b1b2206c9 100644
--- a/tests/test_distributed/test_federated/test_federated.py
+++ b/tests/test_distributed/test_federated/test_federated.py
@@ -1,86 +1,8 @@
-#!/usr/bin/python
-import multiprocessing
-import sys
-import time
+import pytest
 
-import xgboost as xgb
-import xgboost.federated
+from xgboost.testing.federated import run_federated_learning
 
-SERVER_KEY = 'server-key.pem'
-SERVER_CERT = 'server-cert.pem'
-CLIENT_KEY = 'client-key.pem'
-CLIENT_CERT = 'client-cert.pem'
 
-
-def run_server(port: int, world_size: int, with_ssl: bool) -> None:
-    if with_ssl:
-        xgboost.federated.run_federated_server(port, world_size, SERVER_KEY, SERVER_CERT,
-                                               CLIENT_CERT)
-    else:
-        xgboost.federated.run_federated_server(port, world_size)
-
-
-def run_worker(port: int, world_size: int, rank: int, with_ssl: bool, with_gpu: bool) -> None:
-    communicator_env = {
-        'xgboost_communicator': 'federated',
-        'federated_server_address': f'localhost:{port}',
-        'federated_world_size': world_size,
-        'federated_rank': rank
-    }
-    if with_ssl:
-        communicator_env['federated_server_cert'] = SERVER_CERT
-        communicator_env['federated_client_key'] = CLIENT_KEY
-        communicator_env['federated_client_cert'] = CLIENT_CERT
-
-    # Always call this before using distributed module
-    with xgb.collective.CommunicatorContext(**communicator_env):
-        # Load file, file will not be sharded in federated mode.
-        dtrain = xgb.DMatrix('agaricus.txt.train-%02d?format=libsvm' % rank)
-        dtest = xgb.DMatrix('agaricus.txt.test-%02d?format=libsvm' % rank)
-
-        # Specify parameters via map, definition are same as c++ version
-        param = {'max_depth': 2, 'eta': 1, 'objective': 'binary:logistic'}
-        if with_gpu:
-            param['tree_method'] = 'hist'
-            param['device'] = f"cuda:{rank}"
-
-        # Specify validations set to watch performance
-        watchlist = [(dtest, 'eval'), (dtrain, 'train')]
-        num_round = 20
-
-        # Run training, all the features in training API is available.
-        bst = xgb.train(param, dtrain, num_round, evals=watchlist,
-                        early_stopping_rounds=2)
-
-        # Save the model, only ask process 0 to save the model.
-        if xgb.collective.get_rank() == 0:
-            bst.save_model("test.model.json")
-            xgb.collective.communicator_print("Finished training\n")
-
-
-def run_federated(with_ssl: bool = True, with_gpu: bool = False) -> None:
-    port = 9091
-    world_size = int(sys.argv[1])
-
-    server = multiprocessing.Process(target=run_server, args=(port, world_size, with_ssl))
-    server.start()
-    time.sleep(1)
-    if not server.is_alive():
-        raise Exception("Error starting Federated Learning server")
-
-    workers = []
-    for rank in range(world_size):
-        worker = multiprocessing.Process(target=run_worker,
-                                         args=(port, world_size, rank, with_ssl, with_gpu))
-        workers.append(worker)
-        worker.start()
-    for worker in workers:
-        worker.join()
-    server.terminate()
-
-
-if __name__ == '__main__':
-    run_federated(with_ssl=True, with_gpu=False)
-    run_federated(with_ssl=False, with_gpu=False)
-    run_federated(with_ssl=True, with_gpu=True)
-    run_federated(with_ssl=False, with_gpu=True)
+@pytest.mark.parametrize("with_ssl", [True, False])
+def test_federated_learning(with_ssl: bool) -> None:
+    run_federated_learning(with_ssl, False, __file__)
diff --git a/tests/test_distributed/test_gpu_federated/test_gpu_federated.py b/tests/test_distributed/test_gpu_federated/test_gpu_federated.py
new file mode 100644
index 000000000000..c366a743f45f
--- /dev/null
+++ b/tests/test_distributed/test_gpu_federated/test_gpu_federated.py
@@ -0,0 +1,9 @@
+import pytest
+
+from xgboost.testing.federated import run_federated_learning
+
+
+@pytest.mark.parametrize("with_ssl", [True, False])
+@pytest.mark.mgpu
+def test_federated_learning(with_ssl: bool) -> None:
+    run_federated_learning(with_ssl, True, __file__)

From 086ca69cce07f2634111eb37fa0dced2558aac39 Mon Sep 17 00:00:00 2001
From: Jiaming Yuan <jm.yuan@outlook.com>
Date: Fri, 12 Jul 2024 18:30:15 +0800
Subject: [PATCH 08/18] [backport]Fix empty partition. (#10559) (#10579)

---
 src/common/hist_util.cc | 25 ++++++++++++++++---------
 1 file changed, 16 insertions(+), 9 deletions(-)

diff --git a/src/common/hist_util.cc b/src/common/hist_util.cc
index 9b703a3fa13a..7107cb2dee7f 100644
--- a/src/common/hist_util.cc
+++ b/src/common/hist_util.cc
@@ -201,7 +201,7 @@ void RowsWiseBuildHistKernel(Span<GradientPair const> gpair,
 
   auto const &row_ptr = gmat.row_ptr.data();
   auto base_rowid = gmat.base_rowid;
-  uint32_t const *offsets = gmat.index.Offset();
+  std::uint32_t const *offsets = gmat.index.Offset();
   // There's no feature-based compression if missing value is present.
   if (kAnyMissing) {
     CHECK(!offsets);
@@ -212,8 +212,11 @@ void RowsWiseBuildHistKernel(Span<GradientPair const> gpair,
   auto get_row_ptr = [&](bst_idx_t ridx) {
     return kFirstPage ? row_ptr[ridx] : row_ptr[ridx - base_rowid];
   };
-  auto get_rid = [&](bst_idx_t ridx) { return kFirstPage ? ridx : (ridx - base_rowid); };
+  auto get_rid = [&](bst_idx_t ridx) {
+    return kFirstPage ? ridx : (ridx - base_rowid);
+  };
 
+  CHECK_NE(row_indices.Size(), 0);
   const size_t n_features =
       get_row_ptr(row_indices.begin[0] + 1) - get_row_ptr(row_indices.begin[0]);
   auto hist_data = reinterpret_cast<double *>(hist.data());
@@ -325,16 +328,20 @@ void BuildHistDispatch(Span<GradientPair const> gpair, const RowSetCollection::E
 
     if (contiguousBlock) {
       // contiguous memory access, built-in HW prefetching is enough
+      if (row_indices.Size() == 0) {
+        return;
+      }
       RowsWiseBuildHistKernel<false, BuildingManager>(gpair, row_indices, gmat, hist);
     } else {
-      const RowSetCollection::Elem span1(row_indices.begin,
-                                        row_indices.end - no_prefetch_size);
-      const RowSetCollection::Elem span2(row_indices.end - no_prefetch_size,
-                                        row_indices.end);
-
-      RowsWiseBuildHistKernel<true, BuildingManager>(gpair, span1, gmat, hist);
+      const RowSetCollection::Elem span1(row_indices.begin, row_indices.end - no_prefetch_size);
+      if (span1.Size() != 0) {
+        RowsWiseBuildHistKernel<true, BuildingManager>(gpair, span1, gmat, hist);
+      }
       // no prefetching to avoid loading extra memory
-      RowsWiseBuildHistKernel<false, BuildingManager>(gpair, span2, gmat, hist);
+      const RowSetCollection::Elem span2(row_indices.end - no_prefetch_size, row_indices.end);
+      if (span2.Size() != 0) {
+        RowsWiseBuildHistKernel<false, BuildingManager>(gpair, span2, gmat, hist);
+      }
     }
   }
 }

From ea7bd919d4c77586d954633cb9e40de6e9541632 Mon Sep 17 00:00:00 2001
From: Jiaming Yuan <jm.yuan@outlook.com>
Date: Fri, 12 Jul 2024 21:04:57 +0800
Subject: [PATCH 09/18] [backport] Fix column split race condition. (#10572)
 (#10580)

---
 src/tree/common_row_partitioner.h    | 48 ++++++++++++++---
 tests/cpp/tree/test_approx.cc        | 23 ++++++++
 tests/cpp/tree/test_column_split.h   | 79 ++++++++++++++++++++++++++++
 tests/cpp/tree/test_histmaker.cc     | 79 ++--------------------------
 tests/cpp/tree/test_quantile_hist.cc | 71 +++++++------------------
 5 files changed, 167 insertions(+), 133 deletions(-)
 create mode 100644 tests/cpp/tree/test_column_split.h

diff --git a/src/tree/common_row_partitioner.h b/src/tree/common_row_partitioner.h
index c3065ad5f135..73fef8203e59 100644
--- a/src/tree/common_row_partitioner.h
+++ b/src/tree/common_row_partitioner.h
@@ -36,10 +36,11 @@ class ColumnSplitHelper {
                     common::PartitionBuilder<kPartitionBlockSize>* partition_builder,
                     common::RowSetCollection* row_set_collection)
       : partition_builder_{partition_builder}, row_set_collection_{row_set_collection} {
-    decision_storage_.resize(num_row);
-    decision_bits_ = BitVector(common::Span<BitVector::value_type>(decision_storage_));
-    missing_storage_.resize(num_row);
-    missing_bits_ = BitVector(common::Span<BitVector::value_type>(missing_storage_));
+    auto n_bytes = BitVector::ComputeStorageSize(num_row);
+    decision_storage_.resize(n_bytes);
+    decision_bits_ = BitVector{common::Span<BitVector::value_type>{decision_storage_}};
+    missing_storage_.resize(n_bytes);
+    missing_bits_ = BitVector{common::Span<BitVector::value_type>{missing_storage_}};
   }
 
   template <typename BinIdxType, bool any_missing, bool any_cat, typename ExpandEntry>
@@ -51,14 +52,43 @@ class ColumnSplitHelper {
     // we first collect all the decisions and whether the feature is missing into bit vectors.
     std::fill(decision_storage_.begin(), decision_storage_.end(), 0);
     std::fill(missing_storage_.begin(), missing_storage_.end(), 0);
-    common::ParallelFor2d(space, n_threads, [&](size_t node_in_set, common::Range1d r) {
-      const int32_t nid = nodes[node_in_set].nid;
+
+    this->tloc_decision_.resize(decision_storage_.size() * n_threads);
+    this->tloc_missing_.resize(decision_storage_.size() * n_threads);
+    std::fill_n(this->tloc_decision_.data(), this->tloc_decision_.size(), 0);
+    std::fill_n(this->tloc_missing_.data(), this->tloc_missing_.size(), 0);
+
+    // Make thread-local storage.
+    using T = decltype(decision_storage_)::value_type;
+    auto make_tloc = [&](std::vector<T>& storage, std::int32_t tidx) {
+      auto span = common::Span<T>{storage};
+      auto n = decision_storage_.size();
+      auto bitvec = BitVector{span.subspan(n * tidx, n)};
+      return bitvec;
+    };
+
+    common::ParallelFor2d(space, n_threads, [&](std::size_t node_in_set, common::Range1d r) {
+      bst_node_t const nid = nodes[node_in_set].nid;
+      auto tidx = omp_get_thread_num();
+      auto decision = make_tloc(this->tloc_decision_, tidx);
+      auto missing = make_tloc(this->tloc_missing_, tidx);
       bst_bin_t split_cond = column_matrix.IsInitialized() ? split_conditions[node_in_set] : 0;
       partition_builder_->MaskRows<BinIdxType, any_missing, any_cat>(
           node_in_set, nodes, r, split_cond, gmat, column_matrix, *p_tree,
-          (*row_set_collection_)[nid].begin, &decision_bits_, &missing_bits_);
+          (*row_set_collection_)[nid].begin, &decision, &missing);
     });
 
+    // Reduce thread local
+    auto decision = make_tloc(this->tloc_decision_, 0);
+    auto missing = make_tloc(this->tloc_missing_, 0);
+    for (std::int32_t tidx = 1; tidx < n_threads; ++tidx) {
+      decision |= make_tloc(this->tloc_decision_, tidx);
+      missing |= make_tloc(this->tloc_missing_, tidx);
+    }
+    CHECK_EQ(decision_storage_.size(), decision.NumValues());
+    std::copy_n(decision.Data(), decision_storage_.size(), decision_storage_.data());
+    std::copy_n(missing.Data(), missing_storage_.size(), missing_storage_.data());
+
     // Then aggregate the bit vectors across all the workers.
     auto rc = collective::Success() << [&] {
       return collective::Allreduce(ctx, &decision_storage_, collective::Op::kBitwiseOR);
@@ -85,6 +115,10 @@ class ColumnSplitHelper {
   BitVector decision_bits_{};
   std::vector<BitVector::value_type> missing_storage_{};
   BitVector missing_bits_{};
+
+  std::vector<BitVector::value_type> tloc_decision_;
+  std::vector<BitVector::value_type> tloc_missing_;
+
   common::PartitionBuilder<kPartitionBlockSize>* partition_builder_;
   common::RowSetCollection* row_set_collection_;
 };
diff --git a/tests/cpp/tree/test_approx.cc b/tests/cpp/tree/test_approx.cc
index b2949e5952a2..0e52c9e23b2b 100644
--- a/tests/cpp/tree/test_approx.cc
+++ b/tests/cpp/tree/test_approx.cc
@@ -7,6 +7,7 @@
 #include "../../../src/tree/common_row_partitioner.h"
 #include "../collective/test_worker.h"  // for TestDistributedGlobal
 #include "../helpers.h"
+#include "test_column_split.h"  // for TestColumnSplit
 #include "test_partitioner.h"
 
 namespace xgboost::tree {
@@ -150,4 +151,26 @@ TEST(Approx, PartitionerColSplit) {
                                mid_partitioner);
   });
 }
+
+namespace {
+class TestApproxColSplit : public ::testing::TestWithParam<std::tuple<bool, float>> {
+ public:
+  void Run() {
+    auto [categorical, sparsity] = GetParam();
+    TestColumnSplit(1u, categorical, "grow_histmaker", sparsity);
+  }
+};
+}  // namespace
+
+TEST_P(TestApproxColSplit, Basic) { this->Run(); }
+
+INSTANTIATE_TEST_SUITE_P(ColumnSplit, TestApproxColSplit, ::testing::ValuesIn([]() {
+                           std::vector<std::tuple<bool, float>> params;
+                           for (auto categorical : {true, false}) {
+                             for (auto sparsity : {0.0f, 0.6f}) {
+                               params.emplace_back(categorical, sparsity);
+                             }
+                           }
+                           return params;
+                         }()));
 }  // namespace xgboost::tree
diff --git a/tests/cpp/tree/test_column_split.h b/tests/cpp/tree/test_column_split.h
new file mode 100644
index 000000000000..b03597f38681
--- /dev/null
+++ b/tests/cpp/tree/test_column_split.h
@@ -0,0 +1,79 @@
+/**
+ * Copyright 2023-2024, XGBoost Contributors
+ */
+#pragma once
+
+#include <xgboost/data.h>          // for FeatureType, DMatrix
+#include <xgboost/tree_model.h>    // for RegTree
+#include <xgboost/tree_updater.h>  // for TreeUpdater
+
+#include <cstddef>  // for size_t
+#include <memory>   // for shared_ptr
+#include <vector>   // for vector
+
+#include "../../../src/tree/param.h"    // for TrainParam
+#include "../collective/test_worker.h"  // for TestDistributedGlobal
+#include "../helpers.h"                 // for RandomDataGenerator
+
+namespace xgboost::tree {
+inline std::shared_ptr<DMatrix> GenerateCatDMatrix(std::size_t rows, std::size_t cols,
+                                                   float sparsity, bool categorical) {
+  if (categorical) {
+    std::vector<FeatureType> ft(cols);
+    for (size_t i = 0; i < ft.size(); ++i) {
+      ft[i] = (i % 3 == 0) ? FeatureType::kNumerical : FeatureType::kCategorical;
+    }
+    return RandomDataGenerator(rows, cols, 0.6f).Seed(3).Type(ft).MaxCategory(17).GenerateDMatrix();
+  } else {
+    return RandomDataGenerator{rows, cols, 0.6f}.Seed(3).GenerateDMatrix();
+  }
+}
+
+inline void TestColumnSplit(bst_target_t n_targets, bool categorical, std::string name,
+                            float sparsity) {
+  auto constexpr kRows = 32;
+  auto constexpr kCols = 16;
+
+  RegTree expected_tree{n_targets, static_cast<bst_feature_t>(kCols)};
+  ObjInfo task{ObjInfo::kRegression};
+  Context ctx;
+  {
+    auto p_dmat = GenerateCatDMatrix(kRows, kCols, sparsity, categorical);
+    auto gpair = GenerateRandomGradients(&ctx, kRows, n_targets);
+    std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create(name, &ctx, &task)};
+    std::vector<HostDeviceVector<bst_node_t>> position(1);
+    TrainParam param;
+    param.Init(Args{});
+    updater->Configure(Args{});
+    updater->Update(&param, &gpair, p_dmat.get(), position, {&expected_tree});
+  }
+
+  auto verify = [&] {
+    Context ctx;
+    auto p_dmat = GenerateCatDMatrix(kRows, kCols, sparsity, categorical);
+    auto gpair = GenerateRandomGradients(&ctx, kRows, n_targets);
+
+    ObjInfo task{ObjInfo::kRegression};
+    std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create(name, &ctx, &task)};
+    std::vector<HostDeviceVector<bst_node_t>> position(1);
+
+    std::unique_ptr<DMatrix> sliced{
+        p_dmat->SliceCol(collective::GetWorldSize(), collective::GetRank())};
+
+    RegTree tree{n_targets, static_cast<bst_feature_t>(kCols)};
+    TrainParam param;
+    param.Init(Args{});
+    updater->Configure(Args{});
+    updater->Update(&param, &gpair, sliced.get(), position, {&tree});
+
+    Json json{Object{}};
+    tree.SaveModel(&json);
+    Json expected_json{Object{}};
+    expected_tree.SaveModel(&expected_json);
+    ASSERT_EQ(json, expected_json);
+  };
+
+  auto constexpr kWorldSize = 2;
+  collective::TestDistributedGlobal(kWorldSize, [&] { verify(); });
+}
+}  // namespace xgboost::tree
diff --git a/tests/cpp/tree/test_histmaker.cc b/tests/cpp/tree/test_histmaker.cc
index b8b9e46cac18..888790aa7c3c 100644
--- a/tests/cpp/tree/test_histmaker.cc
+++ b/tests/cpp/tree/test_histmaker.cc
@@ -1,32 +1,19 @@
 /**
- * Copyright 2019-2023 by XGBoost Contributors
+ * Copyright 2019-2024, XGBoost Contributors
  */
 #include <gtest/gtest.h>
 #include <xgboost/tree_model.h>
 #include <xgboost/tree_updater.h>
 
-#include "../../../src/tree/param.h"    // for TrainParam
-#include "../collective/test_worker.h"  // for TestDistributedGlobal
+#include "../../../src/tree/param.h"  // for TrainParam
 #include "../helpers.h"
+#include "test_column_split.h"  // for GenerateCatDMatrix
 
 namespace xgboost::tree {
-std::shared_ptr<DMatrix> GenerateDMatrix(std::size_t rows, std::size_t cols,
-                                         bool categorical = false) {
-  if (categorical) {
-    std::vector<FeatureType> ft(cols);
-    for (size_t i = 0; i < ft.size(); ++i) {
-      ft[i] = (i % 3 == 0) ? FeatureType::kNumerical : FeatureType::kCategorical;
-    }
-    return RandomDataGenerator(rows, cols, 0.6f).Seed(3).Type(ft).MaxCategory(17).GenerateDMatrix();
-  } else {
-    return RandomDataGenerator{rows, cols, 0.6f}.Seed(3).GenerateDMatrix();
-  }
-}
-
 TEST(GrowHistMaker, InteractionConstraint) {
   auto constexpr kRows = 32;
   auto constexpr kCols = 16;
-  auto p_dmat = GenerateDMatrix(kRows, kCols);
+  auto p_dmat = GenerateCatDMatrix(kRows, kCols, 0.0, false);
   Context ctx;
 
   linalg::Matrix<GradientPair> gpair({kRows}, ctx.Device());
@@ -69,62 +56,4 @@ TEST(GrowHistMaker, InteractionConstraint) {
     ASSERT_NE(tree[tree[0].RightChild()].SplitIndex(), 0);
   }
 }
-
-namespace {
-void VerifyColumnSplit(int32_t rows, bst_feature_t cols, bool categorical,
-                       RegTree const& expected_tree) {
-  Context ctx;
-  auto p_dmat = GenerateDMatrix(rows, cols, categorical);
-  linalg::Matrix<GradientPair> gpair({rows}, ctx.Device());
-  gpair.Data()->Copy(GenerateRandomGradients(rows));
-
-
-  ObjInfo task{ObjInfo::kRegression};
-  std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create("grow_histmaker", &ctx, &task)};
-  std::vector<HostDeviceVector<bst_node_t>> position(1);
-
-  std::unique_ptr<DMatrix> sliced{
-      p_dmat->SliceCol(collective::GetWorldSize(), collective::GetRank())};
-
-  RegTree tree{1u, cols};
-  TrainParam param;
-  param.Init(Args{});
-  updater->Configure(Args{});
-  updater->Update(&param, &gpair, sliced.get(), position, {&tree});
-
-  Json json{Object{}};
-  tree.SaveModel(&json);
-  Json expected_json{Object{}};
-  expected_tree.SaveModel(&expected_json);
-  ASSERT_EQ(json, expected_json);
-}
-
-void TestColumnSplit(bool categorical) {
-  auto constexpr kRows = 32;
-  auto constexpr kCols = 16;
-
-  RegTree expected_tree{1u, kCols};
-  ObjInfo task{ObjInfo::kRegression};
-  {
-    Context ctx;
-    auto p_dmat = GenerateDMatrix(kRows, kCols, categorical);
-    linalg::Matrix<GradientPair> gpair({kRows}, ctx.Device());
-    gpair.Data()->Copy(GenerateRandomGradients(kRows));
-    std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create("grow_histmaker", &ctx, &task)};
-    std::vector<HostDeviceVector<bst_node_t>> position(1);
-    TrainParam param;
-    param.Init(Args{});
-    updater->Configure(Args{});
-    updater->Update(&param, &gpair, p_dmat.get(), position, {&expected_tree});
-  }
-
-  auto constexpr kWorldSize = 2;
-  collective::TestDistributedGlobal(
-      kWorldSize, [&] { VerifyColumnSplit(kRows, kCols, categorical, expected_tree); });
-}
-}  // anonymous namespace
-
-TEST(GrowHistMaker, ColumnSplitNumerical) { TestColumnSplit(false); }
-
-TEST(GrowHistMaker, ColumnSplitCategorical) { TestColumnSplit(true); }
 }  // namespace xgboost::tree
diff --git a/tests/cpp/tree/test_quantile_hist.cc b/tests/cpp/tree/test_quantile_hist.cc
index ce637caa4d46..8e524dc7e979 100644
--- a/tests/cpp/tree/test_quantile_hist.cc
+++ b/tests/cpp/tree/test_quantile_hist.cc
@@ -12,9 +12,9 @@
 
 #include "../../../src/tree/common_row_partitioner.h"
 #include "../../../src/tree/hist/expand_entry.h"  // for MultiExpandEntry, CPUExpandEntry
-#include "../../../src/tree/param.h"
 #include "../collective/test_worker.h"  // for TestDistributedGlobal
 #include "../helpers.h"
+#include "test_column_split.h"  // for TestColumnSplit
 #include "test_partitioner.h"
 #include "xgboost/data.h"
 
@@ -203,57 +203,26 @@ TEST(QuantileHist, PartitionerColSplit) { TestColumnSplitPartitioner<CPUExpandEn
 TEST(QuantileHist, MultiPartitionerColSplit) { TestColumnSplitPartitioner<MultiExpandEntry>(3); }
 
 namespace {
-void VerifyColumnSplit(Context const* ctx, bst_idx_t rows, bst_feature_t cols, bst_target_t n_targets,
-                       RegTree const& expected_tree) {
-  auto Xy = RandomDataGenerator{rows, cols, 0}.GenerateDMatrix(true);
-  linalg::Matrix<GradientPair> gpair = GenerateRandomGradients(ctx, rows, n_targets);
-
-  ObjInfo task{ObjInfo::kRegression};
-  std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create("grow_quantile_histmaker", ctx, &task)};
-  std::vector<HostDeviceVector<bst_node_t>> position(1);
-
-  std::unique_ptr<DMatrix> sliced{Xy->SliceCol(collective::GetWorldSize(), collective::GetRank())};
-
-  RegTree tree{n_targets, cols};
-  TrainParam param;
-  param.Init(Args{});
-  updater->Configure(Args{});
-  updater->Update(&param, &gpair, sliced.get(), position, {&tree});
-
-  Json json{Object{}};
-  tree.SaveModel(&json);
-  Json expected_json{Object{}};
-  expected_tree.SaveModel(&expected_json);
-  ASSERT_EQ(json, expected_json);
-}
-
-void TestColumnSplit(bst_target_t n_targets) {
-  auto constexpr kRows = 32;
-  auto constexpr kCols = 16;
-
-  RegTree expected_tree{n_targets, kCols};
-  ObjInfo task{ObjInfo::kRegression};
-  Context ctx;
-  {
-    auto Xy = RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix(true);
-    auto gpair = GenerateRandomGradients(&ctx, kRows, n_targets);
-    std::unique_ptr<TreeUpdater> updater{
-        TreeUpdater::Create("grow_quantile_histmaker", &ctx, &task)};
-    std::vector<HostDeviceVector<bst_node_t>> position(1);
-    TrainParam param;
-    param.Init(Args{});
-    updater->Configure(Args{});
-    updater->Update(&param, &gpair, Xy.get(), position, {&expected_tree});
+class TestHistColSplit : public ::testing::TestWithParam<std::tuple<bst_target_t, bool, float>> {
+ public:
+  void Run() {
+    auto [n_targets, categorical, sparsity] = GetParam();
+    TestColumnSplit(n_targets, categorical, "grow_quantile_histmaker", sparsity);
   }
-
-  auto constexpr kWorldSize = 2;
-  collective::TestDistributedGlobal(kWorldSize, [&] {
-    VerifyColumnSplit(&ctx, kRows, kCols, n_targets, std::cref(expected_tree));
-  });
-}
+};
 }  // anonymous namespace
 
-TEST(QuantileHist, ColumnSplit) { TestColumnSplit(1); }
-
-TEST(QuantileHist, ColumnSplitMultiTarget) { TestColumnSplit(3); }
+TEST_P(TestHistColSplit, Basic) { this->Run(); }
+
+INSTANTIATE_TEST_SUITE_P(ColumnSplit, TestHistColSplit, ::testing::ValuesIn([]() {
+                           std::vector<std::tuple<bst_target_t, bool, float>> params;
+                           for (auto categorical : {true, false}) {
+                             for (auto sparsity : {0.0f, 0.6f}) {
+                               for (bst_target_t n_targets : {1u, 3u}) {
+                                 params.emplace_back(n_targets, categorical, sparsity);
+                               }
+                             }
+                           }
+                           return params;
+                         }()));
 }  // namespace xgboost::tree

From 9fe50c4b8dc8f3e80a3008813dc4ae4c1c3d06b8 Mon Sep 17 00:00:00 2001
From: Philip Hyunsu Cho <chohyu01@cs.washington.edu>
Date: Sun, 21 Jul 2024 00:15:45 -0700
Subject: [PATCH 10/18] [backport] [CI] Build a CPU-only wheel under name
 `xgboost-cpu` (#10603) (#10614)

* [CI] Build a CPU-only wheel under name `xgboost-cpu` (#10603)

* [CI] Fix test environment. (#10609)

* [CI] Fix test environment.

* Remove shell.

* Remove.

* Update Dockerfile.i386

* replace channel for sycl dependencies (#10576)

Co-authored-by: Dmitry Razdoburdin <>

* Optionally skip cupy on windows. (#10611)

---------

Co-authored-by: Jiaming Yuan <jm.yuan@outlook.com>
Co-authored-by: Dmitry Razdoburdin <d.razdoburdin@gmail.com>
---
 .github/workflows/i386.yml                    |  2 +-
 dev/release-artifacts.py                      | 14 +++++
 doc/install.rst                               | 13 ++++
 python-package/pyproject.toml                 |  6 +-
 python-package/xgboost/testing/__init__.py    | 40 ++++--------
 python-package/xgboost/testing/data.py        | 31 +++++++++
 .../buildkite/build-manylinux2014-aarch64.sh  | 33 ----------
 tests/buildkite/build-manylinux2014-x86_64.sh | 33 ----------
 tests/buildkite/build-manylinux2014.sh        | 63 +++++++++++++++++++
 tests/buildkite/cpu_only_pypkg.patch          | 55 ++++++++++++++++
 tests/buildkite/manylinux2014_warning.patch   | 14 -----
 tests/buildkite/pipeline.yml                  |  4 +-
 tests/buildkite/remove_nccl_dep.patch         | 14 +++++
 tests/ci_build/Dockerfile.i386                |  2 +-
 tests/ci_build/conda_env/linux_sycl_test.yml  |  2 +-
 tests/ci_build/conda_env/macos_cpu_test.yml   |  1 -
 16 files changed, 209 insertions(+), 118 deletions(-)
 delete mode 100644 tests/buildkite/build-manylinux2014-aarch64.sh
 delete mode 100644 tests/buildkite/build-manylinux2014-x86_64.sh
 create mode 100755 tests/buildkite/build-manylinux2014.sh
 create mode 100644 tests/buildkite/cpu_only_pypkg.patch
 create mode 100644 tests/buildkite/remove_nccl_dep.patch

diff --git a/.github/workflows/i386.yml b/.github/workflows/i386.yml
index 1c4e98010310..915b320e75ff 100644
--- a/.github/workflows/i386.yml
+++ b/.github/workflows/i386.yml
@@ -23,7 +23,7 @@ jobs:
       with:
         submodules: 'true'
     - name: Set up Docker Buildx
-      uses: docker/setup-buildx-action@v3
+      uses: docker/setup-buildx-action@v3.4.0
       with:
         driver-opts: network=host
     - name: Build and push container
diff --git a/dev/release-artifacts.py b/dev/release-artifacts.py
index d5f29b2d6cad..4c4340984b6e 100644
--- a/dev/release-artifacts.py
+++ b/dev/release-artifacts.py
@@ -2,6 +2,7 @@
 
 tqdm, sh are required to run this script.
 """
+
 import argparse
 import os
 import shutil
@@ -106,6 +107,15 @@ def make_pysrc_wheel(
     if not os.path.exists(dist):
         os.mkdir(dist)
 
+    # Apply patch to remove NCCL dependency
+    # Save the original content of pyproject.toml so that we can restore it later
+    with DirectoryExcursion(ROOT):
+        with open("python-package/pyproject.toml", "r") as f:
+            orig_pyproj_lines = f.read()
+        with open("tests/buildkite/remove_nccl_dep.patch", "r") as f:
+            patch_lines = f.read()
+        subprocess.run(["patch", "-p0"], input=patch_lines, text=True)
+
     with DirectoryExcursion(os.path.join(ROOT, "python-package")):
         subprocess.check_call(["python", "-m", "build", "--sdist"])
         if rc is not None:
@@ -117,6 +127,10 @@ def make_pysrc_wheel(
         target = os.path.join(dist, name)
         shutil.move(src, target)
 
+    with DirectoryExcursion(ROOT):
+        with open("python-package/pyproject.toml", "w") as f:
+            print(orig_pyproj_lines, file=f, end="")
+
 
 def download_py_packages(
     branch: str, major: int, minor: int, commit_hash: str, outdir: str
diff --git a/doc/install.rst b/doc/install.rst
index e5229702e16c..79082a7ed581 100644
--- a/doc/install.rst
+++ b/doc/install.rst
@@ -76,6 +76,19 @@ Capabilities of binary wheels for each platform:
 | Windows             | |tick|  |  |cross|             |
 +---------------------+---------+----------------------+
 
+Minimal installation (CPU-only)
+*******************************
+The default installation with ``pip`` will install the full XGBoost package, including the support for the GPU algorithms and federated learning.
+
+You may choose to reduce the size of the installed package and save the disk space, by opting to install ``xgboost-cpu`` instead:
+
+.. code-block:: bash
+
+  pip install xgboost-cpu
+
+The ``xgboost-cpu`` variant will have drastically smaller disk footprint, but does not provide some features, such as the GPU algorithms and
+federated learning.
+
 Conda
 *****
 
diff --git a/python-package/pyproject.toml b/python-package/pyproject.toml
index 1a46d4a52185..ed92a95aa405 100644
--- a/python-package/pyproject.toml
+++ b/python-package/pyproject.toml
@@ -7,13 +7,13 @@ build-backend = "packager.pep517"
 
 [project]
 name = "xgboost"
-version = "2.1.0"
+description = "XGBoost Python Package"
+readme = { file = "README.rst", content-type = "text/x-rst" }
 authors = [
     { name = "Hyunsu Cho", email = "chohyu01@cs.washington.edu" },
     { name = "Jiaming Yuan", email = "jm.yuan@outlook.com" }
 ]
-description = "XGBoost Python Package"
-readme = { file = "README.rst", content-type = "text/x-rst" }
+version = "2.1.0"
 requires-python = ">=3.8"
 license = { text = "Apache-2.0" }
 classifiers = [
diff --git a/python-package/xgboost/testing/__init__.py b/python-package/xgboost/testing/__init__.py
index 64e2a9170416..5c45345aefae 100644
--- a/python-package/xgboost/testing/__init__.py
+++ b/python-package/xgboost/testing/__init__.py
@@ -45,6 +45,7 @@
     get_cancer,
     get_digits,
     get_sparse,
+    make_batches,
     memory,
 )
 
@@ -161,7 +162,16 @@ def no_cudf() -> PytestSkip:
 
 
 def no_cupy() -> PytestSkip:
-    return no_mod("cupy")
+    skip_cupy = no_mod("cupy")
+    if not skip_cupy["condition"] and system() == "Windows":
+        import cupy as cp
+
+        # Cupy might run into issue on Windows due to missing compiler
+        try:
+            cp.array([1, 2, 3]).sum()
+        except Exception:  # pylint: disable=broad-except
+            skip_cupy["condition"] = True
+    return skip_cupy
 
 
 def no_dask_cudf() -> PytestSkip:
@@ -247,34 +257,6 @@ def as_arrays(
         return X, y, w
 
 
-def make_batches(
-    n_samples_per_batch: int,
-    n_features: int,
-    n_batches: int,
-    use_cupy: bool = False,
-    *,
-    vary_size: bool = False,
-) -> Tuple[List[np.ndarray], List[np.ndarray], List[np.ndarray]]:
-    X = []
-    y = []
-    w = []
-    if use_cupy:
-        import cupy
-
-        rng = cupy.random.RandomState(1994)
-    else:
-        rng = np.random.RandomState(1994)
-    for i in range(n_batches):
-        n_samples = n_samples_per_batch + i * 10 if vary_size else n_samples_per_batch
-        _X = rng.randn(n_samples, n_features)
-        _y = rng.randn(n_samples)
-        _w = rng.uniform(low=0, high=1, size=n_samples)
-        X.append(_X)
-        y.append(_y)
-        w.append(_w)
-    return X, y, w
-
-
 def make_regression(
     n_samples: int, n_features: int, use_cupy: bool
 ) -> Tuple[ArrayLike, ArrayLike, ArrayLike]:
diff --git a/python-package/xgboost/testing/data.py b/python-package/xgboost/testing/data.py
index 0c4f290086d1..0f2d65cc0afa 100644
--- a/python-package/xgboost/testing/data.py
+++ b/python-package/xgboost/testing/data.py
@@ -9,6 +9,7 @@
     Callable,
     Dict,
     Generator,
+    List,
     NamedTuple,
     Optional,
     Tuple,
@@ -501,6 +502,36 @@ def get_mq2008(
     )
 
 
+def make_batches(  # pylint: disable=too-many-arguments,too-many-locals
+    n_samples_per_batch: int,
+    n_features: int,
+    n_batches: int,
+    use_cupy: bool = False,
+    *,
+    vary_size: bool = False,
+    random_state: int = 1994,
+) -> Tuple[List[np.ndarray], List[np.ndarray], List[np.ndarray]]:
+    """Make batches of dense data."""
+    X = []
+    y = []
+    w = []
+    if use_cupy:
+        import cupy  # pylint: disable=import-error
+
+        rng = cupy.random.RandomState(random_state)
+    else:
+        rng = np.random.RandomState(random_state)
+    for i in range(n_batches):
+        n_samples = n_samples_per_batch + i * 10 if vary_size else n_samples_per_batch
+        _X = rng.randn(n_samples, n_features)
+        _y = rng.randn(n_samples)
+        _w = rng.uniform(low=0, high=1, size=n_samples)
+        X.append(_X)
+        y.append(_y)
+        w.append(_w)
+    return X, y, w
+
+
 RelData = Tuple[sparse.csr_matrix, npt.NDArray[np.int32], npt.NDArray[np.int32]]
 
 
diff --git a/tests/buildkite/build-manylinux2014-aarch64.sh b/tests/buildkite/build-manylinux2014-aarch64.sh
deleted file mode 100644
index 802db3f66aaf..000000000000
--- a/tests/buildkite/build-manylinux2014-aarch64.sh
+++ /dev/null
@@ -1,33 +0,0 @@
-#!/bin/bash
-
-set -euo pipefail
-
-source tests/buildkite/conftest.sh
-
-WHEEL_TAG=manylinux2014_aarch64
-command_wrapper="tests/ci_build/ci_build.sh manylinux2014_aarch64"
-python_bin="/opt/python/cp310-cp310/bin/python"
-
-echo "--- Build binary wheel for ${WHEEL_TAG}"
-# Patch to add warning about manylinux2014 variant
-patch -p0 < tests/buildkite/manylinux2014_warning.patch
-$command_wrapper bash -c \
-  "cd python-package && ${python_bin} -m pip wheel --no-deps -vvv . --wheel-dir dist/"
-git checkout python-package/xgboost/core.py  # discard the patch
-
-$command_wrapper auditwheel repair --plat ${WHEEL_TAG} python-package/dist/*.whl
-$command_wrapper ${python_bin} tests/ci_build/rename_whl.py  \
-  --wheel-path wheelhouse/*.whl  \
-  --commit-hash ${BUILDKITE_COMMIT}  \
-  --platform-tag ${WHEEL_TAG}
-rm -rf python-package/dist/
-mkdir python-package/dist/
-mv -v wheelhouse/*.whl python-package/dist/
-
-echo "--- Upload Python wheel"
-buildkite-agent artifact upload python-package/dist/*.whl
-if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]]
-then
-  aws s3 cp python-package/dist/*.whl s3://xgboost-nightly-builds/${BRANCH_NAME}/ \
-    --acl public-read --no-progress
-fi
diff --git a/tests/buildkite/build-manylinux2014-x86_64.sh b/tests/buildkite/build-manylinux2014-x86_64.sh
deleted file mode 100644
index b00616315b8d..000000000000
--- a/tests/buildkite/build-manylinux2014-x86_64.sh
+++ /dev/null
@@ -1,33 +0,0 @@
-#!/bin/bash
-
-set -euo pipefail
-
-source tests/buildkite/conftest.sh
-
-WHEEL_TAG=manylinux2014_x86_64
-command_wrapper="tests/ci_build/ci_build.sh manylinux2014_x86_64"
-python_bin="/opt/python/cp310-cp310/bin/python"
-
-echo "--- Build binary wheel for ${WHEEL_TAG}"
-# Patch to add warning about manylinux2014 variant
-patch -p0 < tests/buildkite/manylinux2014_warning.patch
-$command_wrapper bash -c \
-  "cd python-package && ${python_bin} -m pip wheel --no-deps -vvv . --wheel-dir dist/"
-git checkout python-package/xgboost/core.py  # discard the patch
-
-$command_wrapper auditwheel repair --plat ${WHEEL_TAG} python-package/dist/*.whl
-$command_wrapper ${python_bin} tests/ci_build/rename_whl.py  \
-  --wheel-path wheelhouse/*.whl  \
-  --commit-hash ${BUILDKITE_COMMIT}  \
-  --platform-tag ${WHEEL_TAG}
-rm -rf python-package/dist/
-mkdir python-package/dist/
-mv -v wheelhouse/*.whl python-package/dist/
-
-echo "--- Upload Python wheel"
-buildkite-agent artifact upload python-package/dist/*.whl
-if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]]
-then
-  aws s3 cp python-package/dist/*.whl s3://xgboost-nightly-builds/${BRANCH_NAME}/ \
-    --acl public-read --no-progress
-fi
diff --git a/tests/buildkite/build-manylinux2014.sh b/tests/buildkite/build-manylinux2014.sh
new file mode 100755
index 000000000000..426d32b5c361
--- /dev/null
+++ b/tests/buildkite/build-manylinux2014.sh
@@ -0,0 +1,63 @@
+#!/bin/bash
+
+set -euo pipefail
+
+if [ $# -ne 1 ]; then
+  echo "Usage: $0 {x86_64,aarch64}"
+  exit 1
+fi
+
+arch=$1
+
+source tests/buildkite/conftest.sh
+
+WHEEL_TAG="manylinux2014_${arch}"
+command_wrapper="tests/ci_build/ci_build.sh ${WHEEL_TAG}"
+python_bin="/opt/python/cp310-cp310/bin/python"
+
+echo "--- Build binary wheel for ${WHEEL_TAG}"
+# Patch to add warning about manylinux2014 variant
+patch -p0 < tests/buildkite/remove_nccl_dep.patch
+patch -p0 < tests/buildkite/manylinux2014_warning.patch
+$command_wrapper bash -c \
+  "cd python-package && ${python_bin} -m pip wheel --no-deps -v . --wheel-dir dist/"
+git checkout python-package/pyproject.toml python-package/xgboost/core.py  # discard the patch
+
+$command_wrapper auditwheel repair --plat ${WHEEL_TAG} python-package/dist/*.whl
+$command_wrapper ${python_bin} tests/ci_build/rename_whl.py  \
+  --wheel-path wheelhouse/*.whl  \
+  --commit-hash ${BUILDKITE_COMMIT}  \
+  --platform-tag ${WHEEL_TAG}
+rm -rf python-package/dist/
+mkdir python-package/dist/
+mv -v wheelhouse/*.whl python-package/dist/
+
+echo "--- Build binary wheel for ${WHEEL_TAG} (CPU only)"
+# Patch to rename pkg to xgboost-cpu
+patch -p0 < tests/buildkite/remove_nccl_dep.patch
+patch -p0 < tests/buildkite/cpu_only_pypkg.patch
+$command_wrapper bash -c \
+  "cd python-package && ${python_bin} -m pip wheel --no-deps -v . --wheel-dir dist/"
+git checkout python-package/pyproject.toml  # discard the patch
+
+$command_wrapper auditwheel repair --plat ${WHEEL_TAG} python-package/dist/xgboost_cpu-*.whl
+$command_wrapper ${python_bin} tests/ci_build/rename_whl.py  \
+  --wheel-path wheelhouse/xgboost_cpu-*.whl  \
+  --commit-hash ${BUILDKITE_COMMIT}  \
+  --platform-tag ${WHEEL_TAG}
+rm -v python-package/dist/xgboost_cpu-*.whl
+mv -v wheelhouse/xgboost_cpu-*.whl python-package/dist/
+
+echo "--- Upload Python wheel"
+for wheel in python-package/dist/*.whl
+do
+  buildkite-agent artifact upload "${wheel}"
+done
+if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]]
+then
+  for wheel in python-package/dist/*.whl
+  do
+    aws s3 cp "${wheel}" s3://xgboost-nightly-builds/${BRANCH_NAME}/ \
+      --acl public-read --no-progress
+  done
+fi
diff --git a/tests/buildkite/cpu_only_pypkg.patch b/tests/buildkite/cpu_only_pypkg.patch
new file mode 100644
index 000000000000..765ac5c098d0
--- /dev/null
+++ b/tests/buildkite/cpu_only_pypkg.patch
@@ -0,0 +1,55 @@
+diff --git python-package/README.rst python-package/README.rst
+index 1fc0bb5a0..f1c68470b 100644
+--- python-package/README.rst
++++ python-package/README.rst
+@@ -1,20 +1,15 @@
+-======================
+-XGBoost Python Package
+-======================
++=================================
++XGBoost Python Package (CPU only)
++=================================
+ 
+ |PyPI version|
+ 
+-Installation
+-============
++The ``xgboost-cpu`` package provides for a minimal installation, with no support for the GPU algorithms
++or federated learning. It is provided to allow XGBoost to be installed in a space-constrained
++environments.
+ 
+-From `PyPI <https://pypi.python.org/pypi/xgboost>`_
+----------------------------------------------------
++Note. ``xgboost-cpu`` package is only provided for x86_64 (amd64) Linux and Windows platforms.
++For other platforms, please install ``xgboost`` from https://pypi.org/project/xgboost/.
+ 
+-For a stable version, install using ``pip``::
+-
+-    pip install xgboost
+-
+-.. |PyPI version| image:: https://badge.fury.io/py/xgboost.svg
+-   :target: http://badge.fury.io/py/xgboost
+-
+-For building from source, see `build <https://xgboost.readthedocs.io/en/latest/build.html>`_.
++Note. ``xgboost-cpu`` does not provide an sdist (source distribution). You may install sdist
++from https://pypi.org/project/xgboost/.
+diff --git python-package/pyproject.toml python-package/pyproject.toml
+index 46c1451c2..c5dc908d9 100644
+--- python-package/pyproject.toml
++++ python-package/pyproject.toml
+@@ -6,7 +6,7 @@ backend-path = ["."]
+ build-backend = "packager.pep517"
+ 
+ [project]
+-name = "xgboost"
++name = "xgboost-cpu"
+ description = "XGBoost Python Package"
+ readme = { file = "README.rst", content-type = "text/x-rst" }
+ authors = [
+@@ -82,3 +82,6 @@ class-attribute-naming-style = "snake_case"
+ 
+ # Allow single-letter variables
+ variable-rgx = "[a-zA-Z_][a-z0-9_]{0,30}$"
++
++[tool.hatch.build.targets.wheel]
++packages = ["xgboost/"]
diff --git a/tests/buildkite/manylinux2014_warning.patch b/tests/buildkite/manylinux2014_warning.patch
index 692a92672d2f..679205988b7a 100644
--- a/tests/buildkite/manylinux2014_warning.patch
+++ b/tests/buildkite/manylinux2014_warning.patch
@@ -1,17 +1,3 @@
-diff --git python-package/pyproject.toml python-package/pyproject.toml
-index a273d8c13..dee49686a 100644
---- python-package/pyproject.toml
-+++ python-package/pyproject.toml
-@@ -30,8 +30,7 @@ classifiers = [
- ]
- dependencies = [
-     "numpy",
--    "scipy",
--    "nvidia-nccl-cu12 ; platform_system == 'Linux' and platform_machine != 'aarch64'"
-+    "scipy"
- ]
- 
- [project.urls]
 diff --git python-package/xgboost/core.py python-package/xgboost/core.py
 index e8bc735e6..030972ef2 100644
 --- python-package/xgboost/core.py
diff --git a/tests/buildkite/pipeline.yml b/tests/buildkite/pipeline.yml
index acdb71dba529..ee9637b8bd25 100644
--- a/tests/buildkite/pipeline.yml
+++ b/tests/buildkite/pipeline.yml
@@ -72,12 +72,12 @@ steps:
     agents:
       queue: linux-amd64-cpu
   - label: ":console: Build manylinux2014_x86_64 wheel"
-    command: "tests/buildkite/build-manylinux2014-x86_64.sh"
+    command: "tests/buildkite/build-manylinux2014.sh x86_64"
     key: build-manylinux2014-x86_64
     agents:
       queue: linux-amd64-cpu
   - label: ":console: Build manylinux2014_aarch64 wheel"
-    command: "tests/buildkite/build-manylinux2014-aarch64.sh"
+    command: "tests/buildkite/build-manylinux2014.sh aarch64"
     key: build-manylinux2014-aarch64
     agents:
       queue: linux-arm64-cpu
diff --git a/tests/buildkite/remove_nccl_dep.patch b/tests/buildkite/remove_nccl_dep.patch
new file mode 100644
index 000000000000..a2a4a5c88289
--- /dev/null
+++ b/tests/buildkite/remove_nccl_dep.patch
@@ -0,0 +1,14 @@
+diff --git python-package/pyproject.toml python-package/pyproject.toml
+index 8835def25..46c1451c2 100644
+--- python-package/pyproject.toml
++++ python-package/pyproject.toml
+@@ -30,8 +30,7 @@ classifiers = [
+ ]
+ dependencies = [
+     "numpy",
+-    "scipy",
+-    "nvidia-nccl-cu12 ; platform_system == 'Linux' and platform_machine != 'aarch64'"
++    "scipy"
+ ]
+ 
+ [project.urls]
diff --git a/tests/ci_build/Dockerfile.i386 b/tests/ci_build/Dockerfile.i386
index d7c133e2aee4..a582a54020bb 100644
--- a/tests/ci_build/Dockerfile.i386
+++ b/tests/ci_build/Dockerfile.i386
@@ -1,7 +1,7 @@
 FROM i386/debian:sid
 
 ENV DEBIAN_FRONTEND noninteractive
-SHELL ["/bin/bash", "-c"]   # Use Bash as shell
+SHELL ["/bin/bash", "-c"]
 
 RUN \
     apt-get update && \
diff --git a/tests/ci_build/conda_env/linux_sycl_test.yml b/tests/ci_build/conda_env/linux_sycl_test.yml
index 7335b7f20fd5..e82a6bed62f5 100644
--- a/tests/ci_build/conda_env/linux_sycl_test.yml
+++ b/tests/ci_build/conda_env/linux_sycl_test.yml
@@ -1,7 +1,7 @@
 name: linux_sycl_test
 channels:
 - conda-forge
-- intel
+- https://software.repos.intel.com/python/conda/
 dependencies:
 - python=3.8
 - cmake
diff --git a/tests/ci_build/conda_env/macos_cpu_test.yml b/tests/ci_build/conda_env/macos_cpu_test.yml
index ce9ca4b1ba7c..e2e377e2145d 100644
--- a/tests/ci_build/conda_env/macos_cpu_test.yml
+++ b/tests/ci_build/conda_env/macos_cpu_test.yml
@@ -38,4 +38,3 @@ dependencies:
 - cloudpickle
 - pip:
   - sphinx_rtd_theme
-  - py-ubjson

From 7643306c60c04ec03d6a144fc0f780417885bad3 Mon Sep 17 00:00:00 2001
From: Jiaming Yuan <jm.yuan@outlook.com>
Date: Tue, 23 Jul 2024 05:43:34 +0800
Subject: [PATCH 11/18] [backport] Catch exceptions during file read. (#10623)
 (#10625)

---
 src/common/threading_utils.cc | 27 ++++++++++++++++++---------
 1 file changed, 18 insertions(+), 9 deletions(-)

diff --git a/src/common/threading_utils.cc b/src/common/threading_utils.cc
index 5e730e96d34e..1f4d5be2f361 100644
--- a/src/common/threading_utils.cc
+++ b/src/common/threading_utils.cc
@@ -74,17 +74,26 @@ std::int32_t GetCGroupV2Count(std::filesystem::path const& bandwidth_path) noexc
 
 std::int32_t GetCfsCPUCount() noexcept {
   namespace fs = std::filesystem;
-  fs::path const bandwidth_path{"/sys/fs/cgroup/cpu.max"};
-  auto has_v2 = fs::exists(bandwidth_path);
-  if (has_v2) {
-    return GetCGroupV2Count(bandwidth_path);
+
+  try {
+    fs::path const bandwidth_path{"/sys/fs/cgroup/cpu.max"};
+    auto has_v2 = fs::exists(bandwidth_path);
+    if (has_v2) {
+      return GetCGroupV2Count(bandwidth_path);
+    }
+  } catch (std::exception const&) {
+    return -1;
   }
 
-  fs::path const quota_path{"/sys/fs/cgroup/cpu/cpu.cfs_quota_us"};
-  fs::path const peroid_path{"/sys/fs/cgroup/cpu/cpu.cfs_period_us"};
-  auto has_v1 = fs::exists(quota_path) && fs::exists(peroid_path);
-  if (has_v1) {
-    return GetCGroupV1Count(quota_path, peroid_path);
+  try {
+    fs::path const quota_path{"/sys/fs/cgroup/cpu/cpu.cfs_quota_us"};
+    fs::path const peroid_path{"/sys/fs/cgroup/cpu/cpu.cfs_period_us"};
+    auto has_v1 = fs::exists(quota_path) && fs::exists(peroid_path);
+    if (has_v1) {
+      return GetCGroupV1Count(quota_path, peroid_path);
+    }
+  } catch (std::exception const&) {
+    return -1;
   }
 
   return -1;

From 3fd845191f12bab50ae6d29a5a75e8b3cd3c4dff Mon Sep 17 00:00:00 2001
From: Jiaming Yuan <jm.yuan@outlook.com>
Date: Thu, 25 Jul 2024 15:23:03 +0800
Subject: [PATCH 12/18] [backport][dask] Disable `broadcast` in the `scatter`
 call. (#10632) (#10634)

---
 python-package/xgboost/dask/__init__.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/python-package/xgboost/dask/__init__.py b/python-package/xgboost/dask/__init__.py
index c5140a1fe552..44eae0c51837 100644
--- a/python-package/xgboost/dask/__init__.py
+++ b/python-package/xgboost/dask/__init__.py
@@ -1237,12 +1237,14 @@ def _infer_predict_output(
 async def _get_model_future(
     client: "distributed.Client", model: Union[Booster, Dict, "distributed.Future"]
 ) -> "distributed.Future":
-    # See https://github.com/dask/dask/issues/11179#issuecomment-2168094529 for
-    # the use of hash.
+    # See https://github.com/dask/dask/issues/11179#issuecomment-2168094529 for the use
+    # of hash.
+    # https://github.com/dask/distributed/pull/8796 Don't use broadcast in the `scatter`
+    # call, otherwise, the predict function might hang.
     if isinstance(model, Booster):
-        booster = await client.scatter(model, broadcast=True, hash=False)
+        booster = await client.scatter(model, hash=False)
     elif isinstance(model, dict):
-        booster = await client.scatter(model["booster"], broadcast=True, hash=False)
+        booster = await client.scatter(model["booster"], hash=False)
     elif isinstance(model, distributed.Future):
         booster = model
         t = booster.type

From 61e03713760fb74ba6474422d52265ca727304cb Mon Sep 17 00:00:00 2001
From: Jiaming Yuan <jm.yuan@outlook.com>
Date: Fri, 26 Jul 2024 01:12:14 +0800
Subject: [PATCH 13/18] [backport] Partial fix for CTK 12.5 (#10574) (#10638)

---
 src/tree/updater_gpu_common.cuh | 21 ++++++++-------------
 src/tree/updater_gpu_hist.cu    |  1 +
 2 files changed, 9 insertions(+), 13 deletions(-)

diff --git a/src/tree/updater_gpu_common.cuh b/src/tree/updater_gpu_common.cuh
index 1c3e6a55277d..5d999d6d6e01 100644
--- a/src/tree/updater_gpu_common.cuh
+++ b/src/tree/updater_gpu_common.cuh
@@ -1,18 +1,13 @@
-/*!
- * Copyright 2017-2019 XGBoost contributors
+/**
+ * Copyright 2017-2024, XGBoost contributors
  */
 #pragma once
-#include <thrust/random.h>
-#include <cstdio>
-#include <cub/cub.cuh>
-#include <stdexcept>
-#include <string>
-#include <vector>
-#include "../common/categorical.h"
-#include "../common/device_helpers.cuh"
-#include "../common/random.h"
+#include <limits>   // for numeric_limits
+#include <ostream>  // for ostream
+
 #include "gpu_hist/histogram.cuh"
 #include "param.h"
+#include "xgboost/base.h"
 
 namespace xgboost::tree {
 struct GPUTrainingParam {
@@ -54,8 +49,8 @@ enum DefaultDirection {
 };
 
 struct DeviceSplitCandidate {
-  float loss_chg {-FLT_MAX};
-  DefaultDirection dir {kLeftDir};
+  float loss_chg{-std::numeric_limits<float>::max()};
+  DefaultDirection dir{kLeftDir};
   int findex {-1};
   float fvalue {0};
   // categorical split, either it's the split category for OHE or the threshold for partition-based
diff --git a/src/tree/updater_gpu_hist.cu b/src/tree/updater_gpu_hist.cu
index 958fa0331569..e126aeb313df 100644
--- a/src/tree/updater_gpu_hist.cu
+++ b/src/tree/updater_gpu_hist.cu
@@ -19,6 +19,7 @@
 #include "../common/cuda_context.cuh"  // CUDAContext
 #include "../common/device_helpers.cuh"
 #include "../common/hist_util.h"
+#include "../common/random.h"  // for ColumnSampler, GlobalRandom
 #include "../common/timer.h"
 #include "../data/ellpack_page.cuh"
 #include "../data/ellpack_page.h"

From 80d60df83f15dc8ced4e18b36b95a2bbd777a230 Mon Sep 17 00:00:00 2001
From: Philip Hyunsu Cho <chohyu01@cs.washington.edu>
Date: Thu, 25 Jul 2024 22:36:56 -0700
Subject: [PATCH 14/18] [backport] Allow building with CCCL that's newer than
 CTK (#10624, #10633) (#10640)

---
 CMakeLists.txt                         | 18 ++++++++++++++++++
 cmake/Utils.cmake                      | 19 +++++++++++++++----
 tests/buildkite/build-cuda-with-rmm.sh |  2 +-
 3 files changed, 34 insertions(+), 5 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index c4ca82937db7..32ec64305f08 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -224,6 +224,24 @@ if(USE_CUDA)
   add_subdirectory(${PROJECT_SOURCE_DIR}/gputreeshap)
 
   find_package(CUDAToolkit REQUIRED)
+  find_package(CCCL CONFIG)
+  if(NOT CCCL_FOUND)
+    message(STATUS "Standalone CCCL not found. Attempting to use CCCL from CUDA Toolkit...")
+    find_package(CCCL CONFIG
+      HINTS ${CUDAToolkit_LIBRARY_DIR}/cmake)
+    if(NOT CCCL_FOUND)
+      message(STATUS "Could not locate CCCL from CUDA Toolkit. Using Thrust and CUB from CUDA Toolkit...")
+      find_package(libcudacxx CONFIG REQUIRED
+        HINTS ${CUDAToolkit_LIBRARY_DIR}/cmake)
+      find_package(CUB CONFIG REQUIRED
+        HINTS ${CUDAToolkit_LIBRARY_DIR}/cmake)
+      find_package(Thrust CONFIG REQUIRED
+        HINTS ${CUDAToolkit_LIBRARY_DIR}/cmake)
+      thrust_create_target(Thrust HOST CPP DEVICE CUDA)
+      add_library(CCCL::CCCL INTERFACE IMPORTED GLOBAL)
+      target_link_libraries(CCCL::CCCL INTERFACE libcudacxx::libcudacxx CUB::CUB Thrust)
+    endif()
+  endif()
 endif()
 
 if(FORCE_COLORED_OUTPUT AND (CMAKE_GENERATOR STREQUAL "Ninja") AND
diff --git a/cmake/Utils.cmake b/cmake/Utils.cmake
index d555f5edff45..b12302a166c0 100644
--- a/cmake/Utils.cmake
+++ b/cmake/Utils.cmake
@@ -108,12 +108,24 @@ function(xgboost_set_cuda_flags target)
     target_compile_definitions(${target} PRIVATE -DXGBOOST_USE_NVTX=1)
   endif()
 
+  # Use CCCL we find before CUDA Toolkit to make sure we get newer headers as intended
+  # The CUDA Toolkit includes its own copy of CCCL that often lags the latest releases
+  # (and would be picked up otherwise)
+  if(BUILD_STATIC_LIB)
+    # If the downstream user is statically linking with libxgboost, it needs to
+    # explicitly link with CCCL and CUDA runtime.
+    target_link_libraries(${target}
+      PUBLIC CCCL::CCCL CUDA::cudart_static)
+  else()
+    # If the downstream user is dynamically linking with libxgboost, it does not
+    # need to link with CCCL and CUDA runtime.
+    target_link_libraries(${target}
+      PRIVATE CCCL::CCCL CUDA::cudart_static)
+  endif()
   target_compile_definitions(${target} PRIVATE -DXGBOOST_USE_CUDA=1)
   target_include_directories(
     ${target} PRIVATE
-    ${xgboost_SOURCE_DIR}/gputreeshap
-    ${xgboost_SOURCE_DIR}/rabit/include
-    ${CUDAToolkit_INCLUDE_DIRS})
+    ${xgboost_SOURCE_DIR}/gputreeshap)
 
   if(MSVC)
     target_compile_options(${target} PRIVATE
@@ -240,7 +252,6 @@ macro(xgboost_target_link_libraries target)
 
   if(USE_CUDA)
     xgboost_set_cuda_flags(${target})
-    target_link_libraries(${target} PUBLIC CUDA::cudart_static)
   endif()
 
   if(PLUGIN_RMM)
diff --git a/tests/buildkite/build-cuda-with-rmm.sh b/tests/buildkite/build-cuda-with-rmm.sh
index 126bc6b3f28b..f1d3f1b1c91a 100755
--- a/tests/buildkite/build-cuda-with-rmm.sh
+++ b/tests/buildkite/build-cuda-with-rmm.sh
@@ -22,7 +22,7 @@ command_wrapper="tests/ci_build/ci_build.sh gpu_build_rockylinux8 --build-arg "`
 
 echo "--- Build libxgboost from the source"
 $command_wrapper tests/ci_build/build_via_cmake.sh \
-		 -DCMAKE_PREFIX_PATH="/opt/grpc;/opt/rmm" \
+		 -DCMAKE_PREFIX_PATH="/opt/grpc;/opt/rmm;/opt/rmm/lib64/rapids/cmake" \
 		 -DUSE_CUDA=ON \
 		 -DUSE_OPENMP=ON \
 		 -DHIDE_CXX_SYMBOLS=ON \

From 5fdb39d5baac9bd8ccbed36afc4f8123fe63c4ce Mon Sep 17 00:00:00 2001
From: Philip Hyunsu Cho <chohyu01@cs.washington.edu>
Date: Mon, 29 Jul 2024 17:41:19 -0700
Subject: [PATCH 15/18] Add `Library\mingw-w64` to Windows search path (#10643)
 (#10644)

Co-authored-by: jakirkham <jakirkham@gmail.com>
---
 python-package/packager/nativelib.py | 3 +++
 python-package/xgboost/libpath.py    | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/python-package/packager/nativelib.py b/python-package/packager/nativelib.py
index 1852e0d2d909..1a3df2ea8989 100644
--- a/python-package/packager/nativelib.py
+++ b/python-package/packager/nativelib.py
@@ -138,6 +138,9 @@ def locate_or_build_libxgboost(
             sys_prefix / "Library",
             sys_prefix / "Library" / "bin",
             sys_prefix / "Library" / "lib",
+            sys_prefix / "Library" / "mingw-w64",
+            sys_prefix / "Library" / "mingw-w64" / "bin",
+            sys_prefix / "Library" / "mingw-w64" / "lib",
         ]
         sys_prefix_candidates = [
             p.expanduser().resolve() for p in sys_prefix_candidates
diff --git a/python-package/xgboost/libpath.py b/python-package/xgboost/libpath.py
index 58c78df090ae..92d46a0bb77d 100644
--- a/python-package/xgboost/libpath.py
+++ b/python-package/xgboost/libpath.py
@@ -38,6 +38,9 @@ def find_lib_path() -> List[str]:
                 os.path.join(sys.base_prefix, "Library"),
                 os.path.join(sys.base_prefix, "Library", "bin"),
                 os.path.join(sys.base_prefix, "Library", "lib"),
+                os.path.join(sys.base_prefix, "Library", "mingw-w64"),
+                os.path.join(sys.base_prefix, "Library", "mingw-w64", "bin"),
+                os.path.join(sys.base_prefix, "Library", "mingw-w64", "lib"),
             ]
         )
         dll_path = [os.path.join(p, "xgboost.dll") for p in dll_path]

From d8b6121ae3336408c5ed79b36722775647aca784 Mon Sep 17 00:00:00 2001
From: Philip Hyunsu Cho <chohyu01@cs.washington.edu>
Date: Tue, 30 Jul 2024 08:13:38 -0700
Subject: [PATCH 16/18] [backport] Allow external configuration of endianness
 in R package build (#10642) (#10645)

* Allow external configuration of endianness in R package build (#10642)

* Allow users to set endianness in R build

* Run `autoreconf -vi`

* Don't use :BOOL suffix

* Use AC_CONFIG_HEADERS

---------

Co-authored-by: Hyunsu Cho <phcho@nvidia.com>
Co-authored-by: Philip Hyunsu Cho <chohyu01@cs.washington.edu>

* Re-run roxygen2

---------

Co-authored-by: jakirkham <jakirkham@gmail.com>
---
 R-package/DESCRIPTION  |   2 +-
 R-package/config.h.in  |  66 +++++
 R-package/configure    | 558 +++++++++++++++++++++++++++++++++++++----
 R-package/configure.ac |  22 +-
 4 files changed, 588 insertions(+), 60 deletions(-)
 create mode 100644 R-package/config.h.in

diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION
index fdaf0cefba6d..6edad9d8c7bf 100644
--- a/R-package/DESCRIPTION
+++ b/R-package/DESCRIPTION
@@ -66,6 +66,6 @@ Imports:
     data.table (>= 1.9.6),
     jsonlite (>= 1.0)
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.3.1
+RoxygenNote: 7.3.2
 Encoding: UTF-8
 SystemRequirements: GNU make, C++17
diff --git a/R-package/config.h.in b/R-package/config.h.in
new file mode 100644
index 000000000000..c503fc9044a4
--- /dev/null
+++ b/R-package/config.h.in
@@ -0,0 +1,66 @@
+/* config.h.in.  Generated from configure.ac by autoheader.  */
+
+/* Define if building universal (internal helper macro) */
+#undef AC_APPLE_UNIVERSAL_BUILD
+
+/* Define to 1 if you have the <inttypes.h> header file. */
+#undef HAVE_INTTYPES_H
+
+/* Define to 1 if you have the <stdint.h> header file. */
+#undef HAVE_STDINT_H
+
+/* Define to 1 if you have the <stdio.h> header file. */
+#undef HAVE_STDIO_H
+
+/* Define to 1 if you have the <stdlib.h> header file. */
+#undef HAVE_STDLIB_H
+
+/* Define to 1 if you have the <strings.h> header file. */
+#undef HAVE_STRINGS_H
+
+/* Define to 1 if you have the <string.h> header file. */
+#undef HAVE_STRING_H
+
+/* Define to 1 if you have the <sys/stat.h> header file. */
+#undef HAVE_SYS_STAT_H
+
+/* Define to 1 if you have the <sys/types.h> header file. */
+#undef HAVE_SYS_TYPES_H
+
+/* Define to 1 if you have the <unistd.h> header file. */
+#undef HAVE_UNISTD_H
+
+/* Define to the address where bug reports for this package should be sent. */
+#undef PACKAGE_BUGREPORT
+
+/* Define to the full name of this package. */
+#undef PACKAGE_NAME
+
+/* Define to the full name and version of this package. */
+#undef PACKAGE_STRING
+
+/* Define to the one symbol short name of this package. */
+#undef PACKAGE_TARNAME
+
+/* Define to the home page for this package. */
+#undef PACKAGE_URL
+
+/* Define to the version of this package. */
+#undef PACKAGE_VERSION
+
+/* Define to 1 if all of the C90 standard headers exist (not just the ones
+   required in a freestanding environment). This macro is provided for
+   backward compatibility; new code need not use it. */
+#undef STDC_HEADERS
+
+/* Define WORDS_BIGENDIAN to 1 if your processor stores words with the most
+   significant byte first (like Motorola and SPARC, unlike Intel). */
+#if defined AC_APPLE_UNIVERSAL_BUILD
+# if defined __BIG_ENDIAN__
+#  define WORDS_BIGENDIAN 1
+# endif
+#else
+# ifndef WORDS_BIGENDIAN
+#  undef WORDS_BIGENDIAN
+# endif
+#endif
diff --git a/R-package/configure b/R-package/configure
index 3bbfa71503fb..617dc003f657 100755
--- a/R-package/configure
+++ b/R-package/configure
@@ -612,12 +612,45 @@ PACKAGE_STRING='xgboost 2.1.0'
 PACKAGE_BUGREPORT=''
 PACKAGE_URL=''
 
+# Factoring default headers for most tests.
+ac_includes_default="\
+#include <stddef.h>
+#ifdef HAVE_STDIO_H
+# include <stdio.h>
+#endif
+#ifdef HAVE_STDLIB_H
+# include <stdlib.h>
+#endif
+#ifdef HAVE_STRING_H
+# include <string.h>
+#endif
+#ifdef HAVE_INTTYPES_H
+# include <inttypes.h>
+#endif
+#ifdef HAVE_STDINT_H
+# include <stdint.h>
+#endif
+#ifdef HAVE_STRINGS_H
+# include <strings.h>
+#endif
+#ifdef HAVE_SYS_TYPES_H
+# include <sys/types.h>
+#endif
+#ifdef HAVE_SYS_STAT_H
+# include <sys/stat.h>
+#endif
+#ifdef HAVE_UNISTD_H
+# include <unistd.h>
+#endif"
+
+ac_header_cxx_list=
 ac_subst_vars='LTLIBOBJS
 LIBOBJS
 BACKTRACE_LIB
 ENDIAN_FLAG
 OPENMP_LIB
 OPENMP_CXXFLAGS
+USE_LITTLE_ENDIAN
 OBJEXT
 EXEEXT
 ac_ct_CXX
@@ -676,7 +709,8 @@ CXXFLAGS
 LDFLAGS
 LIBS
 CPPFLAGS
-CCC'
+CCC
+USE_LITTLE_ENDIAN'
 
 
 # Initialize some variables set by options.
@@ -1299,6 +1333,9 @@ Some influential environment variables:
   LIBS        libraries to pass to the linker, e.g. -l<library>
   CPPFLAGS    (Objective) C/C++ preprocessor flags, e.g. -I<include dir> if
               you have headers in a nonstandard directory <include dir>
+  USE_LITTLE_ENDIAN
+              "Whether to build with little endian (checks at compile time if
+              unset)"
 
 Use these variables to override the choices made by `configure' or to help
 it to find libraries and programs with nonstandard names/locations.
@@ -1509,6 +1546,39 @@ fi
   as_fn_set_status $ac_retval
 
 } # ac_fn_cxx_try_run
+
+# ac_fn_cxx_check_header_compile LINENO HEADER VAR INCLUDES
+# ---------------------------------------------------------
+# Tests whether HEADER exists and can be compiled using the include files in
+# INCLUDES, setting the cache variable VAR accordingly.
+ac_fn_cxx_check_header_compile ()
+{
+  as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
+printf %s "checking for $2... " >&6; }
+if eval test \${$3+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+$4
+#include <$2>
+_ACEOF
+if ac_fn_cxx_try_compile "$LINENO"
+then :
+  eval "$3=yes"
+else $as_nop
+  eval "$3=no"
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
+fi
+eval ac_res=\$$3
+	       { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
+printf "%s\n" "$ac_res" >&6; }
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
+
+} # ac_fn_cxx_check_header_compile
 ac_configure_args_raw=
 for ac_arg
 do
@@ -2020,6 +2090,15 @@ main (int argc, char **argv)
 }
 "
 
+as_fn_append ac_header_cxx_list " stdio.h stdio_h HAVE_STDIO_H"
+as_fn_append ac_header_cxx_list " stdlib.h stdlib_h HAVE_STDLIB_H"
+as_fn_append ac_header_cxx_list " string.h string_h HAVE_STRING_H"
+as_fn_append ac_header_cxx_list " inttypes.h inttypes_h HAVE_INTTYPES_H"
+as_fn_append ac_header_cxx_list " stdint.h stdint_h HAVE_STDINT_H"
+as_fn_append ac_header_cxx_list " strings.h strings_h HAVE_STRINGS_H"
+as_fn_append ac_header_cxx_list " sys/stat.h sys_stat_h HAVE_SYS_STAT_H"
+as_fn_append ac_header_cxx_list " sys/types.h sys_types_h HAVE_SYS_TYPES_H"
+as_fn_append ac_header_cxx_list " unistd.h unistd_h HAVE_UNISTD_H"
 # Check that the precious variables saved in the cache have kept the same
 # value.
 ac_cache_corrupted=false
@@ -2792,38 +2871,289 @@ fi
 
 
 ### Endian detection
-{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking endian" >&5
-printf %s "checking endian... " >&6; }
-{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: " >&5
-printf "%s\n" "" >&6; }
-if test "$cross_compiling" = yes
+
+ac_header= ac_cache=
+for ac_item in $ac_header_cxx_list
+do
+  if test $ac_cache; then
+    ac_fn_cxx_check_header_compile "$LINENO" $ac_header ac_cv_header_$ac_cache "$ac_includes_default"
+    if eval test \"x\$ac_cv_header_$ac_cache\" = xyes; then
+      printf "%s\n" "#define $ac_item 1" >> confdefs.h
+    fi
+    ac_header= ac_cache=
+  elif test $ac_header; then
+    ac_cache=$ac_item
+  else
+    ac_header=$ac_item
+  fi
+done
+
+
+
+
+
+
+
+
+if test $ac_cv_header_stdlib_h = yes && test $ac_cv_header_string_h = yes
 then :
-  { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
-printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error $? "cannot run test program while cross compiling
-See \`config.log' for more details" "$LINENO" 5; }
+
+printf "%s\n" "#define STDC_HEADERS 1" >>confdefs.h
+
+fi
+if test -z "${USE_LITTLE_ENDIAN+x}"
+then :
+
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: Checking system endianness as USE_LITTLE_ENDIAN is unset" >&5
+printf "%s\n" "$as_me: Checking system endianness as USE_LITTLE_ENDIAN is unset" >&6;}
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking system endianness" >&5
+printf %s "checking system endianness... " >&6; }
+   { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether byte ordering is bigendian" >&5
+printf %s "checking whether byte ordering is bigendian... " >&6; }
+if test ${ac_cv_c_bigendian+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  ac_cv_c_bigendian=unknown
+    # See if we're dealing with a universal compiler.
+    cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#ifndef __APPLE_CC__
+	       not a universal capable compiler
+	     #endif
+	     typedef int dummy;
+
+_ACEOF
+if ac_fn_cxx_try_compile "$LINENO"
+then :
+
+	# Check for potential -arch flags.  It is not universal unless
+	# there are at least two -arch flags with different values.
+	ac_arch=
+	ac_prev=
+	for ac_word in $CC $CFLAGS $CPPFLAGS $LDFLAGS; do
+	 if test -n "$ac_prev"; then
+	   case $ac_word in
+	     i?86 | x86_64 | ppc | ppc64)
+	       if test -z "$ac_arch" || test "$ac_arch" = "$ac_word"; then
+		 ac_arch=$ac_word
+	       else
+		 ac_cv_c_bigendian=universal
+		 break
+	       fi
+	       ;;
+	   esac
+	   ac_prev=
+	 elif test "x$ac_word" = "x-arch"; then
+	   ac_prev=arch
+	 fi
+       done
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
+    if test $ac_cv_c_bigendian = unknown; then
+      # See if sys/param.h defines the BYTE_ORDER macro.
+      cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <sys/types.h>
+	     #include <sys/param.h>
+
+int
+main (void)
+{
+#if ! (defined BYTE_ORDER && defined BIG_ENDIAN \
+		     && defined LITTLE_ENDIAN && BYTE_ORDER && BIG_ENDIAN \
+		     && LITTLE_ENDIAN)
+	      bogus endian macros
+	     #endif
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_cxx_try_compile "$LINENO"
+then :
+  # It does; now see whether it defined to BIG_ENDIAN or not.
+	 cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <sys/types.h>
+		#include <sys/param.h>
+
+int
+main (void)
+{
+#if BYTE_ORDER != BIG_ENDIAN
+		 not big endian
+		#endif
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_cxx_try_compile "$LINENO"
+then :
+  ac_cv_c_bigendian=yes
+else $as_nop
+  ac_cv_c_bigendian=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
+    fi
+    if test $ac_cv_c_bigendian = unknown; then
+      # See if <limits.h> defines _LITTLE_ENDIAN or _BIG_ENDIAN (e.g., Solaris).
+      cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <limits.h>
+
+int
+main (void)
+{
+#if ! (defined _LITTLE_ENDIAN || defined _BIG_ENDIAN)
+	      bogus endian macros
+	     #endif
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_cxx_try_compile "$LINENO"
+then :
+  # It does; now see whether it defined to _BIG_ENDIAN or not.
+	 cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <limits.h>
+
+int
+main (void)
+{
+#ifndef _BIG_ENDIAN
+		 not big endian
+		#endif
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_cxx_try_compile "$LINENO"
+then :
+  ac_cv_c_bigendian=yes
+else $as_nop
+  ac_cv_c_bigendian=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
+    fi
+    if test $ac_cv_c_bigendian = unknown; then
+      # Compile a test program.
+      if test "$cross_compiling" = yes
+then :
+  # Try to guess by grepping values from an object file.
+	 cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+unsigned short int ascii_mm[] =
+		  { 0x4249, 0x4765, 0x6E44, 0x6961, 0x6E53, 0x7953, 0 };
+		unsigned short int ascii_ii[] =
+		  { 0x694C, 0x5454, 0x656C, 0x6E45, 0x6944, 0x6E61, 0 };
+		int use_ascii (int i) {
+		  return ascii_mm[i] + ascii_ii[i];
+		}
+		unsigned short int ebcdic_ii[] =
+		  { 0x89D3, 0xE3E3, 0x8593, 0x95C5, 0x89C4, 0x9581, 0 };
+		unsigned short int ebcdic_mm[] =
+		  { 0xC2C9, 0xC785, 0x95C4, 0x8981, 0x95E2, 0xA8E2, 0 };
+		int use_ebcdic (int i) {
+		  return ebcdic_mm[i] + ebcdic_ii[i];
+		}
+		extern int foo;
+
+int
+main (void)
+{
+return use_ascii (foo) == use_ebcdic (foo);
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_cxx_try_compile "$LINENO"
+then :
+  if grep BIGenDianSyS conftest.$ac_objext >/dev/null; then
+	      ac_cv_c_bigendian=yes
+	    fi
+	    if grep LiTTleEnDian conftest.$ac_objext >/dev/null ; then
+	      if test "$ac_cv_c_bigendian" = unknown; then
+		ac_cv_c_bigendian=no
+	      else
+		# finding both strings is unlikely to happen, but who knows?
+		ac_cv_c_bigendian=unknown
+	      fi
+	    fi
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
 else $as_nop
   cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 /* end confdefs.h.  */
-#include <stdint.h>
+$ac_includes_default
 int
 main (void)
 {
-const uint16_t endianness = 256; return !!(*(const uint8_t *)&endianness);
+
+	     /* Are we little or big endian?  From Harbison&Steele.  */
+	     union
+	     {
+	       long int l;
+	       char c[sizeof (long int)];
+	     } u;
+	     u.l = 1;
+	     return u.c[sizeof (long int) - 1] == 1;
+
   ;
   return 0;
 }
 _ACEOF
 if ac_fn_cxx_try_run "$LINENO"
 then :
-  ENDIAN_FLAG="-DDMLC_CMAKE_LITTLE_ENDIAN=1"
+  ac_cv_c_bigendian=no
 else $as_nop
-  ENDIAN_FLAG="-DDMLC_CMAKE_LITTLE_ENDIAN=0"
+  ac_cv_c_bigendian=yes
 fi
 rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
   conftest.$ac_objext conftest.beam conftest.$ac_ext
 fi
 
+    fi
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_bigendian" >&5
+printf "%s\n" "$ac_cv_c_bigendian" >&6; }
+ case $ac_cv_c_bigendian in #(
+   yes)
+     { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: big endian" >&5
+printf "%s\n" "big endian" >&6; }
+     ENDIAN_FLAG="-DDMLC_CMAKE_LITTLE_ENDIAN=0";; #(
+   no)
+     { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: little endian" >&5
+printf "%s\n" "little endian" >&6; }
+     ENDIAN_FLAG="-DDMLC_CMAKE_LITTLE_ENDIAN=1" ;; #(
+   universal)
+
+printf "%s\n" "#define AC_APPLE_UNIVERSAL_BUILD 1" >>confdefs.h
+
+     ;; #(
+   *)
+     { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: unknown" >&5
+printf "%s\n" "unknown" >&6; }
+     as_fn_error $? "Could not determine endianness. Please set USE_LITTLE_ENDIAN" "$LINENO" 5
+   ;;
+ esac
+
+
+else $as_nop
+
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: Forcing endianness to: ${USE_LITTLE_ENDIAN}" >&5
+printf "%s\n" "$as_me: Forcing endianness to: ${USE_LITTLE_ENDIAN}" >&6;}
+  ENDIAN_FLAG="-DDMLC_CMAKE_LITTLE_ENDIAN=${USE_LITTLE_ENDIAN}"
+
+fi
 
 OPENMP_CXXFLAGS=""
 
@@ -2877,6 +3207,8 @@ fi
 
 ac_config_files="$ac_config_files src/Makevars"
 
+ac_config_headers="$ac_config_headers config.h"
+
 cat >confcache <<\_ACEOF
 # This file is a shell script that caches the results of configure
 # tests run on this system so they can be shared between configure
@@ -2967,43 +3299,7 @@ test "x$prefix" = xNONE && prefix=$ac_default_prefix
 # Let make expand exec_prefix.
 test "x$exec_prefix" = xNONE && exec_prefix='${prefix}'
 
-# Transform confdefs.h into DEFS.
-# Protect against shell expansion while executing Makefile rules.
-# Protect against Makefile macro expansion.
-#
-# If the first sed substitution is executed (which looks for macros that
-# take arguments), then branch to the quote section.  Otherwise,
-# look for a macro that doesn't take arguments.
-ac_script='
-:mline
-/\\$/{
- N
- s,\\\n,,
- b mline
-}
-t clear
-:clear
-s/^[	 ]*#[	 ]*define[	 ][	 ]*\([^	 (][^	 (]*([^)]*)\)[	 ]*\(.*\)/-D\1=\2/g
-t quote
-s/^[	 ]*#[	 ]*define[	 ][	 ]*\([^	 ][^	 ]*\)[	 ]*\(.*\)/-D\1=\2/g
-t quote
-b any
-:quote
-s/[	 `~#$^&*(){}\\|;'\''"<>?]/\\&/g
-s/\[/\\&/g
-s/\]/\\&/g
-s/\$/$$/g
-H
-:any
-${
-	g
-	s/^\n//
-	s/\n/ /g
-	p
-}
-'
-DEFS=`sed -n "$ac_script" confdefs.h`
-
+DEFS=-DHAVE_CONFIG_H
 
 ac_libobjs=
 ac_ltlibobjs=
@@ -3023,6 +3319,7 @@ LTLIBOBJS=$ac_ltlibobjs
 
 
 
+
 : "${CONFIG_STATUS=./config.status}"
 ac_write_fail=0
 ac_clean_files_save=$ac_clean_files
@@ -3430,11 +3727,15 @@ case $ac_config_files in *"
 "*) set x $ac_config_files; shift; ac_config_files=$*;;
 esac
 
+case $ac_config_headers in *"
+"*) set x $ac_config_headers; shift; ac_config_headers=$*;;
+esac
 
 
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 # Files that config.status was made for.
 config_files="$ac_config_files"
+config_headers="$ac_config_headers"
 
 _ACEOF
 
@@ -3455,10 +3756,15 @@ Usage: $0 [OPTION]... [TAG]...
       --recheck    update $as_me by reconfiguring in the same conditions
       --file=FILE[:TEMPLATE]
                    instantiate the configuration file FILE
+      --header=FILE[:TEMPLATE]
+                   instantiate the configuration header FILE
 
 Configuration files:
 $config_files
 
+Configuration headers:
+$config_headers
+
 Report bugs to the package provider."
 
 _ACEOF
@@ -3521,7 +3827,18 @@ do
     esac
     as_fn_append CONFIG_FILES " '$ac_optarg'"
     ac_need_defaults=false;;
-  --he | --h |  --help | --hel | -h )
+  --header | --heade | --head | --hea )
+    $ac_shift
+    case $ac_optarg in
+    *\'*) ac_optarg=`printf "%s\n" "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;;
+    esac
+    as_fn_append CONFIG_HEADERS " '$ac_optarg'"
+    ac_need_defaults=false;;
+  --he | --h)
+    # Conflict between --help and --header
+    as_fn_error $? "ambiguous option: \`$1'
+Try \`$0 --help' for more information.";;
+  --help | --hel | -h )
     printf "%s\n" "$ac_cs_usage"; exit ;;
   -q | -quiet | --quiet | --quie | --qui | --qu | --q \
   | -silent | --silent | --silen | --sile | --sil | --si | --s)
@@ -3578,6 +3895,7 @@ for ac_config_target in $ac_config_targets
 do
   case $ac_config_target in
     "src/Makevars") CONFIG_FILES="$CONFIG_FILES src/Makevars" ;;
+    "config.h") CONFIG_HEADERS="$CONFIG_HEADERS config.h" ;;
 
   *) as_fn_error $? "invalid argument: \`$ac_config_target'" "$LINENO" 5;;
   esac
@@ -3590,6 +3908,7 @@ done
 # bizarre bug on SunOS 4.1.3.
 if $ac_need_defaults; then
   test ${CONFIG_FILES+y} || CONFIG_FILES=$config_files
+  test ${CONFIG_HEADERS+y} || CONFIG_HEADERS=$config_headers
 fi
 
 # Have a temporary directory for convenience.  Make it in the build tree
@@ -3777,8 +4096,116 @@ fi
 cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
 fi # test -n "$CONFIG_FILES"
 
+# Set up the scripts for CONFIG_HEADERS section.
+# No need to generate them if there are no CONFIG_HEADERS.
+# This happens for instance with `./config.status Makefile'.
+if test -n "$CONFIG_HEADERS"; then
+cat >"$ac_tmp/defines.awk" <<\_ACAWK ||
+BEGIN {
+_ACEOF
+
+# Transform confdefs.h into an awk script `defines.awk', embedded as
+# here-document in config.status, that substitutes the proper values into
+# config.h.in to produce config.h.
+
+# Create a delimiter string that does not exist in confdefs.h, to ease
+# handling of long lines.
+ac_delim='%!_!# '
+for ac_last_try in false false :; do
+  ac_tt=`sed -n "/$ac_delim/p" confdefs.h`
+  if test -z "$ac_tt"; then
+    break
+  elif $ac_last_try; then
+    as_fn_error $? "could not make $CONFIG_HEADERS" "$LINENO" 5
+  else
+    ac_delim="$ac_delim!$ac_delim _$ac_delim!! "
+  fi
+done
+
+# For the awk script, D is an array of macro values keyed by name,
+# likewise P contains macro parameters if any.  Preserve backslash
+# newline sequences.
 
-eval set X "  :F $CONFIG_FILES      "
+ac_word_re=[_$as_cr_Letters][_$as_cr_alnum]*
+sed -n '
+s/.\{148\}/&'"$ac_delim"'/g
+t rset
+:rset
+s/^[	 ]*#[	 ]*define[	 ][	 ]*/ /
+t def
+d
+:def
+s/\\$//
+t bsnl
+s/["\\]/\\&/g
+s/^ \('"$ac_word_re"'\)\(([^()]*)\)[	 ]*\(.*\)/P["\1"]="\2"\
+D["\1"]=" \3"/p
+s/^ \('"$ac_word_re"'\)[	 ]*\(.*\)/D["\1"]=" \2"/p
+d
+:bsnl
+s/["\\]/\\&/g
+s/^ \('"$ac_word_re"'\)\(([^()]*)\)[	 ]*\(.*\)/P["\1"]="\2"\
+D["\1"]=" \3\\\\\\n"\\/p
+t cont
+s/^ \('"$ac_word_re"'\)[	 ]*\(.*\)/D["\1"]=" \2\\\\\\n"\\/p
+t cont
+d
+:cont
+n
+s/.\{148\}/&'"$ac_delim"'/g
+t clear
+:clear
+s/\\$//
+t bsnlc
+s/["\\]/\\&/g; s/^/"/; s/$/"/p
+d
+:bsnlc
+s/["\\]/\\&/g; s/^/"/; s/$/\\\\\\n"\\/p
+b cont
+' <confdefs.h | sed '
+s/'"$ac_delim"'/"\\\
+"/g' >>$CONFIG_STATUS || ac_write_fail=1
+
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+  for (key in D) D_is_set[key] = 1
+  FS = ""
+}
+/^[\t ]*#[\t ]*(define|undef)[\t ]+$ac_word_re([\t (]|\$)/ {
+  line = \$ 0
+  split(line, arg, " ")
+  if (arg[1] == "#") {
+    defundef = arg[2]
+    mac1 = arg[3]
+  } else {
+    defundef = substr(arg[1], 2)
+    mac1 = arg[2]
+  }
+  split(mac1, mac2, "(") #)
+  macro = mac2[1]
+  prefix = substr(line, 1, index(line, defundef) - 1)
+  if (D_is_set[macro]) {
+    # Preserve the white space surrounding the "#".
+    print prefix "define", macro P[macro] D[macro]
+    next
+  } else {
+    # Replace #undef with comments.  This is necessary, for example,
+    # in the case of _POSIX_SOURCE, which is predefined and required
+    # on some systems where configure will not decide to define it.
+    if (defundef == "undef") {
+      print "/*", prefix defundef, macro, "*/"
+      next
+    }
+  }
+}
+{ print }
+_ACAWK
+_ACEOF
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+  as_fn_error $? "could not setup config headers machinery" "$LINENO" 5
+fi # test -n "$CONFIG_HEADERS"
+
+
+eval set X "  :F $CONFIG_FILES  :H $CONFIG_HEADERS    "
 shift
 for ac_tag
 do
@@ -3986,7 +4413,30 @@ which seems to be undefined.  Please make sure it is defined" >&2;}
   esac \
   || as_fn_error $? "could not create $ac_file" "$LINENO" 5
  ;;
-
+  :H)
+  #
+  # CONFIG_HEADER
+  #
+  if test x"$ac_file" != x-; then
+    {
+      printf "%s\n" "/* $configure_input  */" >&1 \
+      && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs"
+    } >"$ac_tmp/config.h" \
+      || as_fn_error $? "could not create $ac_file" "$LINENO" 5
+    if diff "$ac_file" "$ac_tmp/config.h" >/dev/null 2>&1; then
+      { printf "%s\n" "$as_me:${as_lineno-$LINENO}: $ac_file is unchanged" >&5
+printf "%s\n" "$as_me: $ac_file is unchanged" >&6;}
+    else
+      rm -f "$ac_file"
+      mv "$ac_tmp/config.h" "$ac_file" \
+	|| as_fn_error $? "could not create $ac_file" "$LINENO" 5
+    fi
+  else
+    printf "%s\n" "/* $configure_input  */" >&1 \
+      && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs" \
+      || as_fn_error $? "could not create -" "$LINENO" 5
+  fi
+ ;;
 
 
   esac
diff --git a/R-package/configure.ac b/R-package/configure.ac
index 89f8635fe315..916187fc3f34 100644
--- a/R-package/configure.ac
+++ b/R-package/configure.ac
@@ -28,11 +28,22 @@ AC_MSG_RESULT([])
 AC_CHECK_LIB([execinfo], [backtrace], [BACKTRACE_LIB=-lexecinfo], [BACKTRACE_LIB=''])
 
 ### Endian detection
-AC_MSG_CHECKING([endian])
-AC_MSG_RESULT([])
-AC_RUN_IFELSE([AC_LANG_PROGRAM([[#include <stdint.h>]], [[const uint16_t endianness = 256; return !!(*(const uint8_t *)&endianness);]])],
-  [ENDIAN_FLAG="-DDMLC_CMAKE_LITTLE_ENDIAN=1"],
-  [ENDIAN_FLAG="-DDMLC_CMAKE_LITTLE_ENDIAN=0"])
+AC_ARG_VAR(USE_LITTLE_ENDIAN, "Whether to build with little endian (checks at compile time if unset)")
+AS_IF([test -z "${USE_LITTLE_ENDIAN+x}"], [
+  AC_MSG_NOTICE([Checking system endianness as USE_LITTLE_ENDIAN is unset])
+  AC_MSG_CHECKING([system endianness])
+  AC_C_BIGENDIAN(
+    [AC_MSG_RESULT([big endian])
+     ENDIAN_FLAG="-DDMLC_CMAKE_LITTLE_ENDIAN=0"],
+    [AC_MSG_RESULT([little endian])
+     ENDIAN_FLAG="-DDMLC_CMAKE_LITTLE_ENDIAN=1"],
+    [AC_MSG_RESULT([unknown])
+     AC_MSG_ERROR([Could not determine endianness. Please set USE_LITTLE_ENDIAN])]
+  )
+], [
+  AC_MSG_NOTICE([Forcing endianness to: ${USE_LITTLE_ENDIAN}])
+  ENDIAN_FLAG="-DDMLC_CMAKE_LITTLE_ENDIAN=${USE_LITTLE_ENDIAN}"
+])
 
 OPENMP_CXXFLAGS=""
 
@@ -73,4 +84,5 @@ AC_SUBST(OPENMP_LIB)
 AC_SUBST(ENDIAN_FLAG)
 AC_SUBST(BACKTRACE_LIB)
 AC_CONFIG_FILES([src/Makevars])
+AC_CONFIG_HEADERS([config.h])
 AC_OUTPUT

From 9c9db1259240bffe9040ed7ca6e3fb2c1bda80e4 Mon Sep 17 00:00:00 2001
From: Philip Hyunsu Cho <chohyu01@cs.washington.edu>
Date: Tue, 30 Jul 2024 14:50:00 -0700
Subject: [PATCH 17/18] Bump version to 2.1.1 (#10651)

---
 CMakeLists.txt                           |  2 +-
 R-package/DESCRIPTION                    |  4 ++--
 R-package/configure                      | 18 +++++++++---------
 R-package/configure.ac                   |  2 +-
 include/xgboost/version_config.h         |  2 +-
 jvm-packages/pom.xml                     |  2 +-
 jvm-packages/xgboost4j-example/pom.xml   |  4 ++--
 jvm-packages/xgboost4j-flink/pom.xml     |  4 ++--
 jvm-packages/xgboost4j-gpu/pom.xml       |  4 ++--
 jvm-packages/xgboost4j-spark-gpu/pom.xml |  2 +-
 jvm-packages/xgboost4j-spark/pom.xml     |  2 +-
 jvm-packages/xgboost4j/pom.xml           |  4 ++--
 python-package/pyproject.toml            |  2 +-
 python-package/xgboost/VERSION           |  2 +-
 14 files changed, 27 insertions(+), 27 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 32ec64305f08..adceee5b3be4 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -6,7 +6,7 @@ if(PLUGIN_SYCL)
   string(REPLACE " -isystem ${CONDA_PREFIX}/include" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
 endif()
 
-project(xgboost LANGUAGES CXX C VERSION 2.1.0)
+project(xgboost LANGUAGES CXX C VERSION 2.1.1)
 include(cmake/Utils.cmake)
 list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules")
 
diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION
index 6edad9d8c7bf..db5290334b49 100644
--- a/R-package/DESCRIPTION
+++ b/R-package/DESCRIPTION
@@ -1,8 +1,8 @@
 Package: xgboost
 Type: Package
 Title: Extreme Gradient Boosting
-Version: 2.1.0.1
-Date: 2024-06-19
+Version: 2.1.1.1
+Date: 2024-07-30
 Authors@R: c(
   person("Tianqi", "Chen", role = c("aut"),
          email = "tianqi.tchen@gmail.com"),
diff --git a/R-package/configure b/R-package/configure
index 617dc003f657..f6deab049b2d 100755
--- a/R-package/configure
+++ b/R-package/configure
@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.71 for xgboost 2.1.0.
+# Generated by GNU Autoconf 2.71 for xgboost 2.1.1.
 #
 #
 # Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation,
@@ -607,8 +607,8 @@ MAKEFLAGS=
 # Identity of this package.
 PACKAGE_NAME='xgboost'
 PACKAGE_TARNAME='xgboost'
-PACKAGE_VERSION='2.1.0'
-PACKAGE_STRING='xgboost 2.1.0'
+PACKAGE_VERSION='2.1.1'
+PACKAGE_STRING='xgboost 2.1.1'
 PACKAGE_BUGREPORT=''
 PACKAGE_URL=''
 
@@ -1259,7 +1259,7 @@ if test "$ac_init_help" = "long"; then
   # Omit some internal or obsolete options to make the list less imposing.
   # This message is too long to be a string in the A/UX 3.1 sh.
   cat <<_ACEOF
-\`configure' configures xgboost 2.1.0 to adapt to many kinds of systems.
+\`configure' configures xgboost 2.1.1 to adapt to many kinds of systems.
 
 Usage: $0 [OPTION]... [VAR=VALUE]...
 
@@ -1321,7 +1321,7 @@ fi
 
 if test -n "$ac_init_help"; then
   case $ac_init_help in
-     short | recursive ) echo "Configuration of xgboost 2.1.0:";;
+     short | recursive ) echo "Configuration of xgboost 2.1.1:";;
    esac
   cat <<\_ACEOF
 
@@ -1404,7 +1404,7 @@ fi
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
   cat <<\_ACEOF
-xgboost configure 2.1.0
+xgboost configure 2.1.1
 generated by GNU Autoconf 2.71
 
 Copyright (C) 2021 Free Software Foundation, Inc.
@@ -1603,7 +1603,7 @@ cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
 
-It was created by xgboost $as_me 2.1.0, which was
+It was created by xgboost $as_me 2.1.1, which was
 generated by GNU Autoconf 2.71.  Invocation command line was
 
   $ $0$ac_configure_args_raw
@@ -3709,7 +3709,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by xgboost $as_me 2.1.0, which was
+This file was extended by xgboost $as_me 2.1.1, which was
 generated by GNU Autoconf 2.71.  Invocation command line was
 
   CONFIG_FILES    = $CONFIG_FILES
@@ -3773,7 +3773,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 ac_cs_config='$ac_cs_config_escaped'
 ac_cs_version="\\
-xgboost config.status 2.1.0
+xgboost config.status 2.1.1
 configured by $0, generated by GNU Autoconf 2.71,
   with options \\"\$ac_cs_config\\"
 
diff --git a/R-package/configure.ac b/R-package/configure.ac
index 916187fc3f34..46b251708fba 100644
--- a/R-package/configure.ac
+++ b/R-package/configure.ac
@@ -2,7 +2,7 @@
 
 AC_PREREQ(2.69)
 
-AC_INIT([xgboost],[2.1.0],[],[xgboost],[])
+AC_INIT([xgboost],[2.1.1],[],[xgboost],[])
 
 : ${R_HOME=`R RHOME`}
 if test -z "${R_HOME}"; then
diff --git a/include/xgboost/version_config.h b/include/xgboost/version_config.h
index 70e5417af779..ee4c824d7f61 100644
--- a/include/xgboost/version_config.h
+++ b/include/xgboost/version_config.h
@@ -6,6 +6,6 @@
 
 #define XGBOOST_VER_MAJOR 2  /* NOLINT */
 #define XGBOOST_VER_MINOR 1  /* NOLINT */
-#define XGBOOST_VER_PATCH 0  /* NOLINT */
+#define XGBOOST_VER_PATCH 1  /* NOLINT */
 
 #endif  // XGBOOST_VERSION_CONFIG_H_
diff --git a/jvm-packages/pom.xml b/jvm-packages/pom.xml
index af9046ca5440..53b7333e8fe3 100644
--- a/jvm-packages/pom.xml
+++ b/jvm-packages/pom.xml
@@ -6,7 +6,7 @@
 
     <groupId>ml.dmlc</groupId>
     <artifactId>xgboost-jvm_2.12</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1</version>
     <packaging>pom</packaging>
     <name>XGBoost JVM Package</name>
     <description>JVM Package for XGBoost</description>
diff --git a/jvm-packages/xgboost4j-example/pom.xml b/jvm-packages/xgboost4j-example/pom.xml
index a1e78775e3df..554a7bf8f4be 100644
--- a/jvm-packages/xgboost4j-example/pom.xml
+++ b/jvm-packages/xgboost4j-example/pom.xml
@@ -6,11 +6,11 @@
     <parent>
         <groupId>ml.dmlc</groupId>
         <artifactId>xgboost-jvm_2.12</artifactId>
-        <version>2.1.0</version>
+        <version>2.1.1</version>
     </parent>
     <name>xgboost4j-example</name>
     <artifactId>xgboost4j-example_2.12</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1</version>
     <packaging>jar</packaging>
     <build>
         <plugins>
diff --git a/jvm-packages/xgboost4j-flink/pom.xml b/jvm-packages/xgboost4j-flink/pom.xml
index 3f8adf40a544..f24e0426c7ef 100644
--- a/jvm-packages/xgboost4j-flink/pom.xml
+++ b/jvm-packages/xgboost4j-flink/pom.xml
@@ -6,12 +6,12 @@
     <parent>
         <groupId>ml.dmlc</groupId>
         <artifactId>xgboost-jvm_2.12</artifactId>
-        <version>2.1.0</version>
+        <version>2.1.1</version>
     </parent>
 
     <name>xgboost4j-flink</name>
     <artifactId>xgboost4j-flink_2.12</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1</version>
     <properties>
       <flink-ml.version>2.2.0</flink-ml.version>
     </properties>
diff --git a/jvm-packages/xgboost4j-gpu/pom.xml b/jvm-packages/xgboost4j-gpu/pom.xml
index 7e54092a9408..cbc147b8aeff 100644
--- a/jvm-packages/xgboost4j-gpu/pom.xml
+++ b/jvm-packages/xgboost4j-gpu/pom.xml
@@ -6,11 +6,11 @@
     <parent>
         <groupId>ml.dmlc</groupId>
         <artifactId>xgboost-jvm_2.12</artifactId>
-        <version>2.1.0</version>
+        <version>2.1.1</version>
     </parent>
     <artifactId>xgboost4j-gpu_2.12</artifactId>
     <name>xgboost4j-gpu</name>
-    <version>2.1.0</version>
+    <version>2.1.1</version>
     <packaging>jar</packaging>
 
     <dependencies>
diff --git a/jvm-packages/xgboost4j-spark-gpu/pom.xml b/jvm-packages/xgboost4j-spark-gpu/pom.xml
index 04a130b9dcd7..dd5e3179c023 100644
--- a/jvm-packages/xgboost4j-spark-gpu/pom.xml
+++ b/jvm-packages/xgboost4j-spark-gpu/pom.xml
@@ -6,7 +6,7 @@
     <parent>
         <groupId>ml.dmlc</groupId>
         <artifactId>xgboost-jvm_2.12</artifactId>
-        <version>2.1.0</version>
+        <version>2.1.1</version>
     </parent>
     <name>xgboost4j-spark-gpu</name>
     <artifactId>xgboost4j-spark-gpu_2.12</artifactId>
diff --git a/jvm-packages/xgboost4j-spark/pom.xml b/jvm-packages/xgboost4j-spark/pom.xml
index d169599c1412..1858defd5b02 100644
--- a/jvm-packages/xgboost4j-spark/pom.xml
+++ b/jvm-packages/xgboost4j-spark/pom.xml
@@ -6,7 +6,7 @@
     <parent>
         <groupId>ml.dmlc</groupId>
         <artifactId>xgboost-jvm_2.12</artifactId>
-        <version>2.1.0</version>
+        <version>2.1.1</version>
     </parent>
     <name>xgboost4j-spark</name>
     <artifactId>xgboost4j-spark_2.12</artifactId>
diff --git a/jvm-packages/xgboost4j/pom.xml b/jvm-packages/xgboost4j/pom.xml
index a4c425a734b9..a4b8985c3493 100644
--- a/jvm-packages/xgboost4j/pom.xml
+++ b/jvm-packages/xgboost4j/pom.xml
@@ -6,11 +6,11 @@
     <parent>
         <groupId>ml.dmlc</groupId>
         <artifactId>xgboost-jvm_2.12</artifactId>
-        <version>2.1.0</version>
+        <version>2.1.1</version>
     </parent>
     <name>xgboost4j</name>
     <artifactId>xgboost4j_2.12</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1</version>
     <packaging>jar</packaging>
 
     <dependencies>
diff --git a/python-package/pyproject.toml b/python-package/pyproject.toml
index ed92a95aa405..05bbe998c44a 100644
--- a/python-package/pyproject.toml
+++ b/python-package/pyproject.toml
@@ -13,7 +13,7 @@ authors = [
     { name = "Hyunsu Cho", email = "chohyu01@cs.washington.edu" },
     { name = "Jiaming Yuan", email = "jm.yuan@outlook.com" }
 ]
-version = "2.1.0"
+version = "2.1.1"
 requires-python = ">=3.8"
 license = { text = "Apache-2.0" }
 classifiers = [
diff --git a/python-package/xgboost/VERSION b/python-package/xgboost/VERSION
index 7ec1d6db4087..3e3c2f1e5edb 100644
--- a/python-package/xgboost/VERSION
+++ b/python-package/xgboost/VERSION
@@ -1 +1 @@
-2.1.0
+2.1.1

From e36d361674cb1b8fd599da891e1e91a427bb4159 Mon Sep 17 00:00:00 2001
From: Philip Hyunsu Cho <chohyu01@cs.washington.edu>
Date: Tue, 30 Jul 2024 21:35:54 -0700
Subject: [PATCH 18/18] [CI] Create xgboost-cpu on Windows (#10653)

---
 doc/install.rst                      |  2 ++
 tests/buildkite/build-win64-cpu.ps1  | 51 ++++++++++++++++++++++++++++
 tests/buildkite/build-win64-gpu.ps1  |  2 ++
 tests/buildkite/cpu_only_pypkg.patch | 29 ++++++++--------
 tests/buildkite/pipeline-win64.yml   |  5 +++
 5 files changed, 75 insertions(+), 14 deletions(-)
 create mode 100644 tests/buildkite/build-win64-cpu.ps1

diff --git a/doc/install.rst b/doc/install.rst
index 79082a7ed581..7871763e6f25 100644
--- a/doc/install.rst
+++ b/doc/install.rst
@@ -89,6 +89,8 @@ You may choose to reduce the size of the installed package and save the disk spa
 The ``xgboost-cpu`` variant will have drastically smaller disk footprint, but does not provide some features, such as the GPU algorithms and
 federated learning.
 
+Currently, ``xgboost-cpu`` package is provided for x86_64 (amd64) Linux and Windows platforms.
+
 Conda
 *****
 
diff --git a/tests/buildkite/build-win64-cpu.ps1 b/tests/buildkite/build-win64-cpu.ps1
new file mode 100644
index 000000000000..f237ba163011
--- /dev/null
+++ b/tests/buildkite/build-win64-cpu.ps1
@@ -0,0 +1,51 @@
+## Build Python package xgboost-cpu (minimal install)
+
+$ErrorActionPreference = "Stop"
+
+. tests/buildkite/conftest.ps1
+
+Write-Host "--- Build libxgboost on Windows (minimal)"
+
+mkdir build
+cd build
+cmake .. -G"Visual Studio 17 2022" -A x64
+if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
+cmake --build . --config Release -- /m /nodeReuse:false `
+  "/consoleloggerparameters:ShowCommandLine;Verbosity=minimal"
+if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
+
+Write-Host "--- Build binary wheel"
+cd ..
+# Patch to rename pkg to xgboost-cpu
+Get-Content tests/buildkite/remove_nccl_dep.patch | patch -p0
+Get-Content tests/buildkite/cpu_only_pypkg.patch | patch -p0
+
+cd python-package
+conda activate
+& pip install --user -v "pip>=23"
+& pip --version
+& pip wheel --no-deps -v . --wheel-dir dist/
+Get-ChildItem . -Filter dist/*.whl |
+Foreach-Object {
+  & python ../tests/ci_build/rename_whl.py `
+    --wheel-path $_.FullName `
+    --commit-hash $Env:BUILDKITE_COMMIT `
+    --platform-tag win_amd64
+  if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
+}
+
+Write-Host "--- Upload Python wheel"
+cd ..
+Get-ChildItem . -Filter python-package/dist/*.whl |
+Foreach-Object {
+  & buildkite-agent artifact upload python-package/dist/$_
+  if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
+}
+if ( $is_release_branch -eq 1 ) {
+  Get-ChildItem . -Filter python-package/dist/*.whl |
+  Foreach-Object {
+    & aws s3 cp python-package/dist/$_ s3://xgboost-nightly-builds/$Env:BUILDKITE_BRANCH/ `
+      --acl public-read --no-progress
+    if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
+  }
+}
diff --git a/tests/buildkite/build-win64-gpu.ps1 b/tests/buildkite/build-win64-gpu.ps1
index 9114d3237751..75ffd3a15c56 100644
--- a/tests/buildkite/build-win64-gpu.ps1
+++ b/tests/buildkite/build-win64-gpu.ps1
@@ -1,3 +1,5 @@
+## Build Python package and gtest with GPU enabled
+
 $ErrorActionPreference = "Stop"
 
 . tests/buildkite/conftest.ps1
diff --git a/tests/buildkite/cpu_only_pypkg.patch b/tests/buildkite/cpu_only_pypkg.patch
index 765ac5c098d0..62789a25aefe 100644
--- a/tests/buildkite/cpu_only_pypkg.patch
+++ b/tests/buildkite/cpu_only_pypkg.patch
@@ -1,8 +1,8 @@
 diff --git python-package/README.rst python-package/README.rst
-index 1fc0bb5a0..f1c68470b 100644
+index 1fc0bb5a0..4475dbb26 100644
 --- python-package/README.rst
 +++ python-package/README.rst
-@@ -1,20 +1,15 @@
+@@ -1,20 +1,18 @@
 -======================
 -XGBoost Python Package
 -======================
@@ -14,27 +14,28 @@ index 1fc0bb5a0..f1c68470b 100644
  
 -Installation
 -============
-+The ``xgboost-cpu`` package provides for a minimal installation, with no support for the GPU algorithms
-+or federated learning. It is provided to allow XGBoost to be installed in a space-constrained
-+environments.
- 
+-
 -From `PyPI <https://pypi.python.org/pypi/xgboost>`_
 ----------------------------------------------------
-+Note. ``xgboost-cpu`` package is only provided for x86_64 (amd64) Linux and Windows platforms.
-+For other platforms, please install ``xgboost`` from https://pypi.org/project/xgboost/.
- 
+-
 -For a stable version, install using ``pip``::
 -
 -    pip install xgboost
--
--.. |PyPI version| image:: https://badge.fury.io/py/xgboost.svg
--   :target: http://badge.fury.io/py/xgboost
--
++The ``xgboost-cpu`` package provides for a minimal installation, with no support for the GPU algorithms
++or federated learning. It is provided to allow XGBoost to be installed in a space-constrained
++environments.
+ 
+ .. |PyPI version| image:: https://badge.fury.io/py/xgboost.svg
+    :target: http://badge.fury.io/py/xgboost
+ 
 -For building from source, see `build <https://xgboost.readthedocs.io/en/latest/build.html>`_.
++Note. ``xgboost-cpu`` package is only provided for x86_64 (amd64) Linux and Windows platforms.
++For other platforms, please install ``xgboost`` from https://pypi.org/project/xgboost/.
++
 +Note. ``xgboost-cpu`` does not provide an sdist (source distribution). You may install sdist
 +from https://pypi.org/project/xgboost/.
 diff --git python-package/pyproject.toml python-package/pyproject.toml
-index 46c1451c2..c5dc908d9 100644
+index 05bbe998c..50e10bf26 100644
 --- python-package/pyproject.toml
 +++ python-package/pyproject.toml
 @@ -6,7 +6,7 @@ backend-path = ["."]
diff --git a/tests/buildkite/pipeline-win64.yml b/tests/buildkite/pipeline-win64.yml
index 83a61981e716..2333226bd538 100644
--- a/tests/buildkite/pipeline-win64.yml
+++ b/tests/buildkite/pipeline-win64.yml
@@ -13,6 +13,11 @@ steps:
     key: build-win64-gpu
     agents:
       queue: windows-cpu
+  - label: ":windows: Build XGBoost for Windows (minimal)"
+    command: "tests/buildkite/build-win64-cpu.ps1"
+    key: build-win64-cpu
+    agents:
+      queue: windows-cpu
 
   - wait