Skip to content

Commit

Permalink
Implement baseless monobase
Browse files Browse the repository at this point in the history
  • Loading branch information
nevillelyh committed Sep 30, 2024
1 parent c3cd7ba commit cfeebb4
Show file tree
Hide file tree
Showing 20 changed files with 1,001 additions and 0 deletions.
3 changes: 3 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
local/uv/cache
src/__pycache__
src/requirements-test
15 changes: 15 additions & 0 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
name: CI

on:
push:
pull_request:

jobs:
build:
name: Build + release
permissions:
contents: read
id-token: write
uses: replicate/actions/.github/workflows/buildx.yml@main
with:
image: monobase
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
__pycache__
/cache
/local
/src/requirements-test
82 changes: 82 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
# vi: filetype=dockerfile

# - If hostname is monobase-builder-*
# - Runs as a DaemonSet on each worker
# - Builds /usr/local for model images
# - Else
# - Runs as base image for models
# - Mounts /usr/local as host path

FROM ubuntu:jammy

ARG PREFIX=/usr/local

ENV DEBIAN_FRONTEND=noninteractive
ENV TZ=Etc/UTC

ENV UV_CACHE_DIR=$PREFIX/uv/cache
ENV UV_PYTHON_INSTALL_DIR=$PREFIX/uv/python
ENV UV_TOOL_BIN_DIR=$PREFIX/bin
ENV UV_TOOL_DIR=$PREFIX/uv/tools

ENV UV_COMPILE_BYTECODE=true
ENV UV_LINK_MODE=hardlink
ENV UV_PYTHON_PREFERENCE=only-managed

# ca-certificates - HTTPS
# curl - download uv, PGET, etc.
# libxml2 - CUDA installer
# rdfind - find duplicate CUDA .so files
# xz-utils - CuDNN tarball

RUN apt-get update \
&& apt-get install -y \
ca-certificates \
curl \
libxml2 \
rdfind \
xz-utils \
build-essential \
ca-certificates \
cmake \
curl \
ffmpeg \
findutils \
g++ \
gcc \
git \
libavcodec-dev \
libbz2-dev \
libcairo2-dev \
libffi-dev \
libfontconfig1 \
libgirepository1.0-dev \
libgl1 \
libgl1-mesa-glx \
libglib2.0-0 \
liblzma-dev \
libncurses5-dev \
libncursesw5-dev \
libopencv-dev \
libreadline-dev \
libsm6 \
libsndfile1 \
libsqlite3-dev \
libssl-dev \
libunistring-dev \
libxext6 \
libxrender1 \
llvm \
make \
sox \
tk-dev \
unzip \
wget \
xz-utils \
zip \
zlib1g-dev \
zstd \
&& rm -rf /var/lib/apt/lists/*

COPY src /srv/r8/monobase
ENTRYPOINT [ "/srv/r8/monobase/entrypoint.sh" ]
7 changes: 7 additions & 0 deletions build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/bin/bash

# Build production images

set -euo pipefail

docker build --tag monobase:latest --platform=linux/amd64 .
5 changes: 5 additions & 0 deletions mini.Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# vi: filetype=dockerfile

FROM monobase:latest

COPY local /usr/local
7 changes: 7 additions & 0 deletions mini.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/bin/bash

# Build mini image for testing

set -euo pipefail

docker build --file mini.Dockerfile --tag monobase:mini --platform=linux/amd64 .
98 changes: 98 additions & 0 deletions src/build.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
import argparse
import os
import os.path
import re
import shutil
import subprocess
import sys

from monogen import MONOGENS, MonoGen
from cuda import install_cuda, install_cudnn
from optimize import optimize_ld_cache, optimize_rdfind
from prune import clean_uv_cache, prune_cuda, prune_old_gen, prune_uv_cache
from util import is_done, logger, mark_done
from uv import install_venv


parser = argparse.ArgumentParser(description='Build monobase enviroment')
parser.add_argument('--environment', metavar='ENV', default='prod', choices=['test', 'prod'],
help='environment [test, prod], default=test')
parser.add_argument('--min-gen-id', metavar='N', type=int, default=0,
help='minimum generation ID, default=0')
parser.add_argument('--max-gen-id', metavar='N', type=int, default=sys.maxsize,
help='maximum generation ID, default=inf')
parser.add_argument('--prefix', metavar='PATH', default='/usr/local',
help='prefix for monobase')
parser.add_argument('--cache', metavar='PATH', default='/var/cache/monobase',
help='cache for monobase')
parser.add_argument('--prune-old-gen', default=False, action='store_true',
help='prune old generations')
parser.add_argument('--prune-cuda', default=False, action='store_true',
help='prune unused CUDAs and CuDNNs')
parser.add_argument('--prune-uv-cache', default=True, action='store_true',
help='prune uv cache')
parser.add_argument('--clean-uv-cache', default=False, action='store_true',
help='clean uv cache')


def build_generation(args: argparse.Namespace, mg: MonoGen) -> None:
gdir = os.path.join(args.prefix, 'monobase', 'g%05d' % mg.id)
if is_done(gdir):
return

logger.info(f'Building monobase generation {mg.id}...')
shutil.rmtree(gdir, ignore_errors=True)
os.makedirs(gdir)

for k, v in mg.cuda.items():
src = install_cuda(args, v)
dst = f'{gdir}/cuda{k}'
os.symlink(os.path.relpath(src, gdir), dst)
logger.info(f'CUDA symlinked in {dst}')

cuda_major_p = re.compile(r'\.\d+$')
cuda_majors = set(cuda_major_p.sub('', k) for k in mg.cuda.keys())
for k, v in mg.cudnn.items():
for m in cuda_majors:
src = install_cudnn(args, v, m)
dst = f'{gdir}/cudnn{k}-cuda{m}'
os.symlink(os.path.relpath(src, gdir), dst)
logger.info(f'CuDNN symlinked in {dst}')

suffix = '' if args.environment == 'prod' else f'-{args.environment}'
rdir = os.path.join('/srv/r8/monobase', f'requirements{suffix}', 'g%05d' % mg.id)
for p, pf in mg.python.items():
for t in mg.torch:
for c in mg.cuda.keys():
install_venv(args, rdir, gdir, p, pf, t, c)

optimize_ld_cache(args, gdir, mg)
optimize_rdfind(args, gdir, mg)

mark_done(gdir)
logger.info(f'Generation {mg.id} installed in {gdir}')


def build(args: argparse.Namespace) -> None:
for mg in MONOGENS[args.environment]:
if mg.id < args.min_gen_id or mg.id > args.max_gen_id:
continue
build_generation(args, mg)

if args.prune_old_gen:
prune_old_gen(args.min_gen_id)
if args.prune_cuda:
prune_cuda()
if args.prune_uv_cache:
prune_uv_cache()
if args.clean_uv_cache:
clean_uv_cache()

logger.info(f'Calculating disk usage in {args.prefix}...')
cmd = ['du', '-ch', '-d', '1', args.prefix]
subprocess.run(cmd, check=True)


if __name__ == '__main__':
args = parser.parse_args()
build(args)
111 changes: 111 additions & 0 deletions src/cuda.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
from collections import namedtuple
from urls import cuda_urls, cudnn_urls

import argparse
import os
import urllib.parse
import re
import shutil
import subprocess

from util import Version, is_done, logger, mark_done


Cuda = namedtuple('Cuda', ['url', 'filename', 'cuda_version', 'driver_version'])
CuDNN = namedtuple('CuDNN', ['url', 'filename', 'cudnn_version', 'cuda_major'])


def build_cudas() -> dict[str, Cuda]:
p = re.compile(r'^cuda_(?P<cuda>[^_]+)_(?P<driver>[^_]+)_linux.run$')
cudas = {}
for u in cuda_urls:
url = urllib.parse.urlparse(u)
f = os.path.basename(url.path)
m = p.search(f)
cuda = Version.parse(m.group('cuda'))
driver = Version.parse(m.group('driver'))
cudas[f'{cuda}_{driver}'] = Cuda(u, f, cuda, driver)
return cudas


def build_cudnns() -> dict[str, CuDNN]:
p = re.compile(
r'^cudnn-linux-x86_64-(?P<cudnn>[^_]+)_cuda(?P<cuda_major>[^-]+)-archive.tar.xz$')
cudnns = {}
for u in cudnn_urls:
url = urllib.parse.urlparse(u)
f = os.path.basename(url.path)
m = p.search(f)
cudnn = Version.parse(m.group('cudnn'))
cuda_major = int(m.group('cuda_major'))
cudnns[f'{cudnn}-cuda{cuda_major}'] = CuDNN(u, f, cudnn, cuda_major)
return cudnns


cudas: dict[str, Cuda] = build_cudas()
cudnns: dict[str, CuDNN] = build_cudnns()


def install_cuda(args: argparse.Namespace, version: str) -> str:
dir = f'{args.prefix}/cuda/cuda-{version}'
if is_done(dir):
return dir

logger.info(f'Downloading CUDA {version}...')
cuda = cudas[version]
file = os.path.join(args.cache, cuda.filename)
cmd = ['curl', '-fsSL', cuda.url, '-o', file]
subprocess.run(cmd, check=True)

logger.info(f'Installing CUDA {version}...')
cmd = [
'/bin/sh', file, f'--installpath={dir}',
'--toolkit', '--override', '--silent',
'--no-opengl-libs', '--no-man-page', '--no-drm'
]
subprocess.run(cmd, check=True)
os.remove(file)

# Remove unused files
logger.info(f'Deleting unused files for CUDA {version}...')
shutil.rmtree(os.path.join(dir, 'compute-sanitizer'), ignore_errors=True)
shutil.rmtree(os.path.join(dir, 'extras'), ignore_errors=True)
shutil.rmtree(os.path.join(dir, 'gds'), ignore_errors=True)
shutil.rmtree(os.path.join(dir, 'libnvvp'), ignore_errors=True)
shutil.rmtree(os.path.join(dir, 'nsightee_plugins'), ignore_errors=True)
shutil.rmtree(os.path.join(dir, 'nvml'), ignore_errors=True)
shutil.rmtree(os.path.join(dir, 'pkgconfig'), ignore_errors=True)
shutil.rmtree(os.path.join(dir, 'tools'), ignore_errors=True)

cmd = ['/bin/sh', '-c', f'rm -rf {dir}/gds-* {dir}/nsight-*']
subprocess.run(cmd, check=True)

cmd = ['find', dir, '-name', 'lib*.a', '-delete']
subprocess.run(cmd, check=True)

mark_done(dir)
logger.info(f'CUDA {version} installed in {dir}')
return dir


def install_cudnn(args: argparse.Namespace, version: str, cuda_major: str) -> str:
key = f'{version}-cuda{cuda_major}'
dir = f'{args.prefix}/cuda/cudnn-{key}'
if is_done(dir):
return dir

logger.info(f'Downloading CuDNN {key}...')
cudnn = cudnns[key]
file = os.path.join(args.cache, cudnn.filename)
cmd = ['curl', '-fsSL', cudnn.url, '-o', file]
subprocess.run(cmd, check=True)

logger.info(f'Installing CuDNN {key}...')
os.makedirs(dir)
cmd = ['tar', '-xf', file, '--strip-components=1', '--exclude=lib*.a', '-C', dir]
subprocess.run(cmd, check=True)
os.remove(file)

mark_done(dir)
logger.info(f'CuDNN {key} installed in {dir}')
return dir
48 changes: 48 additions & 0 deletions src/entrypoint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#!/bin/bash

# Entrypoint script

set -euo pipefail

PREFIX="/usr/local"
BUILDER_PYTHON="3.12"

UV_URL="https://github.com/astral-sh/uv/releases/latest/download/uv-x86_64-unknown-linux-gnu.tar.gz"
PGET_URL="https://github.com/replicate/pget/releases/latest/download/pget_Linux_x86_64"

log() {
echo "$(date --iso-8601=seconds --utc) $*"
}

builder() {
if [ -n "${MONOBASE_CLEAN:-}" ]; then
log "Cleaning up $PREFIX..."
rm -rf "${PREFIX:?}"/{bin,cuda,monobase,uv}
fi
mkdir -p "$PREFIX/bin"

# Always install latest uv and pget first

log "Installing uv..."
curl -fsSL "$UV_URL" | tar -xz --strip-components=1 -C "$PREFIX/bin"
"$PREFIX/bin/uv" --version

log "Installing pget..."
curl -fsSL -o "$PREFIX/bin/pget" "$PGET_URL"
chmod +x "$PREFIX/bin/pget"
"$PREFIX/bin/pget" version

log "Running builder..."
uv run --python "$BUILDER_PYTHON" /srv/r8/monobase/build.py "$@"
}

model() {
# shellcheck disable=SC1091
. /srv/r8/monobase/env.sh
exec "$@"
}

case $HOSTNAME in
monobase-*) builder "$@" ;;
*) model "$@" ;;
esac
Loading

0 comments on commit cfeebb4

Please sign in to comment.