Skip to content

Commit

Permalink
Support for nvidia MIG in Mesos containerizer
Browse files Browse the repository at this point in the history
  • Loading branch information
jblache committed Jan 30, 2023
1 parent a69bd75 commit 7cd3c46
Show file tree
Hide file tree
Showing 5 changed files with 525 additions and 71 deletions.
147 changes: 131 additions & 16 deletions src/slave/containerizer/mesos/isolators/gpu/allocator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,11 +85,21 @@ static Try<set<Gpu>> enumerateGpus(
if (flags.nvidia_gpu_devices.isSome()) {
indices = flags.nvidia_gpu_devices.get();
} else {
for (size_t i = 0; i < resources.gpus().getOrElse(0); ++i) {
Try<unsigned int> available = nvml::deviceGetCount();
if (available.isError()) {
return Error("Failed to nvml::deviceGetCount: " + available.error());
}

for (unsigned int i = 0; i < available.get(); ++i) {
indices.push_back(i);
}
}

Try<unsigned int> caps_major = nvml::systemGetCapsMajor();
if (caps_major.isError()) {
return Error("Failed to get nvidia caps major: " + caps_major.error());
}

set<Gpu> gpus;

foreach (unsigned int index, indices) {
Expand All @@ -103,17 +113,91 @@ static Try<set<Gpu>> enumerateGpus(
return Error("Failed to nvml::deviceGetMinorNumber: " + minor.error());
}

Gpu gpu;
gpu.major = NVIDIA_MAJOR_DEVICE;
gpu.minor = minor.get();
Try<bool> ismig = nvml::deviceGetMigMode(handle.get());
if (ismig.isError()) {
return Error("Failed to nvml::deviceGetMigMode: " + ismig.error());
}

if (!ismig.get()) {
Gpu gpu;
gpu.major = NVIDIA_MAJOR_DEVICE;
gpu.minor = minor.get();

gpus.insert(gpu);

gpus.insert(gpu);
continue;
}

Try<unsigned int> migcount = nvml::deviceGetMigDeviceCount(handle.get());
if (migcount.isError()) {
return Error("Failed to nvml::deviceGetMigDeviceCount: " + migcount.error());
}

for (unsigned int migindex = 0; migindex < migcount.get(); migindex++) {
Try<nvmlDevice_t> mighandle = nvml::deviceGetMigDeviceHandleByIndex(handle.get(), migindex);
if (mighandle.isError()) {
return Error("Failed to nvml::deviceGetMigDeviceHandleByIndex: " + mighandle.error());
}

Try<unsigned int> gi_minor = nvml::deviceGetGpuInstanceMinor(mighandle.get());
if (gi_minor.isError()) {
return Error("Failed to nvml::deviceGetGpuInstanceMinor: " + gi_minor.error());
}

Try<unsigned int> ci_minor = nvml::deviceGetComputeInstanceMinor(mighandle.get());
if (ci_minor.isError()) {
return Error("Failed to nvml::deviceGetComputeInstanceMinor: " + ci_minor.error());
}

Gpu gpu;
gpu.major = NVIDIA_MAJOR_DEVICE;
gpu.minor = minor.get();
gpu.ismig = true;
gpu.caps_major = caps_major.get();
gpu.gi_minor = gi_minor.get();
gpu.ci_minor = ci_minor.get();

gpus.insert(gpu);
}
}

return gpus;
}


static Try<unsigned int> countGpuInstancesForDevices(
const vector<unsigned int>& devices)
{
unsigned int count = 0;

foreach (unsigned int device, devices) {
Try<nvmlDevice_t> handle = nvml::deviceGetHandleByIndex(device);
if (handle.isError()) {
return Error("Failed to nvml::deviceGetHandleByIndex: " + handle.error());
}

Try<bool> ismig = nvml::deviceGetMigMode(handle.get());
if (ismig.isError()) {
return Error("Failed to nvml::deviceGetMigMode: " + ismig.error());
}

if (!ismig.get()) {
count++;
continue;
}

Try<unsigned int> migcount = nvml::deviceGetMigDeviceCount(handle.get());
if (migcount.isError()) {
return Error("Failed to nvml::deviceGetMigDeviceCount: " + migcount.error());
}

count += migcount.get();
}

return count;
}


// To determine the proper number of GPU resources to return, we
// need to check both --resources and --nvidia_gpu_devices.
// There are two cases to consider:
Expand Down Expand Up @@ -174,11 +258,6 @@ static Try<Resources> enumerateGpuResources(const Flags& flags)
return Error("Failed to nvml::initialize: " + initialized.error());
}

Try<unsigned int> available = nvml::deviceGetCount();
if (available.isError()) {
return Error("Failed to nvml::deviceGetCount: " + available.error());
}

// The `Resources` wrapper does not allow us to distinguish between
// a user specifying "gpus:0" in the --resources flag and not
// specifying "gpus" at all. To help with this we short circuit
Expand Down Expand Up @@ -225,9 +304,11 @@ static Try<Resources> enumerateGpuResources(const Flags& flags)
return Error("'--nvidia_gpu_devices' contains duplicates");
}

if (flags.nvidia_gpu_devices->size() != resources.gpus().get()) {
return Error("'--resources' and '--nvidia_gpu_devices' specify"
" different numbers of GPU devices");
Try<unsigned int> available = countGpuInstancesForDevices(unique);
if (available.isError()) {
return Error("Failed to count all GPU instances for devices"
" specified by --nvidia_gpu_devices: "
+ available.error());
}

if (resources.gpus().get() > available.get()) {
Expand All @@ -238,6 +319,22 @@ static Try<Resources> enumerateGpuResources(const Flags& flags)
return resources;
}

Try<unsigned int> available = nvml::deviceGetCount();
if (available.isError()) {
return Error("Failed to nvml::deviceGetCount: " + available.error());
}

vector<unsigned int> indices;
for (unsigned int i = 0; i < available.get(); ++i) {
indices.push_back(i);
}

available = countGpuInstancesForDevices(indices);
if (available.isError()) {
return Error("Failed to count all GPU instances: "
+ available.error());
}

return Resources::parse(
"gpus",
stringify(available.get()),
Expand Down Expand Up @@ -378,7 +475,15 @@ Future<Nothing> NvidiaGpuAllocator::deallocate(const set<Gpu>& gpus)
bool operator<(const Gpu& left, const Gpu& right)
{
if (left.major == right.major) {
return left.minor < right.minor;
// Either or both aren't MIG, comparing major/minor is enough
if (!left.ismig || !right.ismig || (left.minor != right.minor)) {
return left.minor < right.minor;
}

if (left.gi_minor == right.gi_minor) {
return left.ci_minor < right.ci_minor;
}
return left.gi_minor < right.gi_minor;
}
return left.major < right.major;
}
Expand All @@ -404,7 +509,14 @@ bool operator>=(const Gpu& left, const Gpu& right)

bool operator==(const Gpu& left, const Gpu& right)
{
return left.major == right.major && left.minor == right.minor;
if (left.ismig != right.ismig)
return false;

if (!left.ismig)
return left.major == right.major && left.minor == right.minor;

return left.major == right.major && left.minor == right.minor
&& left.gi_minor == right.gi_minor && left.ci_minor == right.ci_minor;
}


Expand All @@ -416,7 +528,10 @@ bool operator!=(const Gpu& left, const Gpu& right)

ostream& operator<<(ostream& stream, const Gpu& gpu)
{
return stream << gpu.major << '.' << gpu.minor;
if (gpu.ismig)
return stream << gpu.major << '.' << gpu.minor << ':' << gpu.gi_minor << '.' << gpu.ci_minor;
else
return stream << gpu.major << '.' << gpu.minor;
}

} // namespace slave {
Expand Down
7 changes: 7 additions & 0 deletions src/slave/containerizer/mesos/isolators/gpu/allocator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,15 @@ namespace slave {
// abstraction in terms of it.
struct Gpu
{
// GPU device
unsigned int major;
unsigned int minor;

// MIG support
bool ismig;
unsigned int caps_major;
unsigned int gi_minor;
unsigned int ci_minor;
};


Expand Down
Loading

0 comments on commit 7cd3c46

Please sign in to comment.