forked from entmike/disco-diffusion-1
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmemory.py
63 lines (53 loc) · 2.45 KB
/
memory.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
from torch import cuda
def get_less_used_gpu(gpus=None, debug=False):
"""Inspect cached/reserved and allocated memory on specified gpus and return the id of the less used device"""
if gpus is None:
warn = "Falling back to default: all gpus"
gpus = range(cuda.device_count())
elif isinstance(gpus, str):
gpus = [int(el) for el in gpus.split(",")]
# check gpus arg VS available gpus
sys_gpus = list(range(cuda.device_count()))
if len(gpus) > len(sys_gpus):
gpus = sys_gpus
warn = f"WARNING: Specified {len(gpus)} gpus, but only {cuda.device_count()} available. Falling back to default: all gpus.\nIDs:\t{list(gpus)}"
elif set(gpus).difference(sys_gpus):
# take correctly specified and add as much bad specifications as unused system gpus
available_gpus = set(gpus).intersection(sys_gpus)
unavailable_gpus = set(gpus).difference(sys_gpus)
unused_gpus = set(sys_gpus).difference(gpus)
gpus = list(available_gpus) + list(unused_gpus)[: len(unavailable_gpus)]
warn = f"GPU ids {unavailable_gpus} not available. Falling back to {len(gpus)} device(s).\nIDs:\t{list(gpus)}"
cur_allocated_mem = {}
cur_cached_mem = {}
max_allocated_mem = {}
max_cached_mem = {}
for i in gpus:
cur_allocated_mem[i] = cuda.memory_allocated(i)
cur_cached_mem[i] = cuda.memory_reserved(i)
max_allocated_mem[i] = cuda.max_memory_allocated(i)
max_cached_mem[i] = cuda.max_memory_reserved(i)
min_allocated = min(cur_allocated_mem, key=cur_allocated_mem.get)
# if debug:
print(warn)
print("Current allocated memory:", {f"cuda:{k}": v for k, v in cur_allocated_mem.items()})
print("Current reserved memory:", {f"cuda:{k}": v for k, v in cur_cached_mem.items()})
print("Maximum allocated memory:", {f"cuda:{k}": v for k, v in max_allocated_mem.items()})
print("Maximum reserved memory:", {f"cuda:{k}": v for k, v in max_cached_mem.items()})
print("Suggested GPU:", min_allocated)
# return min_allocated
def free_memory(to_delete: list, debug=False):
import gc
import inspect
calling_namespace = inspect.currentframe().f_back
if debug:
print("Before:")
get_less_used_gpu(debug=True)
for _var in to_delete:
calling_namespace.f_locals.pop(_var, None)
gc.collect()
cuda.empty_cache()
if debug:
print("After:")
get_less_used_gpu(debug=True)
get_less_used_gpu()