Skip to content

Commit

Permalink
core: slabs automover improvements
Browse files Browse the repository at this point in the history
Overhaul of how page moving works for memory balancing.

Removes `slabs_evictions_nomem` case entirely. Instead of sometimes
evicting random memory when re-assigning memory, it will pull from the
LRU tail. If memory is not completely full it will not evict items when
moving pages.

The extstore related memory balancing algorithm has been simplified and
improved. The new stat `extstore_memory_pressure` has been added. It is
a percentage between 0 and 100: At 100 extstore will be forced to evict
items instead of simply moving items from memory to disk.

Many performance improvements and code cleanups were also done.
  • Loading branch information
dormando committed Dec 5, 2024
1 parent 5b84e42 commit 4c56c8d
Show file tree
Hide file tree
Showing 20 changed files with 1,401 additions and 796 deletions.
1 change: 1 addition & 0 deletions Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ memcached_SOURCES = memcached.c memcached.h \
crawler.c crawler.h \
itoa_ljust.c itoa_ljust.h \
slab_automove.c slab_automove.h \
slabs_mover.c slabs_mover.h \
authfile.c authfile.h \
restart.c restart.h \
proto_text.c proto_text.h \
Expand Down
7 changes: 4 additions & 3 deletions doc/protocol.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1421,9 +1421,6 @@ integers separated by a colon (treat this as a floating point number).
| slab_global_page_pool | 32u | Slab pages returned to global pool for |
| | | reassignment to other slab classes. |
| slab_reassign_rescues | 64u | Items rescued from eviction in page move |
| slab_reassign_evictions_nomem |
| | 64u | Valid items evicted during a page move |
| | | (due to no free memory in slab) |
| slab_reassign_chunk_rescues |
| | 64u | Individual sections of an item rescued |
| | | during a page move. |
Expand All @@ -1434,6 +1431,10 @@ integers separated by a colon (treat this as a floating point number).
| slab_reassign_busy_items |
| | 64u | Items busy during page move, requiring a |
| | | retry before page can be moved. |
| slab_reassign_busy_nomem |
| | 64u | Times waiting for free slab memory before |
| | | being able to rescue valid items during |
| | a page move. |
| slab_reassign_busy_deletes |
| | 64u | Items busy during page move, requiring |
| | | deletion before page can be moved. |
Expand Down
2 changes: 0 additions & 2 deletions globals.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,5 +22,3 @@ volatile rel_time_t current_time;
struct stats stats;
struct stats_state stats_state;
struct settings settings;
struct slab_rebalance slab_rebal;
volatile int slab_rebalance_signal;
81 changes: 3 additions & 78 deletions items.c
Original file line number Diff line number Diff line change
@@ -1,20 +1,14 @@
/* -*- Mode: C; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
#include "memcached.h"
#include "bipbuffer.h"
#include "slab_automove.h"
#include "storage.h"
#ifdef EXTSTORE
#include "slab_automove_extstore.h"
#endif
#include <sys/stat.h>
#include <sys/socket.h>
#include <sys/resource.h>
#include <fcntl.h>
#include <netinet/in.h>
#include <errno.h>
#include <stdlib.h>
#include <stdio.h>
#include <signal.h>
#include <string.h>
#include <time.h>
#include <assert.h>
Expand Down Expand Up @@ -178,7 +172,7 @@ item *do_item_alloc_pull(const size_t ntotal, const unsigned int id) {
if (!settings.lru_segmented) {
lru_pull_tail(id, COLD_LRU, 0, 0, 0, NULL);
}
it = slabs_alloc(ntotal, id, 0);
it = slabs_alloc(id, 0);

if (it == NULL) {
// We send '0' in for "total_bytes" as this routine is always
Expand Down Expand Up @@ -353,7 +347,6 @@ item *do_item_alloc(const char *key, const size_t nkey, const client_flags_t fla
}

void item_free(item *it) {
size_t ntotal = ITEM_ntotal(it);
unsigned int clsid;
assert((it->it_flags & ITEM_LINKED) == 0);
assert(it != heads[it->slabs_clsid]);
Expand All @@ -363,7 +356,7 @@ void item_free(item *it) {
/* so slab size changer can tell later if item is already free or not */
clsid = ITEM_clsid(it);
DEBUG_REFCNT(it, 'F');
slabs_free(it, ntotal, clsid);
slabs_free(it, clsid);
}

/**
Expand Down Expand Up @@ -694,7 +687,7 @@ char *item_cachedump(const unsigned int slabs_clsid, const unsigned int limit, u
return buffer;
}

/* With refactoring of the various stats code the automover won't need a
/* With refactoring of the various stats code the automover shouldn't need a
* custom function here.
*/
void fill_item_stats_automove(item_stats_automove *am) {
Expand Down Expand Up @@ -981,26 +974,6 @@ item *do_item_get(const char *key, const size_t nkey, const uint32_t hv, LIBEVEN
item *it = assoc_find(key, nkey, hv);
if (it != NULL) {
refcount_incr(it);
/* Optimization for slab reassignment. prevents popular items from
* jamming in busy wait. Can only do this here to satisfy lock order
* of item_lock, slabs_lock. */
/* This was made unsafe by removal of the cache_lock:
* slab_rebalance_signal and slab_rebal.* are modified in a separate
* thread under slabs_lock. If slab_rebalance_signal = 1, slab_start =
* NULL (0), but slab_end is still equal to some value, this would end
* up unlinking every item fetched.
* This is either an acceptable loss, or if slab_rebalance_signal is
* true, slab_start/slab_end should be put behind the slabs_lock.
* Which would cause a huge potential slowdown.
* Could also use a specific lock for slab_rebal.* and
* slab_rebalance_signal (shorter lock?)
*/
/*if (slab_rebalance_signal &&
((void *)it >= slab_rebal.slab_start && (void *)it < slab_rebal.slab_end)) {
do_item_unlink(it, hv);
do_item_remove(it);
it = NULL;
}*/
}
int was_found = 0;

Expand Down Expand Up @@ -1237,9 +1210,6 @@ int lru_pull_tail(const int orig_id, const int cur_lru,
STORAGE_delete(ext_storage, search);
do_item_unlink_nolock(search, hv);
removed++;
if (settings.slab_automove == 2) {
slabs_reassign(-1, orig_id);
}
} else if (flags & LRU_PULL_RETURN_ITEM) {
/* Keep a reference to this item and return it. */
ret_it->it = it;
Expand Down Expand Up @@ -1567,35 +1537,16 @@ static void lru_maintainer_crawler_check(struct crawler_expired_data *cdata, log
}
}

slab_automove_reg_t slab_automove_default = {
.init = slab_automove_init,
.free = slab_automove_free,
.run = slab_automove_run
};
#ifdef EXTSTORE
slab_automove_reg_t slab_automove_extstore = {
.init = slab_automove_extstore_init,
.free = slab_automove_extstore_free,
.run = slab_automove_extstore_run
};
#endif
static pthread_t lru_maintainer_tid;

#define MAX_LRU_MAINTAINER_SLEEP (1000000-1)
#define MIN_LRU_MAINTAINER_SLEEP 1000

static void *lru_maintainer_thread(void *arg) {
slab_automove_reg_t *sam = &slab_automove_default;
#ifdef EXTSTORE
void *storage = arg;
if (storage != NULL)
sam = &slab_automove_extstore;
#endif
int i;
useconds_t to_sleep = MIN_LRU_MAINTAINER_SLEEP;
useconds_t last_sleep = MIN_LRU_MAINTAINER_SLEEP;
rel_time_t last_crawler_check = 0;
rel_time_t last_automove_check = 0;
useconds_t next_juggles[MAX_NUMBER_OF_SLAB_CLASSES] = {0};
useconds_t backoff_juggles[MAX_NUMBER_OF_SLAB_CLASSES] = {0};
struct crawler_expired_data *cdata =
Expand All @@ -1612,9 +1563,6 @@ static void *lru_maintainer_thread(void *arg) {
abort();
}

double last_ratio = settings.slab_automove_ratio;
void *am = sam->init(&settings);

pthread_mutex_lock(&lru_maintainer_lock);
if (settings.verbose > 2)
fprintf(stderr, "Starting LRU maintainer background thread\n");
Expand Down Expand Up @@ -1672,31 +1620,8 @@ static void *lru_maintainer_thread(void *arg) {
lru_maintainer_crawler_check(cdata, l);
last_crawler_check = current_time;
}

if (settings.slab_automove == 1 && last_automove_check != current_time) {
if (last_ratio != settings.slab_automove_ratio) {
sam->free(am);
am = sam->init(&settings);
last_ratio = settings.slab_automove_ratio;
}
int src, dst;
sam->run(am, &src, &dst);
if (src != -1 && dst != -1) {
slabs_reassign(src, dst);
LOGGER_LOG(l, LOG_SYSEVENTS, LOGGER_SLAB_MOVE, NULL,
src, dst);
}
// dst == 0 means reclaim to global pool, be more aggressive
if (dst != 0) {
last_automove_check = current_time;
} else if (dst == 0) {
// also ensure we minimize the thread sleep
to_sleep = 1000;
}
}
}
pthread_mutex_unlock(&lru_maintainer_lock);
sam->free(am);
// LRU crawler *must* be stopped.
free(cdata);
if (settings.verbose > 2)
Expand Down
2 changes: 1 addition & 1 deletion logger.c
Original file line number Diff line number Diff line change
Expand Up @@ -494,7 +494,7 @@ static const entry_details default_entries[] = {
"type=lru_crawler crawler=%d lru=%s low_mark=%llu next_reclaims=%llu since_run=%u next_run=%d elapsed=%u examined=%llu reclaimed=%llu"
},
[LOGGER_SLAB_MOVE] = {512, LOG_SYSEVENTS, _logger_log_text, _logger_parse_text,
"type=slab_move src=%d dst=%d"
"type=slab_move src=%d dst=%d state=%s"
},
[LOGGER_CONNECTION_NEW] = {512, LOG_CONNEVENTS, _logger_log_conn_event, _logger_parse_cne, NULL},
[LOGGER_CONNECTION_CLOSE] = {512, LOG_CONNEVENTS, _logger_log_conn_event, _logger_parse_cce, NULL},
Expand Down
17 changes: 9 additions & 8 deletions memcached.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include "storage.h"
#include "authfile.h"
#include "restart.h"
#include "slabs_mover.h"
#include <sys/stat.h>
#include <sys/socket.h>
#include <sys/un.h>
Expand Down Expand Up @@ -107,8 +108,6 @@ struct settings settings;
time_t process_started; /* when the process was started */
conn **conns;

struct slab_rebalance slab_rebal;
volatile int slab_rebalance_signal;
#ifdef EXTSTORE
/* hoping this is temporary; I'd prefer to cut globals, but will complete this
* battle another day.
Expand Down Expand Up @@ -258,8 +257,9 @@ static void settings_init(void) {
settings.hashpower_init = 0;
settings.slab_reassign = true;
settings.slab_automove = 1;
settings.slab_automove_version = 0;
settings.slab_automove_ratio = 0.8;
settings.slab_automove_window = 30;
settings.slab_automove_window = 10;
settings.shutdown_command = false;
settings.tail_repair_time = TAIL_REPAIR_TIME_DEFAULT;
settings.flush_enabled = true;
Expand Down Expand Up @@ -1872,10 +1872,10 @@ void server_stats(ADD_STAT add_stats, void *c) {
if (settings.slab_reassign) {
APPEND_STAT("slab_reassign_rescues", "%llu", stats.slab_reassign_rescues);
APPEND_STAT("slab_reassign_chunk_rescues", "%llu", stats.slab_reassign_chunk_rescues);
APPEND_STAT("slab_reassign_evictions_nomem", "%llu", stats.slab_reassign_evictions_nomem);
APPEND_STAT("slab_reassign_inline_reclaim", "%llu", stats.slab_reassign_inline_reclaim);
APPEND_STAT("slab_reassign_busy_items", "%llu", stats.slab_reassign_busy_items);
APPEND_STAT("slab_reassign_busy_deletes", "%llu", stats.slab_reassign_busy_deletes);
APPEND_STAT("slab_reassign_busy_nomem", "%llu", stats.slab_reassign_busy_nomem);
APPEND_STAT("slab_reassign_running", "%u", stats_state.slab_reassign_running);
APPEND_STAT("slabs_moved", "%llu", stats.slabs_moved);
}
Expand Down Expand Up @@ -6033,7 +6033,6 @@ int main (int argc, char **argv) {
}
#endif
#ifdef EXTSTORE
slabs_set_storage(storage);
memcached_thread_init(settings.num_threads, storage);
init_lru_crawler(storage);
#else
Expand Down Expand Up @@ -6075,9 +6074,11 @@ int main (int argc, char **argv) {
return 1;
}

if (settings.slab_reassign &&
start_slab_maintenance_thread() == -1) {
exit(EXIT_FAILURE);
if (settings.slab_reassign) {
settings.slab_rebal = start_slab_maintenance_thread(storage);
if (!settings.slab_rebal) {
exit(EXIT_FAILURE);
}
}

if (settings.idle_timeout && start_conn_timeout_thread() == -1) {
Expand Down
29 changes: 6 additions & 23 deletions memcached.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
#endif

#include "itoa_ljust.h"
#include "slabs_mover.h"
#include "protocol_binary.h"
#include "cache.h"
#include "logger.h"
Expand Down Expand Up @@ -394,11 +395,11 @@ struct stats {
uint64_t listen_disabled_num;
uint64_t slabs_moved; /* times slabs were moved around */
uint64_t slab_reassign_rescues; /* items rescued during slab move */
uint64_t slab_reassign_evictions_nomem; /* valid items lost during slab move */
uint64_t slab_reassign_inline_reclaim; /* valid items lost during slab move */
uint64_t slab_reassign_chunk_rescues; /* chunked-item chunks recovered */
uint64_t slab_reassign_busy_items; /* valid temporarily unmovable */
uint64_t slab_reassign_busy_deletes; /* refcounted items killed */
uint64_t slab_reassign_busy_nomem; /* valid items lost during slab move */
uint64_t lru_crawler_starts; /* Number of item crawlers kicked off */
uint64_t lru_maintainer_juggles; /* number of LRU bg pokes */
uint64_t time_in_listen_disabled_us; /* elapsed time in microseconds while server unable to process new connections */
Expand Down Expand Up @@ -432,6 +433,7 @@ struct stats_state {
uint64_t curr_bytes;
uint64_t curr_conns;
uint64_t hash_bytes; /* size used for hash tables */
float extstore_memory_pressure; /* when extstore might memory evict */
unsigned int conn_structs;
unsigned int reserved_fds;
unsigned int hash_power_level; /* Better hope it's not over 9000 */
Expand Down Expand Up @@ -483,7 +485,9 @@ struct settings {
bool slab_reassign; /* Whether or not slab reassignment is allowed */
bool ssl_enabled; /* indicates whether SSL is enabled */
int slab_automove; /* Whether or not to automatically move slabs */
unsigned int slab_automove_version; /* bump if AM config args change */
double slab_automove_ratio; /* youngest must be within pct of oldest */
double slab_automove_freeratio; /* % of memory to hold free as buffer */
unsigned int slab_automove_window; /* window mover for algorithm */
int hashpower_init; /* Starting hash power level */
bool shutdown_command; /* allow shutdown command */
Expand All @@ -507,6 +511,7 @@ struct settings {
bool drop_privileges; /* Whether or not to drop unnecessary process privileges */
bool watch_enabled; /* allows watch commands to be dropped */
bool relaxed_privileges; /* Relax process restrictions when running testapp */
struct slab_rebal_thread *slab_rebal; /* struct for page mover thread */
#ifdef EXTSTORE
unsigned int ext_io_threadcount; /* number of IO threads to run. */
unsigned int ext_page_size; /* size in megabytes of storage pages. */
Expand All @@ -519,7 +524,6 @@ struct settings {
unsigned int ext_drop_under; /* when fewer than this many pages, drop COLD items */
unsigned int ext_max_sleep; /* maximum sleep time for extstore bg threads, in us */
double ext_max_frag; /* ideal maximum page fragmentation */
double slab_automove_freeratio; /* % of memory to hold free as buffer */
bool ext_drop_unread; /* skip unread items during compaction */
/* start flushing to extstore after memory below this */
unsigned int ext_global_pool_min;
Expand Down Expand Up @@ -918,27 +922,6 @@ extern volatile bool is_paused;
extern volatile int64_t delta;
#endif

/* TODO: Move to slabs.h? */
extern volatile int slab_rebalance_signal;

struct slab_rebalance {
void *slab_start;
void *slab_end;
void *slab_pos;
int s_clsid;
int d_clsid;
uint32_t busy_items;
uint32_t rescues;
uint32_t evictions_nomem;
uint32_t inline_reclaim;
uint32_t chunk_rescues;
uint32_t busy_deletes;
uint32_t busy_loops;
uint8_t done;
uint8_t *completed;
};

extern struct slab_rebalance slab_rebal;
#ifdef EXTSTORE
extern void *ext_storage;
#endif
Expand Down
6 changes: 2 additions & 4 deletions memcached_dtrace.d
Original file line number Diff line number Diff line change
Expand Up @@ -61,19 +61,17 @@ provider memcached {

/**
* Allocate memory from the slab allocator.
* @param size the requested size
* @param slabclass the allocation will be fulfilled in this class
* @param slabsize the size of each item in this class
* @param ptr pointer to allocated memory
*/
probe slabs__allocate(int size, int slabclass, int slabsize, void* ptr);
probe slabs__allocate(int slabclass, int slabsize, void* ptr);

/**
* Failed to allocate memory (out of memory).
* @param size the requested size
* @param slabclass the class that failed to fulfill the request
*/
probe slabs__allocate__failed(int size, int slabclass);
probe slabs__allocate__failed(int slabclass);

/**
* Fired when a slab class attempts to allocate more space.
Expand Down
Loading

0 comments on commit 4c56c8d

Please sign in to comment.