Skip to content

Commit

Permalink
[6.8] [Monitoring] Only do a single date_histogram agg for get_nodes …
Browse files Browse the repository at this point in the history
…calls (#43481) (#44137)

* [Monitoring] Only do a single date_histogram agg for get_nodes calls (#43481)

* I think this is working now

* Add a way to uncovert, and then fix tests

* Remove unnecessary export

* Update snapshots

* normalize this across branches

* This is just interval in 6.8
  • Loading branch information
chrisronline authored Aug 28, 2019
1 parent 59be31e commit c814843
Show file tree
Hide file tree
Showing 8 changed files with 3,069 additions and 2,478 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/

import { cloneDeep } from 'lodash';
import { LISTING_METRICS_NAMES } from './nodes/get_nodes/nodes_listing_metrics';

// We should use some explicit prefix for the converted aggregation name
// so we can easily strip them out later (see `convertMetricNames` and `uncovertMetricNames`)
const CONVERTED_TOKEN = `odh_`;

/**
* This work stemmed from this issue: https://github.com/elastic/kibana/issues/43477
*
* Historically, the `get_nodes` function created an aggregation with multiple sub `date_histogram`
* aggregations for each metric aggregation. From a top down view, the entire aggregations look liked:
* `terms` agg -> [`date_histogram` -> metric agg]x6
* However, this is very inefficient, as each `date_histogram` will create a new set of search buckets
* unnecessarily and users will hit the `search.max_buckets` ceiling sooner.
*
* To solve this, we need to create a single `date_histogram`, then perform each metric agg as a sub aggregations
* of this single `date_histogram`. This is not straightforward though. The logic to build these aggregations
* is shared code between the rest of the monitoring code base and is not easily updated to accommodate the
* changes from above. To circumvent that, this function will adjust the existing aggregation names to work
* for a single date_histogram.
*
* @param string prefix - This is the aggregation name prefix where the rest of the name will be the type of aggregation
* @param object metricObj The metric aggregation itself
*/
export function convertMetricNames(prefix, metricObj) {
return Object.entries(metricObj).reduce((newObj, [key, value]) => {
const newValue = cloneDeep(value);
if (key.includes('_deriv') && newValue.derivative) {
newValue.derivative.buckets_path = `${CONVERTED_TOKEN}${prefix}__${newValue.derivative.buckets_path}`;
}
newObj[`${CONVERTED_TOKEN}${prefix}__${key}`] = newValue;
return newObj;
}, {});
}

/**
* Building upon the comment for `convertMetricNames`, we are dynamically changing the aggregation names to allow
* the single `date_histogram` to work properly. Unfortunately, the code that looks at the response also needs to
* understand the naming changes. And yet again, this code is shared amongst the rest of the monitoring code base.
* To circumvent this, we need to convert the changed aggregation names back to the original, expected names.
* This feels messy, but possible because we keep the original name in the converted aggregation name.
*
* @param object byDateBucketResponse - The response object from the single `date_histogram` bucket
*/
export function uncovertMetricNames(byDateBucketResponse) {
const unconverted = {};
for (const metricName of LISTING_METRICS_NAMES) {
unconverted[metricName] = {
buckets: byDateBucketResponse.buckets.map(bucket => {
const { key_as_string, key, doc_count, ...rest } = bucket; /* eslint-disable-line camelcase */
const metrics = Object.entries(rest).reduce((accum, [key, value]) => {
if (key.startsWith(`${CONVERTED_TOKEN}${metricName}`)) {
const name = key.split('__')[1];
accum[name] = value;
}
return accum;
}, {});

return {
key_as_string, /* eslint-disable-line camelcase */
key,
doc_count, /* eslint-disable-line camelcase */
...metrics,
};
})
};
}
return unconverted;
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,115 +2,79 @@

exports[`get metric aggs should create aggregations for "basic" metrics 1`] = `
Object {
"node_cpu_utilization": Object {
"aggs": Object {
"metric": Object {
"max": Object {
"field": "node_stats.process.cpu.percent",
},
},
"metric_deriv": Object {
"derivative": Object {
"buckets_path": "metric",
"unit": "1s",
},
},
"odh_node_cpu_utilization__metric": Object {
"max": Object {
"field": "node_stats.process.cpu.percent",
},
"date_histogram": Object {
"field": "timestamp",
"interval": "30s",
"min_doc_count": 1,
},
"odh_node_cpu_utilization__metric_deriv": Object {
"derivative": Object {
"buckets_path": "odh_node_cpu_utilization__metric",
"unit": "1s",
},
},
"node_jvm_mem_percent": Object {
"aggs": Object {
"metric": Object {
"max": Object {
"field": "node_stats.jvm.mem.heap_used_percent",
},
},
"metric_deriv": Object {
"derivative": Object {
"buckets_path": "metric",
"unit": "1s",
},
},
"odh_node_jvm_mem_percent__metric": Object {
"max": Object {
"field": "node_stats.jvm.mem.heap_used_percent",
},
"date_histogram": Object {
"field": "timestamp",
"interval": "30s",
"min_doc_count": 1,
},
"odh_node_jvm_mem_percent__metric_deriv": Object {
"derivative": Object {
"buckets_path": "odh_node_jvm_mem_percent__metric",
"unit": "1s",
},
},
}
`;

exports[`get metric aggs should incorporate a metric custom aggs 1`] = `
Object {
"node_index_latency": Object {
"aggs": Object {
"event_time_in_millis": Object {
"max": Object {
"field": "node_stats.indices.indexing.index_time_in_millis",
},
},
"event_time_in_millis_deriv": Object {
"derivative": Object {
"buckets_path": "event_time_in_millis",
"gap_policy": "skip",
"unit": "1s",
},
},
"event_total": Object {
"max": Object {
"field": "node_stats.indices.indexing.index_total",
},
},
"event_total_deriv": Object {
"derivative": Object {
"buckets_path": "event_total",
"gap_policy": "skip",
"unit": "1s",
},
},
"odh_node_index_latency__event_time_in_millis": Object {
"max": Object {
"field": "node_stats.indices.indexing.index_time_in_millis",
},
},
"odh_node_index_latency__event_time_in_millis_deriv": Object {
"derivative": Object {
"buckets_path": "odh_node_index_latency__event_time_in_millis",
"gap_policy": "skip",
"unit": "1s",
},
},
"odh_node_index_latency__event_total": Object {
"max": Object {
"field": "node_stats.indices.indexing.index_total",
},
"date_histogram": Object {
"field": "timestamp",
"interval": "30s",
"min_doc_count": 1,
},
"odh_node_index_latency__event_total_deriv": Object {
"derivative": Object {
"buckets_path": "odh_node_index_latency__event_total",
"gap_policy": "skip",
"unit": "1s",
},
},
"odh_node_query_latency__event_time_in_millis": Object {
"max": Object {
"field": "node_stats.indices.search.query_time_in_millis",
},
},
"node_query_latency": Object {
"aggs": Object {
"event_time_in_millis": Object {
"max": Object {
"field": "node_stats.indices.search.query_time_in_millis",
},
},
"event_time_in_millis_deriv": Object {
"derivative": Object {
"buckets_path": "event_time_in_millis",
"gap_policy": "skip",
"unit": "1s",
},
},
"event_total": Object {
"max": Object {
"field": "node_stats.indices.search.query_total",
},
},
"event_total_deriv": Object {
"derivative": Object {
"buckets_path": "event_total",
"gap_policy": "skip",
"unit": "1s",
},
},
"odh_node_query_latency__event_time_in_millis_deriv": Object {
"derivative": Object {
"buckets_path": "odh_node_query_latency__event_time_in_millis",
"gap_policy": "skip",
"unit": "1s",
},
"date_histogram": Object {
"field": "timestamp",
"interval": "30s",
"min_doc_count": 1,
},
"odh_node_query_latency__event_total": Object {
"max": Object {
"field": "node_stats.indices.search.query_total",
},
},
"odh_node_query_latency__event_total_deriv": Object {
"derivative": Object {
"buckets_path": "odh_node_query_latency__event_total",
"gap_policy": "skip",
"unit": "1s",
},
},
}
Expand Down
Loading

0 comments on commit c814843

Please sign in to comment.