Skip to content

Commit

Permalink
Allow CAggs with variable sized bucked with origin/offset (#7005)
Browse files Browse the repository at this point in the history
On 2.15.x we added complete support of CAggs using time bucket with
origin and/or offset, but we restrict the creating when the bucked size
is variable due to some uncertaing regarding monthly buckets.

When bucketing by month we always align with the beginning of the month
even defining an origin with day component. So to be consistent with the
current implementation we'll not change this behavior and allow it to be
used in Continuous Aggregates.

Disable-check: force-changelog-file

(cherry picked from commit af8ca2d)
  • Loading branch information
fabriziomello committed Jun 6, 2024
1 parent d6bd051 commit 9812a45
Show file tree
Hide file tree
Showing 13 changed files with 521 additions and 440 deletions.
34 changes: 0 additions & 34 deletions tsl/src/continuous_aggs/common.c
Original file line number Diff line number Diff line change
Expand Up @@ -967,47 +967,13 @@ cagg_validate_query(const Query *query, const bool finalized, const char *cagg_s
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot create continuous aggregate on hypertable with row security")));

/* Test for broken time_bucket configurations (variable with with offset and origin). We need to
* check only time based buckets since integer based buckets are always fixed. */
bool time_offset_or_origin_set =
(bucket_info.bf->bucket_time_offset != NULL) ||
(TIMESTAMP_NOT_FINITE(bucket_info.bf->bucket_time_origin) == false);

/* At this point, we should have a valid bucket function. Otherwise, we have errored out before.
*/
Ensure(OidIsValid(bucket_info.bf->bucket_function), "unable to find valid bucket function");

/* Ignore time_bucket_ng in this check, since offset and origin were allowed in the past */
FuncInfo *func_info = ts_func_cache_get_bucketing_func(bucket_info.bf->bucket_function);
Ensure(func_info != NULL, "bucket function is not found in function cache");
bool is_time_bucket_ng = func_info->origin == ORIGIN_TIMESCALE_EXPERIMENTAL;

/*
* Some time_bucket variants using variable-sized buckets and custom origin/offset values are
* not behaving correctly. To prevent misaligned buckets, these variants are blocked at the
* moment. This restriction can be removed as soon as time_bucket behaves correctly.
*
* --- Align with default origin ('midnight on January 1, 2000')
* test2=# SELECT time_bucket('1 month', '2000-01-01 01:05:00 UTC'::timestamptz,
* timezone=>'UTC'); time_bucket
* ------------------------
* 2000-01-01 00:00:00+00
*
* --- Using a custom origin
* test2=# SELECT time_bucket('1 month', '2000-01-01 01:05:00 UTC'::timestamptz,
* origin=>'2000-01-01 01:05:00 UTC'::timestamptz, timezone=>'UTC'); time_bucket
* ------------------------
* 2000-01-01 00:00:00+00 <--- Should be 2000-01-01 01:05:00+00
* (1 row)
*/
if (time_bucket_info_has_fixed_width(&bucket_info) == false && time_offset_or_origin_set &&
!is_time_bucket_ng)
{
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot create continuous aggregate with variable-width bucket using "
"offset or origin.")));
}

/* hierarchical cagg validations */
if (is_hierarchical)
Expand Down
41 changes: 17 additions & 24 deletions tsl/test/expected/cagg_ddl-13.out
Original file line number Diff line number Diff line change
Expand Up @@ -1811,36 +1811,29 @@ SELECT * FROM cashflows;
-- 3. test named ts
-- 4. test named bucket width
-- named origin
-- Currently not supported due to a bug in time_bucket (see comment in cagg_validate_query)
\set ON_ERROR_STOP 0
CREATE MATERIALIZED VIEW cagg_named_origin WITH
(timescaledb.continuous, timescaledb.materialized_only=false) AS
SELECT time_bucket('1h', time, 'UTC', origin => '2001-01-03 01:23:45') AS bucket,
avg(amount) as avg_amount
FROM transactions GROUP BY 1 WITH NO DATA;
ERROR: cannot create continuous aggregate with variable-width bucket using offset or origin.
-- named timezone
CREATE MATERIALIZED VIEW cagg_named_tz_origin WITH
(timescaledb.continuous, timescaledb.materialized_only=false) AS
SELECT time_bucket('1h', time, timezone => 'UTC', origin => '2001-01-03 01:23:45') AS bucket,
avg(amount) as avg_amount
FROM transactions GROUP BY 1 WITH NO DATA;
ERROR: cannot create continuous aggregate with variable-width bucket using offset or origin.
-- named ts
CREATE MATERIALIZED VIEW cagg_named_ts_tz_origin WITH
(timescaledb.continuous, timescaledb.materialized_only=false) AS
SELECT time_bucket('1h', ts => time, timezone => 'UTC', origin => '2001-01-03 01:23:45') AS bucket,
avg(amount) as avg_amount
FROM transactions GROUP BY 1 WITH NO DATA;
ERROR: cannot create continuous aggregate with variable-width bucket using offset or origin.
-- named bucket width
CREATE MATERIALIZED VIEW cagg_named_all WITH
(timescaledb.continuous, timescaledb.materialized_only=false) AS
SELECT time_bucket(bucket_width => '1h', ts => time, timezone => 'UTC', origin => '2001-01-03 01:23:45') AS bucket,
avg(amount) as avg_amount
FROM transactions GROUP BY 1 WITH NO DATA;
ERROR: cannot create continuous aggregate with variable-width bucket using offset or origin.
\set ON_ERROR_STOP 1
-- Refreshing from the beginning (NULL) of a CAGG with variable time bucket and
-- using an INTERVAL for the end timestamp (issue #5534)
CREATE MATERIALIZED VIEW transactions_montly
Expand Down Expand Up @@ -1923,7 +1916,7 @@ CREATE TABLE conditions (
SELECT create_hypertable('conditions', 'time');
create_hypertable
--------------------------
(48,public,conditions,t)
(52,public,conditions,t)
(1 row)

INSERT INTO conditions VALUES ( '2018-01-01 09:20:00-08', 'SFO', 55);
Expand Down Expand Up @@ -1953,10 +1946,10 @@ WITH NO DATA;
bucket | timestamp with time zone | | | | plain |
avg | double precision | | | | plain |
View definition:
SELECT _materialized_hypertable_49.location,
_materialized_hypertable_49.bucket,
_materialized_hypertable_49.avg
FROM _timescaledb_internal._materialized_hypertable_49;
SELECT _materialized_hypertable_53.location,
_materialized_hypertable_53.bucket,
_materialized_hypertable_53.avg
FROM _timescaledb_internal._materialized_hypertable_53;

-- Should return NO ROWS
SELECT * FROM conditions_daily ORDER BY bucket, avg;
Expand All @@ -1973,17 +1966,17 @@ ALTER MATERIALIZED VIEW conditions_daily SET (timescaledb.materialized_only=fals
bucket | timestamp with time zone | | | | plain |
avg | double precision | | | | plain |
View definition:
SELECT _materialized_hypertable_49.location,
_materialized_hypertable_49.bucket,
_materialized_hypertable_49.avg
FROM _timescaledb_internal._materialized_hypertable_49
WHERE _materialized_hypertable_49.bucket < COALESCE(_timescaledb_functions.to_timestamp(_timescaledb_functions.cagg_watermark(49)), '-infinity'::timestamp with time zone)
SELECT _materialized_hypertable_53.location,
_materialized_hypertable_53.bucket,
_materialized_hypertable_53.avg
FROM _timescaledb_internal._materialized_hypertable_53
WHERE _materialized_hypertable_53.bucket < COALESCE(_timescaledb_functions.to_timestamp(_timescaledb_functions.cagg_watermark(53)), '-infinity'::timestamp with time zone)
UNION ALL
SELECT conditions.location,
time_bucket('@ 1 day'::interval, conditions."time") AS bucket,
avg(conditions.temperature) AS avg
FROM conditions
WHERE conditions."time" >= COALESCE(_timescaledb_functions.to_timestamp(_timescaledb_functions.cagg_watermark(49)), '-infinity'::timestamp with time zone)
WHERE conditions."time" >= COALESCE(_timescaledb_functions.to_timestamp(_timescaledb_functions.cagg_watermark(53)), '-infinity'::timestamp with time zone)
GROUP BY conditions.location, (time_bucket('@ 1 day'::interval, conditions."time"));

-- Should return ROWS because now it is realtime
Expand All @@ -2008,10 +2001,10 @@ ALTER MATERIALIZED VIEW conditions_daily SET (timescaledb.materialized_only=true
bucket | timestamp with time zone | | | | plain |
avg | double precision | | | | plain |
View definition:
SELECT _materialized_hypertable_49.location,
_materialized_hypertable_49.bucket,
_materialized_hypertable_49.avg
FROM _timescaledb_internal._materialized_hypertable_49;
SELECT _materialized_hypertable_53.location,
_materialized_hypertable_53.bucket,
_materialized_hypertable_53.avg
FROM _timescaledb_internal._materialized_hypertable_53;

CALL refresh_continuous_aggregate('conditions_daily', NULL, NULL);
SELECT * FROM conditions_daily ORDER BY bucket, avg;
Expand Down Expand Up @@ -2088,15 +2081,15 @@ NOTICE: refreshing continuous aggregate "cagg1"
ALTER MATERIALIZED VIEW cagg1 SET (timescaledb.compress);
NOTICE: defaulting compress_orderby to time_bucket
WARNING: there was some uncertainty picking the default segment by for the hypertable: You do not have any indexes on columns that can be used for segment_by and thus we are not using segment_by for compression. Please make sure you are not missing any indexes
NOTICE: default segment by for hypertable "_materialized_hypertable_52" is set to ""
NOTICE: default segment by for hypertable "_materialized_hypertable_56" is set to ""
SELECT count(compress_chunk(ch)) FROM show_chunks('cagg1') ch;
count
-------
1
(1 row)

DROP MATERIALIZED VIEW cagg1;
NOTICE: drop cascades to table _timescaledb_internal._hyper_52_68_chunk
NOTICE: drop cascades to table _timescaledb_internal._hyper_56_68_chunk
SELECT * FROM _timescaledb_catalog.compression_settings;
relid | segmentby | orderby | orderby_desc | orderby_nullsfirst
-------+-----------+---------+--------------+--------------------
Expand Down
41 changes: 17 additions & 24 deletions tsl/test/expected/cagg_ddl-14.out
Original file line number Diff line number Diff line change
Expand Up @@ -1811,36 +1811,29 @@ SELECT * FROM cashflows;
-- 3. test named ts
-- 4. test named bucket width
-- named origin
-- Currently not supported due to a bug in time_bucket (see comment in cagg_validate_query)
\set ON_ERROR_STOP 0
CREATE MATERIALIZED VIEW cagg_named_origin WITH
(timescaledb.continuous, timescaledb.materialized_only=false) AS
SELECT time_bucket('1h', time, 'UTC', origin => '2001-01-03 01:23:45') AS bucket,
avg(amount) as avg_amount
FROM transactions GROUP BY 1 WITH NO DATA;
ERROR: cannot create continuous aggregate with variable-width bucket using offset or origin.
-- named timezone
CREATE MATERIALIZED VIEW cagg_named_tz_origin WITH
(timescaledb.continuous, timescaledb.materialized_only=false) AS
SELECT time_bucket('1h', time, timezone => 'UTC', origin => '2001-01-03 01:23:45') AS bucket,
avg(amount) as avg_amount
FROM transactions GROUP BY 1 WITH NO DATA;
ERROR: cannot create continuous aggregate with variable-width bucket using offset or origin.
-- named ts
CREATE MATERIALIZED VIEW cagg_named_ts_tz_origin WITH
(timescaledb.continuous, timescaledb.materialized_only=false) AS
SELECT time_bucket('1h', ts => time, timezone => 'UTC', origin => '2001-01-03 01:23:45') AS bucket,
avg(amount) as avg_amount
FROM transactions GROUP BY 1 WITH NO DATA;
ERROR: cannot create continuous aggregate with variable-width bucket using offset or origin.
-- named bucket width
CREATE MATERIALIZED VIEW cagg_named_all WITH
(timescaledb.continuous, timescaledb.materialized_only=false) AS
SELECT time_bucket(bucket_width => '1h', ts => time, timezone => 'UTC', origin => '2001-01-03 01:23:45') AS bucket,
avg(amount) as avg_amount
FROM transactions GROUP BY 1 WITH NO DATA;
ERROR: cannot create continuous aggregate with variable-width bucket using offset or origin.
\set ON_ERROR_STOP 1
-- Refreshing from the beginning (NULL) of a CAGG with variable time bucket and
-- using an INTERVAL for the end timestamp (issue #5534)
CREATE MATERIALIZED VIEW transactions_montly
Expand Down Expand Up @@ -1923,7 +1916,7 @@ CREATE TABLE conditions (
SELECT create_hypertable('conditions', 'time');
create_hypertable
--------------------------
(48,public,conditions,t)
(52,public,conditions,t)
(1 row)

INSERT INTO conditions VALUES ( '2018-01-01 09:20:00-08', 'SFO', 55);
Expand Down Expand Up @@ -1953,10 +1946,10 @@ WITH NO DATA;
bucket | timestamp with time zone | | | | plain |
avg | double precision | | | | plain |
View definition:
SELECT _materialized_hypertable_49.location,
_materialized_hypertable_49.bucket,
_materialized_hypertable_49.avg
FROM _timescaledb_internal._materialized_hypertable_49;
SELECT _materialized_hypertable_53.location,
_materialized_hypertable_53.bucket,
_materialized_hypertable_53.avg
FROM _timescaledb_internal._materialized_hypertable_53;

-- Should return NO ROWS
SELECT * FROM conditions_daily ORDER BY bucket, avg;
Expand All @@ -1973,17 +1966,17 @@ ALTER MATERIALIZED VIEW conditions_daily SET (timescaledb.materialized_only=fals
bucket | timestamp with time zone | | | | plain |
avg | double precision | | | | plain |
View definition:
SELECT _materialized_hypertable_49.location,
_materialized_hypertable_49.bucket,
_materialized_hypertable_49.avg
FROM _timescaledb_internal._materialized_hypertable_49
WHERE _materialized_hypertable_49.bucket < COALESCE(_timescaledb_functions.to_timestamp(_timescaledb_functions.cagg_watermark(49)), '-infinity'::timestamp with time zone)
SELECT _materialized_hypertable_53.location,
_materialized_hypertable_53.bucket,
_materialized_hypertable_53.avg
FROM _timescaledb_internal._materialized_hypertable_53
WHERE _materialized_hypertable_53.bucket < COALESCE(_timescaledb_functions.to_timestamp(_timescaledb_functions.cagg_watermark(53)), '-infinity'::timestamp with time zone)
UNION ALL
SELECT conditions.location,
time_bucket('@ 1 day'::interval, conditions."time") AS bucket,
avg(conditions.temperature) AS avg
FROM conditions
WHERE conditions."time" >= COALESCE(_timescaledb_functions.to_timestamp(_timescaledb_functions.cagg_watermark(49)), '-infinity'::timestamp with time zone)
WHERE conditions."time" >= COALESCE(_timescaledb_functions.to_timestamp(_timescaledb_functions.cagg_watermark(53)), '-infinity'::timestamp with time zone)
GROUP BY conditions.location, (time_bucket('@ 1 day'::interval, conditions."time"));

-- Should return ROWS because now it is realtime
Expand All @@ -2008,10 +2001,10 @@ ALTER MATERIALIZED VIEW conditions_daily SET (timescaledb.materialized_only=true
bucket | timestamp with time zone | | | | plain |
avg | double precision | | | | plain |
View definition:
SELECT _materialized_hypertable_49.location,
_materialized_hypertable_49.bucket,
_materialized_hypertable_49.avg
FROM _timescaledb_internal._materialized_hypertable_49;
SELECT _materialized_hypertable_53.location,
_materialized_hypertable_53.bucket,
_materialized_hypertable_53.avg
FROM _timescaledb_internal._materialized_hypertable_53;

CALL refresh_continuous_aggregate('conditions_daily', NULL, NULL);
SELECT * FROM conditions_daily ORDER BY bucket, avg;
Expand Down Expand Up @@ -2088,15 +2081,15 @@ NOTICE: refreshing continuous aggregate "cagg1"
ALTER MATERIALIZED VIEW cagg1 SET (timescaledb.compress);
NOTICE: defaulting compress_orderby to time_bucket
WARNING: there was some uncertainty picking the default segment by for the hypertable: You do not have any indexes on columns that can be used for segment_by and thus we are not using segment_by for compression. Please make sure you are not missing any indexes
NOTICE: default segment by for hypertable "_materialized_hypertable_52" is set to ""
NOTICE: default segment by for hypertable "_materialized_hypertable_56" is set to ""
SELECT count(compress_chunk(ch)) FROM show_chunks('cagg1') ch;
count
-------
1
(1 row)

DROP MATERIALIZED VIEW cagg1;
NOTICE: drop cascades to table _timescaledb_internal._hyper_52_68_chunk
NOTICE: drop cascades to table _timescaledb_internal._hyper_56_68_chunk
SELECT * FROM _timescaledb_catalog.compression_settings;
relid | segmentby | orderby | orderby_desc | orderby_nullsfirst
-------+-----------+---------+--------------+--------------------
Expand Down
Loading

0 comments on commit 9812a45

Please sign in to comment.