getsentry · aayush-se · Jan 3, 2025 · Dec 21, 2024 · Dec 21, 2024 · Dec 21, 2024
@@ -4,7 +4,11 @@
 
 import seer.app  # noqa: F401
 from celery_app.app import celery_app as celery  # noqa: F401
-from seer.anomaly_detection.tasks import cleanup_disabled_alerts, cleanup_timeseries  # noqa: F401
+from seer.anomaly_detection.tasks import (  # noqa: F401
+    cleanup_disabled_alerts,
+    cleanup_old_timeseries_history,
+    cleanup_timeseries,
+)
 from seer.automation.autofix.tasks import check_and_mark_recent_autofix_runs
 from seer.automation.tasks import delete_data_for_ttl
 from seer.configuration import AppConfig
@@ -43,3 +47,9 @@ def setup_periodic_tasks(sender, config: AppConfig = injected, **kwargs):
             cleanup_disabled_alerts.signature(kwargs={}, queue=config.CELERY_WORKER_QUEUE),
             name="Clean up old disabled timeseries every week",
         )
+
+        sender.add_periodic_task(
+            crontab(minute="0", hour="0", day_of_week="0"),  # Run once a week on Sunday
+            cleanup_old_timeseries_history.signature(kwargs={}, queue=config.CELERY_WORKER_QUEUE),
+            name="Clean up old timeseries history every week",
+        )
@@ -0,0 +1,37 @@
+"""Migration
+
+Revision ID: 5a7ff418f726
+Revises: 3914e6cdb818
+Create Date: 2024-12-30 23:36:51.280449
+
+"""
+
+import sqlalchemy as sa
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision = "5a7ff418f726"
+down_revision = "3914e6cdb818"
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.create_table(
+        "dynamic_alert_time_series_history",
+        sa.Column("id", sa.Integer(), autoincrement=True, nullable=False),
+        sa.Column("alert_id", sa.BigInteger(), nullable=False),
+        sa.Column("timestamp", sa.DateTime(), nullable=False),
+        sa.Column("value", sa.Float(), nullable=False),
+        sa.Column("anomaly_type", sa.String(), nullable=False),
+        sa.Column("saved_at", sa.DateTime(), nullable=False),
+        sa.PrimaryKeyConstraint("id", "alert_id"),
+    )
+    # ### end Alembic commands ###
+
+
+def downgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_table("dynamic_alert_time_series_history")
+    # ### end Alembic commands ###
@@ -1,6 +1,7 @@
 import logging
 from datetime import datetime, timedelta
 from operator import and_, or_
+from typing import List
 
 import numpy as np
 import sentry_sdk
@@ -10,7 +11,13 @@
 from seer.anomaly_detection.detectors.anomaly_detectors import MPBatchAnomalyDetector
 from seer.anomaly_detection.models import AlgoConfig, TimeSeries
 from seer.anomaly_detection.models.external import AnomalyDetectionConfig
-from seer.db import DbDynamicAlert, Session, TaskStatus
+from seer.db import (
+    DbDynamicAlert,
+    DbDynamicAlertTimeSeries,
+    DbDynamicAlertTimeSeriesHistory,
+    Session,
+    TaskStatus,
+)
 from seer.dependency_injection import inject, injected
 
 logger = logging.getLogger(__name__)
@@ -68,12 +75,30 @@ def delete_old_timeseries_points(alert: DbDynamicAlert, date_threshold: float):
     for ts in alert.timeseries:
         if ts.timestamp.timestamp() < date_threshold:
             to_remove.append(ts)
+
+    # Save history records before removing
+    save_timeseries_history(alert, to_remove)
+
     for ts in to_remove:
         alert.timeseries.remove(ts)
         deleted_count += 1
     return deleted_count
 
 
+def save_timeseries_history(alert: DbDynamicAlert, timeseries: List[DbDynamicAlertTimeSeries]):
+    with Session() as session:
+        for ts in timeseries:
+            history_record = DbDynamicAlertTimeSeriesHistory(
+                alert_id=alert.external_alert_id,
+                timestamp=ts.timestamp,
+                anomaly_type=ts.anomaly_type,
+                value=ts.value,
+                saved_at=datetime.now(),
+            )
+            session.add(history_record)
+        session.commit()
+
+
 @inject
 def update_matrix_profiles(
     alert: DbDynamicAlert,
@@ -163,3 +188,17 @@ def cleanup_disabled_alerts():
 
         session.commit()
         logger.info(f"Deleted {deleted_count} alerts")
+
+
+@celery_app.task
+@sentry_sdk.trace
+def cleanup_old_timeseries_history():
+    date_threshold = datetime.now() - timedelta(days=90)
+    with Session() as session:
+        deleted_count = (
+            session.query(DbDynamicAlertTimeSeriesHistory)
+            .filter(DbDynamicAlertTimeSeriesHistory.saved_at < date_threshold)
+            .delete()
+        )
+        session.commit()
+        logger.info(f"Deleted {deleted_count} timeseries history records older than 90 days")
@@ -360,3 +360,17 @@ class DbIssueSummary(Base):
     created_at: Mapped[datetime.datetime] = mapped_column(
         DateTime, nullable=False, default=datetime.datetime.now(datetime.UTC)
     )
+
+
+class DbDynamicAlertTimeSeriesHistory(Base):
+    __tablename__ = "dynamic_alert_time_series_history"
+    id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
+    alert_id: Mapped[int] = mapped_column(BigInteger, primary_key=True)
+    timestamp: Mapped[datetime.datetime] = mapped_column(
+        DateTime, nullable=False, default=datetime.datetime.now(datetime.UTC)
+    )
+    value: Mapped[float] = mapped_column(Float, nullable=False)
+    anomaly_type: Mapped[str] = mapped_column(String, nullable=False)
+    saved_at: Mapped[datetime.datetime] = mapped_column(
+        DateTime, nullable=False, default=datetime.datetime.now(datetime.UTC)
+    )
@@ -9,8 +9,18 @@
 from seer.anomaly_detection.models import MPTimeSeriesAnomalies
 from seer.anomaly_detection.models.external import AnomalyDetectionConfig, TimeSeriesPoint
 from seer.anomaly_detection.models.timeseries import TimeSeries
-from seer.anomaly_detection.tasks import cleanup_disabled_alerts, cleanup_timeseries
-from seer.db import DbDynamicAlert, DbDynamicAlertTimeSeries, Session, TaskStatus
+from seer.anomaly_detection.tasks import (
+    cleanup_disabled_alerts,
+    cleanup_old_timeseries_history,
+    cleanup_timeseries,
+)
+from seer.db import (
+    DbDynamicAlert,
+    DbDynamicAlertTimeSeries,
+    DbDynamicAlertTimeSeriesHistory,
+    Session,
+    TaskStatus,
+)
 
 
 class TestCleanupTasks(unittest.TestCase):
@@ -236,3 +246,36 @@ def test_cleanup_disabled_alerts(self):
 
             assert alert is not None
             assert len(alert.timeseries) == 500
+
+    def test_cleanup_old_timeseries_history(self):
+        # Create and save alert with 1000 points (all old)
+        external_alert_id, config, points, anomalies = self._save_alert(0, 1000, 0)
+        date_threshold = (datetime.now() - timedelta(days=28)).timestamp()
+        cleanup_timeseries(external_alert_id, date_threshold)
+
+        # Confirm the historical table is populated with 1000 points
+
+        with Session() as session:
+            history = (
+                session.query(DbDynamicAlertTimeSeriesHistory)
+                .filter(DbDynamicAlertTimeSeriesHistory.alert_id == external_alert_id)
+                .all()
+            )
+            assert len(history) == 1000
+
+        # Timeseries History should be deleted as these points have been added over 90 days ago
+
+        with Session() as session:
+            session.query(DbDynamicAlertTimeSeriesHistory).update(
+                {DbDynamicAlertTimeSeriesHistory.saved_at: datetime.now() - timedelta(days=91)}
+            )
+            session.commit()
+
+            cleanup_old_timeseries_history()
+
+            history = (
+                session.query(DbDynamicAlertTimeSeriesHistory)
+                .filter(DbDynamicAlertTimeSeriesHistory.alert_id == external_alert_id)
+                .all()
+            )
+            assert len(history) == 0
@@ -13,6 +13,7 @@ def test_detected_celery_jobs():
         assert set(k for k in celery_app.tasks.keys() if not k.startswith("celery.")) == set(
             [
                 "seer.anomaly_detection.tasks.cleanup_timeseries",
+                "seer.anomaly_detection.tasks.cleanup_old_timeseries_history",
                 "seer.anomaly_detection.tasks.cleanup_disabled_alerts",
                 "seer.automation.autofix.steps.change_describer_step.autofix_change_describer_task",
                 "seer.automation.autofix.steps.coding_step.autofix_coding_task",
@@ -32,6 +33,7 @@ def test_detected_celery_jobs():
                 "Check and mark recent autofix runs every hour",
                 "Delete old Automation runs for 90 day time-to-live",
                 "Clean up old disabled timeseries every week",
+                "Clean up old timeseries history every week",
             ]
         )
 
@@ -48,6 +50,7 @@ def test_anomaly_beat_jobs():
         assert set(k for k in app.conf.beat_schedule.keys()) == set(
             [
                 "Clean up old disabled timeseries every week",
+                "Clean up old timeseries history every week",
             ]
         )