From b411d37f2b1373b08be7997b4fb177af8366f24b Mon Sep 17 00:00:00 2001 From: Yusuke Kuoka Date: Thu, 2 Apr 2020 09:51:40 +0900 Subject: [PATCH] fix: RunnerDeployment should clean up old RunnerReplicaSets ASAP Since the initial implementation of RunnerDeployment and until this change, any update to a runner deployment has been leaving old runner replicasets until the next resync interval. This fixes that, by continusouly retrying the reconcilation 10 seconds later to see if there are any old runner replicasets that can be removed. In addition to that, the cleanup of old runner replicasets has been improved to be deferred until all the runners of the newest replica set to be available. This gives you hopefully zero or at less downtime updates of runner deployments. Fixes #24 --- controllers/runnerdeployment_controller.go | 57 +++++++++++++++++----- 1 file changed, 45 insertions(+), 12 deletions(-) diff --git a/controllers/runnerdeployment_controller.go b/controllers/runnerdeployment_controller.go index 2c6c056e9b..28b04d81a2 100644 --- a/controllers/runnerdeployment_controller.go +++ b/controllers/runnerdeployment_controller.go @@ -20,7 +20,9 @@ import ( "context" "fmt" "hash/fnv" + "k8s.io/apimachinery/pkg/types" "sort" + "time" "github.com/davecgh/go-spew/spew" "github.com/go-logr/logr" @@ -58,7 +60,7 @@ type RunnerDeploymentReconciler struct { func (r *RunnerDeploymentReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) { ctx := context.Background() - log := r.Log.WithValues("runnerreplicaset", req.NamespacedName) + log := r.Log.WithValues("runnerdeployment", req.NamespacedName) var rd v1alpha1.RunnerDeployment if err := r.Get(ctx, req.NamespacedName, &rd); err != nil { @@ -130,12 +132,19 @@ func (r *RunnerDeploymentReconciler) Reconcile(req ctrl.Request) (ctrl.Result, e return ctrl.Result{}, err } - return ctrl.Result{}, nil + // We requeue in order to clean up old runner replica sets later. + // Otherwise, they aren't cleaned up until the next re-sync interval. + return ctrl.Result{RequeueAfter: 5 * time.Second}, nil } + const defaultReplicas = 1 + + currentDesiredReplicas := getIntOrDefault(newestSet.Spec.Replicas, defaultReplicas) + newDesiredReplicas := getIntOrDefault(desiredRS.Spec.Replicas, defaultReplicas) + // Please add more conditions that we can in-place update the newest runnerreplicaset without disruption - if newestSet.Spec.Replicas != desiredRS.Spec.Replicas { - newestSet.Spec.Replicas = desiredRS.Spec.Replicas + if currentDesiredReplicas != newDesiredReplicas { + newestSet.Spec.Replicas = &newDesiredReplicas if err := r.Client.Update(ctx, newestSet); err != nil { log.Error(err, "Failed to update runnerreplicaset resource") @@ -143,25 +152,49 @@ func (r *RunnerDeploymentReconciler) Reconcile(req ctrl.Request) (ctrl.Result, e return ctrl.Result{}, err } - return ctrl.Result{}, nil + return ctrl.Result{}, err } - for i := range oldSets { - rs := oldSets[i] + // Do we old runner replica sets that should eventually deleted? + if len(oldSets) > 0 { + readyReplicas := newestSet.Status.ReadyReplicas - if err := r.Client.Delete(ctx, &rs); err != nil { - log.Error(err, "Failed to delete runner resource") + if readyReplicas < currentDesiredReplicas { + log.WithValues("runnerreplicaset", types.NamespacedName{ + Namespace: newestSet.Namespace, + Name: newestSet.Name, + }). + Info("Waiting until the newest runner replica set to be 100% available") - return ctrl.Result{}, err + return ctrl.Result{RequeueAfter: 10 * time.Second}, nil } - r.Recorder.Event(&rd, corev1.EventTypeNormal, "RunnerReplicaSetDeleted", fmt.Sprintf("Deleted runnerreplicaset '%s'", rs.Name)) - log.Info("Deleted runnerreplicaset", "runnerdeployment", rd.ObjectMeta.Name, "runnerreplicaset", rs.Name) + for i := range oldSets { + rs := oldSets[i] + + if err := r.Client.Delete(ctx, &rs); err != nil { + log.Error(err, "Failed to delete runner resource") + + return ctrl.Result{}, err + } + + r.Recorder.Event(&rd, corev1.EventTypeNormal, "RunnerReplicaSetDeleted", fmt.Sprintf("Deleted runnerreplicaset '%s'", rs.Name)) + + log.Info("Deleted runnerreplicaset", "runnerdeployment", rd.ObjectMeta.Name, "runnerreplicaset", rs.Name) + } } return ctrl.Result{}, nil } +func getIntOrDefault(p *int, d int) int { + if p == nil { + return d + } + + return *p +} + func getTemplateHash(rs *v1alpha1.RunnerReplicaSet) (string, bool) { hash, ok := rs.Labels[LabelKeyRunnerTemplateHash]