Refactor queue code

This refactor is a preparation for another commit. I want to add instrumentation
around our queues. The code of how queues were handled was spread throughout
the code base, and that made adding such instrumentation nice and complicated.

This centralizes the queue management logic in queue.go, and only requires
the user to provide a (custom) rate limiter, if they want to, a name,
and a handler.

The lease code is moved into its own package to simplify testing, because
the goroutine leak tester was triggering incorrectly if other tests
were running, and it was measuring leaks from those tests.

This also identified buggy behaviour:

wq := workqueue.NewNamedRateLimitingQueue(workqueue.DefaultItemBasedRateLimiter(), "test")
wq.AddRateLimited("hi")
fmt.Printf("Added hi, len: %d\n", wq.Len())

wq.Forget("hi")
fmt.Printf("Forgot hi, len: %d\n", wq.Len())

wq.Done("hi")
fmt.Printf("Done hi, len: %d\n", wq.Len())

---
Prints all 0s because event non-delayed items are delayed. If you call Add
directly, then the last line prints a len of 2.

// Workqueue docs:
// Forget indicates that an item is finished being retried.  Doesn't matter whether it's for perm failing
// or for success, we'll stop the rate limiter from tracking it.  This only clears the `rateLimiter`, you
// still have to call `Done` on the queue.

^----- Even this seems untrue
This commit is contained in:
Sargun Dhillon
2020-12-18 03:27:04 -08:00
parent 735eb34829
commit 1b8597647b
9 changed files with 391 additions and 227 deletions

View File

@@ -22,6 +22,7 @@ import (
"github.com/google/go-cmp/cmp"
pkgerrors "github.com/pkg/errors"
"github.com/virtual-kubelet/virtual-kubelet/internal/podutils"
"github.com/virtual-kubelet/virtual-kubelet/internal/queue"
"github.com/virtual-kubelet/virtual-kubelet/log"
"github.com/virtual-kubelet/virtual-kubelet/trace"
corev1 "k8s.io/api/core/v1"
@@ -29,7 +30,6 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/tools/cache"
"k8s.io/client-go/util/workqueue"
)
const (
@@ -264,8 +264,8 @@ func (pc *PodController) updatePodStatus(ctx context.Context, podFromKubernetes
// enqueuePodStatusUpdate updates our pod status map, and marks the pod as dirty in the workqueue. The pod must be DeepCopy'd
// prior to enqueuePodStatusUpdate.
func (pc *PodController) enqueuePodStatusUpdate(ctx context.Context, q workqueue.RateLimitingInterface, pod *corev1.Pod) {
ctx, cancel := context.WithTimeout(ctx, notificationRetryPeriod*maxRetries)
func (pc *PodController) enqueuePodStatusUpdate(ctx context.Context, pod *corev1.Pod) {
ctx, cancel := context.WithTimeout(ctx, notificationRetryPeriod*queue.MaxRetries)
defer cancel()
ctx, span := trace.StartSpan(ctx, "enqueuePodStatusUpdate")
@@ -330,11 +330,11 @@ func (pc *PodController) enqueuePodStatusUpdate(ctx context.Context, q workqueue
}
kpod.lastPodStatusReceivedFromProvider = pod
kpod.Unlock()
q.AddRateLimited(key)
pc.syncPodStatusFromProvider.Enqueue(key)
}
func (pc *PodController) podStatusHandler(ctx context.Context, key string) (retErr error) {
ctx, span := trace.StartSpan(ctx, "podStatusHandler")
func (pc *PodController) syncPodStatusFromProviderHandler(ctx context.Context, key string) (retErr error) {
ctx, span := trace.StartSpan(ctx, "syncPodStatusFromProviderHandler")
defer span.End()
ctx = span.WithField(ctx, "key", key)
@@ -363,8 +363,8 @@ func (pc *PodController) podStatusHandler(ctx context.Context, key string) (retE
return pc.updatePodStatus(ctx, pod, key)
}
func (pc *PodController) deletePodHandler(ctx context.Context, key string) (retErr error) {
ctx, span := trace.StartSpan(ctx, "processDeletionReconcilationWorkItem")
func (pc *PodController) deletePodsFromKubernetesHandler(ctx context.Context, key string) (retErr error) {
ctx, span := trace.StartSpan(ctx, "deletePodsFromKubernetesHandler")
defer span.End()
namespace, name, err := cache.SplitMetaNamespaceKey(key)