Replace golang workqueue with our own

This is a fundamentally different API than that of the K8s workqueue which is better suited for our needs. Specifically, we need a simple queue which doesn't have complex features like delayed adds that sit on "external" goroutines. In addition, we need deep introspection into the operations of the workqueue. Although you can get this on top of the K8s workqueue by implementing a custom rate limiter, the problem is that the underlying rate limiter's behaviour is still somewhat opaque. This basically has 100% code coverage.
2021-01-29 00:45:03 -08:00
parent fd3da8dcad
commit ac9a1af564
5 changed files with 661 additions and 96 deletions
--- a/internal/queue/queue_test.go
+++ b/internal/queue/queue_test.go
@@ -5,18 +5,18 @@ import (
 	"errors"
 	"strconv"
 	"sync"
+	"sync/atomic"
 	"testing"
 	"time"

 	"github.com/sirupsen/logrus"
 	"github.com/virtual-kubelet/virtual-kubelet/log"
 	logruslogger "github.com/virtual-kubelet/virtual-kubelet/log/logrus"
-	"go.uber.org/goleak"
 	"golang.org/x/time/rate"
 	"gotest.tools/assert"
 	is "gotest.tools/assert/cmp"
-	"k8s.io/apimachinery/pkg/util/wait"
 	"k8s.io/client-go/util/workqueue"
+	"k8s.io/utils/clock"
 )

 func TestQueueMaxRetries(t *testing.T) {
@@ -43,7 +43,7 @@ func TestQueueMaxRetries(t *testing.T) {
 	}

 	assert.Assert(t, is.Equal(n, MaxRetries))
-	assert.Assert(t, is.Equal(0, wq.workqueue.Len()))
+	assert.Assert(t, is.Equal(0, wq.Len()))
 }

 func TestForget(t *testing.T) {
@@ -54,62 +54,400 @@ func TestForget(t *testing.T) {
 	wq := New(workqueue.DefaultItemBasedRateLimiter(), t.Name(), handler)

 	wq.Forget("val")
-	assert.Assert(t, is.Equal(0, wq.workqueue.Len()))
+	assert.Assert(t, is.Equal(0, wq.Len()))

 	v := "test"
 	wq.EnqueueWithoutRateLimit(v)
-	assert.Assert(t, is.Equal(1, wq.workqueue.Len()))
-
-	t.Skip("This is broken")
-	// Workqueue docs:
-	// Forget indicates that an item is finished being retried.  Doesn't matter whether it's for perm failing
-	// or for success, we'll stop the rate limiter from tracking it.  This only clears the `rateLimiter`, you
-	// still have to call `Done` on the queue.
-	// Even if you do this, it doesn't work: https://play.golang.com/p/8vfL_RCsFGI
-	assert.Assert(t, is.Equal(0, wq.workqueue.Len()))
-
+	assert.Assert(t, is.Equal(1, wq.Len()))
 }

-func TestQueueTerminate(t *testing.T) {
+func TestQueueEmpty(t *testing.T) {
 	t.Parallel()
-	defer goleak.VerifyNone(t,
-		// Ignore existing goroutines
-		goleak.IgnoreCurrent(),
-		// Ignore klog background flushers
-		goleak.IgnoreTopFunction("k8s.io/klog.(*loggingT).flushDaemon"),
-		goleak.IgnoreTopFunction("k8s.io/klog/v2.(*loggingT).flushDaemon"),
-		// Workqueue runs a goroutine in the background to handle background functions. AFAICT, they're unkillable
-		// and are designed to stop after a certain idle window
-		goleak.IgnoreTopFunction("k8s.io/client-go/util/workqueue.(*Type).updateUnfinishedWorkLoop"),
-		goleak.IgnoreTopFunction("k8s.io/client-go/util/workqueue.(*delayingType).waitingLoop"),
-	)
-	ctx, cancel := context.WithCancel(context.Background())
+	ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond)
 	defer cancel()

-	testMap := &sync.Map{}
-	handler := func(ctx context.Context, key string) error {
-		testMap.Store(key, struct{}{})
+	q := New(workqueue.DefaultItemBasedRateLimiter(), t.Name(), func(ctx context.Context, key string) error {
 		return nil
-	}
-
-	wq := New(workqueue.DefaultItemBasedRateLimiter(), t.Name(), handler)
-	group := &wait.Group{}
-	group.StartWithContext(ctx, func(ctx context.Context) {
-		wq.Run(ctx, 10)
 	})
-	for i := 0; i < 1000; i++ {
-		wq.EnqueueWithoutRateLimit(strconv.Itoa(i))
+
+	item, err := q.getNextItem(ctx)
+	assert.Error(t, err, context.DeadlineExceeded.Error())
+	assert.Assert(t, is.Nil(item))
+}
+
+func TestQueueItemNoSleep(t *testing.T) {
+	t.Parallel()
+
+	ctx, cancel := context.WithTimeout(context.Background(), 1000*time.Millisecond)
+	defer cancel()
+
+	q := New(workqueue.DefaultItemBasedRateLimiter(), t.Name(), func(ctx context.Context, key string) error {
+		return nil
+	})
+
+	q.lock.Lock()
+	q.insert("foo", false, -1*time.Hour)
+	q.insert("bar", false, -1*time.Hour)
+	q.lock.Unlock()
+
+	item, err := q.getNextItem(ctx)
+	assert.NilError(t, err)
+	assert.Assert(t, is.Equal(item.key, "foo"))
+
+	item, err = q.getNextItem(ctx)
+	assert.NilError(t, err)
+	assert.Assert(t, is.Equal(item.key, "bar"))
+}
+
+func TestQueueItemSleep(t *testing.T) {
+	t.Parallel()
+
+	ctx, cancel := context.WithTimeout(context.Background(), 1000*time.Millisecond)
+	defer cancel()
+
+	q := New(workqueue.DefaultItemBasedRateLimiter(), t.Name(), func(ctx context.Context, key string) error {
+		return nil
+	})
+	q.lock.Lock()
+	q.insert("foo", false, 100*time.Millisecond)
+	q.insert("bar", false, 100*time.Millisecond)
+	q.lock.Unlock()
+
+	item, err := q.getNextItem(ctx)
+	assert.NilError(t, err)
+	assert.Assert(t, is.Equal(item.key, "foo"))
+}
+
+func TestQueueBackgroundAdd(t *testing.T) {
+	t.Parallel()
+
+	ctx, cancel := context.WithTimeout(context.Background(), 5000*time.Millisecond)
+	defer cancel()
+
+	q := New(workqueue.DefaultItemBasedRateLimiter(), t.Name(), func(ctx context.Context, key string) error {
+		return nil
+	})
+	start := time.Now()
+	time.AfterFunc(100*time.Millisecond, func() {
+		q.lock.Lock()
+		defer q.lock.Unlock()
+		q.insert("foo", false, 0)
+	})
+
+	item, err := q.getNextItem(ctx)
+	assert.NilError(t, err)
+	assert.Assert(t, is.Equal(item.key, "foo"))
+	assert.Assert(t, time.Since(start) > 100*time.Millisecond)
+}
+
+func TestQueueBackgroundAdvance(t *testing.T) {
+	t.Parallel()
+
+	ctx, cancel := context.WithTimeout(context.Background(), 5000*time.Millisecond)
+	defer cancel()
+
+	q := New(workqueue.DefaultItemBasedRateLimiter(), t.Name(), func(ctx context.Context, key string) error {
+		return nil
+	})
+	start := time.Now()
+	q.lock.Lock()
+	q.insert("foo", false, 10*time.Second)
+	q.lock.Unlock()
+
+	time.AfterFunc(200*time.Millisecond, func() {
+		q.lock.Lock()
+		defer q.lock.Unlock()
+		q.insert("foo", false, 0)
+	})
+
+	item, err := q.getNextItem(ctx)
+	assert.NilError(t, err)
+	assert.Assert(t, is.Equal(item.key, "foo"))
+	assert.Assert(t, time.Since(start) > 200*time.Millisecond)
+	assert.Assert(t, time.Since(start) < 5*time.Second)
+}
+
+func TestQueueRedirty(t *testing.T) {
+	t.Parallel()
+
+	ctx, cancel := context.WithTimeout(context.Background(), 5000*time.Millisecond)
+	defer cancel()
+
+	var times int64
+	var q *Queue
+	q = New(workqueue.DefaultItemBasedRateLimiter(), t.Name(), func(ctx context.Context, key string) error {
+		assert.Assert(t, is.Equal(key, "foo"))
+		if atomic.AddInt64(&times, 1) == 1 {
+			q.EnqueueWithoutRateLimit("foo")
+		} else {
+			cancel()
+		}
+		return nil
+	})
+
+	q.EnqueueWithoutRateLimit("foo")
+	q.Run(ctx, 1)
+	for !q.Empty() {
+		time.Sleep(100 * time.Millisecond)
+	}
+	assert.Assert(t, is.Equal(atomic.LoadInt64(&times), int64(2)))
+}
+
+func TestHeapConcurrency(t *testing.T) {
+	t.Parallel()
+
+	ctx, cancel := context.WithTimeout(context.Background(), 5000*time.Millisecond)
+	defer cancel()
+
+	start := time.Now()
+	seen := sync.Map{}
+	q := New(workqueue.DefaultItemBasedRateLimiter(), t.Name(), func(ctx context.Context, key string) error {
+		seen.Store(key, struct{}{})
+		time.Sleep(time.Second)
+		return nil
+	})
+	for i := 0; i < 20; i++ {
+		q.EnqueueWithoutRateLimit(strconv.Itoa(i))
 	}

-	for wq.workqueue.Len() > 0 {
+	assert.Assert(t, q.Len() == 20)
+
+	go q.Run(ctx, 20)
+	for q.Len() > 0 {
 		time.Sleep(100 * time.Millisecond)
 	}

-	for i := 0; i < 1000; i++ {
-		_, ok := testMap.Load(strconv.Itoa(i))
-		assert.Assert(t, ok, "Item %d missing", i)
+	for i := 0; i < 20; i++ {
+		_, ok := seen.Load(strconv.Itoa(i))
+		assert.Assert(t, ok, "Did not observe: %d", i)
+	}
+	assert.Assert(t, time.Since(start) < 5*time.Second)
+}
+
+func checkConsistency(t *testing.T, q *Queue) {
+	q.lock.Lock()
+	defer q.lock.Unlock()
+
+	for next := q.items.Front(); next != nil && next.Next() != nil; next = next.Next() {
+		qi := next.Value.(*queueItem)
+		qiNext := next.Next().Value.(*queueItem)
+		assert.Assert(t, qi.plannedToStartWorkAt.Before(qiNext.plannedToStartWorkAt) || qi.plannedToStartWorkAt.Equal(qiNext.plannedToStartWorkAt))
+	}
+}
+
+func TestHeapOrder(t *testing.T) {
+	q := New(workqueue.DefaultItemBasedRateLimiter(), t.Name(), func(ctx context.Context, key string) error {
+		return nil
+	})
+	q.clock = nonmovingClock{}
+
+	q.EnqueueWithoutRateLimitWithDelay("a", 1000)
+	q.EnqueueWithoutRateLimitWithDelay("b", 2000)
+	q.EnqueueWithoutRateLimitWithDelay("c", 3000)
+	q.EnqueueWithoutRateLimitWithDelay("d", 4000)
+	q.EnqueueWithoutRateLimitWithDelay("e", 5000)
+	checkConsistency(t, q)
+	t.Logf("%v", q)
+	q.EnqueueWithoutRateLimitWithDelay("d", 1000)
+	checkConsistency(t, q)
+	t.Logf("%v", q)
+	q.EnqueueWithoutRateLimitWithDelay("c", 1001)
+	checkConsistency(t, q)
+	t.Logf("%v", q)
+	q.EnqueueWithoutRateLimitWithDelay("e", 999)
+	checkConsistency(t, q)
+	t.Logf("%v", q)
+}
+
+type rateLimitWrapper struct {
+	addedMap     sync.Map
+	forgottenMap sync.Map
+	rl           workqueue.RateLimiter
+}
+
+func (r *rateLimitWrapper) When(item interface{}) time.Duration {
+	if _, ok := r.forgottenMap.Load(item); ok {
+		r.forgottenMap.Delete(item)
+		// Reset the added map
+		r.addedMap.Store(item, 1)
+	} else {
+		actual, loaded := r.addedMap.LoadOrStore(item, 1)
+		if loaded {
+			r.addedMap.Store(item, actual.(int)+1)
+		}
 	}

-	cancel()
-	group.Wait()
+	return r.rl.When(item)
+}
+
+func (r *rateLimitWrapper) Forget(item interface{}) {
+	r.forgottenMap.Store(item, struct{}{})
+	r.rl.Forget(item)
+}
+
+func (r *rateLimitWrapper) NumRequeues(item interface{}) int {
+	return r.rl.NumRequeues(item)
+}
+
+func TestRateLimiter(t *testing.T) {
+	ctx, cancel := context.WithTimeout(context.Background(), 5000*time.Millisecond)
+	defer cancel()
+
+	syncMap := sync.Map{}
+	syncMap.Store("foo", 0)
+	syncMap.Store("bar", 0)
+	syncMap.Store("baz", 0)
+	syncMap.Store("quux", 0)
+
+	start := time.Now()
+	ratelimiter := &rateLimitWrapper{
+		rl: workqueue.NewItemFastSlowRateLimiter(1*time.Millisecond, 100*time.Millisecond, 1),
+	}
+
+	q := New(ratelimiter, t.Name(), func(ctx context.Context, key string) error {
+		oldValue, _ := syncMap.Load(key)
+		syncMap.Store(key, oldValue.(int)+1)
+		if oldValue.(int) < 9 {
+			return errors.New("test")
+		}
+		return nil
+	})
+
+	enqueued := 0
+	syncMap.Range(func(key, value interface{}) bool {
+		enqueued++
+		q.Enqueue(key.(string))
+		return true
+	})
+
+	assert.Assert(t, enqueued == 4)
+	go q.Run(ctx, 10)
+
+	incomplete := true
+	for incomplete {
+		time.Sleep(10 * time.Millisecond)
+		incomplete = false
+		// Wait for all items to finish processing.
+		syncMap.Range(func(key, value interface{}) bool {
+			if value.(int) < 10 {
+				incomplete = true
+			}
+			return true
+		})
+	}
+
+	// Make sure there were ~9 "slow" rate limits per item, and 1 fast
+	assert.Assert(t, time.Since(start) > 9*100*time.Millisecond)
+	// Make sure we didn't go off the deep end.
+	assert.Assert(t, time.Since(start) < 2*9*100*time.Millisecond)
+
+	// Make sure each item was seen. And Forgotten.
+	syncMap.Range(func(key, value interface{}) bool {
+		_, ok := ratelimiter.forgottenMap.Load(key)
+		assert.Assert(t, ok, "%s in forgotten map", key)
+		val, ok := ratelimiter.addedMap.Load(key)
+		assert.Assert(t, ok, "%s in added map", key)
+		assert.Assert(t, val == 10)
+		return true
+	})
+
+	q.lock.Lock()
+	defer q.lock.Unlock()
+	assert.Assert(t, len(q.itemsInQueue) == 0)
+	assert.Assert(t, len(q.itemsBeingProcessed) == 0)
+	assert.Assert(t, q.items.Len() == 0)
+
+}
+
+func TestQueueForgetInProgress(t *testing.T) {
+	t.Parallel()
+	ctx, cancel := context.WithCancel(context.Background())
+	defer cancel()
+
+	var times int64
+	var q *Queue
+	q = New(workqueue.DefaultItemBasedRateLimiter(), t.Name(), func(ctx context.Context, key string) error {
+		assert.Assert(t, is.Equal(key, "foo"))
+		atomic.AddInt64(&times, 1)
+		q.Forget(key)
+		return errors.New("test")
+	})
+
+	q.EnqueueWithoutRateLimit("foo")
+	go q.Run(ctx, 1)
+	for !q.Empty() {
+		time.Sleep(100 * time.Millisecond)
+	}
+	assert.Assert(t, is.Equal(atomic.LoadInt64(&times), int64(1)))
+}
+
+func TestQueueForgetBeforeStart(t *testing.T) {
+	t.Parallel()
+	ctx, cancel := context.WithCancel(context.Background())
+	defer cancel()
+
+	q := New(workqueue.DefaultItemBasedRateLimiter(), t.Name(), func(ctx context.Context, key string) error {
+		panic("shouldn't be called")
+	})
+
+	q.EnqueueWithoutRateLimit("foo")
+	q.Forget("foo")
+	go q.Run(ctx, 1)
+	for !q.Empty() {
+		time.Sleep(100 * time.Millisecond)
+	}
+}
+
+func TestQueueMoveItem(t *testing.T) {
+	t.Parallel()
+
+	q := New(workqueue.DefaultItemBasedRateLimiter(), t.Name(), func(ctx context.Context, key string) error {
+		panic("shouldn't be called")
+	})
+	q.clock = nonmovingClock{}
+
+	q.insert("foo", false, 3000)
+	q.insert("bar", false, 2000)
+	q.insert("baz", false, 1000)
+	checkConsistency(t, q)
+	t.Log(q)
+
+	q.insert("foo", false, 2000)
+	checkConsistency(t, q)
+	t.Log(q)
+
+	q.insert("foo", false, 1999)
+	checkConsistency(t, q)
+	t.Log(q)
+
+	q.insert("foo", false, 999)
+	checkConsistency(t, q)
+	t.Log(q)
+}
+
+type nonmovingClock struct {
+}
+
+func (n nonmovingClock) Now() time.Time {
+	return time.Time{}
+}
+
+func (n nonmovingClock) Since(t time.Time) time.Duration {
+	return n.Now().Sub(t)
+}
+
+func (n nonmovingClock) After(d time.Duration) <-chan time.Time {
+	panic("implement me")
+}
+
+func (n nonmovingClock) NewTimer(d time.Duration) clock.Timer {
+	panic("implement me")
+}
+
+func (n nonmovingClock) Sleep(d time.Duration) {
+	panic("implement me")
+}
+
+func (n nonmovingClock) Tick(d time.Duration) <-chan time.Time {
+	panic("implement me")
 }