Replace golang workqueue with our own

This is a fundamentally different API than that of the K8s workqueue
which is better suited for our needs. Specifically, we need a simple
queue which doesn't have complex features like delayed adds that
sit on "external" goroutines.

In addition, we need deep introspection into the operations of the
workqueue. Although you can get this on top of the K8s workqueue
by implementing a custom rate limiter, the problem is that
the underlying rate limiter's behaviour is still somewhat
opaque.

This basically has 100% code coverage.
This commit is contained in:
Sargun Dhillon
2021-01-29 00:45:03 -08:00
parent fd3da8dcad
commit ac9a1af564
5 changed files with 661 additions and 96 deletions

View File

@@ -5,18 +5,18 @@ import (
"errors"
"strconv"
"sync"
"sync/atomic"
"testing"
"time"
"github.com/sirupsen/logrus"
"github.com/virtual-kubelet/virtual-kubelet/log"
logruslogger "github.com/virtual-kubelet/virtual-kubelet/log/logrus"
"go.uber.org/goleak"
"golang.org/x/time/rate"
"gotest.tools/assert"
is "gotest.tools/assert/cmp"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/util/workqueue"
"k8s.io/utils/clock"
)
func TestQueueMaxRetries(t *testing.T) {
@@ -43,7 +43,7 @@ func TestQueueMaxRetries(t *testing.T) {
}
assert.Assert(t, is.Equal(n, MaxRetries))
assert.Assert(t, is.Equal(0, wq.workqueue.Len()))
assert.Assert(t, is.Equal(0, wq.Len()))
}
func TestForget(t *testing.T) {
@@ -54,62 +54,400 @@ func TestForget(t *testing.T) {
wq := New(workqueue.DefaultItemBasedRateLimiter(), t.Name(), handler)
wq.Forget("val")
assert.Assert(t, is.Equal(0, wq.workqueue.Len()))
assert.Assert(t, is.Equal(0, wq.Len()))
v := "test"
wq.EnqueueWithoutRateLimit(v)
assert.Assert(t, is.Equal(1, wq.workqueue.Len()))
t.Skip("This is broken")
// Workqueue docs:
// Forget indicates that an item is finished being retried. Doesn't matter whether it's for perm failing
// or for success, we'll stop the rate limiter from tracking it. This only clears the `rateLimiter`, you
// still have to call `Done` on the queue.
// Even if you do this, it doesn't work: https://play.golang.com/p/8vfL_RCsFGI
assert.Assert(t, is.Equal(0, wq.workqueue.Len()))
assert.Assert(t, is.Equal(1, wq.Len()))
}
func TestQueueTerminate(t *testing.T) {
func TestQueueEmpty(t *testing.T) {
t.Parallel()
defer goleak.VerifyNone(t,
// Ignore existing goroutines
goleak.IgnoreCurrent(),
// Ignore klog background flushers
goleak.IgnoreTopFunction("k8s.io/klog.(*loggingT).flushDaemon"),
goleak.IgnoreTopFunction("k8s.io/klog/v2.(*loggingT).flushDaemon"),
// Workqueue runs a goroutine in the background to handle background functions. AFAICT, they're unkillable
// and are designed to stop after a certain idle window
goleak.IgnoreTopFunction("k8s.io/client-go/util/workqueue.(*Type).updateUnfinishedWorkLoop"),
goleak.IgnoreTopFunction("k8s.io/client-go/util/workqueue.(*delayingType).waitingLoop"),
)
ctx, cancel := context.WithCancel(context.Background())
ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond)
defer cancel()
testMap := &sync.Map{}
handler := func(ctx context.Context, key string) error {
testMap.Store(key, struct{}{})
q := New(workqueue.DefaultItemBasedRateLimiter(), t.Name(), func(ctx context.Context, key string) error {
return nil
}
wq := New(workqueue.DefaultItemBasedRateLimiter(), t.Name(), handler)
group := &wait.Group{}
group.StartWithContext(ctx, func(ctx context.Context) {
wq.Run(ctx, 10)
})
for i := 0; i < 1000; i++ {
wq.EnqueueWithoutRateLimit(strconv.Itoa(i))
item, err := q.getNextItem(ctx)
assert.Error(t, err, context.DeadlineExceeded.Error())
assert.Assert(t, is.Nil(item))
}
func TestQueueItemNoSleep(t *testing.T) {
t.Parallel()
ctx, cancel := context.WithTimeout(context.Background(), 1000*time.Millisecond)
defer cancel()
q := New(workqueue.DefaultItemBasedRateLimiter(), t.Name(), func(ctx context.Context, key string) error {
return nil
})
q.lock.Lock()
q.insert("foo", false, -1*time.Hour)
q.insert("bar", false, -1*time.Hour)
q.lock.Unlock()
item, err := q.getNextItem(ctx)
assert.NilError(t, err)
assert.Assert(t, is.Equal(item.key, "foo"))
item, err = q.getNextItem(ctx)
assert.NilError(t, err)
assert.Assert(t, is.Equal(item.key, "bar"))
}
func TestQueueItemSleep(t *testing.T) {
t.Parallel()
ctx, cancel := context.WithTimeout(context.Background(), 1000*time.Millisecond)
defer cancel()
q := New(workqueue.DefaultItemBasedRateLimiter(), t.Name(), func(ctx context.Context, key string) error {
return nil
})
q.lock.Lock()
q.insert("foo", false, 100*time.Millisecond)
q.insert("bar", false, 100*time.Millisecond)
q.lock.Unlock()
item, err := q.getNextItem(ctx)
assert.NilError(t, err)
assert.Assert(t, is.Equal(item.key, "foo"))
}
func TestQueueBackgroundAdd(t *testing.T) {
t.Parallel()
ctx, cancel := context.WithTimeout(context.Background(), 5000*time.Millisecond)
defer cancel()
q := New(workqueue.DefaultItemBasedRateLimiter(), t.Name(), func(ctx context.Context, key string) error {
return nil
})
start := time.Now()
time.AfterFunc(100*time.Millisecond, func() {
q.lock.Lock()
defer q.lock.Unlock()
q.insert("foo", false, 0)
})
item, err := q.getNextItem(ctx)
assert.NilError(t, err)
assert.Assert(t, is.Equal(item.key, "foo"))
assert.Assert(t, time.Since(start) > 100*time.Millisecond)
}
func TestQueueBackgroundAdvance(t *testing.T) {
t.Parallel()
ctx, cancel := context.WithTimeout(context.Background(), 5000*time.Millisecond)
defer cancel()
q := New(workqueue.DefaultItemBasedRateLimiter(), t.Name(), func(ctx context.Context, key string) error {
return nil
})
start := time.Now()
q.lock.Lock()
q.insert("foo", false, 10*time.Second)
q.lock.Unlock()
time.AfterFunc(200*time.Millisecond, func() {
q.lock.Lock()
defer q.lock.Unlock()
q.insert("foo", false, 0)
})
item, err := q.getNextItem(ctx)
assert.NilError(t, err)
assert.Assert(t, is.Equal(item.key, "foo"))
assert.Assert(t, time.Since(start) > 200*time.Millisecond)
assert.Assert(t, time.Since(start) < 5*time.Second)
}
func TestQueueRedirty(t *testing.T) {
t.Parallel()
ctx, cancel := context.WithTimeout(context.Background(), 5000*time.Millisecond)
defer cancel()
var times int64
var q *Queue
q = New(workqueue.DefaultItemBasedRateLimiter(), t.Name(), func(ctx context.Context, key string) error {
assert.Assert(t, is.Equal(key, "foo"))
if atomic.AddInt64(&times, 1) == 1 {
q.EnqueueWithoutRateLimit("foo")
} else {
cancel()
}
return nil
})
q.EnqueueWithoutRateLimit("foo")
q.Run(ctx, 1)
for !q.Empty() {
time.Sleep(100 * time.Millisecond)
}
assert.Assert(t, is.Equal(atomic.LoadInt64(&times), int64(2)))
}
func TestHeapConcurrency(t *testing.T) {
t.Parallel()
ctx, cancel := context.WithTimeout(context.Background(), 5000*time.Millisecond)
defer cancel()
start := time.Now()
seen := sync.Map{}
q := New(workqueue.DefaultItemBasedRateLimiter(), t.Name(), func(ctx context.Context, key string) error {
seen.Store(key, struct{}{})
time.Sleep(time.Second)
return nil
})
for i := 0; i < 20; i++ {
q.EnqueueWithoutRateLimit(strconv.Itoa(i))
}
for wq.workqueue.Len() > 0 {
assert.Assert(t, q.Len() == 20)
go q.Run(ctx, 20)
for q.Len() > 0 {
time.Sleep(100 * time.Millisecond)
}
for i := 0; i < 1000; i++ {
_, ok := testMap.Load(strconv.Itoa(i))
assert.Assert(t, ok, "Item %d missing", i)
for i := 0; i < 20; i++ {
_, ok := seen.Load(strconv.Itoa(i))
assert.Assert(t, ok, "Did not observe: %d", i)
}
assert.Assert(t, time.Since(start) < 5*time.Second)
}
func checkConsistency(t *testing.T, q *Queue) {
q.lock.Lock()
defer q.lock.Unlock()
for next := q.items.Front(); next != nil && next.Next() != nil; next = next.Next() {
qi := next.Value.(*queueItem)
qiNext := next.Next().Value.(*queueItem)
assert.Assert(t, qi.plannedToStartWorkAt.Before(qiNext.plannedToStartWorkAt) || qi.plannedToStartWorkAt.Equal(qiNext.plannedToStartWorkAt))
}
}
func TestHeapOrder(t *testing.T) {
q := New(workqueue.DefaultItemBasedRateLimiter(), t.Name(), func(ctx context.Context, key string) error {
return nil
})
q.clock = nonmovingClock{}
q.EnqueueWithoutRateLimitWithDelay("a", 1000)
q.EnqueueWithoutRateLimitWithDelay("b", 2000)
q.EnqueueWithoutRateLimitWithDelay("c", 3000)
q.EnqueueWithoutRateLimitWithDelay("d", 4000)
q.EnqueueWithoutRateLimitWithDelay("e", 5000)
checkConsistency(t, q)
t.Logf("%v", q)
q.EnqueueWithoutRateLimitWithDelay("d", 1000)
checkConsistency(t, q)
t.Logf("%v", q)
q.EnqueueWithoutRateLimitWithDelay("c", 1001)
checkConsistency(t, q)
t.Logf("%v", q)
q.EnqueueWithoutRateLimitWithDelay("e", 999)
checkConsistency(t, q)
t.Logf("%v", q)
}
type rateLimitWrapper struct {
addedMap sync.Map
forgottenMap sync.Map
rl workqueue.RateLimiter
}
func (r *rateLimitWrapper) When(item interface{}) time.Duration {
if _, ok := r.forgottenMap.Load(item); ok {
r.forgottenMap.Delete(item)
// Reset the added map
r.addedMap.Store(item, 1)
} else {
actual, loaded := r.addedMap.LoadOrStore(item, 1)
if loaded {
r.addedMap.Store(item, actual.(int)+1)
}
}
cancel()
group.Wait()
return r.rl.When(item)
}
func (r *rateLimitWrapper) Forget(item interface{}) {
r.forgottenMap.Store(item, struct{}{})
r.rl.Forget(item)
}
func (r *rateLimitWrapper) NumRequeues(item interface{}) int {
return r.rl.NumRequeues(item)
}
func TestRateLimiter(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), 5000*time.Millisecond)
defer cancel()
syncMap := sync.Map{}
syncMap.Store("foo", 0)
syncMap.Store("bar", 0)
syncMap.Store("baz", 0)
syncMap.Store("quux", 0)
start := time.Now()
ratelimiter := &rateLimitWrapper{
rl: workqueue.NewItemFastSlowRateLimiter(1*time.Millisecond, 100*time.Millisecond, 1),
}
q := New(ratelimiter, t.Name(), func(ctx context.Context, key string) error {
oldValue, _ := syncMap.Load(key)
syncMap.Store(key, oldValue.(int)+1)
if oldValue.(int) < 9 {
return errors.New("test")
}
return nil
})
enqueued := 0
syncMap.Range(func(key, value interface{}) bool {
enqueued++
q.Enqueue(key.(string))
return true
})
assert.Assert(t, enqueued == 4)
go q.Run(ctx, 10)
incomplete := true
for incomplete {
time.Sleep(10 * time.Millisecond)
incomplete = false
// Wait for all items to finish processing.
syncMap.Range(func(key, value interface{}) bool {
if value.(int) < 10 {
incomplete = true
}
return true
})
}
// Make sure there were ~9 "slow" rate limits per item, and 1 fast
assert.Assert(t, time.Since(start) > 9*100*time.Millisecond)
// Make sure we didn't go off the deep end.
assert.Assert(t, time.Since(start) < 2*9*100*time.Millisecond)
// Make sure each item was seen. And Forgotten.
syncMap.Range(func(key, value interface{}) bool {
_, ok := ratelimiter.forgottenMap.Load(key)
assert.Assert(t, ok, "%s in forgotten map", key)
val, ok := ratelimiter.addedMap.Load(key)
assert.Assert(t, ok, "%s in added map", key)
assert.Assert(t, val == 10)
return true
})
q.lock.Lock()
defer q.lock.Unlock()
assert.Assert(t, len(q.itemsInQueue) == 0)
assert.Assert(t, len(q.itemsBeingProcessed) == 0)
assert.Assert(t, q.items.Len() == 0)
}
func TestQueueForgetInProgress(t *testing.T) {
t.Parallel()
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
var times int64
var q *Queue
q = New(workqueue.DefaultItemBasedRateLimiter(), t.Name(), func(ctx context.Context, key string) error {
assert.Assert(t, is.Equal(key, "foo"))
atomic.AddInt64(&times, 1)
q.Forget(key)
return errors.New("test")
})
q.EnqueueWithoutRateLimit("foo")
go q.Run(ctx, 1)
for !q.Empty() {
time.Sleep(100 * time.Millisecond)
}
assert.Assert(t, is.Equal(atomic.LoadInt64(&times), int64(1)))
}
func TestQueueForgetBeforeStart(t *testing.T) {
t.Parallel()
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
q := New(workqueue.DefaultItemBasedRateLimiter(), t.Name(), func(ctx context.Context, key string) error {
panic("shouldn't be called")
})
q.EnqueueWithoutRateLimit("foo")
q.Forget("foo")
go q.Run(ctx, 1)
for !q.Empty() {
time.Sleep(100 * time.Millisecond)
}
}
func TestQueueMoveItem(t *testing.T) {
t.Parallel()
q := New(workqueue.DefaultItemBasedRateLimiter(), t.Name(), func(ctx context.Context, key string) error {
panic("shouldn't be called")
})
q.clock = nonmovingClock{}
q.insert("foo", false, 3000)
q.insert("bar", false, 2000)
q.insert("baz", false, 1000)
checkConsistency(t, q)
t.Log(q)
q.insert("foo", false, 2000)
checkConsistency(t, q)
t.Log(q)
q.insert("foo", false, 1999)
checkConsistency(t, q)
t.Log(q)
q.insert("foo", false, 999)
checkConsistency(t, q)
t.Log(q)
}
type nonmovingClock struct {
}
func (n nonmovingClock) Now() time.Time {
return time.Time{}
}
func (n nonmovingClock) Since(t time.Time) time.Duration {
return n.Now().Sub(t)
}
func (n nonmovingClock) After(d time.Duration) <-chan time.Time {
panic("implement me")
}
func (n nonmovingClock) NewTimer(d time.Duration) clock.Timer {
panic("implement me")
}
func (n nonmovingClock) Sleep(d time.Duration) {
panic("implement me")
}
func (n nonmovingClock) Tick(d time.Duration) <-chan time.Time {
panic("implement me")
}