Files
virtual-kubelet/node/node_test.go
Sargun Dhillon 4bdcba5b85 Set Node Leader Owner Reference
This sets / updates the node lease owner reference to the current
node. Previously, we did not set this, which had the interesting
problem of leaking node leases on clusters with node churn.
2020-07-31 11:23:47 -07:00

762 lines
21 KiB
Go

// Copyright © 2017 The virtual-kubelet authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package node
import (
"context"
"strings"
"testing"
"time"
"gotest.tools/assert"
"gotest.tools/assert/cmp"
is "gotest.tools/assert/cmp"
coord "k8s.io/api/coordination/v1beta1"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
watch "k8s.io/apimachinery/pkg/watch"
testclient "k8s.io/client-go/kubernetes/fake"
"k8s.io/client-go/util/retry"
)
func TestNodeRun(t *testing.T) {
t.Run("WithoutLease", func(t *testing.T) { testNodeRun(t, false) })
t.Run("WithLease", func(t *testing.T) { testNodeRun(t, true) })
}
func testNodeRun(t *testing.T, enableLease bool) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
c := testclient.NewSimpleClientset()
testP := &testNodeProvider{NodeProvider: &NaiveNodeProvider{}}
nodes := c.CoreV1().Nodes()
leases := c.CoordinationV1beta1().Leases(corev1.NamespaceNodeLease)
interval := 1 * time.Millisecond
opts := []NodeControllerOpt{
WithNodePingInterval(interval),
WithNodeStatusUpdateInterval(interval),
}
if enableLease {
opts = append(opts, WithNodeEnableLeaseV1Beta1(leases, nil))
}
testNode := testNode(t)
// We have to refer to testNodeCopy during the course of the test. testNode is modified by the node controller
// so it will trigger the race detector.
testNodeCopy := testNode.DeepCopy()
node, err := NewNodeController(testP, testNode, nodes, opts...)
assert.NilError(t, err)
chErr := make(chan error)
defer func() {
cancel()
assert.NilError(t, <-chErr)
}()
go func() {
chErr <- node.Run(ctx)
close(chErr)
}()
nw := makeWatch(ctx, t, nodes, testNodeCopy.Name)
defer nw.Stop()
nr := nw.ResultChan()
lw := makeWatch(ctx, t, leases, testNodeCopy.Name)
defer lw.Stop()
lr := lw.ResultChan()
var (
lBefore *coord.Lease
nodeUpdates int
leaseUpdates int
iters = 50
expectAtLeast = iters / 5
)
timeout := time.After(30 * time.Second)
for i := 0; i < iters; i++ {
var l *coord.Lease
select {
case <-timeout:
t.Fatal("timed out waiting for expected events")
case <-time.After(time.Second):
t.Errorf("timeout waiting for event")
continue
case err := <-chErr:
t.Fatal(err) // if this returns at all it is an error regardless if err is nil
case <-nr:
nodeUpdates++
continue
case le := <-lr:
l = le.Object.(*coord.Lease)
leaseUpdates++
assert.Assert(t, cmp.Equal(l.Spec.HolderIdentity != nil, true))
assert.NilError(t, err)
assert.Check(t, cmp.Equal(*l.Spec.HolderIdentity, testNodeCopy.Name))
if lBefore != nil {
assert.Check(t, before(lBefore.Spec.RenewTime.Time, l.Spec.RenewTime.Time))
}
lBefore = l
}
}
lw.Stop()
nw.Stop()
assert.Check(t, atLeast(nodeUpdates, expectAtLeast))
if enableLease {
assert.Check(t, atLeast(leaseUpdates, expectAtLeast))
} else {
assert.Check(t, cmp.Equal(leaseUpdates, 0))
}
// trigger an async node status update
n, err := nodes.Get(ctx, testNode.Name, metav1.GetOptions{})
assert.NilError(t, err)
newCondition := corev1.NodeCondition{
Type: corev1.NodeConditionType("UPDATED"),
LastTransitionTime: metav1.Now().Rfc3339Copy(),
}
n.Status.Conditions = append(n.Status.Conditions, newCondition)
nw = makeWatch(ctx, t, nodes, testNodeCopy.Name)
defer nw.Stop()
nr = nw.ResultChan()
testP.triggerStatusUpdate(n)
eCtx, eCancel := context.WithTimeout(ctx, 10*time.Second)
defer eCancel()
select {
case err := <-chErr:
t.Fatal(err) // if this returns at all it is an error regardless if err is nil
case err := <-waitForEvent(eCtx, nr, func(e watch.Event) bool {
node := e.Object.(*corev1.Node)
if len(node.Status.Conditions) == 0 {
return false
}
// Check if this is a node update we are looking for
// Since node updates happen periodically there could be some that occur
// before the status update that we are looking for happens.
c := node.Status.Conditions[len(n.Status.Conditions)-1]
if !c.LastTransitionTime.Equal(&newCondition.LastTransitionTime) {
return false
}
if c.Type != newCondition.Type {
return false
}
return true
}):
assert.NilError(t, err, "error waiting for updated node condition")
}
}
func TestNodeCustomUpdateStatusErrorHandler(t *testing.T) {
c := testclient.NewSimpleClientset()
testP := &testNodeProvider{NodeProvider: &NaiveNodeProvider{}}
nodes := c.CoreV1().Nodes()
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
node, err := NewNodeController(testP, testNode(t), nodes,
WithNodeStatusUpdateErrorHandler(func(_ context.Context, err error) error {
cancel()
return nil
}),
)
assert.NilError(t, err)
chErr := make(chan error, 1)
go func() {
chErr <- node.Run(ctx)
}()
timer := time.NewTimer(10 * time.Second)
defer timer.Stop()
// wait for the node to be ready
select {
case <-timer.C:
t.Fatal("timeout waiting for node to be ready")
case <-chErr:
t.Fatalf("node.Run returned earlier than expected: %v", err)
case <-node.Ready():
}
err = nodes.Delete(ctx, node.n.Name, metav1.DeleteOptions{})
assert.NilError(t, err)
testP.triggerStatusUpdate(node.n.DeepCopy())
timer = time.NewTimer(10 * time.Second)
defer timer.Stop()
select {
case err := <-chErr:
assert.Equal(t, err, nil)
case <-timer.C:
t.Fatal("timeout waiting for node shutdown")
}
}
func TestEnsureLease(t *testing.T) {
c := testclient.NewSimpleClientset().CoordinationV1beta1().Leases(corev1.NamespaceNodeLease)
n := testNode(t)
ctx := context.Background()
lease := newLease(ctx, nil, n, 1*time.Second)
l1, err := ensureLease(ctx, c, lease.DeepCopy())
assert.NilError(t, err)
assert.Check(t, timeEqual(l1.Spec.RenewTime.Time, lease.Spec.RenewTime.Time))
l1.Spec.RenewTime.Time = time.Now().Add(1 * time.Second)
l2, err := ensureLease(ctx, c, l1.DeepCopy())
assert.NilError(t, err)
assert.Check(t, timeEqual(l2.Spec.RenewTime.Time, l1.Spec.RenewTime.Time))
}
func TestUpdateNodeStatus(t *testing.T) {
n := testNode(t)
n.Status.Conditions = append(n.Status.Conditions, corev1.NodeCondition{
LastHeartbeatTime: metav1.Now().Rfc3339Copy(),
})
n.Status.Phase = corev1.NodePending
nodes := testclient.NewSimpleClientset().CoreV1().Nodes()
ctx := context.Background()
updated, err := updateNodeStatus(ctx, nodes, n.DeepCopy())
assert.Equal(t, errors.IsNotFound(err), true, err)
_, err = nodes.Create(ctx, n, metav1.CreateOptions{})
assert.NilError(t, err)
updated, err = updateNodeStatus(ctx, nodes, n.DeepCopy())
assert.NilError(t, err)
assert.NilError(t, err)
assert.Check(t, cmp.DeepEqual(n.Status, updated.Status))
n.Status.Phase = corev1.NodeRunning
updated, err = updateNodeStatus(ctx, nodes, n.DeepCopy())
assert.NilError(t, err)
assert.Check(t, cmp.DeepEqual(n.Status, updated.Status))
err = nodes.Delete(ctx, n.Name, metav1.DeleteOptions{})
assert.NilError(t, err)
_, err = nodes.Get(ctx, n.Name, metav1.GetOptions{})
assert.Equal(t, errors.IsNotFound(err), true, err)
_, err = updateNodeStatus(ctx, nodes, updated.DeepCopy())
assert.Equal(t, errors.IsNotFound(err), true, err)
}
func TestUpdateNodeLease(t *testing.T) {
ctx := context.Background()
leases := testclient.NewSimpleClientset().CoordinationV1beta1().Leases(corev1.NamespaceNodeLease)
n := testNode(t)
lease := newLease(ctx, nil, n, time.Duration(0))
l, err := updateNodeLease(ctx, leases, lease)
assert.NilError(t, err)
assert.Equal(t, l.Name, lease.Name)
assert.Assert(t, cmp.DeepEqual(l.Spec.HolderIdentity, lease.Spec.HolderIdentity))
compare, err := leases.Get(ctx, l.Name, emptyGetOptions)
assert.NilError(t, err)
assert.Equal(t, l.Spec.RenewTime.Time.Unix(), compare.Spec.RenewTime.Time.Unix())
assert.Equal(t, compare.Name, lease.Name)
assert.Assert(t, cmp.DeepEqual(compare.Spec.HolderIdentity, lease.Spec.HolderIdentity))
l.Spec.RenewTime.Time = time.Now().Add(10 * time.Second)
compare, err = updateNodeLease(ctx, leases, l.DeepCopy())
assert.NilError(t, err)
assert.Equal(t, compare.Spec.RenewTime.Time.Unix(), l.Spec.RenewTime.Time.Unix())
assert.Equal(t, compare.Name, lease.Name)
assert.Assert(t, cmp.DeepEqual(compare.Spec.HolderIdentity, lease.Spec.HolderIdentity))
}
func TestFixNodeLeaseReferences(t *testing.T) {
ctx := context.Background()
n := testNode(t)
lease1 := newLease(ctx, nil, n, time.Second)
// Let's break owner references
lease1.OwnerReferences = nil
time.Sleep(2 * time.Nanosecond)
lease2 := newLease(ctx, lease1, n, time.Second)
// Make sure that newLease actually did its jobs
assert.Assert(t, lease2.Spec.RenewTime.Nanosecond() > lease1.Spec.RenewTime.Nanosecond())
// Let's check if owner references got set
assert.Assert(t, is.Len(lease2.OwnerReferences, 1))
assert.Assert(t, is.Equal(lease2.OwnerReferences[0].UID, n.UID))
assert.Assert(t, is.Equal(lease2.OwnerReferences[0].Name, n.Name))
}
// TestPingAfterStatusUpdate checks that Ping continues to be called with the specified interval
// after a node status update occurs, when leases are disabled.
//
// Timing ratios used in this test:
// ping interval (10 ms)
// maximum allowed interval = 2.5 * ping interval
// status update interval = 6 * ping interval
//
// The allowed maximum time is 2.5 times the ping interval because
// the status update resets the ping interval timer, meaning
// that there can be a full two interval durations between
// successive calls to Ping. The extra half is to allow
// for timing variations when using such short durations.
//
// Once the node controller is ready:
// send status update after 10 * ping interval
// end test after another 10 * ping interval
func TestPingAfterStatusUpdate(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
c := testclient.NewSimpleClientset()
nodes := c.CoreV1().Nodes()
testP := &testNodeProviderPing{}
interval := 10 * time.Millisecond
maxAllowedInterval := time.Duration(2.5 * float64(interval.Nanoseconds()))
opts := []NodeControllerOpt{
WithNodePingInterval(interval),
WithNodeStatusUpdateInterval(interval * time.Duration(6)),
}
testNode := testNode(t)
testNodeCopy := testNode.DeepCopy()
node, err := NewNodeController(testP, testNode, nodes, opts...)
assert.NilError(t, err)
chErr := make(chan error, 1)
go func() {
chErr <- node.Run(ctx)
}()
timer := time.NewTimer(10 * time.Second)
defer timer.Stop()
// wait for the node to be ready
select {
case <-timer.C:
t.Fatal("timeout waiting for node to be ready")
case <-chErr:
t.Fatalf("node.Run returned earlier than expected: %v", err)
case <-node.Ready():
}
notifyTimer := time.After(interval * time.Duration(10))
select {
case <-notifyTimer:
testP.triggerStatusUpdate(testNodeCopy)
}
endTimer := time.After(interval * time.Duration(10))
select {
case <-endTimer:
break
}
assert.Assert(t, testP.maxPingInterval < maxAllowedInterval, "maximum time between node pings (%v) was greater than the maximum expected interval (%v)", testP.maxPingInterval, maxAllowedInterval)
}
// Are annotations that were created before the VK existed preserved?
func TestBeforeAnnotationsPreserved(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
c := testclient.NewSimpleClientset()
testP := &testNodeProvider{NodeProvider: &NaiveNodeProvider{}}
nodes := c.CoreV1().Nodes()
interval := 10 * time.Millisecond
opts := []NodeControllerOpt{
WithNodePingInterval(interval),
}
testNode := testNode(t)
testNodeCreateCopy := testNode.DeepCopy()
testNodeCreateCopy.Annotations = map[string]string{
"beforeAnnotation": "value",
}
_, err := nodes.Create(ctx, testNodeCreateCopy, metav1.CreateOptions{})
assert.NilError(t, err)
// We have to refer to testNodeCopy during the course of the test. testNode is modified by the node controller
// so it will trigger the race detector.
testNodeCopy := testNode.DeepCopy()
node, err := NewNodeController(testP, testNode, nodes, opts...)
assert.NilError(t, err)
chErr := make(chan error)
defer func() {
cancel()
assert.NilError(t, <-chErr)
}()
go func() {
chErr <- node.Run(ctx)
close(chErr)
}()
nw := makeWatch(ctx, t, nodes, testNodeCopy.Name)
defer nw.Stop()
nr := nw.ResultChan()
t.Log("Waiting for node to exist")
assert.NilError(t, <-waitForEvent(ctx, nr, func(e watch.Event) bool {
if e.Object == nil {
return false
}
return true
}))
testP.notifyNodeStatus(&corev1.Node{
ObjectMeta: metav1.ObjectMeta{
Annotations: map[string]string{
"testAnnotation": "value",
},
},
})
assert.NilError(t, <-waitForEvent(ctx, nr, func(e watch.Event) bool {
if e.Object == nil {
return false
}
_, ok := e.Object.(*corev1.Node).Annotations["testAnnotation"]
return ok
}))
newNode, err := nodes.Get(ctx, testNodeCopy.Name, emptyGetOptions)
assert.NilError(t, err)
assert.Assert(t, is.Contains(newNode.Annotations, "testAnnotation"))
assert.Assert(t, is.Contains(newNode.Annotations, "beforeAnnotation"))
}
// Are conditions set by systems outside of VK preserved?
func TestManualConditionsPreserved(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
c := testclient.NewSimpleClientset()
testP := &testNodeProvider{NodeProvider: &NaiveNodeProvider{}}
nodes := c.CoreV1().Nodes()
interval := 10 * time.Millisecond
opts := []NodeControllerOpt{
WithNodePingInterval(interval),
}
testNode := testNode(t)
// We have to refer to testNodeCopy during the course of the test. testNode is modified by the node controller
// so it will trigger the race detector.
testNodeCopy := testNode.DeepCopy()
node, err := NewNodeController(testP, testNode, nodes, opts...)
assert.NilError(t, err)
chErr := make(chan error)
defer func() {
cancel()
assert.NilError(t, <-chErr)
}()
go func() {
chErr <- node.Run(ctx)
close(chErr)
}()
nw := makeWatch(ctx, t, nodes, testNodeCopy.Name)
defer nw.Stop()
nr := nw.ResultChan()
t.Log("Waiting for node to exist")
assert.NilError(t, <-waitForEvent(ctx, nr, func(e watch.Event) bool {
if e.Object == nil {
return false
}
receivedNode := e.Object.(*corev1.Node)
if len(receivedNode.Status.Conditions) != 0 {
return false
}
return true
}))
newNode, err := nodes.Get(ctx, testNodeCopy.Name, emptyGetOptions)
assert.NilError(t, err)
assert.Assert(t, is.Len(newNode.Status.Conditions, 0))
baseCondition := corev1.NodeCondition{
Type: "BaseCondition",
Status: "Ok",
Reason: "NA",
Message: "This is the base condition. It is set by VK, and should always be there.",
}
testP.notifyNodeStatus(&corev1.Node{
ObjectMeta: metav1.ObjectMeta{
Annotations: map[string]string{
"testAnnotation": "value",
},
},
Status: corev1.NodeStatus{
Conditions: []corev1.NodeCondition{
baseCondition,
},
},
})
// Wait for this (node with condition) to show up
assert.NilError(t, <-waitForEvent(ctx, nr, func(e watch.Event) bool {
receivedNode := e.Object.(*corev1.Node)
for _, condition := range receivedNode.Status.Conditions {
if condition.Type == baseCondition.Type {
return true
}
}
return false
}))
newNode, err = nodes.Get(ctx, testNodeCopy.Name, emptyGetOptions)
assert.NilError(t, err)
assert.Assert(t, is.Len(newNode.Status.Conditions, 1))
assert.Assert(t, is.Contains(newNode.Annotations, "testAnnotation"))
// Add a new event manually
manuallyAddedCondition := corev1.NodeCondition{
Type: "ManuallyAddedCondition",
Status: "Ok",
Reason: "NA",
Message: "This is a manually added condition. Outside of VK. It should not be removed.",
}
assert.NilError(t, retry.RetryOnConflict(retry.DefaultRetry, func() error {
newNode, err = nodes.Get(ctx, testNodeCopy.Name, emptyGetOptions)
if err != nil {
return err
}
newNode.Annotations["manuallyAddedAnnotation"] = "value"
newNode.Status.Conditions = append(newNode.Status.Conditions, manuallyAddedCondition)
_, err = nodes.UpdateStatus(ctx, newNode, metav1.UpdateOptions{})
return err
}))
assert.NilError(t, <-waitForEvent(ctx, nr, func(e watch.Event) bool {
receivedNode := e.Object.(*corev1.Node)
for _, condition := range receivedNode.Status.Conditions {
if condition.Type == manuallyAddedCondition.Type {
return true
}
}
assert.Assert(t, is.Contains(receivedNode.Annotations, "testAnnotation"))
assert.Assert(t, is.Contains(newNode.Annotations, "manuallyAddedAnnotation"))
return false
}))
// Let's have the VK have a new condition.
newCondition := corev1.NodeCondition{
Type: "NewCondition",
Status: "Ok",
Reason: "NA",
Message: "This is a newly added condition. It should only show up *with* / *after* ManuallyAddedCondition. It is set by the VK.",
}
// Everything but node status is ignored here
testP.notifyNodeStatus(&corev1.Node{
// Annotations is left empty
Status: corev1.NodeStatus{
Conditions: []corev1.NodeCondition{
baseCondition,
newCondition,
},
},
})
i := 0
assert.NilError(t, <-waitForEvent(ctx, nr, func(e watch.Event) bool {
receivedNode := e.Object.(*corev1.Node)
for _, condition := range receivedNode.Status.Conditions {
if condition.Type == newCondition.Type {
// Wait for 2 updates / patches
if i > 2 {
return true
}
i++
}
}
return false
}))
// Make sure that all three conditions are there.
newNode, err = nodes.Get(ctx, testNodeCopy.Name, emptyGetOptions)
assert.NilError(t, err)
seenConditionTypes := make([]corev1.NodeConditionType, len(newNode.Status.Conditions))
for idx := range newNode.Status.Conditions {
seenConditionTypes[idx] = newNode.Status.Conditions[idx].Type
}
assert.Assert(t, is.Contains(seenConditionTypes, baseCondition.Type))
assert.Assert(t, is.Contains(seenConditionTypes, newCondition.Type))
assert.Assert(t, is.Contains(seenConditionTypes, manuallyAddedCondition.Type))
assert.Assert(t, is.Equal(newNode.Annotations["testAnnotation"], ""))
assert.Assert(t, is.Contains(newNode.Annotations, "manuallyAddedAnnotation"))
t.Log(newNode.Status.Conditions)
}
func testNode(t *testing.T) *corev1.Node {
n := &corev1.Node{}
n.Name = strings.ToLower(t.Name())
return n
}
type testNodeProvider struct {
NodeProvider
statusHandlers []func(*corev1.Node)
// Callback to VK
notifyNodeStatus func(*corev1.Node)
}
func (p *testNodeProvider) NotifyNodeStatus(ctx context.Context, h func(*corev1.Node)) {
p.notifyNodeStatus = h
}
func (p *testNodeProvider) triggerStatusUpdate(n *corev1.Node) {
for _, h := range p.statusHandlers {
h(n)
}
p.notifyNodeStatus(n)
}
// testNodeProviderPing tracks the maximum time interval between calls to Ping
type testNodeProviderPing struct {
testNodeProvider
lastPingTime time.Time
maxPingInterval time.Duration
}
func (tnp *testNodeProviderPing) Ping(ctx context.Context) error {
now := time.Now()
if tnp.lastPingTime.IsZero() {
tnp.lastPingTime = now
return nil
}
if now.Sub(tnp.lastPingTime) > tnp.maxPingInterval {
tnp.maxPingInterval = now.Sub(tnp.lastPingTime)
}
tnp.lastPingTime = now
return nil
}
type watchGetter interface {
Watch(context.Context, metav1.ListOptions) (watch.Interface, error)
}
func makeWatch(ctx context.Context, t *testing.T, wc watchGetter, name string) watch.Interface {
t.Helper()
w, err := wc.Watch(ctx, metav1.ListOptions{FieldSelector: "name=" + name})
assert.NilError(t, err)
return w
}
func atLeast(x, atLeast int) cmp.Comparison {
return func() cmp.Result {
if x < atLeast {
return cmp.ResultFailureTemplate(failTemplate("<"), map[string]interface{}{"x": x, "y": atLeast})
}
return cmp.ResultSuccess
}
}
func before(x, y time.Time) cmp.Comparison {
return func() cmp.Result {
if x.Before(y) {
return cmp.ResultSuccess
}
return cmp.ResultFailureTemplate(failTemplate(">="), map[string]interface{}{"x": x, "y": y})
}
}
func timeEqual(x, y time.Time) cmp.Comparison {
return func() cmp.Result {
if x.Equal(y) {
return cmp.ResultSuccess
}
return cmp.ResultFailureTemplate(failTemplate("!="), map[string]interface{}{"x": x, "y": y})
}
}
// waitForEvent waits for the `check` function to return true
// `check` is run when an event is received
// Cancelling the context will cancel the wait, with the context error sent on
// the returned channel.
func waitForEvent(ctx context.Context, chEvent <-chan watch.Event, check func(watch.Event) bool) <-chan error {
chErr := make(chan error, 1)
go func() {
for {
select {
case e := <-chEvent:
if check(e) {
chErr <- nil
return
}
case <-ctx.Done():
chErr <- ctx.Err()
return
}
}
}()
return chErr
}
func failTemplate(op string) string {
return `
{{- .Data.x}} (
{{- with callArg 0 }}{{ formatNode . }} {{end -}}
{{- printf "%T" .Data.x -}}
) ` + op + ` {{ .Data.y}} (
{{- with callArg 1 }}{{ formatNode . }} {{end -}}
{{- printf "%T" .Data.y -}}
)`
}