Merge pull request #795 from cpuguy83/cherry_picks_1.1.1

Cherry picks 1.1.1
Add test for node ping interval
2019-12-02 11:27:21 -08:00 · 2019-11-15 14:33:02 -08:00 · 2019-11-15 14:32:56 -08:00
2 changed files with 98 additions and 1 deletions
--- a/node/node.go
+++ b/node/node.go
@@ -244,7 +244,12 @@ func (n *NodeController) controlLoop(ctx context.Context) error {

 	statusTimer := time.NewTimer(n.statusInterval)
 	defer statusTimer.Stop()
+	timerResetDuration := n.statusInterval
 	if n.disableLease {
+		// when resetting the timer after processing a status update, reset it to the ping interval
+		// (since it will be the ping timer as n.disableLease == true)
+		timerResetDuration = n.pingInterval
+
 		// hack to make sure this channel always blocks since we won't be using it
 		if !statusTimer.Stop() {
 			<-statusTimer.C
@@ -276,7 +281,7 @@ func (n *NodeController) controlLoop(ctx context.Context) error {
 			if err := n.updateStatus(ctx, false); err != nil {
 				log.G(ctx).WithError(err).Error("Error handling node status update")
 			}
-			t.Reset(n.statusInterval)
+			t.Reset(timerResetDuration)
 		case <-statusTimer.C:
 			if err := n.updateStatus(ctx, false); err != nil {
 				log.G(ctx).WithError(err).Error("Error handling node status update")
--- a/node/node_test.go
+++ b/node/node_test.go
@@ -302,6 +302,78 @@ func TestUpdateNodeLease(t *testing.T) {
 	assert.Assert(t, cmp.DeepEqual(compare.Spec.HolderIdentity, lease.Spec.HolderIdentity))
 }

+// TestPingAfterStatusUpdate checks that Ping continues to be called with the specified interval
+// after a node status update occurs, when leases are disabled.
+//
+// Timing ratios used in this test:
+// ping interval (10 ms)
+// maximum allowed interval = 2.5 * ping interval
+// status update interval = 6 * ping interval
+//
+// The allowed maximum time is 2.5 times the ping interval because
+// the status update resets the ping interval timer, meaning
+// that there can be a full two interval durations between
+// successive calls to Ping. The extra half is to allow
+// for timing variations when using such short durations.
+//
+// Once the node controller is ready:
+// send status update after 10 * ping interval
+// end test after another 10 * ping interval
+func TestPingAfterStatusUpdate(t *testing.T) {
+	ctx, cancel := context.WithCancel(context.Background())
+	defer cancel()
+
+	c := testclient.NewSimpleClientset()
+	nodes := c.CoreV1().Nodes()
+
+	testP := &testNodeProviderPing{}
+
+	interval := 10 * time.Millisecond
+	maxAllowedInterval := time.Duration(2.5 * float64(interval.Nanoseconds()))
+
+	opts := []NodeControllerOpt{
+		WithNodePingInterval(interval),
+		WithNodeStatusUpdateInterval(interval * time.Duration(6)),
+	}
+
+	testNode := testNode(t)
+	testNodeCopy := testNode.DeepCopy()
+
+	node, err := NewNodeController(testP, testNode, nodes, opts...)
+	assert.NilError(t, err)
+
+	chErr := make(chan error, 1)
+	go func() {
+		chErr <- node.Run(ctx)
+	}()
+
+	timer := time.NewTimer(10 * time.Second)
+	defer timer.Stop()
+
+	// wait for the node to be ready
+	select {
+	case <-timer.C:
+		t.Fatal("timeout waiting for node to be ready")
+	case <-chErr:
+		t.Fatalf("node.Run returned earlier than expected: %v", err)
+	case <-node.Ready():
+	}
+
+	notifyTimer := time.After(interval * time.Duration(10))
+	select {
+	case <-notifyTimer:
+		testP.triggerStatusUpdate(testNodeCopy)
+	}
+
+	endTimer := time.After(interval * time.Duration(10))
+	select {
+	case <-endTimer:
+		break
+	}
+
+	assert.Assert(t, testP.maxPingInterval < maxAllowedInterval, "maximum time between node pings (%v) was greater than the maximum expected interval (%v)", testP.maxPingInterval, maxAllowedInterval)
+}
+
 func testNode(t *testing.T) *corev1.Node {
 	n := &corev1.Node{}
 	n.Name = strings.ToLower(t.Name())
@@ -323,6 +395,26 @@ func (p *testNodeProvider) triggerStatusUpdate(n *corev1.Node) {
 	}
 }

+// testNodeProviderPing tracks the maximum time interval between calls to Ping
+type testNodeProviderPing struct {
+	testNodeProvider
+	lastPingTime    time.Time
+	maxPingInterval time.Duration
+}
+
+func (tnp *testNodeProviderPing) Ping(ctx context.Context) error {
+	now := time.Now()
+	if tnp.lastPingTime.IsZero() {
+		tnp.lastPingTime = now
+		return nil
+	}
+	if now.Sub(tnp.lastPingTime) > tnp.maxPingInterval {
+		tnp.maxPingInterval = now.Sub(tnp.lastPingTime)
+	}
+	tnp.lastPingTime = now
+	return nil
+}
+
 type watchGetter interface {
 	Watch(metav1.ListOptions) (watch.Interface, error)
 }
Author	SHA1	Message	Date
Brian Goff	50f360d798	Merge pull request #795 from cpuguy83/cherry_picks_1.1.1 Cherry picks 1.1.1	2019-12-02 11:27:21 -08:00
Thomas Hartland	64873c0816	Add test for node ping interval (cherry picked from commit `3783a39b26`)	2019-11-15 14:33:02 -08:00
Thomas Hartland	c660940a7b	After handling status update, reset update timer with correct duration If the ping timer is being used, it should be reset with the ping update interval. If the status update interval is used then Ping stops being called for long enough to cause kubernetes to mark the node as NotReady. (cherry picked from commit `c258614d8f`)	2019-11-15 14:32:56 -08:00