Set Node Leader Owner Reference

This sets / updates the node lease owner reference to the current
node. Previously, we did not set this, which had the interesting
problem of leaking node leases on clusters with node churn.
This commit is contained in:
Sargun Dhillon
2020-07-31 11:19:22 -07:00
parent 5a39c167a6
commit 4bdcba5b85
2 changed files with 83 additions and 25 deletions

View File

@@ -206,8 +206,7 @@ func (n *NodeController) Run(ctx context.Context) error {
return n.controlLoop(ctx)
}
n.lease = newLease(n.lease)
setLeaseAttrs(n.lease, n.n, n.pingInterval*5)
n.lease = newLease(ctx, n.lease, n.n, n.pingInterval)
l, err := ensureLease(ctx, n.leases, n.lease)
if err != nil {
@@ -347,7 +346,7 @@ func (n *NodeController) handlePing(ctx context.Context) (retErr error) {
}
func (n *NodeController) updateLease(ctx context.Context) error {
l, err := updateNodeLease(ctx, n.leases, newLease(n.lease))
l, err := updateNodeLease(ctx, n.leases, newLease(ctx, n.lease, n.n, n.pingInterval))
if err != nil {
return err
}
@@ -601,7 +600,8 @@ func updateNodeStatus(ctx context.Context, nodes v1.NodeInterface, nodeFromProvi
return updatedNode, nil
}
func newLease(base *coord.Lease) *coord.Lease {
// This will return a new lease. It will either update base lease (and the set the renewal time appropriately), or create a brand new lease
func newLease(ctx context.Context, base *coord.Lease, node *corev1.Node, leaseRenewalInterval time.Duration) *coord.Lease {
var lease *coord.Lease
if base == nil {
lease = &coord.Lease{}
@@ -610,23 +610,62 @@ func newLease(base *coord.Lease) *coord.Lease {
}
lease.Spec.RenewTime = &metav1.MicroTime{Time: time.Now()}
if lease.Spec.LeaseDurationSeconds == nil {
// This is 25 due to historical reasons. It was supposed to be * 5, but...reasons
d := int32(leaseRenewalInterval.Seconds()) * 25
lease.Spec.LeaseDurationSeconds = &d
}
if lease.Name == "" {
lease.Name = node.Name
}
if lease.Spec.HolderIdentity == nil {
// Let's do a copy here
name := node.Name
lease.Spec.HolderIdentity = &name
}
// Copied and pasted from: https://github.com/kubernetes/kubernetes/blob/442a69c3bdf6fe8e525b05887e57d89db1e2f3a5/pkg/kubelet/nodelease/controller.go#L213-L216
// Setting owner reference needs node's UID. Note that it is different from
// kubelet.nodeRef.UID. When lease is initially created, it is possible that
// the connection between master and node is not ready yet. So try to set
// owner reference every time when renewing the lease, until successful.
//
// We have a special case to deal with in the node may be deleted and
// come back with a different UID. In this case the lease object should
// be deleted due to a owner reference cascading deletion, and when we renew
// lease again updateNodeLease will call ensureLease, and establish a new
// lease with the right node ID
if l := len(lease.OwnerReferences); l == 0 {
lease.OwnerReferences = []metav1.OwnerReference{
{
APIVersion: corev1.SchemeGroupVersion.WithKind("Node").Version,
Kind: corev1.SchemeGroupVersion.WithKind("Node").Kind,
Name: node.Name,
UID: node.UID,
},
}
} else if l > 0 {
var foundThisNode, foundNode bool
for _, ref := range lease.OwnerReferences {
if ref.APIVersion == corev1.SchemeGroupVersion.WithKind("Node").Version && ref.Kind == corev1.SchemeGroupVersion.WithKind("Node").Kind {
foundNode = true
if node.UID == ref.UID && node.Name == ref.Name {
foundThisNode = true
}
}
}
if !foundThisNode && !foundNode {
log.G(ctx).Warn("Found that lease had owner references, but no nodes in owner references")
} else if foundNode {
log.G(ctx).Warn("Found that lease had owner references, but nodes in owner references that is not this node")
}
}
return lease
}
func setLeaseAttrs(l *coord.Lease, n *corev1.Node, dur time.Duration) {
if l.Name == "" {
l.Name = n.Name
}
if l.Spec.HolderIdentity == nil {
l.Spec.HolderIdentity = &n.Name
}
if l.Spec.LeaseDurationSeconds == nil {
d := int32(dur.Seconds()) * 5
l.Spec.LeaseDurationSeconds = &d
}
}
func updateNodeStatusHeartbeat(n *corev1.Node) {
now := metav1.NewTime(time.Now())
for i := range n.Status.Conditions {

View File

@@ -228,8 +228,7 @@ func TestEnsureLease(t *testing.T) {
n := testNode(t)
ctx := context.Background()
lease := newLease(nil)
setLeaseAttrs(lease, n, 1*time.Second)
lease := newLease(ctx, nil, n, 1*time.Second)
l1, err := ensureLease(ctx, c, lease.DeepCopy())
assert.NilError(t, err)
@@ -278,12 +277,13 @@ func TestUpdateNodeStatus(t *testing.T) {
}
func TestUpdateNodeLease(t *testing.T) {
leases := testclient.NewSimpleClientset().CoordinationV1beta1().Leases(corev1.NamespaceNodeLease)
lease := newLease(nil)
n := testNode(t)
setLeaseAttrs(lease, n, 0)
ctx := context.Background()
leases := testclient.NewSimpleClientset().CoordinationV1beta1().Leases(corev1.NamespaceNodeLease)
n := testNode(t)
lease := newLease(ctx, nil, n, time.Duration(0))
l, err := updateNodeLease(ctx, leases, lease)
assert.NilError(t, err)
assert.Equal(t, l.Name, lease.Name)
@@ -304,6 +304,25 @@ func TestUpdateNodeLease(t *testing.T) {
assert.Assert(t, cmp.DeepEqual(compare.Spec.HolderIdentity, lease.Spec.HolderIdentity))
}
func TestFixNodeLeaseReferences(t *testing.T) {
ctx := context.Background()
n := testNode(t)
lease1 := newLease(ctx, nil, n, time.Second)
// Let's break owner references
lease1.OwnerReferences = nil
time.Sleep(2 * time.Nanosecond)
lease2 := newLease(ctx, lease1, n, time.Second)
// Make sure that newLease actually did its jobs
assert.Assert(t, lease2.Spec.RenewTime.Nanosecond() > lease1.Spec.RenewTime.Nanosecond())
// Let's check if owner references got set
assert.Assert(t, is.Len(lease2.OwnerReferences, 1))
assert.Assert(t, is.Equal(lease2.OwnerReferences[0].UID, n.UID))
assert.Assert(t, is.Equal(lease2.OwnerReferences[0].Name, n.Name))
}
// TestPingAfterStatusUpdate checks that Ping continues to be called with the specified interval
// after a node status update occurs, when leases are disabled.
//