Merge pull request #955 from sargun/fix-pod-status-update

Fix pod status update
This commit is contained in:
Sargun Dhillon
2021-02-17 12:02:38 -08:00
committed by GitHub
4 changed files with 120 additions and 22 deletions

View File

@@ -16,6 +16,7 @@ import (
corev1 "k8s.io/api/core/v1" corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/fields"
"k8s.io/client-go/kubernetes" "k8s.io/client-go/kubernetes"
klogv2 "k8s.io/klog/v2" klogv2 "k8s.io/klog/v2"
"sigs.k8s.io/controller-runtime/pkg/envtest" "sigs.k8s.io/controller-runtime/pkg/envtest"
@@ -25,6 +26,9 @@ func TestEnvtest(t *testing.T) {
if !*enableEnvTest || os.Getenv("VK_ENVTEST") != "" { if !*enableEnvTest || os.Getenv("VK_ENVTEST") != "" {
t.Skip("test only runs when -envtest is passed or if VK_ENVTEST is set to a non-empty value") t.Skip("test only runs when -envtest is passed or if VK_ENVTEST is set to a non-empty value")
} }
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
env := &envtest.Environment{} env := &envtest.Environment{}
_, err := env.Start() _, err := env.Start()
assert.NilError(t, err) assert.NilError(t, err)
@@ -33,15 +37,17 @@ func TestEnvtest(t *testing.T) {
}() }()
t.Log("Env test environment ready") t.Log("Env test environment ready")
t.Run("E2ERunWithoutLeases", func(t *testing.T) { t.Run("E2ERunWithoutLeases", wrapE2ETest(ctx, env, func(ctx context.Context, t *testing.T, environment *envtest.Environment) {
testNodeE2ERun(t, env, false) testNodeE2ERun(t, env, false)
}) }))
t.Run("E2ERunWithLeases", func(t *testing.T) { t.Run("E2ERunWithLeases", wrapE2ETest(ctx, env, func(ctx context.Context, t *testing.T, environment *envtest.Environment) {
testNodeE2ERun(t, env, true) testNodeE2ERun(t, env, true)
}) }))
t.Run("E2EPodStatusUpdate", wrapE2ETest(ctx, env, testPodStatusUpdate))
} }
func nodeNameForTest(t *testing.T) string { func kubernetesNameForTest(t *testing.T) string {
name := t.Name() name := t.Name()
name = strings.ToLower(name) name = strings.ToLower(name)
name = strings.ReplaceAll(name, "/", "-") name = strings.ReplaceAll(name, "/", "-")
@@ -49,16 +55,90 @@ func nodeNameForTest(t *testing.T) string {
return name return name
} }
func testNodeE2ERun(t *testing.T, env *envtest.Environment, withLeases bool) { func wrapE2ETest(ctx context.Context, env *envtest.Environment, f func(context.Context, *testing.T, *envtest.Environment)) func(*testing.T) {
ctx, cancel := context.WithCancel(context.Background()) return func(t *testing.T) {
defer cancel() log.G(ctx)
sl := logrus.StandardLogger() sl := logrus.StandardLogger()
sl.SetLevel(logrus.DebugLevel) sl.SetLevel(logrus.DebugLevel)
logger := logruslogger.FromLogrus(sl.WithField("test", t.Name())) logger := logruslogger.FromLogrus(sl.WithField("test", t.Name()))
ctx = log.WithLogger(ctx, logger) ctx = log.WithLogger(ctx, logger)
// The following requires that E2E tests are performed *sequentially*
log.L = logger log.L = logger
klogv2.SetLogger(logrusr.NewLogger(sl)) klogv2.SetLogger(logrusr.NewLogger(sl))
f(ctx, t, env)
}
}
func testPodStatusUpdate(ctx context.Context, t *testing.T, env *envtest.Environment) {
provider := newMockProvider()
clientset, err := kubernetes.NewForConfig(env.Config)
assert.NilError(t, err)
pods := clientset.CoreV1().Pods(testNamespace)
assert.NilError(t, wireUpSystemWithClient(ctx, provider, clientset, func(ctx context.Context, s *system) {
p := newPod(forRealAPIServer, nameBasedOnTest(t))
// In real API server, we don't set the resource version
p.ResourceVersion = ""
newPod, err := pods.Create(ctx, p, metav1.CreateOptions{})
assert.NilError(t, err)
key, err := buildKey(newPod)
assert.NilError(t, err)
listOptions := metav1.ListOptions{
FieldSelector: fields.OneTermEqualSelector("metadata.name", p.ObjectMeta.Name).String(),
}
// Setup a watch to check if the pod is in running
watcher, err := s.client.CoreV1().Pods(testNamespace).Watch(ctx, listOptions)
assert.NilError(t, err)
defer watcher.Stop()
// Start the pod controller
assert.NilError(t, s.start(ctx))
var serverPod *corev1.Pod
for {
select {
case <-ctx.Done():
t.Fatalf("Context ended early: %s", ctx.Err().Error())
case ev := <-watcher.ResultChan():
serverPod = ev.Object.(*corev1.Pod)
if serverPod.Status.Phase == corev1.PodRunning {
goto running
}
}
}
running:
t.Log("Observed pod in running state")
providerPod, ok := provider.pods.Load(key)
assert.Assert(t, ok)
providerPodCopy := providerPod.(*corev1.Pod).DeepCopy()
providerPodCopy.Status = serverPod.Status
if providerPodCopy.Annotations == nil {
providerPodCopy.Annotations = make(map[string]string, 1)
}
providerPodCopy.Annotations["testannotation"] = "testvalue"
provider.notifier(providerPodCopy)
for {
select {
case <-ctx.Done():
t.Fatalf("Context ended early: %s", ctx.Err().Error())
case ev := <-watcher.ResultChan():
annotations := ev.Object.(*corev1.Pod).Annotations
if annotations != nil && annotations["testannotation"] == "testvalue" {
return
}
}
}
}))
}
func testNodeE2ERun(t *testing.T, env *envtest.Environment, withLeases bool) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
clientset, err := kubernetes.NewForConfig(env.Config) clientset, err := kubernetes.NewForConfig(env.Config)
assert.NilError(t, err) assert.NilError(t, err)
@@ -70,7 +150,7 @@ func testNodeE2ERun(t *testing.T, env *envtest.Environment, withLeases bool) {
testNode := &corev1.Node{ testNode := &corev1.Node{
ObjectMeta: metav1.ObjectMeta{ ObjectMeta: metav1.ObjectMeta{
Name: nodeNameForTest(t), Name: kubernetesNameForTest(t),
}, },
} }

View File

@@ -21,6 +21,7 @@ import (
"k8s.io/apimachinery/pkg/util/uuid" "k8s.io/apimachinery/pkg/util/uuid"
"k8s.io/apimachinery/pkg/watch" "k8s.io/apimachinery/pkg/watch"
kubeinformers "k8s.io/client-go/informers" kubeinformers "k8s.io/client-go/informers"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/kubernetes/fake" "k8s.io/client-go/kubernetes/fake"
corev1client "k8s.io/client-go/kubernetes/typed/core/v1" corev1client "k8s.io/client-go/kubernetes/typed/core/v1"
ktesting "k8s.io/client-go/testing" ktesting "k8s.io/client-go/testing"
@@ -226,7 +227,7 @@ func TestPodLifecycle(t *testing.T) {
type testFunction func(ctx context.Context, s *system) type testFunction func(ctx context.Context, s *system)
type system struct { type system struct {
pc *PodController pc *PodController
client *fake.Clientset client kubernetes.Interface
podControllerConfig PodControllerConfig podControllerConfig PodControllerConfig
} }
@@ -262,6 +263,13 @@ func wireUpSystem(ctx context.Context, provider PodLifecycleHandler, f testFunct
return false, nil, nil return false, nil, nil
}) })
return wireUpSystemWithClient(ctx, provider, client, f)
}
func wireUpSystemWithClient(ctx context.Context, provider PodLifecycleHandler, client kubernetes.Interface, f testFunction) error {
ctx, cancel := context.WithCancel(ctx)
defer cancel()
// This is largely copy and pasted code from the root command // This is largely copy and pasted code from the root command
sharedInformerFactory := kubeinformers.NewSharedInformerFactoryWithOptions( sharedInformerFactory := kubeinformers.NewSharedInformerFactoryWithOptions(
client, client,
@@ -620,6 +628,17 @@ func randomizeName(pod *corev1.Pod) {
pod.Name = name pod.Name = name
} }
func forRealAPIServer(pod *corev1.Pod) {
pod.ResourceVersion = ""
pod.ObjectMeta.UID = ""
}
func nameBasedOnTest(t *testing.T) podModifier {
return func(pod *corev1.Pod) {
pod.Name = kubernetesNameForTest(t)
}
}
func newPod(podmodifiers ...podModifier) *corev1.Pod { func newPod(podmodifiers ...podModifier) *corev1.Pod {
var terminationGracePeriodSeconds int64 = 30 var terminationGracePeriodSeconds int64 = 30
pod := &corev1.Pod{ pod := &corev1.Pod{

View File

@@ -215,14 +215,7 @@ func (pc *PodController) updatePodStatus(ctx context.Context, podFromKubernetes
} }
kPod := obj.(*knownPod) kPod := obj.(*knownPod)
kPod.Lock() kPod.Lock()
podFromProvider := kPod.lastPodStatusReceivedFromProvider.DeepCopy() podFromProvider := kPod.lastPodStatusReceivedFromProvider.DeepCopy()
if cmp.Equal(podFromKubernetes.Status, podFromProvider.Status) && podFromProvider.DeletionTimestamp == nil {
kPod.lastPodStatusUpdateSkipped = true
kPod.Unlock()
return nil
}
kPod.lastPodStatusUpdateSkipped = false
kPod.Unlock() kPod.Unlock()
// Pod deleted by provider due some reasons. e.g. a K8s provider, pod created by deployment would be evicted when node is not ready. // Pod deleted by provider due some reasons. e.g. a K8s provider, pod created by deployment would be evicted when node is not ready.
// If we do not delete pod in K8s, deployment would not create a new one. // If we do not delete pod in K8s, deployment would not create a new one.
@@ -326,9 +319,11 @@ func (pc *PodController) enqueuePodStatusUpdate(ctx context.Context, pod *corev1
kpod := obj.(*knownPod) kpod := obj.(*knownPod)
kpod.Lock() kpod.Lock()
if cmp.Equal(kpod.lastPodStatusReceivedFromProvider, pod) { if cmp.Equal(kpod.lastPodStatusReceivedFromProvider, pod) {
kpod.lastPodStatusUpdateSkipped = true
kpod.Unlock() kpod.Unlock()
return return
} }
kpod.lastPodStatusUpdateSkipped = false
kpod.lastPodStatusReceivedFromProvider = pod kpod.lastPodStatusReceivedFromProvider = pod
kpod.Unlock() kpod.Unlock()
pc.syncPodStatusFromProvider.Enqueue(ctx, key) pc.syncPodStatusFromProvider.Enqueue(ctx, key)

View File

@@ -339,7 +339,11 @@ func (pc *PodController) Run(ctx context.Context, podSyncWorkers int) (retErr er
kPod := obj.(*knownPod) kPod := obj.(*knownPod)
kPod.Lock() kPod.Lock()
if kPod.lastPodStatusUpdateSkipped && !cmp.Equal(newPod.Status, kPod.lastPodStatusReceivedFromProvider.Status) { if kPod.lastPodStatusUpdateSkipped &&
(!cmp.Equal(newPod.Status, kPod.lastPodStatusReceivedFromProvider.Status) ||
!cmp.Equal(newPod.Annotations, kPod.lastPodStatusReceivedFromProvider.Annotations) ||
!cmp.Equal(newPod.Labels, kPod.lastPodStatusReceivedFromProvider.Labels) ||
!cmp.Equal(newPod.Finalizers, kPod.lastPodStatusReceivedFromProvider.Finalizers)) {
// The last pod from the provider -> kube api server was skipped, but we see they no longer match. // The last pod from the provider -> kube api server was skipped, but we see they no longer match.
// This means that the pod in API server was changed by someone else [this can be okay], but we skipped // This means that the pod in API server was changed by someone else [this can be okay], but we skipped
// a status update on our side because we compared the status received from the provider to the status // a status update on our side because we compared the status received from the provider to the status