Add the concept of startup timeout (#597)

This adds two concepts, where one encompasses the other. Startup timeout Startup timeout is how long to wait for the entire kubelet to get into a functional state. Right now, this only waits for the pod informer cache for the pod controllerto become in-sync with API server, but this could be extended to other informers (like secrets informer). Wait For Startup This changes the behaviour of the virtual kubelet to wait for the pod controller to start before registering the node. It is to avoid the race condition where the node is registered, but we cannot actually do any pod operations.
2019-05-06 09:25:00 -07:00
parent 74a16f7f9a
commit f1cb6a7bf6
5 changed files with 65 additions and 3 deletions
--- a/vkubelet/podcontroller.go
+++ b/vkubelet/podcontroller.go
@@ -54,6 +54,10 @@ type PodController struct {
 	workqueue workqueue.RateLimitingInterface
 	// recorder is an event recorder for recording Event resources to the Kubernetes API.
 	recorder record.EventRecorder
+
+	// inSync is a channel which will be closed once the pod controller has become in-sync with apiserver
+	// it will never close if startup fails, or if the run context is cancelled prior to initialization completing
+	inSyncCh chan struct{}
 }

 // NewPodController returns a new instance of PodController.
@@ -71,6 +75,7 @@ func NewPodController(server *Server) *PodController {
 		podsLister:   server.podInformer.Lister(),
 		workqueue:    workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "pods"),
 		recorder:     recorder,
+		inSyncCh:     make(chan struct{}),
 	}

 	// Set up event handlers for when Pod resources change.
@@ -123,6 +128,9 @@ func (pc *PodController) Run(ctx context.Context, threadiness int) error {
 	if ok := cache.WaitForCacheSync(ctx.Done(), pc.podsInformer.Informer().HasSynced); !ok {
 		return pkgerrors.New("failed to wait for caches to sync")
 	}
+	log.G(ctx).Info("Pod cache in-sync")
+
+	close(pc.inSyncCh)

 	// Perform a reconciliation step that deletes any dangling pods from the provider.
 	// This happens only when the virtual-kubelet is starting, and operates on a "best-effort" basis.