Do not close pod sync, use context cancel instead. (#402)

Closing the channel is racey and can lead to a panic on exit.
Instead rely on context cancellation to know if workers should exit.
This commit is contained in:
Brian Goff
2018-11-05 11:37:00 -08:00
committed by Robbie Zhang
parent 2743c07222
commit bec818bf3c
3 changed files with 47 additions and 20 deletions

View File

@@ -95,7 +95,6 @@ This allows users to schedule kubernetes workloads on nodes that aren't running
signal.Notify(sig, syscall.SIGINT, syscall.SIGTERM)
go func() {
<-sig
f.Stop()
cancel()
}()

View File

@@ -43,7 +43,13 @@ func (s *Server) onAddPod(ctx context.Context, obj interface{}) {
if s.resourceManager.UpdatePod(pod) {
span.Annotate(nil, "Add pod to synchronizer channel.")
s.podCh <- &podNotification{pod: pod, ctx: ctx}
select {
case <-ctx.Done():
logger = logger.WithField("pod", pod.GetName()).WithField("namespace", pod.GetNamespace())
logger.WithError(ctx.Err()).Debug("Cancel send pod event due to cancelled context")
return
case s.podCh <- &podNotification{pod: pod, ctx: ctx}:
}
}
}
@@ -65,7 +71,13 @@ func (s *Server) onUpdatePod(ctx context.Context, obj interface{}) {
if s.resourceManager.UpdatePod(pod) {
span.Annotate(nil, "Add pod to synchronizer channel.")
s.podCh <- &podNotification{pod: pod, ctx: ctx}
select {
case <-ctx.Done():
logger = logger.WithField("pod", pod.GetName()).WithField("namespace", pod.GetNamespace())
logger.WithError(ctx.Err()).Debug("Cancel send pod event due to cancelled context")
return
case s.podCh <- &podNotification{pod: pod, ctx: ctx}:
}
}
}
@@ -96,7 +108,13 @@ func (s *Server) onDeletePod(ctx context.Context, obj interface{}) {
if s.resourceManager.DeletePod(pod) {
span.Annotate(nil, "Add pod to synchronizer channel.")
s.podCh <- &podNotification{pod: pod, ctx: ctx}
select {
case <-ctx.Done():
logger = logger.WithField("pod", pod.GetName()).WithField("namespace", pod.GetNamespace())
logger.WithError(ctx.Err()).Debug("Cancel send pod event due to cancelled context")
return
case s.podCh <- &podNotification{pod: pod, ctx: ctx}:
}
}
}
@@ -104,11 +122,15 @@ func (s *Server) startPodSynchronizer(ctx context.Context, id int) {
logger := log.G(ctx).WithField("method", "startPodSynchronizer").WithField("podSynchronizer", id)
logger.Debug("Start pod synchronizer")
for event := range s.podCh {
s.syncPod(event.ctx, event.pod)
for {
select {
case <-ctx.Done():
logger.Info("Stop pod syncronizer")
return
case event := <-s.podCh:
s.syncPod(event.ctx, event.pod)
}
}
logger.Info("pod channel is closed.")
}
func (s *Server) syncPod(ctx context.Context, pod *corev1.Pod) {
@@ -217,6 +239,13 @@ func (s *Server) updatePodStatuses(ctx context.Context) {
span.AddAttributes(trace.Int64Attribute("nPods", int64(len(pods))))
for _, pod := range pods {
select {
case <-ctx.Done():
span.Annotate(nil, ctx.Err().Error())
return
default:
}
if pod.Status.Phase == corev1.PodSucceeded ||
pod.Status.Phase == corev1.PodFailed ||
pod.Status.Reason == podStatusReasonProviderFailed {
@@ -281,7 +310,7 @@ func (s *Server) watchForPodEvent(ctx context.Context) error {
time.Minute,
cache.ResourceEventHandlerFuncs{
AddFunc: func(obj interface{}) {
AddFunc: func(obj interface{}) {
s.onAddPod(ctx, obj)
},
UpdateFunc: func(oldObj, newObj interface{}) {

View File

@@ -104,11 +104,16 @@ func New(ctx context.Context, cfg Config) (s *Server, retErr error) {
tick := time.Tick(5 * time.Second)
go func() {
for range tick {
ctx, span := trace.StartSpan(ctx, "syncActualState")
s.updateNode(ctx)
s.updatePodStatuses(ctx)
span.End()
for {
select {
case <-ctx.Done():
return
case <-tick:
ctx, span := trace.StartSpan(ctx, "syncActualState")
s.updateNode(ctx)
s.updatePodStatuses(ctx)
span.End()
}
}
}()
@@ -128,12 +133,6 @@ func (s *Server) Run(ctx context.Context) error {
return nil
}
// Stop shutsdown the server.
// It does not shutdown pods assigned to the virtual node.
func (s *Server) Stop() {
close(s.podCh)
}
// reconcile is the main reconciliation loop that compares differences between Kubernetes and
// the active provider and reconciles the differences.
func (s *Server) reconcile(ctx context.Context) {