use shared informers and workqueue (#425)

* vendor: add vendored code

Signed-off-by: Paulo Pires <pjpires@gmail.com>

* controller: use shared informers and a work queue

Signed-off-by: Paulo Pires <pjpires@gmail.com>

* errors: use cpuguy83/strongerrors

Signed-off-by: Paulo Pires <pjpires@gmail.com>

* aci: fix test that uses resource manager

Signed-off-by: Paulo Pires <pjpires@gmail.com>

* readme: clarify skaffold run before e2e

Signed-off-by: Paulo Pires <pjpires@gmail.com>

* cmd: use root context everywhere

Signed-off-by: Paulo Pires <pjpires@gmail.com>

* sync: refactor pod lifecycle management

Signed-off-by: Paulo Pires <pjpires@gmail.com>

* e2e: fix race in test when observing deletions

Signed-off-by: Paulo Pires <pjpires@gmail.com>

* e2e: test pod forced deletion

Signed-off-by: Paulo Pires <pjpires@gmail.com>

* cmd: fix root context potential leak

Signed-off-by: Paulo Pires <pjpires@gmail.com>

* sync: rename metaKey

Signed-off-by: Paulo Pires <pjpires@gmail.com>

* sync: remove calls to HandleError

Signed-off-by: Paulo Pires <pjpires@gmail.com>

* Revert "errors: use cpuguy83/strongerrors"

This reverts commit f031fc6d.

Signed-off-by: Paulo Pires <pjpires@gmail.com>

* manager: remove redundant lister constraint

Signed-off-by: Paulo Pires <pjpires@gmail.com>

* sync: rename the pod event recorder

Signed-off-by: Paulo Pires <pjpires@gmail.com>

* sync: amend misleading comment

Signed-off-by: Paulo Pires <pjpires@gmail.com>

* mock: add tracing

Signed-off-by: Paulo Pires <pjpires@gmail.com>

* sync: add tracing

Signed-off-by: Paulo Pires <pjpires@gmail.com>

* test: observe timeouts

Signed-off-by: Paulo Pires <pjpires@gmail.com>

* trace: remove unnecessary comments

Signed-off-by: Paulo Pires <pjpires@gmail.com>

* sync: limit concurrency in deleteDanglingPods

Signed-off-by: Paulo Pires <pjpires@gmail.com>

* sync: never store context, always pass in calls

Signed-off-by: Paulo Pires <pjpires@gmail.com>

* sync: remove HandleCrash and just panic

Signed-off-by: Paulo Pires <pjpires@gmail.com>

* sync: don't sync succeeded pods

Signed-off-by: Paulo Pires <pjpires@gmail.com>

* sync: ensure pod deletion from kubernetes

Signed-off-by: Paulo Pires <pjpires@gmail.com>
This commit is contained in:
Paulo Pires
2018-11-30 23:53:58 +00:00
committed by Robbie Zhang
parent 0e9cfca585
commit 28a757f4da
419 changed files with 20138 additions and 14777 deletions

View File

@@ -6,17 +6,13 @@ import (
"time"
"github.com/cpuguy83/strongerrors/status/ocstatus"
pkgerrors "github.com/pkg/errors"
"github.com/virtual-kubelet/virtual-kubelet/log"
"go.opencensus.io/trace"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/fields"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/watch"
"k8s.io/client-go/tools/cache"
"github.com/virtual-kubelet/virtual-kubelet/log"
)
func addPodAttributes(span *trace.Span, pod *corev1.Pod) {
@@ -29,138 +25,18 @@ func addPodAttributes(span *trace.Span, pod *corev1.Pod) {
)
}
func (s *Server) onAddPod(ctx context.Context, obj interface{}) {
ctx, span := trace.StartSpan(ctx, "onAddPod")
defer span.End()
logger := log.G(ctx).WithField("method", "onAddPod")
pod, ok := obj.(*corev1.Pod)
if !ok {
span.SetStatus(trace.Status{Code: trace.StatusCodeInvalidArgument, Message: fmt.Sprintf("Unexpected object from event: %T", obj)})
logger.Errorf("obj is not of a valid type: %T", obj)
return
func (s *Server) createOrUpdatePod(ctx context.Context, pod *corev1.Pod) error {
// Check if the pod is already known by the provider.
// NOTE: Some providers return a non-nil error in their GetPod implementation when the pod is not found while some other don't.
// Hence, we ignore the error and just act upon the pod if it is non-nil (meaning that the provider still knows about the pod).
if pp, _ := s.provider.GetPod(ctx, pod.Namespace, pod.Name); pp != nil {
// The pod has already been created in the provider.
// Hence, we return since pod updates are not yet supported.
log.G(ctx).Warnf("skipping update of pod %s as pod updates are not supported", pp.Name)
return nil
}
addPodAttributes(span, pod)
logger.Debugf("Receive added pod '%s/%s' ", pod.GetNamespace(), pod.GetName())
if s.resourceManager.UpdatePod(pod) {
span.Annotate(nil, "Add pod to synchronizer channel.")
select {
case <-ctx.Done():
logger = logger.WithField("pod", pod.GetName()).WithField("namespace", pod.GetNamespace())
logger.WithError(ctx.Err()).Debug("Cancel send pod event due to cancelled context")
return
case s.podCh <- &podNotification{pod: pod, ctx: ctx}:
}
}
}
func (s *Server) onUpdatePod(ctx context.Context, obj interface{}) {
ctx, span := trace.StartSpan(ctx, "onUpdatePod")
defer span.End()
logger := log.G(ctx).WithField("method", "onUpdatePod")
pod, ok := obj.(*corev1.Pod)
if !ok {
span.SetStatus(trace.Status{Code: trace.StatusCodeInvalidArgument, Message: fmt.Sprintf("Unexpected object from event: %T", obj)})
logger.Errorf("obj is not of a valid type: %T", obj)
return
}
addPodAttributes(span, pod)
logger.Debugf("Receive updated pod '%s/%s'", pod.GetNamespace(), pod.GetName())
if s.resourceManager.UpdatePod(pod) {
span.Annotate(nil, "Add pod to synchronizer channel.")
select {
case <-ctx.Done():
logger = logger.WithField("pod", pod.GetName()).WithField("namespace", pod.GetNamespace())
logger.WithError(ctx.Err()).Debug("Cancel send pod event due to cancelled context")
return
case s.podCh <- &podNotification{pod: pod, ctx: ctx}:
}
}
}
func (s *Server) onDeletePod(ctx context.Context, obj interface{}) {
ctx, span := trace.StartSpan(ctx, "onDeletePod")
defer span.End()
logger := log.G(ctx).WithField("method", "onDeletePod")
pod, ok := obj.(*corev1.Pod)
if !ok {
delta, ok := obj.(cache.DeletedFinalStateUnknown)
if !ok {
span.SetStatus(trace.Status{Code: trace.StatusCodeInvalidArgument, Message: fmt.Sprintf("Unexpected object from event: %T", obj)})
logger.Errorf("obj is not of a valid type: %T", obj)
return
}
if pod, ok = delta.Obj.(*corev1.Pod); !ok {
span.SetStatus(trace.Status{Code: trace.StatusCodeInvalidArgument, Message: fmt.Sprintf("Unexpected object from event: %T", obj)})
logger.Errorf("obj is not of a valid type: %T", obj)
return
}
}
addPodAttributes(span, pod)
logger.Debugf("Receive deleted pod '%s/%s'", pod.GetNamespace(), pod.GetName())
if s.resourceManager.DeletePod(pod) {
span.Annotate(nil, "Add pod to synchronizer channel.")
select {
case <-ctx.Done():
logger = logger.WithField("pod", pod.GetName()).WithField("namespace", pod.GetNamespace())
logger.WithError(ctx.Err()).Debug("Cancel send pod event due to cancelled context")
return
case s.podCh <- &podNotification{pod: pod, ctx: ctx}:
}
}
}
func (s *Server) startPodSynchronizer(ctx context.Context, id int) {
logger := log.G(ctx).WithField("method", "startPodSynchronizer").WithField("podSynchronizer", id)
logger.Debug("Start pod synchronizer")
for {
select {
case <-ctx.Done():
logger.Info("Stop pod syncronizer")
return
case event := <-s.podCh:
s.syncPod(event.ctx, event.pod)
}
}
}
func (s *Server) syncPod(ctx context.Context, pod *corev1.Pod) {
ctx, span := trace.StartSpan(ctx, "syncPod")
defer span.End()
logger := log.G(ctx).WithField("pod", pod.GetName()).WithField("namespace", pod.GetNamespace())
addPodAttributes(span, pod)
if pod.DeletionTimestamp != nil {
span.Annotate(nil, "Delete pod")
logger.Debugf("Deleting pod")
if err := s.deletePod(ctx, pod); err != nil {
logger.WithError(err).Error("Failed to delete pod")
}
} else {
span.Annotate(nil, "Create pod")
logger.Debugf("Creating pod")
if err := s.createPod(ctx, pod); err != nil {
logger.WithError(err).Errorf("Failed to create pod")
}
}
}
func (s *Server) createPod(ctx context.Context, pod *corev1.Pod) error {
ctx, span := trace.StartSpan(ctx, "createPod")
ctx, span := trace.StartSpan(ctx, "createOrUpdatePod")
defer span.End()
addPodAttributes(span, pod)
@@ -199,7 +75,16 @@ func (s *Server) createPod(ctx context.Context, pod *corev1.Pod) error {
return nil
}
func (s *Server) deletePod(ctx context.Context, pod *corev1.Pod) error {
func (s *Server) deletePod(ctx context.Context, namespace, name string) error {
// Grab the pod as known by the provider.
// NOTE: Some providers return a non-nil error in their GetPod implementation when the pod is not found while some other don't.
// Hence, we ignore the error and just act upon the pod if it is non-nil (meaning that the provider still knows about the pod).
pod, _ := s.provider.GetPod(ctx, namespace, name)
if pod == nil {
// The provider is not aware of the pod, but we must still delete the Kubernetes API resource.
return s.forceDeletePodResource(ctx, namespace, name)
}
ctx, span := trace.StartSpan(ctx, "deletePod")
defer span.End()
addPodAttributes(span, pod)
@@ -213,26 +98,37 @@ func (s *Server) deletePod(ctx context.Context, pod *corev1.Pod) error {
logger := log.G(ctx).WithField("pod", pod.GetName()).WithField("namespace", pod.GetNamespace())
if !errors.IsNotFound(delErr) {
var grace int64
if err := s.k8sClient.CoreV1().Pods(pod.GetNamespace()).Delete(pod.GetName(), &metav1.DeleteOptions{GracePeriodSeconds: &grace}); err != nil {
if errors.IsNotFound(err) {
span.Annotate(nil, "Pod does not exist in k8s, nothing to delete")
return nil
}
span.SetStatus(trace.Status{Code: trace.StatusCodeUnknown, Message: err.Error()})
return fmt.Errorf("Failed to delete kubernetes pod: %s", err)
if err := s.forceDeletePodResource(ctx, namespace, name); err != nil {
span.SetStatus(ocstatus.FromError(err))
return err
}
span.Annotate(nil, "Deleted pod from k8s")
s.resourceManager.DeletePod(pod)
span.Annotate(nil, "Deleted pod from internal state")
logger.Info("Pod deleted")
}
return nil
}
func (s *Server) forceDeletePodResource(ctx context.Context, namespace, name string) error {
ctx, span := trace.StartSpan(ctx, "forceDeletePodResource")
defer span.End()
span.AddAttributes(
trace.StringAttribute("namespace", namespace),
trace.StringAttribute("name", name),
)
var grace int64
if err := s.k8sClient.CoreV1().Pods(namespace).Delete(name, &metav1.DeleteOptions{GracePeriodSeconds: &grace}); err != nil {
if errors.IsNotFound(err) {
span.Annotate(nil, "Pod does not exist in Kubernetes, nothing to delete")
return nil
}
span.SetStatus(trace.Status{Code: trace.StatusCodeUnknown, Message: err.Error()})
return fmt.Errorf("Failed to delete Kubernetes pod: %s", err)
}
return nil
}
// updatePodStatuses syncs the providers pod status with the kubernetes pod status.
func (s *Server) updatePodStatuses(ctx context.Context) {
ctx, span := trace.StartSpan(ctx, "updatePodStatuses")
@@ -310,69 +206,3 @@ func (s *Server) updatePodStatus(ctx context.Context, pod *corev1.Pod) error {
}, "updated pod status in kubernetes")
return nil
}
// watchForPodEvent waits for pod changes from kubernetes and updates the details accordingly in the local state.
// This returns after a single pod event.
func (s *Server) watchForPodEvent(ctx context.Context) error {
opts := metav1.ListOptions{
FieldSelector: fields.OneTermEqualSelector("spec.nodeName", s.nodeName).String(),
}
pods, err := s.k8sClient.CoreV1().Pods(s.namespace).List(opts)
if err != nil {
return pkgerrors.Wrap(err, "error getting pod list")
}
s.resourceManager.SetPods(pods)
s.reconcile(ctx)
opts.ResourceVersion = pods.ResourceVersion
var controller cache.Controller
_, controller = cache.NewInformer(
&cache.ListWatch{
ListFunc: func(options metav1.ListOptions) (runtime.Object, error) {
if controller != nil {
opts.ResourceVersion = controller.LastSyncResourceVersion()
}
return s.k8sClient.Core().Pods(s.namespace).List(opts)
},
WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) {
if controller != nil {
opts.ResourceVersion = controller.LastSyncResourceVersion()
}
return s.k8sClient.Core().Pods(s.namespace).Watch(opts)
},
},
&corev1.Pod{},
time.Minute,
cache.ResourceEventHandlerFuncs{
AddFunc: func(obj interface{}) {
s.onAddPod(ctx, obj)
},
UpdateFunc: func(oldObj, newObj interface{}) {
s.onUpdatePod(ctx, newObj)
},
DeleteFunc: func(obj interface{}) {
s.onDeletePod(ctx, obj)
},
},
)
for i := 0; i < s.podSyncWorkers; i++ {
go s.startPodSynchronizer(ctx, i)
}
log.G(ctx).Info("Start to run pod cache controller.")
controller.Run(ctx.Done())
return ctx.Err()
}