use shared informers and workqueue (#425)
* vendor: add vendored code
Signed-off-by: Paulo Pires <pjpires@gmail.com>
* controller: use shared informers and a work queue
Signed-off-by: Paulo Pires <pjpires@gmail.com>
* errors: use cpuguy83/strongerrors
Signed-off-by: Paulo Pires <pjpires@gmail.com>
* aci: fix test that uses resource manager
Signed-off-by: Paulo Pires <pjpires@gmail.com>
* readme: clarify skaffold run before e2e
Signed-off-by: Paulo Pires <pjpires@gmail.com>
* cmd: use root context everywhere
Signed-off-by: Paulo Pires <pjpires@gmail.com>
* sync: refactor pod lifecycle management
Signed-off-by: Paulo Pires <pjpires@gmail.com>
* e2e: fix race in test when observing deletions
Signed-off-by: Paulo Pires <pjpires@gmail.com>
* e2e: test pod forced deletion
Signed-off-by: Paulo Pires <pjpires@gmail.com>
* cmd: fix root context potential leak
Signed-off-by: Paulo Pires <pjpires@gmail.com>
* sync: rename metaKey
Signed-off-by: Paulo Pires <pjpires@gmail.com>
* sync: remove calls to HandleError
Signed-off-by: Paulo Pires <pjpires@gmail.com>
* Revert "errors: use cpuguy83/strongerrors"
This reverts commit f031fc6d.
Signed-off-by: Paulo Pires <pjpires@gmail.com>
* manager: remove redundant lister constraint
Signed-off-by: Paulo Pires <pjpires@gmail.com>
* sync: rename the pod event recorder
Signed-off-by: Paulo Pires <pjpires@gmail.com>
* sync: amend misleading comment
Signed-off-by: Paulo Pires <pjpires@gmail.com>
* mock: add tracing
Signed-off-by: Paulo Pires <pjpires@gmail.com>
* sync: add tracing
Signed-off-by: Paulo Pires <pjpires@gmail.com>
* test: observe timeouts
Signed-off-by: Paulo Pires <pjpires@gmail.com>
* trace: remove unnecessary comments
Signed-off-by: Paulo Pires <pjpires@gmail.com>
* sync: limit concurrency in deleteDanglingPods
Signed-off-by: Paulo Pires <pjpires@gmail.com>
* sync: never store context, always pass in calls
Signed-off-by: Paulo Pires <pjpires@gmail.com>
* sync: remove HandleCrash and just panic
Signed-off-by: Paulo Pires <pjpires@gmail.com>
* sync: don't sync succeeded pods
Signed-off-by: Paulo Pires <pjpires@gmail.com>
* sync: ensure pod deletion from kubernetes
Signed-off-by: Paulo Pires <pjpires@gmail.com>
This commit is contained in:
committed by
Robbie Zhang
parent
0e9cfca585
commit
28a757f4da
260
vkubelet/pod.go
260
vkubelet/pod.go
@@ -6,17 +6,13 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/cpuguy83/strongerrors/status/ocstatus"
|
||||
|
||||
pkgerrors "github.com/pkg/errors"
|
||||
"github.com/virtual-kubelet/virtual-kubelet/log"
|
||||
"go.opencensus.io/trace"
|
||||
corev1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/errors"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/fields"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/apimachinery/pkg/watch"
|
||||
"k8s.io/client-go/tools/cache"
|
||||
|
||||
"github.com/virtual-kubelet/virtual-kubelet/log"
|
||||
)
|
||||
|
||||
func addPodAttributes(span *trace.Span, pod *corev1.Pod) {
|
||||
@@ -29,138 +25,18 @@ func addPodAttributes(span *trace.Span, pod *corev1.Pod) {
|
||||
)
|
||||
}
|
||||
|
||||
func (s *Server) onAddPod(ctx context.Context, obj interface{}) {
|
||||
ctx, span := trace.StartSpan(ctx, "onAddPod")
|
||||
defer span.End()
|
||||
logger := log.G(ctx).WithField("method", "onAddPod")
|
||||
|
||||
pod, ok := obj.(*corev1.Pod)
|
||||
if !ok {
|
||||
span.SetStatus(trace.Status{Code: trace.StatusCodeInvalidArgument, Message: fmt.Sprintf("Unexpected object from event: %T", obj)})
|
||||
logger.Errorf("obj is not of a valid type: %T", obj)
|
||||
return
|
||||
func (s *Server) createOrUpdatePod(ctx context.Context, pod *corev1.Pod) error {
|
||||
// Check if the pod is already known by the provider.
|
||||
// NOTE: Some providers return a non-nil error in their GetPod implementation when the pod is not found while some other don't.
|
||||
// Hence, we ignore the error and just act upon the pod if it is non-nil (meaning that the provider still knows about the pod).
|
||||
if pp, _ := s.provider.GetPod(ctx, pod.Namespace, pod.Name); pp != nil {
|
||||
// The pod has already been created in the provider.
|
||||
// Hence, we return since pod updates are not yet supported.
|
||||
log.G(ctx).Warnf("skipping update of pod %s as pod updates are not supported", pp.Name)
|
||||
return nil
|
||||
}
|
||||
|
||||
addPodAttributes(span, pod)
|
||||
|
||||
logger.Debugf("Receive added pod '%s/%s' ", pod.GetNamespace(), pod.GetName())
|
||||
|
||||
if s.resourceManager.UpdatePod(pod) {
|
||||
span.Annotate(nil, "Add pod to synchronizer channel.")
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
logger = logger.WithField("pod", pod.GetName()).WithField("namespace", pod.GetNamespace())
|
||||
logger.WithError(ctx.Err()).Debug("Cancel send pod event due to cancelled context")
|
||||
return
|
||||
case s.podCh <- &podNotification{pod: pod, ctx: ctx}:
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Server) onUpdatePod(ctx context.Context, obj interface{}) {
|
||||
ctx, span := trace.StartSpan(ctx, "onUpdatePod")
|
||||
defer span.End()
|
||||
logger := log.G(ctx).WithField("method", "onUpdatePod")
|
||||
|
||||
pod, ok := obj.(*corev1.Pod)
|
||||
if !ok {
|
||||
span.SetStatus(trace.Status{Code: trace.StatusCodeInvalidArgument, Message: fmt.Sprintf("Unexpected object from event: %T", obj)})
|
||||
logger.Errorf("obj is not of a valid type: %T", obj)
|
||||
return
|
||||
}
|
||||
|
||||
addPodAttributes(span, pod)
|
||||
|
||||
logger.Debugf("Receive updated pod '%s/%s'", pod.GetNamespace(), pod.GetName())
|
||||
|
||||
if s.resourceManager.UpdatePod(pod) {
|
||||
span.Annotate(nil, "Add pod to synchronizer channel.")
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
logger = logger.WithField("pod", pod.GetName()).WithField("namespace", pod.GetNamespace())
|
||||
logger.WithError(ctx.Err()).Debug("Cancel send pod event due to cancelled context")
|
||||
return
|
||||
case s.podCh <- &podNotification{pod: pod, ctx: ctx}:
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Server) onDeletePod(ctx context.Context, obj interface{}) {
|
||||
ctx, span := trace.StartSpan(ctx, "onDeletePod")
|
||||
defer span.End()
|
||||
logger := log.G(ctx).WithField("method", "onDeletePod")
|
||||
|
||||
pod, ok := obj.(*corev1.Pod)
|
||||
if !ok {
|
||||
delta, ok := obj.(cache.DeletedFinalStateUnknown)
|
||||
if !ok {
|
||||
span.SetStatus(trace.Status{Code: trace.StatusCodeInvalidArgument, Message: fmt.Sprintf("Unexpected object from event: %T", obj)})
|
||||
logger.Errorf("obj is not of a valid type: %T", obj)
|
||||
return
|
||||
}
|
||||
|
||||
if pod, ok = delta.Obj.(*corev1.Pod); !ok {
|
||||
span.SetStatus(trace.Status{Code: trace.StatusCodeInvalidArgument, Message: fmt.Sprintf("Unexpected object from event: %T", obj)})
|
||||
logger.Errorf("obj is not of a valid type: %T", obj)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
addPodAttributes(span, pod)
|
||||
|
||||
logger.Debugf("Receive deleted pod '%s/%s'", pod.GetNamespace(), pod.GetName())
|
||||
|
||||
if s.resourceManager.DeletePod(pod) {
|
||||
span.Annotate(nil, "Add pod to synchronizer channel.")
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
logger = logger.WithField("pod", pod.GetName()).WithField("namespace", pod.GetNamespace())
|
||||
logger.WithError(ctx.Err()).Debug("Cancel send pod event due to cancelled context")
|
||||
return
|
||||
case s.podCh <- &podNotification{pod: pod, ctx: ctx}:
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Server) startPodSynchronizer(ctx context.Context, id int) {
|
||||
logger := log.G(ctx).WithField("method", "startPodSynchronizer").WithField("podSynchronizer", id)
|
||||
logger.Debug("Start pod synchronizer")
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
logger.Info("Stop pod syncronizer")
|
||||
return
|
||||
case event := <-s.podCh:
|
||||
s.syncPod(event.ctx, event.pod)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Server) syncPod(ctx context.Context, pod *corev1.Pod) {
|
||||
ctx, span := trace.StartSpan(ctx, "syncPod")
|
||||
defer span.End()
|
||||
logger := log.G(ctx).WithField("pod", pod.GetName()).WithField("namespace", pod.GetNamespace())
|
||||
|
||||
addPodAttributes(span, pod)
|
||||
|
||||
if pod.DeletionTimestamp != nil {
|
||||
span.Annotate(nil, "Delete pod")
|
||||
logger.Debugf("Deleting pod")
|
||||
if err := s.deletePod(ctx, pod); err != nil {
|
||||
logger.WithError(err).Error("Failed to delete pod")
|
||||
}
|
||||
} else {
|
||||
span.Annotate(nil, "Create pod")
|
||||
logger.Debugf("Creating pod")
|
||||
if err := s.createPod(ctx, pod); err != nil {
|
||||
logger.WithError(err).Errorf("Failed to create pod")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Server) createPod(ctx context.Context, pod *corev1.Pod) error {
|
||||
ctx, span := trace.StartSpan(ctx, "createPod")
|
||||
ctx, span := trace.StartSpan(ctx, "createOrUpdatePod")
|
||||
defer span.End()
|
||||
addPodAttributes(span, pod)
|
||||
|
||||
@@ -199,7 +75,16 @@ func (s *Server) createPod(ctx context.Context, pod *corev1.Pod) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *Server) deletePod(ctx context.Context, pod *corev1.Pod) error {
|
||||
func (s *Server) deletePod(ctx context.Context, namespace, name string) error {
|
||||
// Grab the pod as known by the provider.
|
||||
// NOTE: Some providers return a non-nil error in their GetPod implementation when the pod is not found while some other don't.
|
||||
// Hence, we ignore the error and just act upon the pod if it is non-nil (meaning that the provider still knows about the pod).
|
||||
pod, _ := s.provider.GetPod(ctx, namespace, name)
|
||||
if pod == nil {
|
||||
// The provider is not aware of the pod, but we must still delete the Kubernetes API resource.
|
||||
return s.forceDeletePodResource(ctx, namespace, name)
|
||||
}
|
||||
|
||||
ctx, span := trace.StartSpan(ctx, "deletePod")
|
||||
defer span.End()
|
||||
addPodAttributes(span, pod)
|
||||
@@ -213,26 +98,37 @@ func (s *Server) deletePod(ctx context.Context, pod *corev1.Pod) error {
|
||||
|
||||
logger := log.G(ctx).WithField("pod", pod.GetName()).WithField("namespace", pod.GetNamespace())
|
||||
if !errors.IsNotFound(delErr) {
|
||||
var grace int64
|
||||
if err := s.k8sClient.CoreV1().Pods(pod.GetNamespace()).Delete(pod.GetName(), &metav1.DeleteOptions{GracePeriodSeconds: &grace}); err != nil {
|
||||
if errors.IsNotFound(err) {
|
||||
span.Annotate(nil, "Pod does not exist in k8s, nothing to delete")
|
||||
return nil
|
||||
}
|
||||
|
||||
span.SetStatus(trace.Status{Code: trace.StatusCodeUnknown, Message: err.Error()})
|
||||
return fmt.Errorf("Failed to delete kubernetes pod: %s", err)
|
||||
if err := s.forceDeletePodResource(ctx, namespace, name); err != nil {
|
||||
span.SetStatus(ocstatus.FromError(err))
|
||||
return err
|
||||
}
|
||||
span.Annotate(nil, "Deleted pod from k8s")
|
||||
|
||||
s.resourceManager.DeletePod(pod)
|
||||
span.Annotate(nil, "Deleted pod from internal state")
|
||||
logger.Info("Pod deleted")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *Server) forceDeletePodResource(ctx context.Context, namespace, name string) error {
|
||||
ctx, span := trace.StartSpan(ctx, "forceDeletePodResource")
|
||||
defer span.End()
|
||||
span.AddAttributes(
|
||||
trace.StringAttribute("namespace", namespace),
|
||||
trace.StringAttribute("name", name),
|
||||
)
|
||||
|
||||
var grace int64
|
||||
if err := s.k8sClient.CoreV1().Pods(namespace).Delete(name, &metav1.DeleteOptions{GracePeriodSeconds: &grace}); err != nil {
|
||||
if errors.IsNotFound(err) {
|
||||
span.Annotate(nil, "Pod does not exist in Kubernetes, nothing to delete")
|
||||
return nil
|
||||
}
|
||||
span.SetStatus(trace.Status{Code: trace.StatusCodeUnknown, Message: err.Error()})
|
||||
return fmt.Errorf("Failed to delete Kubernetes pod: %s", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// updatePodStatuses syncs the providers pod status with the kubernetes pod status.
|
||||
func (s *Server) updatePodStatuses(ctx context.Context) {
|
||||
ctx, span := trace.StartSpan(ctx, "updatePodStatuses")
|
||||
@@ -310,69 +206,3 @@ func (s *Server) updatePodStatus(ctx context.Context, pod *corev1.Pod) error {
|
||||
}, "updated pod status in kubernetes")
|
||||
return nil
|
||||
}
|
||||
|
||||
// watchForPodEvent waits for pod changes from kubernetes and updates the details accordingly in the local state.
|
||||
// This returns after a single pod event.
|
||||
func (s *Server) watchForPodEvent(ctx context.Context) error {
|
||||
opts := metav1.ListOptions{
|
||||
FieldSelector: fields.OneTermEqualSelector("spec.nodeName", s.nodeName).String(),
|
||||
}
|
||||
|
||||
pods, err := s.k8sClient.CoreV1().Pods(s.namespace).List(opts)
|
||||
if err != nil {
|
||||
return pkgerrors.Wrap(err, "error getting pod list")
|
||||
}
|
||||
|
||||
s.resourceManager.SetPods(pods)
|
||||
s.reconcile(ctx)
|
||||
|
||||
opts.ResourceVersion = pods.ResourceVersion
|
||||
|
||||
var controller cache.Controller
|
||||
_, controller = cache.NewInformer(
|
||||
|
||||
&cache.ListWatch{
|
||||
|
||||
ListFunc: func(options metav1.ListOptions) (runtime.Object, error) {
|
||||
if controller != nil {
|
||||
opts.ResourceVersion = controller.LastSyncResourceVersion()
|
||||
}
|
||||
|
||||
return s.k8sClient.Core().Pods(s.namespace).List(opts)
|
||||
},
|
||||
|
||||
WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) {
|
||||
if controller != nil {
|
||||
opts.ResourceVersion = controller.LastSyncResourceVersion()
|
||||
}
|
||||
|
||||
return s.k8sClient.Core().Pods(s.namespace).Watch(opts)
|
||||
},
|
||||
},
|
||||
|
||||
&corev1.Pod{},
|
||||
|
||||
time.Minute,
|
||||
|
||||
cache.ResourceEventHandlerFuncs{
|
||||
AddFunc: func(obj interface{}) {
|
||||
s.onAddPod(ctx, obj)
|
||||
},
|
||||
UpdateFunc: func(oldObj, newObj interface{}) {
|
||||
s.onUpdatePod(ctx, newObj)
|
||||
},
|
||||
DeleteFunc: func(obj interface{}) {
|
||||
s.onDeletePod(ctx, obj)
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
for i := 0; i < s.podSyncWorkers; i++ {
|
||||
go s.startPodSynchronizer(ctx, i)
|
||||
}
|
||||
|
||||
log.G(ctx).Info("Start to run pod cache controller.")
|
||||
controller.Run(ctx.Done())
|
||||
|
||||
return ctx.Err()
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user