Merge pull request #390 from cpuguy83/update_on_provider_delete
Fix a case where provider pod status is not found
This commit is contained in:
@@ -5,6 +5,8 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/cpuguy83/strongerrors/status/ocstatus"
|
||||||
|
|
||||||
pkgerrors "github.com/pkg/errors"
|
pkgerrors "github.com/pkg/errors"
|
||||||
"github.com/virtual-kubelet/virtual-kubelet/log"
|
"github.com/virtual-kubelet/virtual-kubelet/log"
|
||||||
"go.opencensus.io/trace"
|
"go.opencensus.io/trace"
|
||||||
@@ -19,9 +21,11 @@ import (
|
|||||||
|
|
||||||
func addPodAttributes(span *trace.Span, pod *corev1.Pod) {
|
func addPodAttributes(span *trace.Span, pod *corev1.Pod) {
|
||||||
span.AddAttributes(
|
span.AddAttributes(
|
||||||
trace.StringAttribute("uid", string(pod.UID)),
|
trace.StringAttribute("uid", string(pod.GetUID())),
|
||||||
trace.StringAttribute("namespace", pod.Namespace),
|
trace.StringAttribute("namespace", pod.GetNamespace()),
|
||||||
trace.StringAttribute("name", pod.Name),
|
trace.StringAttribute("name", pod.GetName()),
|
||||||
|
trace.StringAttribute("phase", string(pod.Status.Phase)),
|
||||||
|
trace.StringAttribute("reason", pod.Status.Reason),
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -246,26 +250,67 @@ func (s *Server) updatePodStatuses(ctx context.Context) {
|
|||||||
default:
|
default:
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if err := s.updatePodStatus(ctx, pod); err != nil {
|
||||||
|
logger := log.G(ctx).WithField("pod", pod.GetName()).WithField("namespace", pod.GetNamespace()).WithField("status", pod.Status.Phase).WithField("reason", pod.Status.Reason)
|
||||||
|
logger.Error(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Server) updatePodStatus(ctx context.Context, pod *corev1.Pod) error {
|
||||||
|
ctx, span := trace.StartSpan(ctx, "updatePodStatus")
|
||||||
|
defer span.End()
|
||||||
|
addPodAttributes(span, pod)
|
||||||
|
|
||||||
if pod.Status.Phase == corev1.PodSucceeded ||
|
if pod.Status.Phase == corev1.PodSucceeded ||
|
||||||
pod.Status.Phase == corev1.PodFailed ||
|
pod.Status.Phase == corev1.PodFailed ||
|
||||||
pod.Status.Reason == podStatusReasonProviderFailed {
|
pod.Status.Reason == podStatusReasonProviderFailed {
|
||||||
continue
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
status, err := s.provider.GetPodStatus(ctx, pod.Namespace, pod.Name)
|
status, err := s.provider.GetPodStatus(ctx, pod.Namespace, pod.Name)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.G(ctx).WithField("pod", pod.GetName()).WithField("namespace", pod.GetNamespace()).Error("Error retrieving pod status")
|
span.SetStatus(ocstatus.FromError(err))
|
||||||
return
|
return pkgerrors.Wrap(err, "error retreiving pod status")
|
||||||
}
|
}
|
||||||
|
|
||||||
// Update the pod's status
|
// Update the pod's status
|
||||||
if status != nil {
|
if status != nil {
|
||||||
pod.Status = *status
|
pod.Status = *status
|
||||||
s.k8sClient.CoreV1().Pods(pod.Namespace).UpdateStatus(pod)
|
} else {
|
||||||
|
// Only change the status when the pod was already up
|
||||||
|
// Only doing so when the pod was successfully running makes sure we don't run into race conditions during pod creation.
|
||||||
|
if pod.Status.Phase == corev1.PodRunning || pod.ObjectMeta.CreationTimestamp.Add(time.Minute).Before(time.Now()) {
|
||||||
|
// Set the pod to failed, this makes sure if the underlying container implementation is gone that a new pod will be created.
|
||||||
|
pod.Status.Phase = corev1.PodFailed
|
||||||
|
pod.Status.Reason = "NotFound"
|
||||||
|
pod.Status.Message = "The pod status was not found and may have been deleted from the provider"
|
||||||
|
for i, c := range pod.Status.ContainerStatuses {
|
||||||
|
pod.Status.ContainerStatuses[i].State.Terminated = &corev1.ContainerStateTerminated{
|
||||||
|
ExitCode: -137,
|
||||||
|
Reason: "NotFound",
|
||||||
|
Message: "Container was not found and was likely deleted",
|
||||||
|
FinishedAt: metav1.NewTime(time.Now()),
|
||||||
|
StartedAt: c.State.Running.StartedAt,
|
||||||
|
ContainerID: c.ContainerID,
|
||||||
|
}
|
||||||
|
pod.Status.ContainerStatuses[i].State.Running = nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if _, err := s.k8sClient.CoreV1().Pods(pod.Namespace).UpdateStatus(pod); err != nil {
|
||||||
|
span.SetStatus(ocstatus.FromError(err))
|
||||||
|
return pkgerrors.Wrap(err, "error while updating pod status in kubernetes")
|
||||||
|
}
|
||||||
|
|
||||||
|
span.Annotate([]trace.Attribute{
|
||||||
|
trace.StringAttribute("new phase", string(pod.Status.Phase)),
|
||||||
|
trace.StringAttribute("new reason", pod.Status.Reason),
|
||||||
|
}, "updated pod status in kubernetes")
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
// watchForPodEvent waits for pod changes from kubernetes and updates the details accordingly in the local state.
|
// watchForPodEvent waits for pod changes from kubernetes and updates the details accordingly in the local state.
|
||||||
// This returns after a single pod event.
|
// This returns after a single pod event.
|
||||||
func (s *Server) watchForPodEvent(ctx context.Context) error {
|
func (s *Server) watchForPodEvent(ctx context.Context) error {
|
||||||
|
|||||||
@@ -160,8 +160,8 @@ func (s *Server) reconcile(ctx context.Context) {
|
|||||||
|
|
||||||
var failedDeleteCount int64
|
var failedDeleteCount int64
|
||||||
for _, pod := range deletePods {
|
for _, pod := range deletePods {
|
||||||
logger := logger.WithField("pod", pod.Name)
|
logger := logger.WithField("pod", pod.GetName()).WithField("namespace", pod.GetNamespace())
|
||||||
logger.Debug("Deleting pod '%s'\n", pod.Name)
|
logger.Debug("Deleting pod")
|
||||||
if err := s.deletePod(ctx, pod); err != nil {
|
if err := s.deletePod(ctx, pod); err != nil {
|
||||||
logger.WithError(err).Error("Error deleting pod")
|
logger.WithError(err).Error("Error deleting pod")
|
||||||
failedDeleteCount++
|
failedDeleteCount++
|
||||||
|
|||||||
Reference in New Issue
Block a user