Merge pull request #975 from cpuguy83/node_manager

Make ControllerManager more useful
Rename NewNodeFromClient to just NewNode
2021-09-14 10:26:48 -07:00 · 2021-09-14 17:10:17 +00:00 · 2021-09-14 17:10:17 +00:00 · 2021-09-14 17:10:17 +00:00 · 2021-09-14 17:10:14 +00:00 · 2021-09-14 16:57:43 +00:00
47 changed files with 2224 additions and 1106 deletions
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -30,7 +30,7 @@ jobs:
  test:
    resource_class: xlarge
    docker:
-      - image: circleci/golang:1.15
+      - image: circleci/golang:1.16
        environment:
          GO111MODULE: "on"
    working_directory: /go/src/github.com/virtual-kubelet/virtual-kubelet
@@ -42,13 +42,22 @@ jobs:
      - run:
          name: Build
          command: V=1 make build
+      - run: go install gotest.tools/gotestsum@latest
      - run:
          name: Tests
-          command: V=1 CI=1 make test envtest
+          environment:
+            GOTEST: gotestsum -- -timeout=9m
+            GOTESTSUM_JUNITFILE: output/unit/results.xml
+            GODEBUG: cgocheck=2
+          command: |
+            mkdir -p output/unit
+            V=1 make test envtest
      - save_cache:
          key: test-{{ checksum "go.mod" }}-{{ checksum "go.sum" }}
          paths:
            - "/go/pkg/mod"
+      - store_test_results:
+          path: output

  e2e:
    machine:
@@ -121,13 +130,21 @@ jobs:
            - e2e-{{ checksum "go.mod" }}-{{ checksum "go.sum" }}-2
      - run:
          name: Run the end-to-end test suite
+          environment:
+            GOTEST: gotestsum --
          command: |
            mkdir $HOME/.go
            export PATH=$HOME/.go/bin:${PATH}
-            curl -fsSL -o "/tmp/go.tar.gz" "https://dl.google.com/go/go1.15.6.linux-amd64.tar.gz"
+            curl -fsSL -o "/tmp/go.tar.gz" "https://dl.google.com/go/go1.16.4.linux-amd64.tar.gz"
            tar -C $HOME/.go --strip-components=1 -xzf "/tmp/go.tar.gz"
            go version
+            mkdir -p output/e2e
+            export GOTESTSUM_JUNITFILE="$(pwd)/output/e2e/results.xml"
+            export PATH="${GOPATH}/bin:${PATH}"
+            go install gotest.tools/gotestsum@latest
            make e2e
+      - store_test_results:
+          path: output
      - save_cache:
          key: e2e-{{ checksum "go.mod" }}-{{ checksum "go.sum" }}-2
          paths:
--- a/.golangci.yml
+++ b/.golangci.yml
@@ -6,6 +6,8 @@ run:
  skip-dirs:
    # This directory contains copy code from upstream kubernetes/kubernetes, skip it.
    - internal/kubernetes
+    # This is mostly copied from upstream, rather than fixing that code here just ignore the errors.
+    - internal/podutils

 linters:
  enable:
--- a/32
+++ b/32
@@ -5,6 +5,8 @@ exec := $(DOCKER_IMAGE)
 github_repo := virtual-kubelet/virtual-kubelet
 binary := virtual-kubelet

+GOTEST ?= go test $(if $V,-v)
+
 export GO111MODULE ?= on

 include Makefile.e2e
@@ -71,36 +73,28 @@ vet:
 	@echo "go vet'ing..."
 ifndef CI
 	@echo "go vet'ing Outside CI..."
-	go vet $(allpackages)
+	go vet $(TESTDIRS)
 else
 	@echo "go vet'ing in CI..."
 	mkdir -p test
-	( go vet $(allpackages); echo $$? ) | \
+	( go vet $(TESTDIRS); echo $$? ) | \
       tee test/vet.txt | sed '$$ d'; exit $$(tail -1 test/vet.txt)
 endif

 test:
-ifndef CI
-	@echo "Testing..."
-	go test  $(if $V,-v) $(allpackages)
-else
-	@echo "Testing in CI..."
-	mkdir -p test
-	( GODEBUG=cgocheck=2 go test -timeout=9m -v $(allpackages); echo $$? ) | \
-       tee test/output.txt | sed '$$ d'; exit $$(tail -1 test/output.txt)
-endif
+	$(GOTEST) $(TESTDIRS)

 list:
 	@echo "List..."
-	@echo $(allpackages)
+	@echo $(TESTDIRS)

 cover: gocovmerge
 	@echo "Coverage Report..."
 	@echo "NOTE: make cover does not exit 1 on failure, don't use it to check for tests success!"
 	rm -f .GOPATH/cover/*.out cover/all.merged
 	$(if $V,@echo "-- go test -coverpkg=./... -coverprofile=cover/... ./...")
-	@for MOD in $(allpackages); do \
-        go test -coverpkg=`echo $(allpackages)|tr " " ","` \
+	@for MOD in $(TESTDIRS); do \
+        go test -coverpkg=`echo $(TESTDIRS)|tr " " ","` \
            -coverprofile=cover/unit-`echo $$MOD|tr "/" "_"`.out \
            $$MOD 2>&1 | grep -v "no packages being tested depend on"; \
    done
@@ -142,11 +136,7 @@ VERSION          := $(shell git describe --tags --always --dirty="-dev")
 DATE             := $(shell date -u '+%Y-%m-%d-%H:%M UTC')
 VERSION_FLAGS    := -ldflags='-X "main.buildVersion=$(VERSION)" -X "main.buildTime=$(DATE)"'

-# assuming go 1.9 here!!
-_allpackages = $(shell go list ./...)
-
-# memoize allpackages, so that it's executed only once and only if used
-allpackages = $(if $(__allpackages),,$(eval __allpackages := $$(_allpackages)))$(__allpackages)
+TESTDIRS ?= ./...

 .PHONY: goimports
 goimports: $(gobin_tool)
@@ -187,7 +177,7 @@ kubebuilder_2.3.1_${TEST_OS}_${TEST_ARCH}: kubebuilder_2.3.1_${TEST_OS}_${TEST_A
 envtest: kubebuilder_2.3.1_${TEST_OS}_${TEST_ARCH}
 	# You can add klog flags for debugging, like: -klog.v=10 -klog.logtostderr
 	# klogv2 flags just wraps our existing logrus.
-	KUBEBUILDER_ASSETS=$(PWD)/kubebuilder_2.3.1_${TEST_OS}_${TEST_ARCH}/bin go test -v -run=TestEnvtest ./node -envtest=true
+	KUBEBUILDER_ASSETS=$(PWD)/kubebuilder_2.3.1_${TEST_OS}_${TEST_ARCH}/bin $(GOTEST) -run=TestEnvtest ./node -envtest=true

 .PHONY: fmt
 fmt:
@@ -195,4 +185,4 @@ fmt:

 .PHONY: lint
 lint: $(gobin_tool)
-	gobin -run github.com/golangci/golangci-lint/cmd/golangci-lint@v1.33.0 run ./...
+	gobin -run github.com/golangci/golangci-lint/cmd/golangci-lint@v1.33.0 run ./...
--- a/Makefile.e2e
+++ b/Makefile.e2e
@@ -39,7 +39,7 @@ e2e: NODE_NAME := vkubelet-mock-0
 e2e: export VK_BUILD_TAGS += mock_provider
 e2e: e2e.clean bin/e2e/virtual-kubelet skaffold/run
 	@echo Running tests...
-	cd $(PWD)/internal/test/e2e && go test -v -timeout 5m -tags e2e ./... \
+	cd $(PWD)/internal/test/e2e && $(GOTEST) -timeout 5m -tags e2e ./... \
 		-kubeconfig=$(KUBECONFIG) \
 		-namespace=$(NAMESPACE) \
 		-node-name=$(NODE_NAME)
--- a/README.md
+++ b/README.md
@@ -23,6 +23,7 @@ The best description is "Kubernetes API on top, programmable back."
    + [AWS Fargate Provider](#aws-fargate-provider)
    + [Elotl Kip](#elotl-kip)
 	+ [HashiCorp Nomad](#hashicorp-nomad-provider)
+    + [Liqo](#liqo-provider)
    + [OpenStack Zun](#openstack-zun-provider)
    + [Tensile Kube Provider](#tensile-kube-provider)
    + [Adding a New Provider via the Provider Interface](#adding-a-new-provider-via-the-provider-interface)
@@ -46,7 +47,7 @@ project to build a custom Kubernetes node agent.
 See godoc for up to date instructions on consuming this project:
 https://godoc.org/github.com/virtual-kubelet/virtual-kubelet

-There are implementations available for several provides (listed above), see
+There are implementations available for [several providers](#providers), see
 those repos for details on how to deploy.

 ## Current Features
@@ -134,6 +135,12 @@ would on a Kubernetes node.

 For detailed instructions, follow the guide [here](https://github.com/virtual-kubelet/nomad/blob/master/README.md).

+### Liqo Provider
+
+[Liqo](https://liqo.io) implements a provider for Virtual Kubelet designed to transparently offload pods and services to "peered" Kubernetes remote cluster. Liqo is capable of discovering neighbor clusters (using DNS, mDNS) and "peer" with them, or in other words, establish a relationship to share part of the cluster resources. When a cluster has established a peering, a new instance of the Liqo Virtual Kubelet is spawned to seamlessly extend the capacity of the cluster, by providing an abstraction of the resources of the remote cluster. The provider combined with the Liqo network fabric extends the cluster networking by enabling Pod-to-Pod traffic and multi-cluster east-west services, supporting endpoints on both clusters.
+
+For detailed instruction, follow the guide [here](https://github.com/liqotech/liqo/blob/master/README.md)
+
 ### OpenStack Zun Provider

 OpenStack [Zun](https://docs.openstack.org/zun/latest/) provider for Virtual Kubelet connects
--- a/cmd/virtual-kubelet/internal/commands/root/flag.go
+++ b/cmd/virtual-kubelet/internal/commands/root/flag.go
@@ -59,7 +59,11 @@ func (mv mapVar) Type() string {

 func installFlags(flags *pflag.FlagSet, c *Opts) {
 	flags.StringVar(&c.KubeConfigPath, "kubeconfig", c.KubeConfigPath, "kube config file to use for connecting to the Kubernetes API server")
+
 	flags.StringVar(&c.KubeNamespace, "namespace", c.KubeNamespace, "kubernetes namespace (default is 'all')")
+	flags.MarkDeprecated("namespace", "Nodes must watch for pods in all namespaces. This option is now ignored.") //nolint:errcheck
+	flags.MarkHidden("namespace")                                                                                 //nolint:errcheck
+
 	flags.StringVar(&c.KubeClusterDomain, "cluster-domain", c.KubeClusterDomain, "kubernetes cluster-domain (default is 'cluster.local')")
 	flags.StringVar(&c.NodeName, "nodename", c.NodeName, "kubernetes node name")
 	flags.StringVar(&c.OperatingSystem, "os", c.OperatingSystem, "Operating System (Linux/Windows)")
@@ -68,11 +72,15 @@ func installFlags(flags *pflag.FlagSet, c *Opts) {
 	flags.StringVar(&c.MetricsAddr, "metrics-addr", c.MetricsAddr, "address to listen for metrics/stats requests")

 	flags.StringVar(&c.TaintKey, "taint", c.TaintKey, "Set node taint key")
+
 	flags.BoolVar(&c.DisableTaint, "disable-taint", c.DisableTaint, "disable the virtual-kubelet node taint")
 	flags.MarkDeprecated("taint", "Taint key should now be configured using the VK_TAINT_KEY environment variable") //nolint:errcheck

 	flags.IntVar(&c.PodSyncWorkers, "pod-sync-workers", c.PodSyncWorkers, `set the number of pod synchronization workers`)
+
 	flags.BoolVar(&c.EnableNodeLease, "enable-node-lease", c.EnableNodeLease, `use node leases (1.13) for node heartbeats`)
+	flags.MarkDeprecated("enable-node-lease", "leases are always enabled") //nolint:errcheck
+	flags.MarkHidden("enable-node-lease")                                  //nolint:errcheck

 	flags.StringSliceVar(&c.TraceExporters, "trace-exporter", c.TraceExporters, fmt.Sprintf("sets the tracing exporter to use, available exporters: %s", AvailableTraceExporters()))
 	flags.StringVar(&c.TraceConfig.ServiceName, "trace-service-name", c.TraceConfig.ServiceName, "sets the name of the service used to register with the trace exporter")
--- a/cmd/virtual-kubelet/internal/commands/root/http.go
+++ b/cmd/virtual-kubelet/internal/commands/root/http.go
@@ -15,140 +15,15 @@
 package root

 import (
-	"context"
-	"crypto/tls"
 	"fmt"
-	"io"
-	"net"
-	"net/http"
 	"os"
 	"time"
-
-	"github.com/pkg/errors"
-	"github.com/virtual-kubelet/virtual-kubelet/cmd/virtual-kubelet/internal/provider"
-	"github.com/virtual-kubelet/virtual-kubelet/log"
-	"github.com/virtual-kubelet/virtual-kubelet/node/api"
 )

-// AcceptedCiphers is the list of accepted TLS ciphers, with known weak ciphers elided
-// Note this list should be a moving target.
-var AcceptedCiphers = []uint16{
-	tls.TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA,
-	tls.TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA,
-	tls.TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA,
-	tls.TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA,
-
-	tls.TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,
-	tls.TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,
-	tls.TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,
-	tls.TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,
-}
-
-func loadTLSConfig(certPath, keyPath string) (*tls.Config, error) {
-	cert, err := tls.LoadX509KeyPair(certPath, keyPath)
-	if err != nil {
-		return nil, errors.Wrap(err, "error loading tls certs")
-	}
-
-	return &tls.Config{
-		Certificates:             []tls.Certificate{cert},
-		MinVersion:               tls.VersionTLS12,
-		PreferServerCipherSuites: true,
-		CipherSuites:             AcceptedCiphers,
-	}, nil
-}
-
-func setupHTTPServer(ctx context.Context, p provider.Provider, cfg *apiServerConfig, getPodsFromKubernetes api.PodListerFunc) (_ func(), retErr error) {
-	var closers []io.Closer
-	cancel := func() {
-		for _, c := range closers {
-			c.Close()
-		}
-	}
-	defer func() {
-		if retErr != nil {
-			cancel()
-		}
-	}()
-
-	if cfg.CertPath == "" || cfg.KeyPath == "" {
-		log.G(ctx).
-			WithField("certPath", cfg.CertPath).
-			WithField("keyPath", cfg.KeyPath).
-			Error("TLS certificates not provided, not setting up pod http server")
-	} else {
-		tlsCfg, err := loadTLSConfig(cfg.CertPath, cfg.KeyPath)
-		if err != nil {
-			return nil, err
-		}
-		l, err := tls.Listen("tcp", cfg.Addr, tlsCfg)
-		if err != nil {
-			return nil, errors.Wrap(err, "error setting up listener for pod http server")
-		}
-
-		mux := http.NewServeMux()
-
-		podRoutes := api.PodHandlerConfig{
-			RunInContainer:        p.RunInContainer,
-			GetContainerLogs:      p.GetContainerLogs,
-			GetPodsFromKubernetes: getPodsFromKubernetes,
-			GetPods:               p.GetPods,
-			StreamIdleTimeout:     cfg.StreamIdleTimeout,
-			StreamCreationTimeout: cfg.StreamCreationTimeout,
-		}
-
-		api.AttachPodRoutes(podRoutes, mux, true)
-
-		s := &http.Server{
-			Handler:   mux,
-			TLSConfig: tlsCfg,
-		}
-		go serveHTTP(ctx, s, l, "pods")
-		closers = append(closers, s)
-	}
-
-	if cfg.MetricsAddr == "" {
-		log.G(ctx).Info("Pod metrics server not setup due to empty metrics address")
-	} else {
-		l, err := net.Listen("tcp", cfg.MetricsAddr)
-		if err != nil {
-			return nil, errors.Wrap(err, "could not setup listener for pod metrics http server")
-		}
-
-		mux := http.NewServeMux()
-
-		var summaryHandlerFunc api.PodStatsSummaryHandlerFunc
-		if mp, ok := p.(provider.PodMetricsProvider); ok {
-			summaryHandlerFunc = mp.GetStatsSummary
-		}
-		podMetricsRoutes := api.PodMetricsConfig{
-			GetStatsSummary: summaryHandlerFunc,
-		}
-		api.AttachPodMetricsRoutes(podMetricsRoutes, mux)
-		s := &http.Server{
-			Handler: mux,
-		}
-		go serveHTTP(ctx, s, l, "pod metrics")
-		closers = append(closers, s)
-	}
-
-	return cancel, nil
-}
-
-func serveHTTP(ctx context.Context, s *http.Server, l net.Listener, name string) {
-	if err := s.Serve(l); err != nil {
-		select {
-		case <-ctx.Done():
-		default:
-			log.G(ctx).WithError(err).Errorf("Error setting up %s http server", name)
-		}
-	}
-	l.Close()
-}
-
 type apiServerConfig struct {
 	CertPath              string
 	KeyPath               string
+	CACertPath            string
 	Addr                  string
 	MetricsAddr           string
 	StreamIdleTimeout     time.Duration
@@ -157,8 +32,9 @@ type apiServerConfig struct {

 func getAPIConfig(c Opts) (*apiServerConfig, error) {
 	config := apiServerConfig{
-		CertPath: os.Getenv("APISERVER_CERT_LOCATION"),
-		KeyPath:  os.Getenv("APISERVER_KEY_LOCATION"),
+		CertPath:   os.Getenv("APISERVER_CERT_LOCATION"),
+		KeyPath:    os.Getenv("APISERVER_KEY_LOCATION"),
+		CACertPath: os.Getenv("APISERVER_CA_CERT_LOCATION"),
 	}

 	config.Addr = fmt.Sprintf(":%d", c.ListenPort)
--- a/cmd/virtual-kubelet/internal/commands/root/node.go
+++ b/cmd/virtual-kubelet/internal/commands/root/node.go
@@ -15,54 +15,10 @@
 package root

 import (
-	"context"
-	"strings"
-
-	"github.com/virtual-kubelet/virtual-kubelet/cmd/virtual-kubelet/internal/provider"
 	"github.com/virtual-kubelet/virtual-kubelet/errdefs"
 	corev1 "k8s.io/api/core/v1"
-	v1 "k8s.io/api/core/v1"
-	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 )

-const osLabel = "beta.kubernetes.io/os"
-
-// NodeFromProvider builds a kubernetes node object from a provider
-// This is a temporary solution until node stuff actually split off from the provider interface itself.
-func NodeFromProvider(ctx context.Context, name string, taint *v1.Taint, p provider.Provider, version string) *v1.Node {
-	taints := make([]v1.Taint, 0)
-
-	if taint != nil {
-		taints = append(taints, *taint)
-	}
-
-	node := &v1.Node{
-		ObjectMeta: metav1.ObjectMeta{
-			Name: name,
-			Labels: map[string]string{
-				"type":                   "virtual-kubelet",
-				"kubernetes.io/role":     "agent",
-				"kubernetes.io/hostname": name,
-			},
-		},
-		Spec: v1.NodeSpec{
-			Taints: taints,
-		},
-		Status: v1.NodeStatus{
-			NodeInfo: v1.NodeSystemInfo{
-				Architecture:   "amd64",
-				KubeletVersion: version,
-			},
-		},
-	}
-
-	p.ConfigureNode(ctx, node)
-	if _, ok := node.ObjectMeta.Labels[osLabel]; !ok {
-		node.ObjectMeta.Labels[osLabel] = strings.ToLower(node.Status.NodeInfo.OperatingSystem)
-	}
-	return node
-}
-
 // getTaint creates a taint using the provided key/value.
 // Taint effect is read from the environment
 // The taint key/value may be overwritten by the environment.
--- a/cmd/virtual-kubelet/internal/commands/root/opts.go
+++ b/cmd/virtual-kubelet/internal/commands/root/opts.go
@@ -28,7 +28,7 @@ import (
 // Defaults for root command options
 const (
 	DefaultNodeName             = "virtual-kubelet"
-	DefaultOperatingSystem      = "Linux"
+	DefaultOperatingSystem      = "linux"
 	DefaultInformerResyncPeriod = 1 * time.Minute
 	DefaultMetricsAddr          = ":10255"
 	DefaultListenPort           = 10250 // TODO(cpuguy83)(VK1.0): Change this to an addr instead of just a port.. we should not be listening on all interfaces.
--- a/cmd/virtual-kubelet/internal/commands/root/root.go
+++ b/cmd/virtual-kubelet/internal/commands/root/root.go
@@ -16,8 +16,10 @@ package root

 import (
 	"context"
+	"crypto/tls"
+	"net/http"
 	"os"
-	"path"
+	"runtime"

 	"github.com/pkg/errors"
 	"github.com/spf13/cobra"
@@ -26,14 +28,10 @@ import (
 	"github.com/virtual-kubelet/virtual-kubelet/internal/manager"
 	"github.com/virtual-kubelet/virtual-kubelet/log"
 	"github.com/virtual-kubelet/virtual-kubelet/node"
+	"github.com/virtual-kubelet/virtual-kubelet/node/api"
 	"github.com/virtual-kubelet/virtual-kubelet/node/nodeutil"
 	corev1 "k8s.io/api/core/v1"
-	k8serrors "k8s.io/apimachinery/pkg/api/errors"
-	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
-	kubeinformers "k8s.io/client-go/informers"
-	"k8s.io/client-go/kubernetes/scheme"
-	corev1client "k8s.io/client-go/kubernetes/typed/core/v1"
-	"k8s.io/client-go/tools/record"
+	"k8s.io/apiserver/pkg/server/dynamiccertificates"
 )

 // NewCommand creates a new top-level command.
@@ -75,30 +73,33 @@ func runRootCommand(ctx context.Context, s *provider.Store, c Opts) error {
 		}
 	}

-	client, err := nodeutil.ClientsetFromEnv(c.KubeConfigPath)
-	if err != nil {
-		return err
-	}
+	mux := http.NewServeMux()
+	newProvider := func(cfg nodeutil.ProviderConfig) (nodeutil.Provider, node.NodeProvider, error) {
+		rm, err := manager.NewResourceManager(cfg.Pods, cfg.Secrets, cfg.ConfigMaps, cfg.Services)
+		if err != nil {
+			return nil, nil, errors.Wrap(err, "could not create resource manager")
+		}
+		initConfig := provider.InitConfig{
+			ConfigPath:        c.ProviderConfigPath,
+			NodeName:          c.NodeName,
+			OperatingSystem:   c.OperatingSystem,
+			ResourceManager:   rm,
+			DaemonPort:        c.ListenPort,
+			InternalIP:        os.Getenv("VKUBELET_POD_IP"),
+			KubeClusterDomain: c.KubeClusterDomain,
+		}
+		pInit := s.Get(c.Provider)
+		if pInit == nil {
+			return nil, nil, errors.Errorf("provider %q not found", c.Provider)
+		}

-	// Create a shared informer factory for Kubernetes pods in the current namespace (if specified) and scheduled to the current node.
-	podInformerFactory := kubeinformers.NewSharedInformerFactoryWithOptions(
-		client,
-		c.InformerResyncPeriod,
-		kubeinformers.WithNamespace(c.KubeNamespace),
-		nodeutil.PodInformerFilter(c.NodeName),
-	)
-	podInformer := podInformerFactory.Core().V1().Pods()
-
-	// Create another shared informer factory for Kubernetes secrets and configmaps (not subject to any selectors).
-	scmInformerFactory := kubeinformers.NewSharedInformerFactoryWithOptions(client, c.InformerResyncPeriod)
-	// Create a secret informer and a config map informer so we can pass their listers to the resource manager.
-	secretInformer := scmInformerFactory.Core().V1().Secrets()
-	configMapInformer := scmInformerFactory.Core().V1().ConfigMaps()
-	serviceInformer := scmInformerFactory.Core().V1().Services()
-
-	rm, err := manager.NewResourceManager(podInformer.Lister(), secretInformer.Lister(), configMapInformer.Lister(), serviceInformer.Lister())
-	if err != nil {
-		return errors.Wrap(err, "could not create resource manager")
+		p, err := pInit(initConfig)
+		if err != nil {
+			return nil, nil, errors.Wrapf(err, "error initializing provider %s", c.Provider)
+		}
+		p.ConfigureNode(ctx, cfg.Node)
+		cfg.Node.Status.NodeInfo.KubeletVersion = c.Version
+		return p, nil, nil
 	}

 	apiConfig, err := getAPIConfig(c)
@@ -106,28 +107,39 @@ func runRootCommand(ctx context.Context, s *provider.Store, c Opts) error {
 		return err
 	}

-	if err := setupTracing(ctx, c); err != nil {
+	cm, err := nodeutil.NewNode(c.NodeName, newProvider, func(cfg *nodeutil.NodeConfig) error {
+		cfg.KubeconfigPath = c.KubeConfigPath
+		cfg.Handler = mux
+		cfg.InformerResyncPeriod = c.InformerResyncPeriod
+
+		if taint != nil {
+			cfg.NodeSpec.Spec.Taints = append(cfg.NodeSpec.Spec.Taints, *taint)
+		}
+		cfg.NodeSpec.Status.NodeInfo.Architecture = runtime.GOARCH
+		cfg.NodeSpec.Status.NodeInfo.OperatingSystem = c.OperatingSystem
+
+		cfg.HTTPListenAddr = apiConfig.Addr
+		cfg.StreamCreationTimeout = apiConfig.StreamCreationTimeout
+		cfg.StreamIdleTimeout = apiConfig.StreamIdleTimeout
+		cfg.DebugHTTP = true
+
+		cfg.NumWorkers = c.PodSyncWorkers
+
+		return nil
+	},
+		setAuth(c.NodeName, apiConfig),
+		nodeutil.WithTLSConfig(
+			nodeutil.WithKeyPairFromPath(apiConfig.CertPath, apiConfig.KeyPath),
+			maybeCA(apiConfig.CACertPath),
+		),
+		nodeutil.AttachProviderRoutes(mux),
+	)
+	if err != nil {
 		return err
 	}

-	initConfig := provider.InitConfig{
-		ConfigPath:        c.ProviderConfigPath,
-		NodeName:          c.NodeName,
-		OperatingSystem:   c.OperatingSystem,
-		ResourceManager:   rm,
-		DaemonPort:        c.ListenPort,
-		InternalIP:        os.Getenv("VKUBELET_POD_IP"),
-		KubeClusterDomain: c.KubeClusterDomain,
-	}
-
-	pInit := s.Get(c.Provider)
-	if pInit == nil {
-		return errors.Errorf("provider %q not found", c.Provider)
-	}
-
-	p, err := pInit(initConfig)
-	if err != nil {
-		return errors.Wrapf(err, "error initializing provider %s", c.Provider)
+	if err := setupTracing(ctx, c); err != nil {
+		return err
 	}

 	ctx = log.WithLogger(ctx, log.G(ctx).WithFields(log.Fields{
@@ -137,117 +149,54 @@ func runRootCommand(ctx context.Context, s *provider.Store, c Opts) error {
 		"watchedNamespace": c.KubeNamespace,
 	}))

-	pNode := NodeFromProvider(ctx, c.NodeName, taint, p, c.Version)
-	np := node.NewNaiveNodeProvider()
-	additionalOptions := []node.NodeControllerOpt{
-		node.WithNodeStatusUpdateErrorHandler(func(ctx context.Context, err error) error {
-			if !k8serrors.IsNotFound(err) {
-				return err
-			}
+	go cm.Run(ctx) //nolint:errcheck

-			log.G(ctx).Debug("node not found")
-			newNode := pNode.DeepCopy()
-			newNode.ResourceVersion = ""
-			_, err = client.CoreV1().Nodes().Create(ctx, newNode, metav1.CreateOptions{})
-			if err != nil {
-				return err
-			}
-			log.G(ctx).Debug("created new node")
-			return nil
-		}),
-	}
-	if c.EnableNodeLease {
-		leaseClient := nodeutil.NodeLeaseV1Client(client)
-		// 40 seconds is the default lease time in upstream kubelet
-		additionalOptions = append(additionalOptions, node.WithNodeEnableLeaseV1(leaseClient, 40))
-	}
-	nodeRunner, err := node.NewNodeController(
-		np,
-		pNode,
-		client.CoreV1().Nodes(),
-		additionalOptions...,
-	)
-	if err != nil {
-		log.G(ctx).Fatal(err)
-	}
+	defer func() {
+		log.G(ctx).Debug("Waiting for controllers to be done")
+		cancel()
+		<-cm.Done()
+	}()

-	eb := record.NewBroadcaster()
-	eb.StartLogging(log.G(ctx).Infof)
-	eb.StartRecordingToSink(&corev1client.EventSinkImpl{Interface: client.CoreV1().Events(c.KubeNamespace)})
-
-	pc, err := node.NewPodController(node.PodControllerConfig{
-		PodClient:         client.CoreV1(),
-		PodInformer:       podInformer,
-		EventRecorder:     eb.NewRecorder(scheme.Scheme, corev1.EventSource{Component: path.Join(pNode.Name, "pod-controller")}),
-		Provider:          p,
-		SecretInformer:    secretInformer,
-		ConfigMapInformer: configMapInformer,
-		ServiceInformer:   serviceInformer,
-	})
-	if err != nil {
-		return errors.Wrap(err, "error setting up pod controller")
-	}
-
-	go podInformerFactory.Start(ctx.Done())
-	go scmInformerFactory.Start(ctx.Done())
-
-	cancelHTTP, err := setupHTTPServer(ctx, p, apiConfig, func(context.Context) ([]*corev1.Pod, error) {
-		return rm.GetPods(), nil
-	})
-	if err != nil {
+	log.G(ctx).Info("Waiting for controller to be ready")
+	if err := cm.WaitReady(ctx, c.StartupTimeout); err != nil {
 		return err
 	}
-	defer cancelHTTP()

-	go func() {
-		if err := pc.Run(ctx, c.PodSyncWorkers); err != nil && errors.Cause(err) != context.Canceled {
-			log.G(ctx).Fatal(err)
-		}
-	}()
+	log.G(ctx).Info("Ready")

-	if c.StartupTimeout > 0 {
-		ctx, cancel := context.WithTimeout(ctx, c.StartupTimeout)
-		log.G(ctx).Info("Waiting for pod controller / VK to be ready")
-		select {
-		case <-ctx.Done():
-			cancel()
-			return ctx.Err()
-		case <-pc.Ready():
-		}
-		cancel()
-		if err := pc.Err(); err != nil {
-			return err
-		}
+	select {
+	case <-ctx.Done():
+	case <-cm.Done():
+		return cm.Err()
 	}
-
-	go func() {
-		if err := nodeRunner.Run(ctx); err != nil {
-			log.G(ctx).Fatal(err)
-		}
-	}()
-
-	setNodeReady(pNode)
-	if err := np.UpdateStatus(ctx, pNode); err != nil {
-		return errors.Wrap(err, "error marking the node as ready")
-	}
-	log.G(ctx).Info("Initialized")
-
-	<-ctx.Done()
 	return nil
 }

-func setNodeReady(n *corev1.Node) {
-	for i, c := range n.Status.Conditions {
-		if c.Type != "Ready" {
-			continue
+func setAuth(node string, apiCfg *apiServerConfig) nodeutil.NodeOpt {
+	if apiCfg.CACertPath == "" {
+		return func(cfg *nodeutil.NodeConfig) error {
+			cfg.Handler = api.InstrumentHandler(nodeutil.WithAuth(nodeutil.NoAuth(), cfg.Handler))
+			return nil
 		}
+	}

-		c.Message = "Kubelet is ready"
-		c.Reason = "KubeletReady"
-		c.Status = corev1.ConditionTrue
-		c.LastHeartbeatTime = metav1.Now()
-		c.LastTransitionTime = metav1.Now()
-		n.Status.Conditions[i] = c
-		return
+	return func(cfg *nodeutil.NodeConfig) error {
+		auth, err := nodeutil.WebhookAuth(cfg.Client, node, func(cfg *nodeutil.WebhookAuthConfig) error {
+			var err error
+			cfg.AuthnConfig.ClientCertificateCAContentProvider, err = dynamiccertificates.NewDynamicCAContentFromFile("ca-cert-bundle", apiCfg.CACertPath)
+			return err
+		})
+		if err != nil {
+			return err
+		}
+		cfg.Handler = api.InstrumentHandler(nodeutil.WithAuth(auth, cfg.Handler))
+		return nil
 	}
 }
+
+func maybeCA(p string) func(*tls.Config) error {
+	if p == "" {
+		return func(*tls.Config) error { return nil }
+	}
+	return nodeutil.WithCAFromPath(p)
+}
--- a/cmd/virtual-kubelet/internal/commands/root/tracing_register_jaeger.go
+++ b/cmd/virtual-kubelet/internal/commands/root/tracing_register_jaeger.go
@@ -18,6 +18,7 @@ package root

 import (
 	"errors"
+	"fmt"
 	"os"

 	"contrib.go.opencensus.io/exporter/jaeger"
@@ -31,17 +32,21 @@ func init() {
 // NewJaegerExporter creates a new opencensus tracing exporter.
 func NewJaegerExporter(opts TracingExporterOptions) (trace.Exporter, error) {
 	jOpts := jaeger.Options{
-		Endpoint:      os.Getenv("JAEGER_ENDPOINT"),
-		AgentEndpoint: os.Getenv("JAEGER_AGENT_ENDPOINT"),
-		Username:      os.Getenv("JAEGER_USER"),
-		Password:      os.Getenv("JAEGER_PASSWORD"),
+		Endpoint:          os.Getenv("JAEGER_ENDPOINT"), // deprecated
+		CollectorEndpoint: os.Getenv("JAEGER_COLLECTOR_ENDPOINT"),
+		AgentEndpoint:     os.Getenv("JAEGER_AGENT_ENDPOINT"),
+		Username:          os.Getenv("JAEGER_USER"),
+		Password:          os.Getenv("JAEGER_PASSWORD"),
 		Process: jaeger.Process{
 			ServiceName: opts.ServiceName,
 		},
 	}

-	if jOpts.Endpoint == "" && jOpts.AgentEndpoint == "" { // nolint:staticcheck
-		return nil, errors.New("Must specify either JAEGER_ENDPOINT or JAEGER_AGENT_ENDPOINT")
+	if jOpts.Endpoint != "" && jOpts.CollectorEndpoint == "" { // nolintlint:staticcheck
+		jOpts.CollectorEndpoint = fmt.Sprintf("%s/api/traces", jOpts.Endpoint) // nolintlint:staticcheck
+	}
+	if jOpts.CollectorEndpoint == "" && jOpts.AgentEndpoint == "" { // nolintlint:staticcheck
+		return nil, errors.New("Must specify either JAEGER_COLLECTOR_ENDPOINT or JAEGER_AGENT_ENDPOINT")
 	}

 	for k, v := range opts.Tags {
--- a/cmd/virtual-kubelet/internal/provider/mock/mock.go
+++ b/cmd/virtual-kubelet/internal/provider/mock/mock.go
@@ -13,11 +13,11 @@ import (
 	"github.com/virtual-kubelet/virtual-kubelet/errdefs"
 	"github.com/virtual-kubelet/virtual-kubelet/log"
 	"github.com/virtual-kubelet/virtual-kubelet/node/api"
+	stats "github.com/virtual-kubelet/virtual-kubelet/node/api/statsv1alpha1"
 	"github.com/virtual-kubelet/virtual-kubelet/trace"
 	v1 "k8s.io/api/core/v1"
 	"k8s.io/apimachinery/pkg/api/resource"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
-	stats "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1"
 )

 const (
@@ -339,7 +339,7 @@ func (p *MockProvider) ConfigureNode(ctx context.Context, n *v1.Node) { // nolin
 	n.Status.DaemonEndpoints = p.nodeDaemonEndpoints()
 	os := p.operatingSystem
 	if os == "" {
-		os = "Linux"
+		os = "linux"
 	}
 	n.Status.NodeInfo.OperatingSystem = os
 	n.Status.NodeInfo.Architecture = "amd64"
--- a/cmd/virtual-kubelet/internal/provider/provider.go
+++ b/cmd/virtual-kubelet/internal/provider/provider.go
@@ -2,35 +2,15 @@ package provider

 import (
 	"context"
-	"io"

-	"github.com/virtual-kubelet/virtual-kubelet/node"
-	"github.com/virtual-kubelet/virtual-kubelet/node/api"
+	"github.com/virtual-kubelet/virtual-kubelet/node/nodeutil"
 	v1 "k8s.io/api/core/v1"
-	stats "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1"
 )

-// Provider contains the methods required to implement a virtual-kubelet provider.
-//
-// Errors produced by these methods should implement an interface from
-// github.com/virtual-kubelet/virtual-kubelet/errdefs package in order for the
-// core logic to be able to understand the type of failure.
+// Provider wraps the core provider type with an extra function needed to bootstrap the node
 type Provider interface {
-	node.PodLifecycleHandler
-
-	// GetContainerLogs retrieves the logs of a container by name from the provider.
-	GetContainerLogs(ctx context.Context, namespace, podName, containerName string, opts api.ContainerLogOpts) (io.ReadCloser, error)
-
-	// RunInContainer executes a command in a container in the pod, copying data
-	// between in/out/err and the container's stdin/stdout/stderr.
-	RunInContainer(ctx context.Context, namespace, podName, containerName string, cmd []string, attach api.AttachIO) error
-
+	nodeutil.Provider
 	// ConfigureNode enables a provider to configure the node object that
 	// will be used for Kubernetes.
 	ConfigureNode(context.Context, *v1.Node)
 }
-
-// PodMetricsProvider is an optional interface that providers can implement to expose pod stats
-type PodMetricsProvider interface {
-	GetStatsSummary(context.Context) (*stats.Summary, error)
-}
--- a/cmd/virtual-kubelet/internal/provider/types.go
+++ b/cmd/virtual-kubelet/internal/provider/types.go
@@ -2,9 +2,9 @@ package provider

 const (
 	// OperatingSystemLinux is the configuration value for defining Linux.
-	OperatingSystemLinux = "Linux"
+	OperatingSystemLinux = "linux"
 	// OperatingSystemWindows is the configuration value for defining Windows.
-	OperatingSystemWindows = "Windows"
+	OperatingSystemWindows = "windows"
 )

 type OperatingSystems map[string]bool // nolint:golint
--- a/go.mod
+++ b/go.mod
@@ -9,68 +9,26 @@ require (
 	github.com/docker/spdystream v0.0.0-20170912183627-bc6354cbbc29 // indirect
 	github.com/elazarl/goproxy v0.0.0-20190421051319-9d40249d3c2f // indirect
 	github.com/elazarl/goproxy/ext v0.0.0-20190711103511-473e67f1d7d2 // indirect
-	github.com/google/go-cmp v0.4.0
-	github.com/gorilla/mux v1.7.0
+	github.com/google/go-cmp v0.5.2
+	github.com/gorilla/mux v1.7.3
 	github.com/mitchellh/go-homedir v1.1.0
-	github.com/pkg/errors v0.8.1
-	github.com/prometheus/client_golang v1.0.0
-	github.com/sirupsen/logrus v1.4.2
-	github.com/spf13/cobra v0.0.5
+	github.com/pkg/errors v0.9.1
+	github.com/prometheus/client_golang v1.7.1
+	github.com/sirupsen/logrus v1.6.0
+	github.com/spf13/cobra v1.0.0
 	github.com/spf13/pflag v1.0.5
-	go.opencensus.io v0.21.0
-	golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e
-	golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd
-	golang.org/x/time v0.0.0-20190308202827-9d24e82272b4
+	go.opencensus.io v0.22.2
+	golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9
+	golang.org/x/sys v0.0.0-20201112073958-5cba982894dd
+	golang.org/x/time v0.0.0-20200630173020-3af7569d3a1e
+	google.golang.org/api v0.15.1 // indirect
 	gotest.tools v2.2.0+incompatible
-	k8s.io/api v0.18.6
-	k8s.io/apimachinery v0.18.6
-	k8s.io/apiserver v0.18.4
-	k8s.io/client-go v0.18.6
+	k8s.io/api v0.19.10
+	k8s.io/apimachinery v0.19.10
+	k8s.io/apiserver v0.19.10
+	k8s.io/client-go v0.19.10
 	k8s.io/klog v1.0.0
-	k8s.io/klog/v2 v2.0.0
-	k8s.io/kubernetes v1.18.4
-	k8s.io/utils v0.0.0-20200603063816-c1c6865ac451
-	sigs.k8s.io/controller-runtime v0.6.3
+	k8s.io/klog/v2 v2.2.0
+	k8s.io/utils v0.0.0-20200912215256-4140de9c8800
+	sigs.k8s.io/controller-runtime v0.7.1
 )
-
-replace k8s.io/legacy-cloud-providers => k8s.io/legacy-cloud-providers v0.18.4
-
-replace k8s.io/cloud-provider => k8s.io/cloud-provider v0.18.4
-
-replace k8s.io/cli-runtime => k8s.io/cli-runtime v0.18.4
-
-replace k8s.io/apiserver => k8s.io/apiserver v0.18.4
-
-replace k8s.io/csi-translation-lib => k8s.io/csi-translation-lib v0.18.4
-
-replace k8s.io/cri-api => k8s.io/cri-api v0.18.4
-
-replace k8s.io/kube-aggregator => k8s.io/kube-aggregator v0.18.4
-
-replace k8s.io/kubelet => k8s.io/kubelet v0.18.4
-
-replace k8s.io/kube-controller-manager => k8s.io/kube-controller-manager v0.18.4
-
-replace k8s.io/apimachinery => k8s.io/apimachinery v0.18.4
-
-replace k8s.io/cluster-bootstrap => k8s.io/cluster-bootstrap v0.18.4
-
-replace k8s.io/kube-proxy => k8s.io/kube-proxy v0.18.4
-
-replace k8s.io/component-base => k8s.io/component-base v0.18.4
-
-replace k8s.io/kube-scheduler => k8s.io/kube-scheduler v0.18.4
-
-replace k8s.io/apiextensions-apiserver => k8s.io/apiextensions-apiserver v0.18.4
-
-replace k8s.io/metrics => k8s.io/metrics v0.18.4
-
-replace k8s.io/sample-apiserver => k8s.io/sample-apiserver v0.18.4
-
-replace k8s.io/code-generator => k8s.io/code-generator v0.18.4
-
-replace k8s.io/client-go => k8s.io/client-go v0.18.4
-
-replace k8s.io/kubectl => k8s.io/kubectl v0.18.4
-
-replace k8s.io/api => k8s.io/api v0.18.4
--- a/go.sum
+++ b/go.sum
--- a/hack/skaffold/virtual-kubelet/base.yml
+++ b/hack/skaffold/virtual-kubelet/base.yml
@@ -56,6 +56,14 @@ rules:
  verbs:
  - create
  - patch
+- apiGroups:
+  - coordination.k8s.io
+  resources:
+  - leases
+  verbs:
+  - get
+  - create
+  - update
 ---
 apiVersion: rbac.authorization.k8s.io/v1
 kind: ClusterRoleBinding
--- a/hack/skaffold/virtual-kubelet/pod.yml
+++ b/hack/skaffold/virtual-kubelet/pod.yml
@@ -4,6 +4,8 @@ metadata:
  name: vkubelet-mock-0
 spec:
  containers:
+  - name: jaeger-tracing
+    image: jaegertracing/all-in-one:1.22
  - name: vkubelet-mock-0
    image: virtual-kubelet
    # "IfNotPresent" is used to prevent Minikube from trying to pull from the registry (and failing) in the first place.
@@ -23,18 +25,16 @@ spec:
    - --klog.logtostderr
    - --log-level
    - debug
+    - --trace-exporter
+    - jaeger
+    - --trace-sample-rate=always
    env:
+    - name: JAEGER_AGENT_ENDPOINT
+      value: localhost:6831
    - name: KUBELET_PORT
      value: "10250"
    - name: VKUBELET_POD_IP
      valueFrom:
        fieldRef:
          fieldPath: status.podIP
-    ports:
-    - name: metrics
-      containerPort: 10255
-    readinessProbe:
-      httpGet:
-        path: /stats/summary
-        port: metrics
  serviceAccountName: virtual-kubelet
--- a/internal/expansion/expand_test.go
+++ b/internal/expansion/expand_test.go
@@ -2,24 +2,20 @@ package expansion

 import (
 	"testing"
-
-	api "k8s.io/kubernetes/pkg/apis/core"
 )

 func TestMapReference(t *testing.T) {
-	envs := []api.EnvVar{
-		{
-			Name:  "FOO",
-			Value: "bar",
-		},
-		{
-			Name:  "ZOO",
-			Value: "$(FOO)-1",
-		},
-		{
-			Name:  "BLU",
-			Value: "$(ZOO)-2",
-		},
+	// We use a struct here instead of a map because we need mappings to happen in order.
+	// Go maps are randomized.
+	type envVar struct {
+		Name  string
+		Value string
+	}
+
+	envs := []envVar{
+		{"FOO", "bar"},
+		{"ZOO", "$(FOO)-1"},
+		{"BLU", "$(ZOO)-2"},
 	}

 	declaredEnv := map[string]string{
--- a/internal/podutils/README.md
+++ b/internal/podutils/README.md
@@ -0,0 +1,15 @@
+Much of this is copied from k8s.io/kubernetes, even if it isn't a 1-1 copy of a
+file.  This exists so we do not have to import from k8s.io/kubernetes which is
+currently problematic.  Ideally most or all of this will go away and an upstream
+solution is found so that we can share an implementation with Kubelet without
+importing from k8s.io/kubernetes
+
+
+| filename | upstream location |
+|----------|-------------------|
+| envvars.go | https://github.com/kubernetes/kubernetes/blob/98d5dc5d36d34a7ee13368a7893dcb400ec4e566/pkg/kubelet/envvars/envvars.go#L32 |
+| helper.go#ConvertDownwardAPIFieldLabel | https://github.com/kubernetes/kubernetes/blob/98d5dc5d36d34a7ee13368a7893dcb400ec4e566/pkg/apis/core/pods/helpers.go#L65 |
+| helper.go#ExtractFieldPathAsString | https://github.com/kubernetes/kubernetes/blob/98d5dc5d36d34a7ee13368a7893dcb400ec4e566/pkg/fieldpath/fieldpath.go#L46 |
+| helper.go#SplitMaybeSubscriptedPath | https://github.com/kubernetes/kubernetes/blob/98d5dc5d36d34a7ee13368a7893dcb400ec4e566/pkg/fieldpath/fieldpath.go#L96 |
+| helper.go#FormatMap | https://github.com/kubernetes/kubernetes/blob/ea0764452222146c47ec826977f49d7001b0ea8c/pkg/fieldpath/fieldpath.go#L29 |
+| helper.go#IsServiceIPSet | https://github.com/kubernetes/kubernetes/blob/ea0764452222146c47ec826977f49d7001b0ea8c/pkg/apis/core/v1/helper/helpers.go#L139 |
--- a/internal/podutils/env.go
+++ b/internal/podutils/env.go
@@ -29,10 +29,6 @@ import (
 	"k8s.io/apimachinery/pkg/util/sets"
 	apivalidation "k8s.io/apimachinery/pkg/util/validation"
 	"k8s.io/client-go/tools/record"
-	podshelper "k8s.io/kubernetes/pkg/apis/core/pods"
-	v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
-	fieldpath "k8s.io/kubernetes/pkg/fieldpath"
-	"k8s.io/kubernetes/pkg/kubelet/envvars"
 	"k8s.io/utils/pointer"
 )

@@ -139,7 +135,7 @@ func getServiceEnvVarMap(rm *manager.ResourceManager, ns string, enableServiceLi
 	for i := range services {
 		service := services[i]
 		// ignore services where ClusterIP is "None" or empty
-		if !v1helper.IsServiceIPSet(service) {
+		if !IsServiceIPSet(service) {
 			continue
 		}
 		serviceName := service.Name
@@ -162,7 +158,7 @@ func getServiceEnvVarMap(rm *manager.ResourceManager, ns string, enableServiceLi
 		mappedServices = append(mappedServices, serviceMap[key])
 	}

-	for _, e := range envvars.FromServices(mappedServices) {
+	for _, e := range FromServices(mappedServices) {
 		m[e.Name] = e.Value
 	}
 	return m, nil
@@ -486,7 +482,7 @@ func getEnvironmentVariableValueWithValueFromFieldRef(ctx context.Context, env *
 // podFieldSelectorRuntimeValue returns the runtime value of the given
 // selector for a pod.
 func podFieldSelectorRuntimeValue(fs *corev1.ObjectFieldSelector, pod *corev1.Pod) (string, error) {
-	internalFieldPath, _, err := podshelper.ConvertDownwardAPIFieldLabel(fs.APIVersion, fs.FieldPath, "")
+	internalFieldPath, _, err := ConvertDownwardAPIFieldLabel(fs.APIVersion, fs.FieldPath, "")
 	if err != nil {
 		return "", err
 	}
@@ -497,5 +493,5 @@ func podFieldSelectorRuntimeValue(fs *corev1.ObjectFieldSelector, pod *corev1.Po
 		return pod.Spec.ServiceAccountName, nil

 	}
-	return fieldpath.ExtractFieldPathAsString(pod, internalFieldPath)
+	return ExtractFieldPathAsString(pod, internalFieldPath)
 }
--- a/internal/podutils/envvars.go
+++ b/internal/podutils/envvars.go
@@ -0,0 +1,112 @@
+/*
+Copyright 2014 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package podutils
+
+import (
+	"fmt"
+	"net"
+	"strconv"
+	"strings"
+
+	v1 "k8s.io/api/core/v1"
+)
+
+// FromServices builds environment variables that a container is started with,
+// which tell the container where to find the services it may need, which are
+// provided as an argument.
+func FromServices(services []*v1.Service) []v1.EnvVar {
+	var result []v1.EnvVar
+	for i := range services {
+		service := services[i]
+
+		// ignore services where ClusterIP is "None" or empty
+		// the services passed to this method should be pre-filtered
+		// only services that have the cluster IP set should be included here
+		if !IsServiceIPSet(service) {
+			continue
+		}
+
+		// Host
+		name := makeEnvVariableName(service.Name) + "_SERVICE_HOST"
+		result = append(result, v1.EnvVar{Name: name, Value: service.Spec.ClusterIP})
+		// First port - give it the backwards-compatible name
+		name = makeEnvVariableName(service.Name) + "_SERVICE_PORT"
+		result = append(result, v1.EnvVar{Name: name, Value: strconv.Itoa(int(service.Spec.Ports[0].Port))})
+		// All named ports (only the first may be unnamed, checked in validation)
+		for i := range service.Spec.Ports {
+			sp := &service.Spec.Ports[i]
+			if sp.Name != "" {
+				pn := name + "_" + makeEnvVariableName(sp.Name)
+				result = append(result, v1.EnvVar{Name: pn, Value: strconv.Itoa(int(sp.Port))})
+			}
+		}
+		// Docker-compatible vars.
+		result = append(result, makeLinkVariables(service)...)
+	}
+	return result
+}
+
+func makeEnvVariableName(str string) string {
+	// TODO: If we simplify to "all names are DNS1123Subdomains" this
+	// will need two tweaks:
+	//   1) Handle leading digits
+	//   2) Handle dots
+	return strings.ToUpper(strings.Replace(str, "-", "_", -1))
+}
+
+func makeLinkVariables(service *v1.Service) []v1.EnvVar {
+	prefix := makeEnvVariableName(service.Name)
+	all := []v1.EnvVar{}
+	for i := range service.Spec.Ports {
+		sp := &service.Spec.Ports[i]
+
+		protocol := string(v1.ProtocolTCP)
+		if sp.Protocol != "" {
+			protocol = string(sp.Protocol)
+		}
+
+		hostPort := net.JoinHostPort(service.Spec.ClusterIP, strconv.Itoa(int(sp.Port)))
+
+		if i == 0 {
+			// Docker special-cases the first port.
+			all = append(all, v1.EnvVar{
+				Name:  prefix + "_PORT",
+				Value: fmt.Sprintf("%s://%s", strings.ToLower(protocol), hostPort),
+			})
+		}
+		portPrefix := fmt.Sprintf("%s_PORT_%d_%s", prefix, sp.Port, strings.ToUpper(protocol))
+		all = append(all, []v1.EnvVar{
+			{
+				Name:  portPrefix,
+				Value: fmt.Sprintf("%s://%s", strings.ToLower(protocol), hostPort),
+			},
+			{
+				Name:  portPrefix + "_PROTO",
+				Value: strings.ToLower(protocol),
+			},
+			{
+				Name:  portPrefix + "_PORT",
+				Value: strconv.Itoa(int(sp.Port)),
+			},
+			{
+				Name:  portPrefix + "_ADDR",
+				Value: service.Spec.ClusterIP,
+			},
+		}...)
+	}
+	return all
+}
--- a/internal/podutils/helper.go
+++ b/internal/podutils/helper.go
@@ -0,0 +1,156 @@
+/*
+Copyright 2014 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package podutils
+
+import (
+	"fmt"
+	"strings"
+
+	corev1 "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/api/meta"
+	"k8s.io/apimachinery/pkg/util/sets"
+	"k8s.io/apimachinery/pkg/util/validation"
+)
+
+// ConvertDownwardAPIFieldLabel converts the specified downward API field label
+// and its value in the pod of the specified version to the internal version,
+// and returns the converted label and value. This function returns an error if
+// the conversion fails.
+func ConvertDownwardAPIFieldLabel(version, label, value string) (string, string, error) {
+	if version != "v1" {
+		return "", "", fmt.Errorf("unsupported pod version: %s", version)
+	}
+
+	if path, _, ok := SplitMaybeSubscriptedPath(label); ok {
+		switch path {
+		case "metadata.annotations", "metadata.labels":
+			return label, value, nil
+		default:
+			return "", "", fmt.Errorf("field label does not support subscript: %s", label)
+		}
+	}
+
+	switch label {
+	case "metadata.annotations",
+		"metadata.labels",
+		"metadata.name",
+		"metadata.namespace",
+		"metadata.uid",
+		"spec.nodeName",
+		"spec.restartPolicy",
+		"spec.serviceAccountName",
+		"spec.schedulerName",
+		"status.phase",
+		"status.hostIP",
+		"status.podIP",
+		"status.podIPs":
+		return label, value, nil
+	// This is for backwards compatibility with old v1 clients which send spec.host
+	case "spec.host":
+		return "spec.nodeName", value, nil
+	default:
+		return "", "", fmt.Errorf("field label not supported: %s", label)
+	}
+}
+
+// ExtractFieldPathAsString extracts the field from the given object
+// and returns it as a string.  The object must be a pointer to an
+// API type.
+func ExtractFieldPathAsString(obj interface{}, fieldPath string) (string, error) {
+	accessor, err := meta.Accessor(obj)
+	if err != nil {
+		return "", err
+	}
+
+	if path, subscript, ok := SplitMaybeSubscriptedPath(fieldPath); ok {
+		switch path {
+		case "metadata.annotations":
+			if errs := validation.IsQualifiedName(strings.ToLower(subscript)); len(errs) != 0 {
+				return "", fmt.Errorf("invalid key subscript in %s: %s", fieldPath, strings.Join(errs, ";"))
+			}
+			return accessor.GetAnnotations()[subscript], nil
+		case "metadata.labels":
+			if errs := validation.IsQualifiedName(subscript); len(errs) != 0 {
+				return "", fmt.Errorf("invalid key subscript in %s: %s", fieldPath, strings.Join(errs, ";"))
+			}
+			return accessor.GetLabels()[subscript], nil
+		default:
+			return "", fmt.Errorf("fieldPath %q does not support subscript", fieldPath)
+		}
+	}
+
+	switch fieldPath {
+	case "metadata.annotations":
+		return FormatMap(accessor.GetAnnotations()), nil
+	case "metadata.labels":
+		return FormatMap(accessor.GetLabels()), nil
+	case "metadata.name":
+		return accessor.GetName(), nil
+	case "metadata.namespace":
+		return accessor.GetNamespace(), nil
+	case "metadata.uid":
+		return string(accessor.GetUID()), nil
+	}
+
+	return "", fmt.Errorf("unsupported fieldPath: %v", fieldPath)
+}
+
+// SplitMaybeSubscriptedPath checks whether the specified fieldPath is
+// subscripted, and
+//  - if yes, this function splits the fieldPath into path and subscript, and
+//    returns (path, subscript, true).
+//  - if no, this function returns (fieldPath, "", false).
+//
+// Example inputs and outputs:
+//  - "metadata.annotations['myKey']" --> ("metadata.annotations", "myKey", true)
+//  - "metadata.annotations['a[b]c']" --> ("metadata.annotations", "a[b]c", true)
+//  - "metadata.labels['']"           --> ("metadata.labels", "", true)
+//  - "metadata.labels"               --> ("metadata.labels", "", false)
+func SplitMaybeSubscriptedPath(fieldPath string) (string, string, bool) {
+	if !strings.HasSuffix(fieldPath, "']") {
+		return fieldPath, "", false
+	}
+	s := strings.TrimSuffix(fieldPath, "']")
+	parts := strings.SplitN(s, "['", 2)
+	if len(parts) < 2 {
+		return fieldPath, "", false
+	}
+	if len(parts[0]) == 0 {
+		return fieldPath, "", false
+	}
+	return parts[0], parts[1], true
+}
+
+// FormatMap formats map[string]string to a string.
+func FormatMap(m map[string]string) (fmtStr string) {
+	// output with keys in sorted order to provide stable output
+	keys := sets.NewString()
+	for key := range m {
+		keys.Insert(key)
+	}
+	for _, key := range keys.List() {
+		fmtStr += fmt.Sprintf("%v=%q\n", key, m[key])
+	}
+	fmtStr = strings.TrimSuffix(fmtStr, "\n")
+
+	return
+}
+
+// IsServiceIPSet aims to check if the service's ClusterIP is set or not the objective is not to perform validation here
+func IsServiceIPSet(service *corev1.Service) bool {
+	return service.Spec.ClusterIP != corev1.ClusterIPNone && service.Spec.ClusterIP != ""
+}
--- a/internal/queue/queue.go
+++ b/internal/queue/queue.go
@@ -35,6 +35,9 @@ const (
 	MaxRetries = 20
 )

+// ShouldRetryFunc is a mechanism to have a custom retry policy
+type ShouldRetryFunc func(ctx context.Context, key string, timesTried int, originallyAdded time.Time, err error) (*time.Duration, error)
+
 // ItemHandler is a callback that handles a single key on the Queue
 type ItemHandler func(ctx context.Context, key string) error

@@ -61,6 +64,8 @@ type Queue struct {

 	// wakeup
 	wakeupCh chan struct{}
+
+	retryFunc ShouldRetryFunc
 }

 type queueItem struct {
@@ -83,9 +88,12 @@ func (item *queueItem) String() string {

 // New creates a queue
 //
-// It expects to get a item rate limiter, and a friendly name which is used in logs, and
-// in the internal kubernetes metrics.
-func New(ratelimiter workqueue.RateLimiter, name string, handler ItemHandler) *Queue {
+// It expects to get a item rate limiter, and a friendly name which is used in logs, and in the internal kubernetes
+// metrics. If retryFunc is nil, the default retry function.
+func New(ratelimiter workqueue.RateLimiter, name string, handler ItemHandler, retryFunc ShouldRetryFunc) *Queue {
+	if retryFunc == nil {
+		retryFunc = DefaultRetryFunc
+	}
 	return &Queue{
 		clock:                    clock.RealClock{},
 		name:                     name,
@@ -96,6 +104,7 @@ func New(ratelimiter workqueue.RateLimiter, name string, handler ItemHandler) *Q
 		handler:                  handler,
 		wakeupCh:                 make(chan struct{}, 1),
 		waitForNextItemSemaphore: semaphore.NewWeighted(1),
+		retryFunc:                retryFunc,
 	}
 }

@@ -104,7 +113,7 @@ func (q *Queue) Enqueue(ctx context.Context, key string) {
 	q.lock.Lock()
 	defer q.lock.Unlock()

-	q.insert(ctx, key, true, 0)
+	q.insert(ctx, key, true, nil)
 }

 // EnqueueWithoutRateLimit enqueues the key without a rate limit
@@ -112,7 +121,7 @@ func (q *Queue) EnqueueWithoutRateLimit(ctx context.Context, key string) {
 	q.lock.Lock()
 	defer q.lock.Unlock()

-	q.insert(ctx, key, false, 0)
+	q.insert(ctx, key, false, nil)
 }

 // Forget forgets the key
@@ -142,9 +151,20 @@ func (q *Queue) Forget(ctx context.Context, key string) {
 	span.WithField(ctx, "status", "notfound")
 }

+func durationDeref(duration *time.Duration, def time.Duration) time.Duration {
+	if duration == nil {
+		return def
+	}
+
+	return *duration
+}
+
 // insert inserts a new item to be processed at time time. It will not further delay items if when is later than the
 // original time the item was scheduled to be processed. If when is earlier, it will "bring it forward"
-func (q *Queue) insert(ctx context.Context, key string, ratelimit bool, delay time.Duration) *queueItem {
+// If ratelimit is specified, and delay is nil, then the ratelimiter's delay (return from When function) will be used
+// If ratelimit is specified, and the delay is non-nil, then the delay value will be used
+// If ratelimit is false, then only delay is used to schedule the work. If delay is nil, it will be considered 0.
+func (q *Queue) insert(ctx context.Context, key string, ratelimit bool, delay *time.Duration) *queueItem {
 	ctx, span := trace.StartSpan(ctx, "insert")
 	defer span.End()

@@ -153,7 +173,9 @@ func (q *Queue) insert(ctx context.Context, key string, ratelimit bool, delay ti
 		"key":       key,
 		"ratelimit": ratelimit,
 	})
-	if delay > 0 {
+	if delay == nil {
+		ctx = span.WithField(ctx, "delay", "nil")
+	} else {
 		ctx = span.WithField(ctx, "delay", delay.String())
 	}

@@ -167,7 +189,7 @@ func (q *Queue) insert(ctx context.Context, key string, ratelimit bool, delay ti
 	// First see if the item is already being processed
 	if item, ok := q.itemsBeingProcessed[key]; ok {
 		span.WithField(ctx, "status", "itemsBeingProcessed")
-		when := q.clock.Now().Add(delay)
+		when := q.clock.Now().Add(durationDeref(delay, 0))
 		// Is the item already been redirtied?
 		if item.redirtiedAt.IsZero() {
 			item.redirtiedAt = when
@@ -184,7 +206,7 @@ func (q *Queue) insert(ctx context.Context, key string, ratelimit bool, delay ti
 	if item, ok := q.itemsInQueue[key]; ok {
 		span.WithField(ctx, "status", "itemsInQueue")
 		qi := item.Value.(*queueItem)
-		when := q.clock.Now().Add(delay)
+		when := q.clock.Now().Add(durationDeref(delay, 0))
 		q.adjustPosition(qi, item, when)
 		return qi
 	}
@@ -198,15 +220,16 @@ func (q *Queue) insert(ctx context.Context, key string, ratelimit bool, delay ti
 	}

 	if ratelimit {
-		if delay > 0 {
-			panic("Non-zero delay with rate limiting not supported")
+		actualDelay := q.ratelimiter.When(key)
+		// Check if delay is overridden
+		if delay != nil {
+			actualDelay = *delay
 		}
-		ratelimitDelay := q.ratelimiter.When(key)
-		span.WithField(ctx, "delay", ratelimitDelay.String())
-		val.plannedToStartWorkAt = val.plannedToStartWorkAt.Add(ratelimitDelay)
-		val.delayedViaRateLimit = &ratelimitDelay
+		span.WithField(ctx, "delay", actualDelay.String())
+		val.plannedToStartWorkAt = val.plannedToStartWorkAt.Add(actualDelay)
+		val.delayedViaRateLimit = &actualDelay
 	} else {
-		val.plannedToStartWorkAt = val.plannedToStartWorkAt.Add(delay)
+		val.plannedToStartWorkAt = val.plannedToStartWorkAt.Add(durationDeref(delay, 0))
 	}

 	for item := q.items.Back(); item != nil; item = item.Prev() {
@@ -244,7 +267,7 @@ func (q *Queue) adjustPosition(qi *queueItem, element *list.Element, when time.T
 func (q *Queue) EnqueueWithoutRateLimitWithDelay(ctx context.Context, key string, after time.Duration) {
 	q.lock.Lock()
 	defer q.lock.Unlock()
-	q.insert(ctx, key, false, after)
+	q.insert(ctx, key, false, &after)
 }

 // Empty returns if the queue has no items in it
@@ -423,25 +446,37 @@ func (q *Queue) handleQueueItemObject(ctx context.Context, qi *queueItem) error
 	}

 	if err != nil {
-		if qi.requeues+1 < MaxRetries {
+		ctx = span.WithField(ctx, "error", err.Error())
+		var delay *time.Duration
+
+		// Stash the original error for logging below
+		originalError := err
+		delay, err = q.retryFunc(ctx, qi.key, qi.requeues+1, qi.originallyAdded, err)
+		if err == nil {
 			// Put the item back on the work Queue to handle any transient errors.
-			log.G(ctx).WithError(err).Warnf("requeuing %q due to failed sync", qi.key)
-			newQI := q.insert(ctx, qi.key, true, 0)
+			log.G(ctx).WithError(originalError).Warnf("requeuing %q due to failed sync", qi.key)
+			newQI := q.insert(ctx, qi.key, true, delay)
 			newQI.requeues = qi.requeues + 1
 			newQI.originallyAdded = qi.originallyAdded

 			return nil
 		}
-		err = pkgerrors.Wrapf(err, "forgetting %q due to maximum retries reached", qi.key)
+		if !qi.redirtiedAt.IsZero() {
+			err = fmt.Errorf("temporarily (requeued) forgetting %q due to: %w", qi.key, err)
+		} else {
+			err = fmt.Errorf("forgetting %q due to: %w", qi.key, err)
+		}
 	}

 	// We've exceeded the maximum retries or we were successful.
 	q.ratelimiter.Forget(qi.key)
 	if !qi.redirtiedAt.IsZero() {
-		newQI := q.insert(ctx, qi.key, qi.redirtiedWithRatelimit, time.Until(qi.redirtiedAt))
+		delay := time.Until(qi.redirtiedAt)
+		newQI := q.insert(ctx, qi.key, qi.redirtiedWithRatelimit, &delay)
 		newQI.addedViaRedirty = true
 	}

+	span.SetStatus(err)
 	return err
 }

@@ -456,3 +491,12 @@ func (q *Queue) String() string {
 	}
 	return fmt.Sprintf("<items:%s>", items)
 }
+
+// DefaultRetryFunc is the default function used for retries by the queue subsystem.
+func DefaultRetryFunc(ctx context.Context, key string, timesTried int, originallyAdded time.Time, err error) (*time.Duration, error) {
+	if timesTried < MaxRetries {
+		return nil, nil
+	}
+
+	return nil, pkgerrors.Wrapf(err, "maximum retries (%d) reached", MaxRetries)
+}
--- a/internal/queue/queue_test.go
+++ b/internal/queue/queue_test.go
@@ -19,6 +19,10 @@ import (
 	"k8s.io/utils/clock"
 )

+func durationPtr(d time.Duration) *time.Duration {
+	return &d
+}
+
 func TestQueueMaxRetries(t *testing.T) {
 	ctx, cancel := context.WithCancel(context.Background())
 	defer cancel()
@@ -35,7 +39,7 @@ func TestQueueMaxRetries(t *testing.T) {
 		// The default upper bound is 1000 seconds. Let's not use that.
 		workqueue.NewItemExponentialFailureRateLimiter(5*time.Millisecond, 10*time.Millisecond),
 		&workqueue.BucketRateLimiter{Limiter: rate.NewLimiter(rate.Limit(10), 100)},
-	), t.Name(), handler)
+	), t.Name(), handler, nil)
 	wq.Enqueue(context.TODO(), "test")

 	for n < MaxRetries {
@@ -46,12 +50,63 @@ func TestQueueMaxRetries(t *testing.T) {
 	assert.Assert(t, is.Equal(0, wq.Len()))
 }

+func TestQueueCustomRetries(t *testing.T) {
+	ctx, cancel := context.WithCancel(context.Background())
+	defer cancel()
+	logger := logrus.New()
+	logger.SetLevel(logrus.DebugLevel)
+	ctx = log.WithLogger(ctx, logruslogger.FromLogrus(logrus.NewEntry(logger)))
+	n := 0
+	errorSeen := 0
+	retryTestError := errors.New("Error should be retried every 10 milliseconds")
+	handler := func(ctx context.Context, key string) error {
+		if key == "retrytest" {
+			n++
+			return retryTestError
+		}
+		return errors.New("Unknown error")
+	}
+
+	shouldRetryFunc := func(ctx context.Context, key string, timesTried int, originallyAdded time.Time, err error) (*time.Duration, error) {
+		var sleepTime *time.Duration
+		if errors.Is(err, retryTestError) {
+			errorSeen++
+			sleepTime = durationPtr(10 * time.Millisecond)
+		}
+		_, retErr := DefaultRetryFunc(ctx, key, timesTried, originallyAdded, err)
+		return sleepTime, retErr
+	}
+
+	wq := New(&workqueue.BucketRateLimiter{Limiter: rate.NewLimiter(rate.Limit(1000), 1000)}, t.Name(), handler, shouldRetryFunc)
+
+	timeTaken := func(key string) time.Duration {
+		start := time.Now()
+		wq.Enqueue(context.TODO(), key)
+		for i := 0; i < MaxRetries; i++ {
+			assert.Assert(t, wq.handleQueueItem(ctx))
+		}
+		return time.Since(start)
+	}
+
+	unknownTime := timeTaken("unknown")
+	assert.Assert(t, n == 0)
+	assert.Assert(t, unknownTime < 10*time.Millisecond)
+
+	retrytestTime := timeTaken("retrytest")
+	assert.Assert(t, is.Equal(n, MaxRetries))
+	assert.Assert(t, is.Equal(errorSeen, MaxRetries))
+
+	assert.Assert(t, is.Equal(0, wq.Len()))
+	assert.Assert(t, retrytestTime > 10*time.Millisecond*time.Duration(n-1))
+	assert.Assert(t, retrytestTime < 2*10*time.Millisecond*time.Duration(n-1))
+}
+
 func TestForget(t *testing.T) {
 	t.Parallel()
 	handler := func(ctx context.Context, key string) error {
 		panic("Should never be called")
 	}
-	wq := New(workqueue.DefaultItemBasedRateLimiter(), t.Name(), handler)
+	wq := New(workqueue.DefaultItemBasedRateLimiter(), t.Name(), handler, nil)

 	wq.Forget(context.TODO(), "val")
 	assert.Assert(t, is.Equal(0, wq.Len()))
@@ -68,7 +123,7 @@ func TestQueueEmpty(t *testing.T) {

 	q := New(workqueue.DefaultItemBasedRateLimiter(), t.Name(), func(ctx context.Context, key string) error {
 		return nil
-	})
+	}, nil)

 	item, err := q.getNextItem(ctx)
 	assert.Error(t, err, context.DeadlineExceeded.Error())
@@ -83,11 +138,11 @@ func TestQueueItemNoSleep(t *testing.T) {

 	q := New(workqueue.DefaultItemBasedRateLimiter(), t.Name(), func(ctx context.Context, key string) error {
 		return nil
-	})
+	}, nil)

 	q.lock.Lock()
-	q.insert(ctx, "foo", false, -1*time.Hour)
-	q.insert(ctx, "bar", false, -1*time.Hour)
+	q.insert(ctx, "foo", false, durationPtr(-1*time.Hour))
+	q.insert(ctx, "bar", false, durationPtr(-1*time.Hour))
 	q.lock.Unlock()

 	item, err := q.getNextItem(ctx)
@@ -107,10 +162,10 @@ func TestQueueItemSleep(t *testing.T) {

 	q := New(workqueue.DefaultItemBasedRateLimiter(), t.Name(), func(ctx context.Context, key string) error {
 		return nil
-	})
+	}, nil)
 	q.lock.Lock()
-	q.insert(ctx, "foo", false, 100*time.Millisecond)
-	q.insert(ctx, "bar", false, 100*time.Millisecond)
+	q.insert(ctx, "foo", false, durationPtr(100*time.Millisecond))
+	q.insert(ctx, "bar", false, durationPtr(100*time.Millisecond))
 	q.lock.Unlock()

 	item, err := q.getNextItem(ctx)
@@ -126,12 +181,12 @@ func TestQueueBackgroundAdd(t *testing.T) {

 	q := New(workqueue.DefaultItemBasedRateLimiter(), t.Name(), func(ctx context.Context, key string) error {
 		return nil
-	})
+	}, nil)
 	start := time.Now()
 	time.AfterFunc(100*time.Millisecond, func() {
 		q.lock.Lock()
 		defer q.lock.Unlock()
-		q.insert(ctx, "foo", false, 0)
+		q.insert(ctx, "foo", false, nil)
 	})

 	item, err := q.getNextItem(ctx)
@@ -148,16 +203,16 @@ func TestQueueBackgroundAdvance(t *testing.T) {

 	q := New(workqueue.DefaultItemBasedRateLimiter(), t.Name(), func(ctx context.Context, key string) error {
 		return nil
-	})
+	}, nil)
 	start := time.Now()
 	q.lock.Lock()
-	q.insert(ctx, "foo", false, 10*time.Second)
+	q.insert(ctx, "foo", false, durationPtr(10*time.Second))
 	q.lock.Unlock()

 	time.AfterFunc(200*time.Millisecond, func() {
 		q.lock.Lock()
 		defer q.lock.Unlock()
-		q.insert(ctx, "foo", false, 0)
+		q.insert(ctx, "foo", false, nil)
 	})

 	item, err := q.getNextItem(ctx)
@@ -183,7 +238,7 @@ func TestQueueRedirty(t *testing.T) {
 			cancel()
 		}
 		return nil
-	})
+	}, nil)

 	q.EnqueueWithoutRateLimit(context.TODO(), "foo")
 	q.Run(ctx, 1)
@@ -205,7 +260,7 @@ func TestHeapConcurrency(t *testing.T) {
 		seen.Store(key, struct{}{})
 		time.Sleep(time.Second)
 		return nil
-	})
+	}, nil)
 	for i := 0; i < 20; i++ {
 		q.EnqueueWithoutRateLimit(context.TODO(), strconv.Itoa(i))
 	}
@@ -238,7 +293,7 @@ func checkConsistency(t *testing.T, q *Queue) {
 func TestHeapOrder(t *testing.T) {
 	q := New(workqueue.DefaultItemBasedRateLimiter(), t.Name(), func(ctx context.Context, key string) error {
 		return nil
-	})
+	}, nil)
 	q.clock = nonmovingClock{}

 	q.EnqueueWithoutRateLimitWithDelay(context.TODO(), "a", 1000)
@@ -311,7 +366,7 @@ func TestRateLimiter(t *testing.T) {
 			return errors.New("test")
 		}
 		return nil
-	})
+	}, nil)

 	enqueued := 0
 	syncMap.Range(func(key, value interface{}) bool {
@@ -371,7 +426,7 @@ func TestQueueForgetInProgress(t *testing.T) {
 		atomic.AddInt64(&times, 1)
 		q.Forget(context.TODO(), key)
 		return errors.New("test")
-	})
+	}, nil)

 	q.EnqueueWithoutRateLimit(context.TODO(), "foo")
 	go q.Run(ctx, 1)
@@ -388,7 +443,7 @@ func TestQueueForgetBeforeStart(t *testing.T) {

 	q := New(workqueue.DefaultItemBasedRateLimiter(), t.Name(), func(ctx context.Context, key string) error {
 		panic("shouldn't be called")
-	})
+	}, nil)

 	q.EnqueueWithoutRateLimit(context.TODO(), "foo")
 	q.Forget(context.TODO(), "foo")
@@ -405,24 +460,24 @@ func TestQueueMoveItem(t *testing.T) {
 	defer cancel()
 	q := New(workqueue.DefaultItemBasedRateLimiter(), t.Name(), func(ctx context.Context, key string) error {
 		panic("shouldn't be called")
-	})
+	}, nil)
 	q.clock = nonmovingClock{}

-	q.insert(ctx, "foo", false, 3000)
-	q.insert(ctx, "bar", false, 2000)
-	q.insert(ctx, "baz", false, 1000)
+	q.insert(ctx, "foo", false, durationPtr(3000))
+	q.insert(ctx, "bar", false, durationPtr(2000))
+	q.insert(ctx, "baz", false, durationPtr(1000))
 	checkConsistency(t, q)
 	t.Log(q)

-	q.insert(ctx, "foo", false, 2000)
+	q.insert(ctx, "foo", false, durationPtr(2000))
 	checkConsistency(t, q)
 	t.Log(q)

-	q.insert(ctx, "foo", false, 1999)
+	q.insert(ctx, "foo", false, durationPtr(1999))
 	checkConsistency(t, q)
 	t.Log(q)

-	q.insert(ctx, "foo", false, 999)
+	q.insert(ctx, "foo", false, durationPtr(999))
 	checkConsistency(t, q)
 	t.Log(q)
 }
--- a/internal/test/e2e/framework/pod.go
+++ b/internal/test/e2e/framework/pod.go
@@ -6,13 +6,13 @@ import (
 	"strings"

 	corev1 "k8s.io/api/core/v1"
+	v1 "k8s.io/api/core/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/fields"
 	"k8s.io/apimachinery/pkg/runtime"
 	watchapi "k8s.io/apimachinery/pkg/watch"
 	"k8s.io/client-go/tools/cache"
 	"k8s.io/client-go/tools/watch"
-	podutil "k8s.io/kubernetes/pkg/api/v1/pod"
 )

 // CreateDummyPodObjectWithPrefix creates a dujmmy pod object using the specified prefix as the value of .metadata.generateName.
@@ -101,10 +101,20 @@ func (f *Framework) WaitUntilPodCondition(namespace, name string, fn watch.Condi
 func (f *Framework) WaitUntilPodReady(namespace, name string) (*corev1.Pod, error) {
 	return f.WaitUntilPodCondition(namespace, name, func(event watchapi.Event) (bool, error) {
 		pod := event.Object.(*corev1.Pod)
-		return pod.Status.Phase == corev1.PodRunning && podutil.IsPodReady(pod) && pod.Status.PodIP != "", nil
+		return pod.Status.Phase == corev1.PodRunning && IsPodReady(pod) && pod.Status.PodIP != "", nil
 	})
 }

+// IsPodReady returns true if a pod is ready.
+func IsPodReady(pod *v1.Pod) bool {
+	for _, cond := range pod.Status.Conditions {
+		if cond.Type == v1.PodReady && cond.Status == v1.ConditionTrue {
+			return true
+		}
+	}
+	return false
+}
+
 // WaitUntilPodDeleted blocks until the pod with the specified name and namespace is deleted from apiserver.
 func (f *Framework) WaitUntilPodDeleted(namespace, name string) (*corev1.Pod, error) {
 	return f.WaitUntilPodCondition(namespace, name, func(event watchapi.Event) (bool, error) {
--- a/internal/test/e2e/framework/stats.go
+++ b/internal/test/e2e/framework/stats.go
@@ -3,10 +3,9 @@ package framework
 import (
 	"context"
 	"encoding/json"
-	"strconv"

+	stats "github.com/virtual-kubelet/virtual-kubelet/node/api/statsv1alpha1"
 	"k8s.io/apimachinery/pkg/util/net"
-	stats "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1"
 )

 // GetStatsSummary queries the /stats/summary endpoint of the virtual-kubelet and returns the Summary object obtained as a response.
@@ -18,7 +17,7 @@ func (f *Framework) GetStatsSummary(ctx context.Context) (*stats.Summary, error)
 		Namespace(f.Namespace).
 		Resource("pods").
 		SubResource("proxy").
-		Name(net.JoinSchemeNamePort("http", f.NodeName, strconv.Itoa(10255))).
+		Name(net.JoinSchemeNamePort("https", f.NodeName, "10250")).
 		Suffix("/stats/summary").DoRaw(ctx)
 	if err != nil {
 		return nil, err
--- a/node/api/exec.go
+++ b/node/api/exec.go
@@ -27,7 +27,6 @@ import (
 	"github.com/virtual-kubelet/virtual-kubelet/internal/kubernetes/remotecommand"
 	"k8s.io/apimachinery/pkg/types"
 	remoteutils "k8s.io/client-go/tools/remotecommand"
-	api "k8s.io/kubernetes/pkg/apis/core"
 )

 // ContainerExecHandlerFunc defines the handler function used for "execing" into a
@@ -136,11 +135,18 @@ func HandleContainerExec(h ContainerExecHandlerFunc, opts ...ContainerExecHandle
 	})
 }

+const (
+	execTTYParam    = "tty"
+	execStdinParam  = "input"
+	execStdoutParam = "output"
+	execStderrParam = "error"
+)
+
 func getExecOptions(req *http.Request) (*remotecommand.Options, error) {
-	tty := req.FormValue(api.ExecTTYParam) == "1"
-	stdin := req.FormValue(api.ExecStdinParam) == "1"
-	stdout := req.FormValue(api.ExecStdoutParam) == "1"
-	stderr := req.FormValue(api.ExecStderrParam) == "1"
+	tty := req.FormValue(execTTYParam) == "1"
+	stdin := req.FormValue(execStdinParam) == "1"
+	stdout := req.FormValue(execStdoutParam) == "1"
+	stderr := req.FormValue(execStderrParam) == "1"
 	if tty && stderr {
 		return nil, errors.New("cannot exec with tty and stderr")
 	}
--- a/node/api/stats.go
+++ b/node/api/stats.go
@@ -20,11 +20,11 @@ import (
 	"net/http"

 	"github.com/pkg/errors"
-	stats "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1"
+	"github.com/virtual-kubelet/virtual-kubelet/node/api/statsv1alpha1"
 )

 // PodStatsSummaryHandlerFunc defines the handler for getting pod stats summaries
-type PodStatsSummaryHandlerFunc func(context.Context) (*stats.Summary, error)
+type PodStatsSummaryHandlerFunc func(context.Context) (*statsv1alpha1.Summary, error)

 // HandlePodStatsSummary makes an HTTP handler for implementing the kubelet summary stats endpoint
 func HandlePodStatsSummary(h PodStatsSummaryHandlerFunc) http.HandlerFunc {
--- a/node/api/statsv1alpha1/README.md
+++ b/node/api/statsv1alpha1/README.md
@@ -0,0 +1,7 @@
+These types are copied from the [k8s.io/kubelet](https://pkg.go.dev/k8s.io/kubelet@v0.21.0/pkg/apis/stats/v1alpha1) module.
+They are used from a type alias in the API package.
+
+It is being used this way because the module is only available from 1.20 and on, but currently we are pinned to v1.19 and plan to continue to support v1.19 for some time.
+Likewise we want to stop importing k8s.io/kubernetes (where the older type def is) since this transatively imports all of kubernetes.
+
+After the min version is v1.20 we can update the type alias to point the the module and remove these type definitions.
--- a/node/api/statsv1alpha1/types.go
+++ b/node/api/statsv1alpha1/types.go
@@ -0,0 +1,345 @@
+/*
+Copyright 2015 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package statsv1alpha1
+
+import (
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+)
+
+// Summary is a top-level container for holding NodeStats and PodStats.
+type Summary struct {
+	// Overall node stats.
+	Node NodeStats `json:"node"`
+	// Per-pod stats.
+	Pods []PodStats `json:"pods"`
+}
+
+// NodeStats holds node-level unprocessed sample stats.
+type NodeStats struct {
+	// Reference to the measured Node.
+	NodeName string `json:"nodeName"`
+	// Stats of system daemons tracked as raw containers.
+	// The system containers are named according to the SystemContainer* constants.
+	// +optional
+	// +patchMergeKey=name
+	// +patchStrategy=merge
+	SystemContainers []ContainerStats `json:"systemContainers,omitempty" patchStrategy:"merge" patchMergeKey:"name"`
+	// The time at which data collection for the node-scoped (i.e. aggregate) stats was (re)started.
+	StartTime metav1.Time `json:"startTime"`
+	// Stats pertaining to CPU resources.
+	// +optional
+	CPU *CPUStats `json:"cpu,omitempty"`
+	// Stats pertaining to memory (RAM) resources.
+	// +optional
+	Memory *MemoryStats `json:"memory,omitempty"`
+	// Stats pertaining to network resources.
+	// +optional
+	Network *NetworkStats `json:"network,omitempty"`
+	// Stats pertaining to total usage of filesystem resources on the rootfs used by node k8s components.
+	// NodeFs.Used is the total bytes used on the filesystem.
+	// +optional
+	Fs *FsStats `json:"fs,omitempty"`
+	// Stats about the underlying container runtime.
+	// +optional
+	Runtime *RuntimeStats `json:"runtime,omitempty"`
+	// Stats about the rlimit of system.
+	// +optional
+	Rlimit *RlimitStats `json:"rlimit,omitempty"`
+}
+
+// RlimitStats are stats rlimit of OS.
+type RlimitStats struct {
+	Time metav1.Time `json:"time"`
+
+	// The max PID of OS.
+	MaxPID *int64 `json:"maxpid,omitempty"`
+	// The number of running process in the OS.
+	NumOfRunningProcesses *int64 `json:"curproc,omitempty"`
+}
+
+// RuntimeStats are stats pertaining to the underlying container runtime.
+type RuntimeStats struct {
+	// Stats about the underlying filesystem where container images are stored.
+	// This filesystem could be the same as the primary (root) filesystem.
+	// Usage here refers to the total number of bytes occupied by images on the filesystem.
+	// +optional
+	ImageFs *FsStats `json:"imageFs,omitempty"`
+}
+
+const (
+	// SystemContainerKubelet is the container name for the system container tracking Kubelet usage.
+	SystemContainerKubelet = "kubelet"
+	// SystemContainerRuntime is the container name for the system container tracking the runtime (e.g. docker) usage.
+	SystemContainerRuntime = "runtime"
+	// SystemContainerMisc is the container name for the system container tracking non-kubernetes processes.
+	SystemContainerMisc = "misc"
+	// SystemContainerPods is the container name for the system container tracking user pods.
+	SystemContainerPods = "pods"
+)
+
+// ProcessStats are stats pertaining to processes.
+type ProcessStats struct {
+	// Number of processes
+	// +optional
+	ProcessCount *uint64 `json:"process_count,omitempty"`
+}
+
+// PodStats holds pod-level unprocessed sample stats.
+type PodStats struct {
+	// Reference to the measured Pod.
+	PodRef PodReference `json:"podRef"`
+	// The time at which data collection for the pod-scoped (e.g. network) stats was (re)started.
+	StartTime metav1.Time `json:"startTime"`
+	// Stats of containers in the measured pod.
+	// +patchMergeKey=name
+	// +patchStrategy=merge
+	Containers []ContainerStats `json:"containers" patchStrategy:"merge" patchMergeKey:"name"`
+	// Stats pertaining to CPU resources consumed by pod cgroup (which includes all containers' resource usage and pod overhead).
+	// +optional
+	CPU *CPUStats `json:"cpu,omitempty"`
+	// Stats pertaining to memory (RAM) resources consumed by pod cgroup (which includes all containers' resource usage and pod overhead).
+	// +optional
+	Memory *MemoryStats `json:"memory,omitempty"`
+	// Stats pertaining to network resources.
+	// +optional
+	Network *NetworkStats `json:"network,omitempty"`
+	// Stats pertaining to volume usage of filesystem resources.
+	// VolumeStats.UsedBytes is the number of bytes used by the Volume
+	// +optional
+	// +patchMergeKey=name
+	// +patchStrategy=merge
+	VolumeStats []VolumeStats `json:"volume,omitempty" patchStrategy:"merge" patchMergeKey:"name"`
+	// EphemeralStorage reports the total filesystem usage for the containers and emptyDir-backed volumes in the measured Pod.
+	// +optional
+	EphemeralStorage *FsStats `json:"ephemeral-storage,omitempty"`
+	// ProcessStats pertaining to processes.
+	// +optional
+	ProcessStats *ProcessStats `json:"process_stats,omitempty"`
+}
+
+// ContainerStats holds container-level unprocessed sample stats.
+type ContainerStats struct {
+	// Reference to the measured container.
+	Name string `json:"name"`
+	// The time at which data collection for this container was (re)started.
+	StartTime metav1.Time `json:"startTime"`
+	// Stats pertaining to CPU resources.
+	// +optional
+	CPU *CPUStats `json:"cpu,omitempty"`
+	// Stats pertaining to memory (RAM) resources.
+	// +optional
+	Memory *MemoryStats `json:"memory,omitempty"`
+	// Metrics for Accelerators. Each Accelerator corresponds to one element in the array.
+	Accelerators []AcceleratorStats `json:"accelerators,omitempty"`
+	// Stats pertaining to container rootfs usage of filesystem resources.
+	// Rootfs.UsedBytes is the number of bytes used for the container write layer.
+	// +optional
+	Rootfs *FsStats `json:"rootfs,omitempty"`
+	// Stats pertaining to container logs usage of filesystem resources.
+	// Logs.UsedBytes is the number of bytes used for the container logs.
+	// +optional
+	Logs *FsStats `json:"logs,omitempty"`
+	// User defined metrics that are exposed by containers in the pod. Typically, we expect only one container in the pod to be exposing user defined metrics. In the event of multiple containers exposing metrics, they will be combined here.
+	// +patchMergeKey=name
+	// +patchStrategy=merge
+	UserDefinedMetrics []UserDefinedMetric `json:"userDefinedMetrics,omitempty" patchStrategy:"merge" patchMergeKey:"name"`
+}
+
+// PodReference contains enough information to locate the referenced pod.
+type PodReference struct {
+	Name      string `json:"name"`
+	Namespace string `json:"namespace"`
+	UID       string `json:"uid"`
+}
+
+// InterfaceStats contains resource value data about interface.
+type InterfaceStats struct {
+	// The name of the interface
+	Name string `json:"name"`
+	// Cumulative count of bytes received.
+	// +optional
+	RxBytes *uint64 `json:"rxBytes,omitempty"`
+	// Cumulative count of receive errors encountered.
+	// +optional
+	RxErrors *uint64 `json:"rxErrors,omitempty"`
+	// Cumulative count of bytes transmitted.
+	// +optional
+	TxBytes *uint64 `json:"txBytes,omitempty"`
+	// Cumulative count of transmit errors encountered.
+	// +optional
+	TxErrors *uint64 `json:"txErrors,omitempty"`
+}
+
+// NetworkStats contains data about network resources.
+type NetworkStats struct {
+	// The time at which these stats were updated.
+	Time metav1.Time `json:"time"`
+
+	// Stats for the default interface, if found
+	InterfaceStats `json:",inline"`
+
+	Interfaces []InterfaceStats `json:"interfaces,omitempty"`
+}
+
+// CPUStats contains data about CPU usage.
+type CPUStats struct {
+	// The time at which these stats were updated.
+	Time metav1.Time `json:"time"`
+	// Total CPU usage (sum of all cores) averaged over the sample window.
+	// The "core" unit can be interpreted as CPU core-nanoseconds per second.
+	// +optional
+	UsageNanoCores *uint64 `json:"usageNanoCores,omitempty"`
+	// Cumulative CPU usage (sum of all cores) since object creation.
+	// +optional
+	UsageCoreNanoSeconds *uint64 `json:"usageCoreNanoSeconds,omitempty"`
+}
+
+// MemoryStats contains data about memory usage.
+type MemoryStats struct {
+	// The time at which these stats were updated.
+	Time metav1.Time `json:"time"`
+	// Available memory for use.  This is defined as the memory limit - workingSetBytes.
+	// If memory limit is undefined, the available bytes is omitted.
+	// +optional
+	AvailableBytes *uint64 `json:"availableBytes,omitempty"`
+	// Total memory in use. This includes all memory regardless of when it was accessed.
+	// +optional
+	UsageBytes *uint64 `json:"usageBytes,omitempty"`
+	// The amount of working set memory. This includes recently accessed memory,
+	// dirty memory, and kernel memory. WorkingSetBytes is <= UsageBytes
+	// +optional
+	WorkingSetBytes *uint64 `json:"workingSetBytes,omitempty"`
+	// The amount of anonymous and swap cache memory (includes transparent
+	// hugepages).
+	// +optional
+	RSSBytes *uint64 `json:"rssBytes,omitempty"`
+	// Cumulative number of minor page faults.
+	// +optional
+	PageFaults *uint64 `json:"pageFaults,omitempty"`
+	// Cumulative number of major page faults.
+	// +optional
+	MajorPageFaults *uint64 `json:"majorPageFaults,omitempty"`
+}
+
+// AcceleratorStats contains stats for accelerators attached to the container.
+type AcceleratorStats struct {
+	// Make of the accelerator (nvidia, amd, google etc.)
+	Make string `json:"make"`
+
+	// Model of the accelerator (tesla-p100, tesla-k80 etc.)
+	Model string `json:"model"`
+
+	// ID of the accelerator.
+	ID string `json:"id"`
+
+	// Total accelerator memory.
+	// unit: bytes
+	MemoryTotal uint64 `json:"memoryTotal"`
+
+	// Total accelerator memory allocated.
+	// unit: bytes
+	MemoryUsed uint64 `json:"memoryUsed"`
+
+	// Percent of time over the past sample period (10s) during which
+	// the accelerator was actively processing.
+	DutyCycle uint64 `json:"dutyCycle"`
+}
+
+// VolumeStats contains data about Volume filesystem usage.
+type VolumeStats struct {
+	// Embedded FsStats
+	FsStats `json:",inline"`
+	// Name is the name given to the Volume
+	// +optional
+	Name string `json:"name,omitempty"`
+	// Reference to the PVC, if one exists
+	// +optional
+	PVCRef *PVCReference `json:"pvcRef,omitempty"`
+}
+
+// PVCReference contains enough information to describe the referenced PVC.
+type PVCReference struct {
+	Name      string `json:"name"`
+	Namespace string `json:"namespace"`
+}
+
+// FsStats contains data about filesystem usage.
+type FsStats struct {
+	// The time at which these stats were updated.
+	Time metav1.Time `json:"time"`
+	// AvailableBytes represents the storage space available (bytes) for the filesystem.
+	// +optional
+	AvailableBytes *uint64 `json:"availableBytes,omitempty"`
+	// CapacityBytes represents the total capacity (bytes) of the filesystems underlying storage.
+	// +optional
+	CapacityBytes *uint64 `json:"capacityBytes,omitempty"`
+	// UsedBytes represents the bytes used for a specific task on the filesystem.
+	// This may differ from the total bytes used on the filesystem and may not equal CapacityBytes - AvailableBytes.
+	// e.g. For ContainerStats.Rootfs this is the bytes used by the container rootfs on the filesystem.
+	// +optional
+	UsedBytes *uint64 `json:"usedBytes,omitempty"`
+	// InodesFree represents the free inodes in the filesystem.
+	// +optional
+	InodesFree *uint64 `json:"inodesFree,omitempty"`
+	// Inodes represents the total inodes in the filesystem.
+	// +optional
+	Inodes *uint64 `json:"inodes,omitempty"`
+	// InodesUsed represents the inodes used by the filesystem
+	// This may not equal Inodes - InodesFree because this filesystem may share inodes with other "filesystems"
+	// e.g. For ContainerStats.Rootfs, this is the inodes used only by that container, and does not count inodes used by other containers.
+	InodesUsed *uint64 `json:"inodesUsed,omitempty"`
+}
+
+// UserDefinedMetricType defines how the metric should be interpreted by the user.
+type UserDefinedMetricType string
+
+const (
+	// MetricGauge is an instantaneous value. May increase or decrease.
+	MetricGauge UserDefinedMetricType = "gauge"
+
+	// MetricCumulative is a counter-like value that is only expected to increase.
+	MetricCumulative UserDefinedMetricType = "cumulative"
+
+	// MetricDelta is a rate over a time period.
+	MetricDelta UserDefinedMetricType = "delta"
+)
+
+// UserDefinedMetricDescriptor contains metadata that describes a user defined metric.
+type UserDefinedMetricDescriptor struct {
+	// The name of the metric.
+	Name string `json:"name"`
+
+	// Type of the metric.
+	Type UserDefinedMetricType `json:"type"`
+
+	// Display Units for the stats.
+	Units string `json:"units"`
+
+	// Metadata labels associated with this metric.
+	// +optional
+	Labels map[string]string `json:"labels,omitempty"`
+}
+
+// UserDefinedMetric represents a metric defined and generated by users.
+type UserDefinedMetric struct {
+	UserDefinedMetricDescriptor `json:",inline"`
+	// The time at which these stats were updated.
+	Time metav1.Time `json:"time"`
+	// Value of the metric. Float64s have 53 bit precision.
+	// We do not foresee any metrics exceeding that value.
+	Value float64 `json:"value"`
+}
--- a/node/lease_controller_v1.go
+++ b/node/lease_controller_v1.go
@@ -114,6 +114,7 @@ func (c *leaseController) sync(ctx context.Context) {
 	pingResult, err := c.nodeController.nodePingController.getResult(ctx)
 	if err != nil {
 		log.G(ctx).WithError(err).Error("Could not get ping status")
+		return
 	}
 	if pingResult.error != nil {
 		log.G(ctx).WithError(pingResult.error).Error("Ping result is not clean, not updating lease")
--- a/node/lifecycle_test.go
+++ b/node/lifecycle_test.go
@@ -367,6 +367,14 @@ func testDanglingPodScenario(ctx context.Context, t *testing.T, s *system, m tes

 }

+func sendErr(ctx context.Context, ch chan error, err error) {
+	select {
+	case <-ctx.Done():
+		log.G(ctx).WithError(err).Warn("timeout waiting to send test error")
+	case ch <- err:
+	}
+}
+
 func testDanglingPodScenarioWithDeletionTimestamp(ctx context.Context, t *testing.T, s *system, m testingProvider) {
 	t.Parallel()

@@ -390,18 +398,18 @@ func testDanglingPodScenarioWithDeletionTimestamp(ctx context.Context, t *testin
 	_, e := s.client.CoreV1().Pods(testNamespace).Create(ctx, podCopyWithDeletionTimestamp, metav1.CreateOptions{})
 	assert.NilError(t, e)

-	// Start the pod controller
-	assert.NilError(t, s.start(ctx))
 	watchErrCh := make(chan error)
-
 	go func() {
 		_, watchErr := watchutils.UntilWithoutRetry(ctx, watcher,
 			func(ev watch.Event) (bool, error) {
 				return ev.Type == watch.Deleted, nil
 			})
-		watchErrCh <- watchErr
+		sendErr(ctx, watchErrCh, watchErr)
 	}()

+	// Start the pod controller
+	assert.NilError(t, s.start(ctx))
+
 	select {
 	case <-ctx.Done():
 		t.Fatalf("Context ended early: %s", ctx.Err().Error())
@@ -436,7 +444,7 @@ func testCreateStartDeleteScenario(ctx context.Context, t *testing.T, s *system,
 				return pod.Name == p.ObjectMeta.Name, nil
 			})

-		watchErrCh <- watchErr
+		sendErr(ctx, watchErrCh, watchErr)
 	}()

 	// Create the Pod
@@ -465,7 +473,7 @@ func testCreateStartDeleteScenario(ctx context.Context, t *testing.T, s *system,
 				return pod.Status.Phase == corev1.PodRunning, nil
 			})

-		watchErrCh <- watchErr
+		sendErr(ctx, watchErrCh, watchErr)
 	}()

 	assert.NilError(t, s.start(ctx))
@@ -487,7 +495,7 @@ func testCreateStartDeleteScenario(ctx context.Context, t *testing.T, s *system,
 		_, watchDeleteErr := watchutils.UntilWithoutRetry(ctx, watcher2, func(ev watch.Event) (bool, error) {
 			return ev.Type == watch.Deleted, nil
 		})
-		waitDeleteCh <- watchDeleteErr
+		sendErr(ctx, waitDeleteCh, watchDeleteErr)
 	}()

 	// Setup a watch prior to pod deletion
@@ -495,7 +503,7 @@ func testCreateStartDeleteScenario(ctx context.Context, t *testing.T, s *system,
 	assert.NilError(t, err)
 	defer watcher.Stop()
 	go func() {
-		watchErrCh <- waitFunction(ctx, watcher)
+		sendErr(ctx, watchErrCh, waitFunction(ctx, watcher))
 	}()

 	// Delete the pod via deletiontimestamp
@@ -559,7 +567,7 @@ func testUpdatePodWhileRunningScenario(ctx context.Context, t *testing.T, s *sys
 			})
 		// This deepcopy is required to please the race detector
 		p = newPod.Object.(*corev1.Pod).DeepCopy()
-		watchErrCh <- watchErr
+		sendErr(ctx, watchErrCh, watchErr)
 	}()

 	// Start the pod controller
--- a/node/node.go
+++ b/node/node.go
@@ -84,6 +84,7 @@ func NewNodeController(p NodeProvider, node *corev1.Node, nodes v1.NodeInterface
 		serverNode: node,
 		nodes:      nodes,
 		chReady:    make(chan struct{}),
+		chDone:     make(chan struct{}),
 	}
 	for _, o := range opts {
 		if err := o(n); err != nil {
@@ -223,7 +224,12 @@ type NodeController struct { // nolint:golint

 	nodeStatusUpdateErrorHandler ErrorHandler

+	// chReady is closed once the controller is ready to start the control loop
 	chReady chan struct{}
+	// chDone is closed once the control loop has exited
+	chDone chan struct{}
+	errMu  sync.Mutex
+	err    error

 	nodePingController *nodePingController
 	pingTimeout        *time.Duration
@@ -249,7 +255,14 @@ const (
 // node status update (because some things still expect the node to be updated
 // periodically), otherwise it will only use node status update with the configured
 // ping interval.
-func (n *NodeController) Run(ctx context.Context) error {
+func (n *NodeController) Run(ctx context.Context) (retErr error) {
+	defer func() {
+		n.errMu.Lock()
+		n.err = retErr
+		n.errMu.Unlock()
+		close(n.chDone)
+	}()
+
 	n.chStatusUpdate = make(chan *corev1.Node, 1)
 	n.p.NotifyNodeStatus(ctx, func(node *corev1.Node) {
 		n.chStatusUpdate <- node
@@ -273,6 +286,22 @@ func (n *NodeController) Run(ctx context.Context) error {
 	return n.controlLoop(ctx, providerNode)
 }

+// Done signals to the caller when the controller is done and the control loop is exited.
+//
+// Call n.Err() to find out if there was an error.
+func (n *NodeController) Done() <-chan struct{} {
+	return n.chDone
+}
+
+// Err returns any errors that have occurred that trigger the control loop to exit.
+//
+// Err only returns a non-nil error after `<-n.Done()` returns.
+func (n *NodeController) Err() error {
+	n.errMu.Lock()
+	defer n.errMu.Unlock()
+	return n.err
+}
+
 func (n *NodeController) ensureNode(ctx context.Context, providerNode *corev1.Node) (err error) {
 	ctx, span := trace.StartSpan(ctx, "node.ensureNode")
 	defer span.End()
@@ -307,14 +336,12 @@ func (n *NodeController) ensureNode(ctx context.Context, providerNode *corev1.No

 // Ready returns a channel that gets closed when the node is fully up and
 // running. Note that if there is an error on startup this channel will never
-// be started.
+// be closed.
 func (n *NodeController) Ready() <-chan struct{} {
 	return n.chReady
 }

 func (n *NodeController) controlLoop(ctx context.Context, providerNode *corev1.Node) error {
-	close(n.chReady)
-
 	defer n.group.Wait()

 	var sleepInterval time.Duration
@@ -355,6 +382,7 @@ func (n *NodeController) controlLoop(ctx context.Context, providerNode *corev1.N
 		return false
 	}

+	close(n.chReady)
 	for {
 		shouldTerminate := loop()
 		if shouldTerminate {
--- a/node/node_test.go
+++ b/node/node_test.go
@@ -65,16 +65,13 @@ func testNodeRun(t *testing.T, enableLease bool) {
 	node, err := NewNodeController(testP, testNode, nodes, opts...)
 	assert.NilError(t, err)

-	chErr := make(chan error)
 	defer func() {
 		cancel()
-		assert.NilError(t, <-chErr)
+		<-node.Done()
+		assert.NilError(t, node.Err())
 	}()

-	go func() {
-		chErr <- node.Run(ctx)
-		close(chErr)
-	}()
+	go node.Run(ctx) // nolint:errcheck

 	nw := makeWatch(ctx, t, nodes, testNodeCopy.Name)
 	defer nw.Stop()
@@ -103,8 +100,8 @@ func testNodeRun(t *testing.T, enableLease bool) {
 		case <-time.After(time.Second):
 			t.Errorf("timeout waiting for event")
 			continue
-		case err := <-chErr:
-			t.Fatal(err) // if this returns at all it is an error regardless if err is nil
+		case <-node.Done():
+			t.Fatal(node.Err()) // if this returns at all it is an error regardless if err is nil
 		case <-nr:
 			nodeUpdates++
 			continue
@@ -152,8 +149,8 @@ func testNodeRun(t *testing.T, enableLease bool) {
 	defer eCancel()

 	select {
-	case err := <-chErr:
-		t.Fatal(err) // if this returns at all it is an error regardless if err is nil
+	case <-node.Done():
+		t.Fatal(node.Err()) // if this returns at all it is an error regardless if err is nil
 	case err := <-waitForEvent(eCtx, nr, func(e watch.Event) bool {
 		node := e.Object.(*corev1.Node)
 		if len(node.Status.Conditions) == 0 {
@@ -192,10 +189,7 @@ func TestNodeCustomUpdateStatusErrorHandler(t *testing.T) {
 	)
 	assert.NilError(t, err)

-	chErr := make(chan error, 1)
-	go func() {
-		chErr <- node.Run(ctx)
-	}()
+	go node.Run(ctx) // nolint:errcheck

 	timer := time.NewTimer(10 * time.Second)
 	defer timer.Stop()
@@ -204,8 +198,8 @@ func TestNodeCustomUpdateStatusErrorHandler(t *testing.T) {
 	select {
 	case <-timer.C:
 		t.Fatal("timeout waiting for node to be ready")
-	case <-chErr:
-		t.Fatalf("node.Run returned earlier than expected: %v", err)
+	case <-node.Done():
+		t.Fatalf("node.Run returned earlier than expected: %v", node.Err())
 	case <-node.Ready():
 	}

@@ -218,8 +212,8 @@ func TestNodeCustomUpdateStatusErrorHandler(t *testing.T) {
 	defer timer.Stop()

 	select {
-	case err := <-chErr:
-		assert.Equal(t, err, nil)
+	case <-node.Done():
+		assert.NilError(t, node.Err())
 	case <-timer.C:
 		t.Fatal("timeout waiting for node shutdown")
 	}
@@ -301,9 +295,11 @@ func TestPingAfterStatusUpdate(t *testing.T) {
 	node, err := NewNodeController(testP, testNode, nodes, opts...)
 	assert.NilError(t, err)

-	chErr := make(chan error, 1)
-	go func() {
-		chErr <- node.Run(ctx)
+	go node.Run(ctx) // nolint:errcheck
+	defer func() {
+		cancel()
+		<-node.Done()
+		assert.NilError(t, node.Err())
 	}()

 	timer := time.NewTimer(10 * time.Second)
@@ -313,10 +309,11 @@ func TestPingAfterStatusUpdate(t *testing.T) {
 	select {
 	case <-timer.C:
 		t.Fatal("timeout waiting for node to be ready")
-	case <-chErr:
-		t.Fatalf("node.Run returned earlier than expected: %v", err)
+	case <-node.Done():
+		t.Fatalf("node.Run returned earlier than expected: %v", node.Err())
 	case <-node.Ready():
 	}
+	timer.Stop()

 	notifyTimer := time.After(interval * time.Duration(10))
 	<-notifyTimer
@@ -360,16 +357,13 @@ func TestBeforeAnnotationsPreserved(t *testing.T) {
 	node, err := NewNodeController(testP, testNode, nodes, opts...)
 	assert.NilError(t, err)

-	chErr := make(chan error)
 	defer func() {
 		cancel()
-		assert.NilError(t, <-chErr)
+		<-node.Done()
+		assert.NilError(t, node.Err())
 	}()

-	go func() {
-		chErr <- node.Run(ctx)
-		close(chErr)
-	}()
+	go node.Run(ctx) // nolint:errcheck

 	nw := makeWatch(ctx, t, nodes, testNodeCopy.Name)
 	defer nw.Stop()
@@ -427,16 +421,13 @@ func TestManualConditionsPreserved(t *testing.T) {
 	node, err := NewNodeController(testP, testNode, nodes, opts...)
 	assert.NilError(t, err)

-	chErr := make(chan error)
 	defer func() {
 		cancel()
-		assert.NilError(t, <-chErr)
+		<-node.Done()
+		assert.NilError(t, node.Err())
 	}()

-	go func() {
-		chErr <- node.Run(ctx)
-		close(chErr)
-	}()
+	go node.Run(ctx) // nolint:errcheck

 	nw := makeWatch(ctx, t, nodes, testNodeCopy.Name)
 	defer nw.Stop()
--- a/node/nodeutil/auth.go
+++ b/node/nodeutil/auth.go
@@ -0,0 +1,220 @@
+package nodeutil
+
+import (
+	"context"
+	"net/http"
+	"strings"
+	"time"
+
+	"github.com/virtual-kubelet/virtual-kubelet/log"
+	"github.com/virtual-kubelet/virtual-kubelet/trace"
+	"k8s.io/apiserver/pkg/authentication/authenticator"
+	"k8s.io/apiserver/pkg/authentication/authenticatorfactory"
+	"k8s.io/apiserver/pkg/authentication/request/anonymous"
+	"k8s.io/apiserver/pkg/authentication/user"
+	"k8s.io/apiserver/pkg/authorization/authorizer"
+	"k8s.io/apiserver/pkg/authorization/authorizerfactory"
+	"k8s.io/client-go/kubernetes"
+)
+
+// Auth is the interface used to implement authn/authz for http requests
+type Auth interface {
+	authenticator.Request
+	authorizer.RequestAttributesGetter
+	authorizer.Authorizer
+}
+
+type authWrapper struct {
+	authenticator.Request
+	authorizer.RequestAttributesGetter
+	authorizer.Authorizer
+}
+
+// InstrumentAuth wraps the provided Auth in a new instrumented Auth
+//
+// Note: You would only need this if you rolled your own auth.
+// 	The Auth implementations defined in this package are already instrumented.
+func InstrumentAuth(auth Auth) Auth {
+	if _, ok := auth.(*authWrapper); ok {
+		// This is already instrumented
+		return auth
+	}
+	return &authWrapper{
+		Request:                 auth,
+		RequestAttributesGetter: auth,
+		Authorizer:              auth,
+	}
+}
+
+// NoAuth creates an Auth which allows anonymous access to all resouorces
+func NoAuth() Auth {
+	return &authWrapper{
+		Request:                 anonymous.NewAuthenticator(),
+		RequestAttributesGetter: &NodeRequestAttr{},
+		Authorizer:              authorizerfactory.NewAlwaysAllowAuthorizer(),
+	}
+}
+
+// WithAuth makes a new http handler which wraps the provided handler with authn/authz.
+func WithAuth(auth Auth, h http.Handler) http.Handler {
+	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		handleAuth(auth, w, r, h)
+	})
+}
+
+func handleAuth(auth Auth, w http.ResponseWriter, r *http.Request, next http.Handler) {
+	ctx := r.Context()
+	ctx, span := trace.StartSpan(ctx, "vk.handleAuth")
+	defer span.End()
+	r = r.WithContext(ctx)
+
+	info, ok, err := auth.AuthenticateRequest(r)
+	if err != nil || !ok {
+		log.G(r.Context()).WithError(err).Error("Authorization error")
+		http.Error(w, "Unauthorized", http.StatusUnauthorized)
+		return
+	}
+
+	logger := log.G(ctx).WithFields(log.Fields{
+		"user-name": info.User.GetName(),
+		"user-id":   info.User.GetUID(),
+	})
+
+	ctx = log.WithLogger(ctx, logger)
+	r = r.WithContext(ctx)
+
+	attrs := auth.GetRequestAttributes(info.User, r)
+
+	decision, _, err := auth.Authorize(ctx, attrs)
+	if err != nil {
+		log.G(r.Context()).WithError(err).Error("Authorization error")
+		http.Error(w, err.Error(), http.StatusInternalServerError)
+		return
+	}
+
+	if decision != authorizer.DecisionAllow {
+		http.Error(w, "Forbidden", http.StatusForbidden)
+		return
+	}
+
+	next.ServeHTTP(w, r)
+}
+
+// WebhookAuthOption is used as a functional argument to configure webhook auth.
+type WebhookAuthOption func(*WebhookAuthConfig) error
+
+// WebhookAuthConfig stores the configurations for authn/authz and is used by WebhookAuthOption to expose to callers.
+type WebhookAuthConfig struct {
+	AuthnConfig authenticatorfactory.DelegatingAuthenticatorConfig
+	AuthzConfig authorizerfactory.DelegatingAuthorizerConfig
+}
+
+// WebhookAuth creates an Auth suitable to use with kubelet webhook auth.
+// You must provide a CA provider to the authentication config, otherwise mTLS is disabled.
+func WebhookAuth(client kubernetes.Interface, nodeName string, opts ...WebhookAuthOption) (Auth, error) {
+	cfg := WebhookAuthConfig{
+		AuthnConfig: authenticatorfactory.DelegatingAuthenticatorConfig{
+			CacheTTL: 2 * time.Minute, // default taken from k8s.io/kubernetes/pkg/kubelet/apis/config/v1beta1
+			// TODO: After upgrading k8s libs, we need to add the retry backoff option
+		},
+		AuthzConfig: authorizerfactory.DelegatingAuthorizerConfig{
+			AllowCacheTTL: 5 * time.Minute,  // default taken from k8s.io/kubernetes/pkg/kubelet/apis/config/v1beta1
+			DenyCacheTTL:  30 * time.Second, // default taken from k8s.io/kubernetes/pkg/kubelet/apis/config/v1beta1
+			// TODO: After upgrading k8s libs, we need to add the retry backoff option
+		},
+	}
+
+	for _, o := range opts {
+		if err := o(&cfg); err != nil {
+			return nil, err
+		}
+	}
+
+	cfg.AuthnConfig.TokenAccessReviewClient = client.AuthenticationV1().TokenReviews()
+	cfg.AuthzConfig.SubjectAccessReviewClient = client.AuthorizationV1().SubjectAccessReviews()
+
+	authn, _, err := cfg.AuthnConfig.New()
+	if err != nil {
+		return nil, err
+	}
+
+	authz, err := cfg.AuthzConfig.New()
+	if err != nil {
+		return nil, err
+	}
+	return &authWrapper{
+		Request:                 authn,
+		RequestAttributesGetter: NodeRequestAttr{nodeName},
+		Authorizer:              authz,
+	}, nil
+}
+
+func (w *authWrapper) AuthenticateRequest(r *http.Request) (*authenticator.Response, bool, error) {
+	ctx, span := trace.StartSpan(r.Context(), "AuthenticateRequest")
+	defer span.End()
+	return w.Request.AuthenticateRequest(r.WithContext(ctx))
+}
+
+func (w *authWrapper) Authorize(ctx context.Context, a authorizer.Attributes) (authorizer.Decision, string, error) {
+	ctx, span := trace.StartSpan(ctx, "Authorize")
+	defer span.End()
+	return w.Authorizer.Authorize(ctx, a)
+}
+
+// NodeRequestAttr is a authorizor.RequeestAttributesGetter which can be used in the Auth interface.
+type NodeRequestAttr struct {
+	NodeName string
+}
+
+// GetRequestAttributes satisfies the authorizer.RequestAttributesGetter interface for use with an `Auth`.
+func (a NodeRequestAttr) GetRequestAttributes(u user.Info, r *http.Request) authorizer.Attributes {
+	return authorizer.AttributesRecord{
+		User:            u,
+		Verb:            getAPIVerb(r),
+		Namespace:       "",
+		APIGroup:        "",
+		APIVersion:      "v1",
+		Resource:        "nodes",
+		Name:            a.NodeName,
+		ResourceRequest: true,
+		Path:            r.URL.Path,
+		Subresource:     getSubresource(r),
+	}
+}
+
+func getAPIVerb(r *http.Request) string {
+	switch r.Method {
+	case http.MethodPost:
+		return "create"
+	case http.MethodGet:
+		return "get"
+	case http.MethodPut:
+		return "update"
+	case http.MethodPatch:
+		return "patch"
+	case http.MethodDelete:
+		return "delete"
+	}
+	return ""
+}
+
+func isSubpath(subpath, path string) bool {
+	// Taken from k8s.io/kubernetes/pkg/kubelet/server/auth.go
+	return subpath == path || (strings.HasPrefix(subpath, path) && subpath[len(path)] == '/')
+}
+
+func getSubresource(r *http.Request) string {
+	if isSubpath(r.URL.Path, "/stats") {
+		return "stats"
+	}
+	if isSubpath(r.URL.Path, "/metrics") {
+		return "metrics"
+	}
+	if isSubpath(r.URL.Path, "/logs") {
+		// yes, "log", not "logs"
+		// per kubelet code: "log" to match other log subresources (pods/log, etc)
+		return "log"
+	}
+
+	return "proxy"
+}
--- a/node/nodeutil/controller.go
+++ b/node/nodeutil/controller.go
@@ -0,0 +1,430 @@
+package nodeutil
+
+import (
+	"context"
+	"crypto/tls"
+	"fmt"
+	"net"
+	"net/http"
+	"os"
+	"path"
+	"runtime"
+	"time"
+
+	"github.com/pkg/errors"
+	"github.com/virtual-kubelet/virtual-kubelet/log"
+	"github.com/virtual-kubelet/virtual-kubelet/node"
+	v1 "k8s.io/api/core/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/client-go/informers"
+	"k8s.io/client-go/kubernetes"
+	"k8s.io/client-go/kubernetes/scheme"
+	corev1client "k8s.io/client-go/kubernetes/typed/core/v1"
+	corev1listers "k8s.io/client-go/listers/core/v1"
+	"k8s.io/client-go/tools/record"
+)
+
+// Node helps manage the startup/shutdown procedure for other controllers.
+// It is intended as a convenience to reduce boiler plate code for starting up controllers.
+//
+// Must be created with constructor `NewNode`.
+type Node struct {
+	nc *node.NodeController
+	pc *node.PodController
+
+	readyCb func(context.Context) error
+
+	ready chan struct{}
+	done  chan struct{}
+	err   error
+
+	podInformerFactory informers.SharedInformerFactory
+	scmInformerFactory informers.SharedInformerFactory
+	client             kubernetes.Interface
+
+	listenAddr string
+	h          http.Handler
+	tlsConfig  *tls.Config
+
+	workers int
+
+	eb record.EventBroadcaster
+}
+
+// NodeController returns the configured node controller.
+func (n *Node) NodeController() *node.NodeController {
+	return n.nc
+}
+
+// PodController returns the configured pod controller.
+func (n *Node) PodController() *node.PodController {
+	return n.pc
+}
+
+func (n *Node) runHTTP(ctx context.Context) (func(), error) {
+	if n.tlsConfig == nil {
+		log.G(ctx).Warn("TLS config not provided, not starting up http service")
+		return func() {}, nil
+	}
+	if n.h == nil {
+		log.G(ctx).Debug("No http handler, not starting up http service")
+		return func() {}, nil
+	}
+
+	l, err := tls.Listen("tcp", n.listenAddr, n.tlsConfig)
+	if err != nil {
+		return nil, errors.Wrap(err, "error starting http listener")
+	}
+
+	log.G(ctx).Debug("Started TLS listener")
+
+	srv := &http.Server{Handler: n.h, TLSConfig: n.tlsConfig}
+	go srv.Serve(l) //nolint:errcheck
+	log.G(ctx).Debug("HTTP server running")
+
+	return func() {
+		srv.Close()
+		l.Close()
+	}, nil
+}
+
+// Run starts all the underlying controllers
+func (n *Node) Run(ctx context.Context) (retErr error) {
+	ctx, cancel := context.WithCancel(ctx)
+	defer func() {
+		cancel()
+
+		n.err = retErr
+		close(n.done)
+	}()
+
+	if n.eb != nil {
+		n.eb.StartLogging(log.G(ctx).Infof)
+		n.eb.StartRecordingToSink(&corev1client.EventSinkImpl{Interface: n.client.CoreV1().Events(v1.NamespaceAll)})
+		defer n.eb.Shutdown()
+		log.G(ctx).Debug("Started event broadcaster")
+	}
+
+	cancelHTTP, err := n.runHTTP(ctx)
+	if err != nil {
+		return err
+	}
+	defer cancelHTTP()
+
+	go n.podInformerFactory.Start(ctx.Done())
+	go n.scmInformerFactory.Start(ctx.Done())
+	go n.pc.Run(ctx, n.workers) //nolint:errcheck
+
+	defer func() {
+		cancel()
+		<-n.pc.Done()
+	}()
+
+	select {
+	case <-ctx.Done():
+		return n.err
+	case <-n.pc.Ready():
+	case <-n.pc.Done():
+		return n.pc.Err()
+	}
+
+	log.G(ctx).Debug("pod controller ready")
+
+	go n.nc.Run(ctx) //nolint:errcheck
+
+	defer func() {
+		cancel()
+		<-n.nc.Done()
+	}()
+
+	select {
+	case <-ctx.Done():
+		n.err = ctx.Err()
+		return n.err
+	case <-n.nc.Ready():
+	case <-n.nc.Done():
+		return n.nc.Err()
+	}
+
+	log.G(ctx).Debug("node controller ready")
+
+	if n.readyCb != nil {
+		if err := n.readyCb(ctx); err != nil {
+			return err
+		}
+	}
+	close(n.ready)
+
+	select {
+	case <-n.nc.Done():
+		cancel()
+		return n.nc.Err()
+	case <-n.pc.Done():
+		cancel()
+		return n.pc.Err()
+	}
+}
+
+// WaitReady waits for the specified timeout for the controller to be ready.
+//
+// The timeout is for convenience so the caller doesn't have to juggle an extra context.
+func (n *Node) WaitReady(ctx context.Context, timeout time.Duration) error {
+	if timeout > 0 {
+		var cancel func()
+		ctx, cancel = context.WithTimeout(ctx, timeout)
+		defer cancel()
+	}
+
+	select {
+	case <-n.ready:
+		return nil
+	case <-n.done:
+		return fmt.Errorf("controller exited before ready: %w", n.err)
+	case <-ctx.Done():
+		return ctx.Err()
+	}
+}
+
+// Ready returns a channel that will be closed after the controller is ready.
+func (n *Node) Ready() <-chan struct{} {
+	return n.ready
+}
+
+// Done returns a channel that will be closed when the controller has exited.
+func (n *Node) Done() <-chan struct{} {
+	return n.done
+}
+
+// Err returns any error that occurred with the controller.
+//
+// This always return nil before `<-Done()`.
+func (n *Node) Err() error {
+	select {
+	case <-n.Done():
+		return n.err
+	default:
+		return nil
+	}
+}
+
+// NodeOpt is used as functional options when configuring a new node in NewNodeFromClient
+type NodeOpt func(c *NodeConfig) error
+
+// NodeConfig is used to hold configuration items for a Node.
+// It gets used in conjection with NodeOpt in NewNodeFromClient
+type NodeConfig struct {
+	// Set the client to use, otherwise a client will be created from ClientsetFromEnv
+	Client kubernetes.Interface
+
+	// Set the node spec to register with Kubernetes
+	NodeSpec v1.Node
+	// Set the path to read a kubeconfig from for creating a client.
+	// This is ignored when a client is provided to NewNodeFromClient
+	KubeconfigPath string
+	// Set the period for a full resync for generated client-go informers
+	InformerResyncPeriod time.Duration
+
+	// Set the address to listen on for the http API
+	HTTPListenAddr string
+	// Set a custom API handler to use.
+	// You can use this to setup, for example, authentication middleware.
+	// If one is not provided a default one will be created.
+	//
+	// Note: If you provide your own handler, you'll need to handle all auth, routes, etc.
+	Handler http.Handler
+	// Set the timeout for idle http streams
+	StreamIdleTimeout time.Duration
+	// Set the timeout for creating http streams
+	StreamCreationTimeout time.Duration
+	// Enable http debugging routes
+	DebugHTTP bool
+	// Set the tls config to use for the http server
+	TLSConfig *tls.Config
+
+	// Specify the event recorder to use
+	// If this is not provided, a default one will be used.
+	EventRecorder record.EventRecorder
+
+	// Set the number of workers to reconcile pods
+	// The default value is derived from the number of cores available.
+	NumWorkers int
+
+	routeAttacher func(Provider, NodeConfig, corev1listers.PodLister)
+}
+
+// WithNodeConfig returns a NodeOpt which replaces the NodeConfig with the passed in value.
+func WithNodeConfig(c NodeConfig) NodeOpt {
+	return func(orig *NodeConfig) error {
+		*orig = c
+		return nil
+	}
+}
+
+// WithClient return a NodeOpt that sets the client that will be used to create/manage the node.
+func WithClient(c kubernetes.Interface) NodeOpt {
+	return func(cfg *NodeConfig) error {
+		cfg.Client = c
+		return nil
+	}
+}
+
+// NewNode creates a new node using the provided client and name.
+// This is intended for high-level/low boiler-plate usage.
+// Use the constructors in the `node` package for lower level configuration.
+//
+// Some basic values are set for node status, you'll almost certainly want to modify it.
+//
+// If client is nil, this will construct a client using ClientsetFromEnv
+//
+// It is up to the caller to configure auth on the HTTP handler.
+func NewNode(name string, newProvider NewProviderFunc, opts ...NodeOpt) (*Node, error) {
+	cfg := NodeConfig{
+		NumWorkers:           runtime.NumCPU(),
+		InformerResyncPeriod: time.Minute,
+		KubeconfigPath:       os.Getenv("KUBECONFIG"),
+		HTTPListenAddr:       ":10250",
+		NodeSpec: v1.Node{
+			ObjectMeta: metav1.ObjectMeta{
+				Name: name,
+				Labels: map[string]string{
+					"type":                   "virtual-kubelet",
+					"kubernetes.io/role":     "agent",
+					"kubernetes.io/hostname": name,
+				},
+			},
+			Status: v1.NodeStatus{
+				Phase: v1.NodePending,
+				Conditions: []v1.NodeCondition{
+					{Type: v1.NodeReady},
+					{Type: v1.NodeDiskPressure},
+					{Type: v1.NodeMemoryPressure},
+					{Type: v1.NodePIDPressure},
+					{Type: v1.NodeNetworkUnavailable},
+				},
+			},
+		},
+	}
+
+	for _, o := range opts {
+		if err := o(&cfg); err != nil {
+			return nil, err
+		}
+	}
+
+	if _, _, err := net.SplitHostPort(cfg.HTTPListenAddr); err != nil {
+		return nil, errors.Wrap(err, "error parsing http listen address")
+	}
+
+	if cfg.Client == nil {
+		var err error
+		cfg.Client, err = ClientsetFromEnv(cfg.KubeconfigPath)
+		if err != nil {
+			return nil, errors.Wrap(err, "error creating clientset from env")
+		}
+	}
+
+	podInformerFactory := informers.NewSharedInformerFactoryWithOptions(
+		cfg.Client,
+		cfg.InformerResyncPeriod,
+		PodInformerFilter(name),
+	)
+
+	scmInformerFactory := informers.NewSharedInformerFactoryWithOptions(
+		cfg.Client,
+		cfg.InformerResyncPeriod,
+	)
+
+	podInformer := podInformerFactory.Core().V1().Pods()
+	secretInformer := scmInformerFactory.Core().V1().Secrets()
+	configMapInformer := scmInformerFactory.Core().V1().ConfigMaps()
+	serviceInformer := scmInformerFactory.Core().V1().Services()
+
+	p, np, err := newProvider(ProviderConfig{
+		Pods:       podInformer.Lister(),
+		ConfigMaps: configMapInformer.Lister(),
+		Secrets:    secretInformer.Lister(),
+		Services:   serviceInformer.Lister(),
+		Node:       &cfg.NodeSpec,
+	})
+	if err != nil {
+		return nil, errors.Wrap(err, "error creating provider")
+	}
+
+	if cfg.routeAttacher != nil {
+		cfg.routeAttacher(p, cfg, podInformer.Lister())
+	}
+
+	var readyCb func(context.Context) error
+	if np == nil {
+		nnp := node.NewNaiveNodeProvider()
+		np = nnp
+
+		readyCb = func(ctx context.Context) error {
+			setNodeReady(&cfg.NodeSpec)
+			err := nnp.UpdateStatus(ctx, &cfg.NodeSpec)
+			return errors.Wrap(err, "error marking node as ready")
+		}
+	}
+
+	nc, err := node.NewNodeController(
+		np,
+		&cfg.NodeSpec,
+		cfg.Client.CoreV1().Nodes(),
+		node.WithNodeEnableLeaseV1(NodeLeaseV1Client(cfg.Client), node.DefaultLeaseDuration),
+	)
+	if err != nil {
+		return nil, errors.Wrap(err, "error creating node controller")
+	}
+
+	var eb record.EventBroadcaster
+	if cfg.EventRecorder == nil {
+		eb = record.NewBroadcaster()
+		cfg.EventRecorder = eb.NewRecorder(scheme.Scheme, v1.EventSource{Component: path.Join(name, "pod-controller")})
+	}
+
+	pc, err := node.NewPodController(node.PodControllerConfig{
+		PodClient:         cfg.Client.CoreV1(),
+		EventRecorder:     cfg.EventRecorder,
+		Provider:          p,
+		PodInformer:       podInformer,
+		SecretInformer:    secretInformer,
+		ConfigMapInformer: configMapInformer,
+		ServiceInformer:   serviceInformer,
+	})
+	if err != nil {
+		return nil, errors.Wrap(err, "error creating pod controller")
+	}
+
+	return &Node{
+		nc:                 nc,
+		pc:                 pc,
+		readyCb:            readyCb,
+		ready:              make(chan struct{}),
+		done:               make(chan struct{}),
+		eb:                 eb,
+		podInformerFactory: podInformerFactory,
+		scmInformerFactory: scmInformerFactory,
+		client:             cfg.Client,
+		tlsConfig:          cfg.TLSConfig,
+		h:                  cfg.Handler,
+		listenAddr:         cfg.HTTPListenAddr,
+		workers:            cfg.NumWorkers,
+	}, nil
+}
+
+func setNodeReady(n *v1.Node) {
+	n.Status.Phase = v1.NodeRunning
+	for i, c := range n.Status.Conditions {
+		if c.Type != "Ready" {
+			continue
+		}
+
+		c.Message = "Kubelet is ready"
+		c.Reason = "KubeletReady"
+		c.Status = v1.ConditionTrue
+		c.LastHeartbeatTime = metav1.Now()
+		c.LastTransitionTime = metav1.Now()
+		n.Status.Conditions[i] = c
+		return
+	}
+}
--- a/node/nodeutil/provider.go
+++ b/node/nodeutil/provider.go
@@ -0,0 +1,70 @@
+package nodeutil
+
+import (
+	"context"
+	"io"
+
+	"github.com/virtual-kubelet/virtual-kubelet/node"
+	"github.com/virtual-kubelet/virtual-kubelet/node/api"
+	"github.com/virtual-kubelet/virtual-kubelet/node/api/statsv1alpha1"
+	v1 "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/labels"
+	corev1listers "k8s.io/client-go/listers/core/v1"
+)
+
+// Provider contains the methods required to implement a virtual-kubelet provider.
+//
+// Errors produced by these methods should implement an interface from
+// github.com/virtual-kubelet/virtual-kubelet/errdefs package in order for the
+// core logic to be able to understand the type of failure
+type Provider interface {
+	node.PodLifecycleHandler
+
+	// GetContainerLogs retrieves the logs of a container by name from the provider.
+	GetContainerLogs(ctx context.Context, namespace, podName, containerName string, opts api.ContainerLogOpts) (io.ReadCloser, error)
+
+	// RunInContainer executes a command in a container in the pod, copying data
+	// between in/out/err and the container's stdin/stdout/stderr.
+	RunInContainer(ctx context.Context, namespace, podName, containerName string, cmd []string, attach api.AttachIO) error
+
+	// GetStatsSummary gets the stats for the node, including running pods
+	GetStatsSummary(context.Context) (*statsv1alpha1.Summary, error)
+}
+
+// ProviderConfig holds objects created by NewNodeFromClient that a provider may need to bootstrap itself.
+type ProviderConfig struct {
+	Pods       corev1listers.PodLister
+	ConfigMaps corev1listers.ConfigMapLister
+	Secrets    corev1listers.SecretLister
+	Services   corev1listers.ServiceLister
+	// Hack to allow the provider to set things on the node
+	// Since the provider is bootstrapped after the node object is configured
+	// Primarily this is due to carry-over from the pre-1.0 interfaces that expect the provider instead of the direct *caller* to configure the node.
+	Node *v1.Node
+}
+
+// NewProviderFunc is used from NewNodeFromClient to bootstrap a provider using the client/listers/etc created there.
+// If a nil node provider is returned a default one will be used.
+type NewProviderFunc func(ProviderConfig) (Provider, node.NodeProvider, error)
+
+// AttachProviderRoutes returns a NodeOpt which uses api.PodHandler to attach the routes to the provider functions.
+//
+// Note this only attaches routes, you'll need to ensure to set the handler in the node config.
+func AttachProviderRoutes(mux api.ServeMux) NodeOpt {
+	return func(cfg *NodeConfig) error {
+		cfg.routeAttacher = func(p Provider, cfg NodeConfig, pods corev1listers.PodLister) {
+			mux.Handle("/", api.PodHandler(api.PodHandlerConfig{
+				RunInContainer:   p.RunInContainer,
+				GetContainerLogs: p.GetContainerLogs,
+				GetPods:          p.GetPods,
+				GetPodsFromKubernetes: func(context.Context) ([]*v1.Pod, error) {
+					return pods.List(labels.Everything())
+				},
+				GetStatsSummary:       p.GetStatsSummary,
+				StreamIdleTimeout:     cfg.StreamIdleTimeout,
+				StreamCreationTimeout: cfg.StreamCreationTimeout,
+			}, true))
+		}
+		return nil
+	}
+}
--- a/node/nodeutil/tls.go
+++ b/node/nodeutil/tls.go
@@ -0,0 +1,83 @@
+package nodeutil
+
+import (
+	"crypto/tls"
+	"crypto/x509"
+	"fmt"
+	"io/ioutil"
+)
+
+// WithTLSConfig returns a NodeOpt which creates a base TLSConfig with the default cipher suites and tls min verions.
+// The tls config can be modified through functional options.
+func WithTLSConfig(opts ...func(*tls.Config) error) NodeOpt {
+	return func(cfg *NodeConfig) error {
+		tlsCfg := &tls.Config{
+			MinVersion:               tls.VersionTLS12,
+			PreferServerCipherSuites: true,
+			CipherSuites:             DefaultServerCiphers(),
+			ClientAuth:               tls.RequestClientCert,
+		}
+		for _, o := range opts {
+			if err := o(tlsCfg); err != nil {
+				return err
+			}
+		}
+
+		cfg.TLSConfig = tlsCfg
+		return nil
+	}
+}
+
+// WithCAFromPath makes a TLS config option to set up client auth using the path to a PEM encoded CA cert.
+func WithCAFromPath(p string) func(*tls.Config) error {
+	return func(cfg *tls.Config) error {
+		pem, err := ioutil.ReadFile(p)
+		if err != nil {
+			return fmt.Errorf("error reading ca cert pem: %w", err)
+		}
+		cfg.ClientAuth = tls.RequireAndVerifyClientCert
+		return WithCACert(pem)(cfg)
+	}
+}
+
+// WithKeyPairFromPath make sa TLS config option which loads the key pair paths from disk and appends them to the tls config.
+func WithKeyPairFromPath(cert, key string) func(*tls.Config) error {
+	return func(cfg *tls.Config) error {
+		cert, err := tls.LoadX509KeyPair(cert, key)
+		if err != nil {
+			return err
+		}
+		cfg.Certificates = append(cfg.Certificates, cert)
+		return nil
+	}
+}
+
+// WithCACert makes a TLS config opotion which appends the provided PEM encoded bytes the tls config's cert pool.
+// If a cert pool is not defined on the tls config an empty one will be created.
+func WithCACert(pem []byte) func(*tls.Config) error {
+	return func(cfg *tls.Config) error {
+		if cfg.ClientCAs == nil {
+			cfg.ClientCAs = x509.NewCertPool()
+		}
+		if !cfg.ClientCAs.AppendCertsFromPEM(pem) {
+			return fmt.Errorf("could not parse ca cert pem")
+		}
+		return nil
+	}
+}
+
+// DefaultServerCiphers is the list of accepted TLS ciphers, with known weak ciphers elided
+// Note this list should be a moving target.
+func DefaultServerCiphers() []uint16 {
+	return []uint16{
+		tls.TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA,
+		tls.TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA,
+		tls.TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA,
+		tls.TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA,
+
+		tls.TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,
+		tls.TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,
+		tls.TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,
+		tls.TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,
+	}
+}
--- a/node/pod.go
+++ b/node/pod.go
@@ -20,10 +20,11 @@ import (
 	"strings"
 	"time"

+	"github.com/virtual-kubelet/virtual-kubelet/internal/queue"
+
 	"github.com/google/go-cmp/cmp"
 	pkgerrors "github.com/pkg/errors"
 	"github.com/virtual-kubelet/virtual-kubelet/internal/podutils"
-	"github.com/virtual-kubelet/virtual-kubelet/internal/queue"
 	"github.com/virtual-kubelet/virtual-kubelet/log"
 	"github.com/virtual-kubelet/virtual-kubelet/trace"
 	corev1 "k8s.io/api/core/v1"
--- a/node/podcontroller.go
+++ b/node/podcontroller.go
@@ -20,12 +20,11 @@ import (
 	"sync"
 	"time"

-	"github.com/virtual-kubelet/virtual-kubelet/internal/queue"
-
 	"github.com/google/go-cmp/cmp"
 	pkgerrors "github.com/pkg/errors"
 	"github.com/virtual-kubelet/virtual-kubelet/errdefs"
 	"github.com/virtual-kubelet/virtual-kubelet/internal/manager"
+	"github.com/virtual-kubelet/virtual-kubelet/internal/queue"
 	"github.com/virtual-kubelet/virtual-kubelet/log"
 	"github.com/virtual-kubelet/virtual-kubelet/trace"
 	corev1 "k8s.io/api/core/v1"
@@ -178,10 +177,18 @@ type PodControllerConfig struct {

 	// SyncPodsFromKubernetesRateLimiter defines the rate limit for the SyncPodsFromKubernetes queue
 	SyncPodsFromKubernetesRateLimiter workqueue.RateLimiter
+	// SyncPodsFromKubernetesShouldRetryFunc allows for a custom retry policy for the SyncPodsFromKubernetes queue
+	SyncPodsFromKubernetesShouldRetryFunc ShouldRetryFunc
+
 	// DeletePodsFromKubernetesRateLimiter defines the rate limit for the DeletePodsFromKubernetesRateLimiter queue
 	DeletePodsFromKubernetesRateLimiter workqueue.RateLimiter
+	// DeletePodsFromKubernetesShouldRetryFunc allows for a custom retry policy for the SyncPodsFromKubernetes queue
+	DeletePodsFromKubernetesShouldRetryFunc ShouldRetryFunc
+
 	// SyncPodStatusFromProviderRateLimiter defines the rate limit for the SyncPodStatusFromProviderRateLimiter queue
 	SyncPodStatusFromProviderRateLimiter workqueue.RateLimiter
+	// SyncPodStatusFromProviderShouldRetryFunc allows for a custom retry policy for the SyncPodStatusFromProvider queue
+	SyncPodStatusFromProviderShouldRetryFunc ShouldRetryFunc

 	// Add custom filtering for pod informer event handlers
 	// Use this for cases where the pod informer handles more than pods assigned to this node
@@ -240,9 +247,9 @@ func NewPodController(cfg PodControllerConfig) (*PodController, error) {
 		podEventFilterFunc: cfg.PodEventFilterFunc,
 	}

-	pc.syncPodsFromKubernetes = queue.New(cfg.SyncPodsFromKubernetesRateLimiter, "syncPodsFromKubernetes", pc.syncPodFromKubernetesHandler)
-	pc.deletePodsFromKubernetes = queue.New(cfg.DeletePodsFromKubernetesRateLimiter, "deletePodsFromKubernetes", pc.deletePodsFromKubernetesHandler)
-	pc.syncPodStatusFromProvider = queue.New(cfg.SyncPodStatusFromProviderRateLimiter, "syncPodStatusFromProvider", pc.syncPodStatusFromProviderHandler)
+	pc.syncPodsFromKubernetes = queue.New(cfg.SyncPodsFromKubernetesRateLimiter, "syncPodsFromKubernetes", pc.syncPodFromKubernetesHandler, cfg.SyncPodsFromKubernetesShouldRetryFunc)
+	pc.deletePodsFromKubernetes = queue.New(cfg.DeletePodsFromKubernetesRateLimiter, "deletePodsFromKubernetes", pc.deletePodsFromKubernetesHandler, cfg.DeletePodsFromKubernetesShouldRetryFunc)
+	pc.syncPodStatusFromProvider = queue.New(cfg.SyncPodStatusFromProviderRateLimiter, "syncPodStatusFromProvider", pc.syncPodStatusFromProviderHandler, cfg.SyncPodStatusFromProviderShouldRetryFunc)

 	return pc, nil
 }
@@ -509,7 +516,6 @@ func (pc *PodController) syncPodInProvider(ctx context.Context, pod *corev1.Pod,
 	// more context is here: https://github.com/virtual-kubelet/virtual-kubelet/pull/760
 	if pod.DeletionTimestamp != nil && !running(&pod.Status) {
 		log.G(ctx).Debug("Force deleting pod from API Server as it is no longer running")
-		pc.deletePodsFromKubernetes.EnqueueWithoutRateLimit(ctx, key)
 		key = fmt.Sprintf("%v/%v", key, pod.UID)
 		pc.deletePodsFromKubernetes.EnqueueWithoutRateLimit(ctx, key)
 		return nil
--- a/node/queue.go
+++ b/node/queue.go
@@ -0,0 +1,34 @@
+package node
+
+import (
+	"github.com/virtual-kubelet/virtual-kubelet/internal/queue"
+)
+
+// These are exportable definitions of the queue package:
+
+// ShouldRetryFunc is a mechanism to have a custom retry policy
+//
+// it is passed metadata about the work item when the handler returns an error. It returns the following:
+// * The key
+// * The number of attempts that this item has already had (and failed)
+// * The (potentially wrapped) error from the queue handler.
+//
+// The return value is an error, and optionally an amount to delay the work.
+// If an error is returned, the work will be aborted, and the returned error is bubbled up. It can be the error that
+// was passed in or that error can be wrapped.
+//
+// If the work item should be is to be retried, a delay duration may be specified. The delay is used to schedule when
+// the item should begin processing relative to now, it does not necessarily dictate when the item will start work.
+// Items are processed in the order they are scheduled. If the delay is nil, it will fall  back to the default behaviour
+// of the queue, and use the rate limiter that's configured to determine when to start work.
+//
+// If the delay is negative, the item will be scheduled "earlier" than now. This will result in the item being executed
+// earlier than other items in the FIFO work order.
+type ShouldRetryFunc = queue.ShouldRetryFunc
+
+// DefaultRetryFunc is the default function used for retries by the queue subsystem. Its only policy is that it gives up
+// after MaxRetries, and falls back to the rate limiter for all other retries.
+var DefaultRetryFunc = queue.DefaultRetryFunc
+
+// MaxRetries is the number of times we try to process a given key before permanently forgetting it.
+var MaxRetries = queue.MaxRetries
--- a/node/sync.go
+++ b/node/sync.go
@@ -134,7 +134,12 @@ func (p *syncProviderWrapper) syncPodStatuses(ctx context.Context) {

 	for _, pod := range pods {
 		if shouldSkipPodStatusUpdate(pod) {
-			log.G(ctx).Debug("Skipping pod status update")
+			log.G(ctx).WithFields(log.Fields{
+				"pod":       pod.Name,
+				"namespace": pod.Namespace,
+				"phase":     pod.Status.Phase,
+				"status":    pod.Status.Reason,
+			}).Debug("Skipping pod status update")
 			continue
 		}

--- a/test/e2e/basic.go
+++ b/test/e2e/basic.go
@@ -7,10 +7,10 @@ import (
 	"time"

 	"github.com/virtual-kubelet/virtual-kubelet/internal/podutils"
+	stats "github.com/virtual-kubelet/virtual-kubelet/node/api/statsv1alpha1"
 	"gotest.tools/assert"
 	v1 "k8s.io/api/core/v1"
 	apierrors "k8s.io/apimachinery/pkg/api/errors"
-	"k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1"
 )

 const (
@@ -401,7 +401,7 @@ func (ts *EndToEndTestSuite) TestCreatePodWithMandatoryInexistentConfigMap(t *te

 // findPodInPodStats returns the index of the specified pod in the .pods field of the specified Summary object.
 // It returns an error if the specified pod is not found.
-func findPodInPodStats(summary *v1alpha1.Summary, pod *v1.Pod) (int, error) {
+func findPodInPodStats(summary *stats.Summary, pod *v1.Pod) (int, error) {
 	for i, p := range summary.Pods {
 		if p.PodRef.Namespace == pod.Namespace && p.PodRef.Name == pod.Name && string(p.PodRef.UID) == string(pod.UID) {
 			return i, nil
--- a/test/e2e/node.go
+++ b/test/e2e/node.go
@@ -1,61 +0,0 @@
-package e2e
-
-import (
-	"context"
-	"testing"
-	"time"
-
-	"gotest.tools/assert"
-	is "gotest.tools/assert/cmp"
-	v1 "k8s.io/api/core/v1"
-	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
-	"k8s.io/apimachinery/pkg/fields"
-	watchapi "k8s.io/apimachinery/pkg/watch"
-)
-
-// TestNodeCreateAfterDelete makes sure that a node is automatically recreated
-// if it is deleted while VK is running.
-func (ts *EndToEndTestSuite) TestNodeCreateAfterDelete(t *testing.T) {
-	ctx, cancel := context.WithCancel(context.Background())
-	defer cancel()
-
-	podList, err := f.KubeClient.CoreV1().Pods(f.Namespace).List(ctx, metav1.ListOptions{
-		FieldSelector: fields.OneTermEqualSelector("spec.nodeName", f.NodeName).String(),
-	})
-
-	assert.NilError(t, err)
-	assert.Assert(t, is.Len(podList.Items, 0), "Kubernetes does not allow node deletion with dependent objects (pods) in existence: %v")
-
-	chErr := make(chan error, 1)
-
-	originalNode, err := f.GetNode(ctx)
-	assert.NilError(t, err)
-
-	ctx, cancel = context.WithTimeout(ctx, time.Minute)
-	defer cancel()
-
-	go func() {
-		wait := func(e watchapi.Event) (bool, error) {
-			err = ctx.Err()
-			// Our timeout has expired
-			if err != nil {
-				return true, err
-			}
-			if e.Type == watchapi.Deleted || e.Type == watchapi.Error {
-				return false, nil
-			}
-
-			return originalNode.ObjectMeta.UID != e.Object.(*v1.Node).ObjectMeta.UID, nil
-		}
-		chErr <- f.WaitUntilNodeCondition(wait)
-	}()
-
-	assert.NilError(t, f.DeleteNode(ctx))
-
-	select {
-	case result := <-chErr:
-		assert.NilError(t, result, "Did not observe new node object created after deletion")
-	case <-ctx.Done():
-		t.Fatal("Test timed out while waiting for node object to be deleted / recreated")
-	}
-}
--- a/website/data/cli.yaml
+++ b/website/data/cli.yaml
@@ -38,7 +38,7 @@ flags:
  default: virtual-kubelet
 - name: --os
  arg: string
-  description: The operating system (must be `Linux` or `Windows`)
+  description: The operating system (must be `linux` or `windows`)
  default: Linux
 - name: --pod-sync-workers
  arg: int
--- a/website/data/providers.yaml
+++ b/website/data/providers.yaml
@@ -19,6 +19,9 @@
  tag: huawei-cci
 - name: HashiCorp Nomad
  tag: nomad
+- name: Liqo
+  tag: liqo
+  org: liqotech
 - name: OpenStack Zun
  tag: openstack-zun
 - name: Tencent Games Tensile Kube
Author	SHA1	Message	Date
Brian Goff	7c9bd20eea	Merge pull request #975 from cpuguy83/node_manager Make ControllerManager more useful	2021-09-14 10:26:48 -07:00
Brian Goff	c9c0d99064	Rename NewNodeFromClient to just NewNode Since we now store the client on the config, we don't need to use a custom client.	2021-09-14 17:10:17 +00:00
Brian Goff	4974e062d0	Add webhook and anon auth support Auth is not automatically enabled because this requires some bootstrapping to work. I'll leave this for some future work. In the meantime people can use the current code similar to how they used the node-cli code to inject their own auth.	2021-09-14 17:10:17 +00:00
Brian Goff	e1342777d6	Add API config to node set This moves API handling into the node object so now everything can be done in one place. TLS is required. In the current form, auth must be setup by the caller.	2021-09-14 17:10:17 +00:00
Brian Goff	597e7dc281	Make ControllerManager more useful This changes `ControllerManager` to `Node`. `Node` is created from a client where the VK lib is responsible for creating all the things except the client (unless client is nil, then we use the env client). This should be a good replacement for node-cli. It offers a simpler API. It only works with leases enabled since this seems always desired, however an option could be added to disable if needed. The intent of this is to provide a simpler way to get a vk node up and running while also being extensible. We can slowly add options, but they should be focussed on a use-case rather than trying to support every possible scenario... in which case the user can just use the controllers directly.	2021-09-14 17:10:14 +00:00
Brian Goff	a9a0ee50cf	Remove create-after-delete node e2e tst This test is only testing the sepcific implementation details of the mock CLI provided in this repo. The behavior is not inherent in the vk lib.	2021-09-14 16:57:43 +00:00
Brian Goff	5fe8a7d000	Merge pull request #979 from cpuguy83/fix_ping_panic Return early on ping error	2021-09-03 12:02:55 -07:00
Brian Goff	22f329fcf0	Add extra logging for pod status update skip	2021-09-03 18:02:35 +00:00
Brian Goff	09ad3fe644	Return early on ping error Found that this caused a panic after many many test runs. It seems like we should have returned early since the pingResult is nil. We don't want to update a lease when ping fails.	2021-08-24 18:49:42 +00:00
Brian Goff	68347d4ed1	Merge pull request #967 from cpuguy83/controller_manager2 Move some boiler plate startup logic to nodeutil	2021-06-01 12:05:59 -07:00
Brian Goff	92f8661031	Merge pull request #973 from cpuguy83/ci_store_test_results Output test results in junit and export to circle	2021-06-01 11:32:21 -07:00
Brian Goff	f63c23108f	Move some boiler plate startup logic to nodeutil This makes a controller that handles the startup for the node and pod controller. Later if we add an "api controller" it can also be added here. This is just part of reducing some of the boiler plate code so it is easier to get off of node-cli.	2021-05-25 17:54:53 +00:00
Brian Goff	db5bf2b0d3	Output test results in junit and export to circle	2021-05-20 17:20:27 +00:00
Brian Goff	fbf6a1957f	Merge pull request #970 from palexster/apa/add_liqo Adding Liqo to README.md	2021-05-19 14:32:20 -07:00
Brian Goff	e6fc00e8dd	Merge pull request #971 from champly/fix-jaeger-deprecated-config fix jaeger deprecated config	2021-05-19 14:31:11 -07:00
champly	50f1346977	fix staticcheck	2021-05-19 09:17:06 +08:00
champly	66fc9d476f	fix staticcheck	2021-05-19 09:13:28 +08:00
Brian Goff	0543245668	lifecycle test: timeout send goroutine on context In error cases these goroutines never exit. Trying to debug cases we end up with a bunch of these goroutines stuck making it difficult to troubleshoot. We could just make a buffered channel, however this will makes it less clear, in cases of an error, what all is happening.	2021-05-18 23:06:55 +00:00
Brian Goff	d245d9b8cf	Merge branch 'master' into apa/add_liqo	2021-05-18 13:36:53 -07:00
Brian Goff	4fe8496dd1	Fix TestMapReference needed an ordered mapping In `405d5d63b1` we changed for an ordered list to a map, however the test is order dependent go maps are randomized. Change the test to use a slice with an internal type (instead of pulling back in k8s.io/kubernetes). Without this change this test will fail occasionally and has no guarentee for success because of the random order of maps.	2021-05-18 19:07:46 +00:00
Brian Goff	5cd25230c5	Merge pull request #972 from cpuguy83/remove_kk Remove remaining deps on k8s.io/kubernetes	2021-05-18 09:22:30 -07:00
Brian Goff	04cdec767b	Remove remaining deps on k8s.io/kubernetes These are mostly helper code for setting up env vars. There is a single file copied verbatim, although much of this downward api stuff is a copy. We may want to pull this out and do a direct copy of the the code so it is easier to update and work with upstream to have a shared package that lives outside of k8s.io/kubernetes for downward api.	2021-05-17 21:42:49 +00:00
champly	822dc8bb4a	Merge branch 'master' into fix-jaeger-deprecated-config	2021-05-16 20:12:38 +08:00
Brian Goff	40b4425804	Merge pull request #811 from TBBle/patch-1 Fix non-linked reference to Providers in Usage	2021-05-14 11:01:37 -07:00
Brian Goff	be0a062aec	Merge pull request #969 from cpuguy83/copy_metrics Copy stats types from upstream.	2021-05-13 16:26:53 -07:00
Paul "Hampy" Hampson	a2515d859a	Fix non-linked reference to Providers in Usage Fixes a typo ("provides") and also replaces "listed above" with a link to the list. Which had moved below, as it happens.	2021-05-14 02:45:33 +10:00
champly	0df7ac4e80	fix jaeger deprecated config	2021-05-12 10:36:58 +08:00
Alex Palesandro	96eae1906b	Adding Liqo to README.md	2021-05-07 22:58:46 +02:00
Brian Goff	8437e237be	Copy stats types from upstream. This drops another dependency on k8s.io/kubernetes. This does have the unfortunate side effect that implementers will now get a compile error until they update their code to use the new type. Just as a note: The stats types have moved to k8s.io/kubelet, however the stats types are only there as of v1.20. Currently we support older versions than v1.20, and even our go.mod imports from v1.19. For now we copy the types in. Later we can remove the type defs and change them to type aliases to the k8s.io/kubelet types (which prevents another compile time issue). Anything relying on type assertions to determine if something implements this method will, unfortunately, be broken and it will be hard to notice until runtime. We need to make sure to call this out in the release notes. Signed-off-by: Brian Goff <cpuguy83@gmail.com>	2021-05-05 23:01:52 +00:00
Brian Goff	baa0e6e8fc	Merge pull request #968 from cpuguy83/cleanup_some_kk Don't import pod util package from k/k	2021-05-04 17:26:49 -07:00
Brian Goff	405d5d63b1	Don't import pod util package from k/k These are all simple changes that will not change w/o breaking API changes upstream anyway. Signed-off-by: Brian Goff <cpuguy83@gmail.com>	2021-05-04 23:55:30 +00:00
Brian Goff	e1486ade00	Merge pull request #966 from sargun/upgrade-k8s Upgrade k8s to v19	2021-04-25 07:18:39 -07:00
Sargun Dhillon	4c223a8cd9	Upgrade to Kubernetes 19.10 Kubernetes 18.X is deprecated and no longer receiving updates. Signed-off-by: Sargun Dhillon <sargun@sargun.me>	2021-04-23 00:47:51 -07:00
Brian Goff	bf3a764409	Merge pull request #962 from sargun/expose-custom-retry	2021-04-15 15:35:34 -07:00
Sargun Dhillon	b259cb0548	Add the ability to dictate custom retries Our current retry policy is naive and only does 20 retries. It is also based off of the rate limiter. If the user is somewhat aggressive in rate limiting, but they have a temporary outage on API server, they may want to continue to delay. In facts, K8s has a built-in function to suggest delays: https://pkg.go.dev/k8s.io/apimachinery/pkg/api/errors#SuggestsClientDelay Signed-off-by: Sargun Dhillon <sargun@sargun.me>	2021-04-14 10:52:26 -07:00
Sargun Dhillon	e95023b76e	Fix test This starts watching for events prior to the start of the controller. This smells like a bug in the fakeclient bits, but it seems to fix the problem. Signed-off-by: Sargun Dhillon <sargun@sargun.me>	2021-04-14 10:52:26 -07:00
Sargun Dhillon	5fd08d4619	Merge pull request #958 from sargun/fix-deletionQ Remove errant double queue	2021-03-24 12:12:29 -07:00
Sargun Dhillon	c40a255eae	Remove errant double queue This seems to be a typo where we erroneously double-queue a deletion, but one without the "key".	2021-03-24 10:21:27 -07:00