From 45d2ef06b204ec6be1b0400684b07dbac4e26e38 Mon Sep 17 00:00:00 2001 From: Jeremy Rickard Date: Tue, 23 Apr 2019 12:43:48 -0600 Subject: [PATCH] Update ACI liveness/readiness probe handling to work with named ports (#333) * Update ACI liveness/readiness probe handling to work with named ports --- examples/nginx-pod-probes-named-ports.yaml | 32 +++++ examples/nginx-pod-probes-numbered-ports.yaml | 33 +++++ providers/azure/aci.go | 58 ++++++--- providers/azure/aci_test.go | 118 +++++++++++++----- 4 files changed, 192 insertions(+), 49 deletions(-) create mode 100644 examples/nginx-pod-probes-named-ports.yaml create mode 100644 examples/nginx-pod-probes-numbered-ports.yaml diff --git a/examples/nginx-pod-probes-named-ports.yaml b/examples/nginx-pod-probes-named-ports.yaml new file mode 100644 index 000000000..d24fadab9 --- /dev/null +++ b/examples/nginx-pod-probes-named-ports.yaml @@ -0,0 +1,32 @@ +apiVersion: v1 +kind: Pod +metadata: + name: nginx-named-ports +spec: + containers: + - image: nginx + imagePullPolicy: Always + name: nginx + ports: + - containerPort: 80 + name: http + protocol: TCP + - containerPort: 443 + name: https + livenessProbe: + httpGet: + path: / + port: http + readinessProbe: + httpGet: + path: / + port: http + nodeSelector: + kubernetes.io/role: agent + beta.kubernetes.io/os: linux + type: virtual-kubelet + tolerations: + - key: virtual-kubelet.io/provider + operator: Exists + - key: azure.com/aci + effect: NoSchedule diff --git a/examples/nginx-pod-probes-numbered-ports.yaml b/examples/nginx-pod-probes-numbered-ports.yaml new file mode 100644 index 000000000..bf1ea9c5b --- /dev/null +++ b/examples/nginx-pod-probes-numbered-ports.yaml @@ -0,0 +1,33 @@ +apiVersion: v1 +kind: Pod +metadata: + name: nginx-named-ports +spec: + containers: + - image: nginx + imagePullPolicy: Always + name: nginx + ports: + - containerPort: 80 + name: http + protocol: TCP + - containerPort: 443 + name: https + livenessProbe: + httpGet: + path: / + port: 80 + readinessProbe: + httpGet: + path: / + port: 80 + dnsPolicy: ClusterFirst + nodeSelector: + kubernetes.io/role: agent + beta.kubernetes.io/os: linux + type: virtual-kubelet + tolerations: + - key: virtual-kubelet.io/provider + operator: Exists + - key: azure.com/aci + effect: NoSchedule diff --git a/providers/azure/aci.go b/providers/azure/aci.go index 46f1c25f7..8267f86b4 100644 --- a/providers/azure/aci.go +++ b/providers/azure/aci.go @@ -31,6 +31,7 @@ import ( "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/intstr" clientcmdv1 "k8s.io/client-go/tools/clientcmd/api/v1" "k8s.io/client-go/tools/remotecommand" stats "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1" @@ -54,11 +55,10 @@ const ( ) const ( - gpuResourceName v1.ResourceName = "nvidia.com/gpu" - gpuTypeAnnotation = "virtual-kubelet.io/gpu-type" + gpuResourceName v1.ResourceName = "nvidia.com/gpu" + gpuTypeAnnotation = "virtual-kubelet.io/gpu-type" ) - // ACIProvider implements the virtual-kubelet provider interface and communicates with Azure's ACI APIs. type ACIProvider struct { aciClient *aci.Client @@ -324,37 +324,37 @@ func (p *ACIProvider) setupCapacity(ctx context.Context) error { defer span.End() logger := log.G(ctx).WithField("method", "setupCapacity") - // Set sane defaults for Capacity in case config is not supplied + // Set sane defaults for Capacity in case config is not supplied p.cpu = "800" p.memory = "4Ti" p.pods = "800" - if cpuQuota := os.Getenv("ACI_QUOTA_CPU"); cpuQuota != "" { + if cpuQuota := os.Getenv("ACI_QUOTA_CPU"); cpuQuota != "" { p.cpu = cpuQuota } - if memoryQuota := os.Getenv("ACI_QUOTA_MEMORY"); memoryQuota != "" { + if memoryQuota := os.Getenv("ACI_QUOTA_MEMORY"); memoryQuota != "" { p.memory = memoryQuota } - if podsQuota := os.Getenv("ACI_QUOTA_POD"); podsQuota != "" { + if podsQuota := os.Getenv("ACI_QUOTA_POD"); podsQuota != "" { p.pods = podsQuota } - metadata, err := p.aciClient.GetResourceProviderMetadata(ctx) + metadata, err := p.aciClient.GetResourceProviderMetadata(ctx) - if err != nil { + if err != nil { msg := "Unable to fetch the ACI metadata" logger.WithError(err).Error(msg) return err } - if metadata == nil || metadata.GPURegionalSKUs == nil { + if metadata == nil || metadata.GPURegionalSKUs == nil { logger.Warn("ACI GPU capacity is not enabled. GPU capacity will be disabled") return nil } - for _, regionalSKU := range metadata.GPURegionalSKUs { + for _, regionalSKU := range metadata.GPURegionalSKUs { if strings.EqualFold(regionalSKU.Location, p.region) && len(regionalSKU.SKUs) != 0 { p.gpu = "100" if gpu := os.Getenv("ACI_QUOTA_GPU"); gpu != "" { @@ -364,7 +364,7 @@ func (p *ACIProvider) setupCapacity(ctx context.Context) error { } } - return nil + return nil } func (p *ACIProvider) setupNetworkProfile(auth *client.Authentication) error { @@ -832,7 +832,7 @@ func (p *ACIProvider) ExecInContainer(name string, uid types.UID, container stri return err } - wsURI := xcrsp.WebSocketURI + wsURI := xcrsp.WebSocketURI password := xcrsp.Password c, _, _ := websocket.DefaultDialer.Dial(wsURI, nil) @@ -1238,7 +1238,7 @@ func (p *ACIProvider) getContainers(pod *v1.Pod) ([]aci.Container, error) { } if container.LivenessProbe != nil { - probe, err := getProbe(container.LivenessProbe) + probe, err := getProbe(container.LivenessProbe, container.Ports) if err != nil { return nil, err } @@ -1246,7 +1246,7 @@ func (p *ACIProvider) getContainers(pod *v1.Pod) ([]aci.Container, error) { } if container.ReadinessProbe != nil { - probe, err := getProbe(container.ReadinessProbe) + probe, err := getProbe(container.ReadinessProbe, container.Ports) if err != nil { return nil, err } @@ -1263,20 +1263,20 @@ func (p *ACIProvider) getGPUSKU(pod *v1.Pod) (aci.GPUSKU, error) { return "", fmt.Errorf("The pod requires GPU resource, but ACI doesn't provide GPU enabled container group in region %s", p.region) } - if desiredSKU, ok := pod.Annotations[gpuTypeAnnotation]; ok { + if desiredSKU, ok := pod.Annotations[gpuTypeAnnotation]; ok { for _, supportedSKU := range p.gpuSKUs { if strings.EqualFold(string(desiredSKU), string(supportedSKU)) { return supportedSKU, nil } } - return "", fmt.Errorf("The pod requires GPU SKU %s, but ACI only supports SKUs %v in region %s", desiredSKU, p.region, p.gpuSKUs) + return "", fmt.Errorf("The pod requires GPU SKU %s, but ACI only supports SKUs %v in region %s", desiredSKU, p.region, p.gpuSKUs) } - return p.gpuSKUs[0], nil + return p.gpuSKUs[0], nil } -func getProbe(probe *v1.Probe) (*aci.ContainerProbe, error) { +func getProbe(probe *v1.Probe, ports []v1.ContainerPort) (*aci.ContainerProbe, error) { if probe.Handler.Exec != nil && probe.Handler.HTTPGet != nil { return nil, fmt.Errorf("probe may not specify more than one of \"exec\" and \"httpGet\"") @@ -1298,8 +1298,26 @@ func getProbe(probe *v1.Probe) (*aci.ContainerProbe, error) { var httpGET *aci.ContainerHTTPGetProbe if probe.Handler.HTTPGet != nil { + var portValue int + port := probe.Handler.HTTPGet.Port + switch port.Type { + case intstr.Int: + portValue = port.IntValue() + case intstr.String: + portName := port.String() + for _, p := range ports { + if portName == p.Name { + portValue = int(p.ContainerPort) + break + } + } + if portValue == 0 { + return nil, fmt.Errorf("unable to find named port: %s", portName) + } + } + httpGET = &aci.ContainerHTTPGetProbe{ - Port: probe.Handler.HTTPGet.Port.IntValue(), + Port: portValue, Path: probe.Handler.HTTPGet.Path, Scheme: string(probe.Handler.HTTPGet.Scheme), } diff --git a/providers/azure/aci_test.go b/providers/azure/aci_test.go index a626d04e0..bef89de4f 100644 --- a/providers/azure/aci_test.go +++ b/providers/azure/aci_test.go @@ -205,32 +205,32 @@ func TestCreatePodWithGPU(t *testing.T) { aadServerMocker := NewAADMock() aciServerMocker := NewACIMock() - podName := "pod-" + uuid.New().String() + podName := "pod-" + uuid.New().String() podNamespace := "ns-" + uuid.New().String() gpuSKU := aci.GPUSKU("sku-" + uuid.New().String()) - aciServerMocker.OnGetRPManifest = func() (int, interface{}) { + aciServerMocker.OnGetRPManifest = func() (int, interface{}) { manifest := &aci.ResourceProviderManifest{ Metadata: &aci.ResourceProviderMetadata{ GPURegionalSKUs: []*aci.GPURegionalSKU{ &aci.GPURegionalSKU{ Location: fakeRegion, - SKUs: []aci.GPUSKU{gpuSKU, aci.K80, aci.P100}, + SKUs: []aci.GPUSKU{gpuSKU, aci.K80, aci.P100}, }, }, }, } - return http.StatusOK, manifest + return http.StatusOK, manifest } - provider, err := createTestProvider(aadServerMocker, aciServerMocker) + provider, err := createTestProvider(aadServerMocker, aciServerMocker) if err != nil { t.Fatalf("failed to create the test provider. %s", err.Error()) return } - aciServerMocker.OnCreate = func(subscription, resourceGroup, containerGroup string, cg *aci.ContainerGroup) (int, interface{}) { + aciServerMocker.OnCreate = func(subscription, resourceGroup, containerGroup string, cg *aci.ContainerGroup) (int, interface{}) { assert.Check(t, is.Equal(fakeSubscription, subscription), "Subscription doesn't match") assert.Check(t, is.Equal(fakeResourceGroup, resourceGroup), "Resource group doesn't match") assert.Check(t, is.Equal(podNamespace+"-"+podName, containerGroup), "Container group name is not expected") @@ -247,10 +247,10 @@ func TestCreatePodWithGPU(t *testing.T) { assert.Check(t, is.Equal(int32(10), cg.ContainerGroupProperties.Containers[0].Resources.Limits.GPU.Count), "Requests GPU Count is not expected") assert.Check(t, is.Equal(gpuSKU, cg.ContainerGroupProperties.Containers[0].Resources.Limits.GPU.SKU), "Requests GPU SKU is not expected") - return http.StatusOK, cg + return http.StatusOK, cg } - pod := &v1.Pod{ + pod := &v1.Pod{ ObjectMeta: metav1.ObjectMeta{ Name: podName, Namespace: podNamespace, @@ -273,42 +273,42 @@ func TestCreatePodWithGPU(t *testing.T) { }, } - if err := provider.CreatePod(context.Background(), pod); err != nil { + if err := provider.CreatePod(context.Background(), pod); err != nil { t.Fatal("Failed to create pod", err) } } - // Tests create pod with GPU SKU in annotation. +// Tests create pod with GPU SKU in annotation. func TestCreatePodWithGPUSKU(t *testing.T) { aadServerMocker := NewAADMock() aciServerMocker := NewACIMock() - podName := "pod-" + uuid.New().String() + podName := "pod-" + uuid.New().String() podNamespace := "ns-" + uuid.New().String() gpuSKU := aci.GPUSKU("sku-" + uuid.New().String()) - aciServerMocker.OnGetRPManifest = func() (int, interface{}) { + aciServerMocker.OnGetRPManifest = func() (int, interface{}) { manifest := &aci.ResourceProviderManifest{ Metadata: &aci.ResourceProviderMetadata{ GPURegionalSKUs: []*aci.GPURegionalSKU{ &aci.GPURegionalSKU{ Location: fakeRegion, - SKUs: []aci.GPUSKU{aci.K80, aci.P100, gpuSKU}, + SKUs: []aci.GPUSKU{aci.K80, aci.P100, gpuSKU}, }, }, }, } - return http.StatusOK, manifest + return http.StatusOK, manifest } - provider, err := createTestProvider(aadServerMocker, aciServerMocker) + provider, err := createTestProvider(aadServerMocker, aciServerMocker) if err != nil { t.Fatalf("failed to create the test provider. %s", err.Error()) return } - aciServerMocker.OnCreate = func(subscription, resourceGroup, containerGroup string, cg *aci.ContainerGroup) (int, interface{}) { + aciServerMocker.OnCreate = func(subscription, resourceGroup, containerGroup string, cg *aci.ContainerGroup) (int, interface{}) { assert.Check(t, is.Equal(fakeSubscription, subscription), "Subscription doesn't match") assert.Check(t, is.Equal(fakeResourceGroup, resourceGroup), "Resource group doesn't match") assert.Check(t, cg != nil, "Container group is nil") @@ -326,10 +326,10 @@ func TestCreatePodWithGPUSKU(t *testing.T) { assert.Check(t, is.Equal(int32(1), cg.ContainerGroupProperties.Containers[0].Resources.Limits.GPU.Count), "Requests GPU Count is not expected") assert.Check(t, is.Equal(gpuSKU, cg.ContainerGroupProperties.Containers[0].Resources.Limits.GPU.SKU), "Requests GPU SKU is not expected") - return http.StatusOK, cg + return http.StatusOK, cg } - pod := &v1.Pod{ + pod := &v1.Pod{ ObjectMeta: metav1.ObjectMeta{ Name: podName, Namespace: podNamespace, @@ -355,7 +355,7 @@ func TestCreatePodWithGPUSKU(t *testing.T) { }, } - if err := provider.CreatePod(context.Background(), pod); err != nil { + if err := provider.CreatePod(context.Background(), pod); err != nil { t.Fatal("Failed to create pod", err) } } @@ -577,19 +577,19 @@ func TestGetPodWithoutResourceRequestsLimits(t *testing.T) { func TestGetPodWithGPU(t *testing.T) { _, aciServerMocker, provider, err := prepareMocks() - if err != nil { + if err != nil { t.Fatal("Unable to prepare the mocks", err) } - podName := "pod-" + uuid.New().String() + podName := "pod-" + uuid.New().String() podNamespace := "ns-" + uuid.New().String() - aciServerMocker.OnGetContainerGroup = func(subscription, resourceGroup, containerGroup string) (int, interface{}) { + aciServerMocker.OnGetContainerGroup = func(subscription, resourceGroup, containerGroup string) (int, interface{}) { assert.Equal(t, fakeSubscription, subscription, "Subscription doesn't match") assert.Equal(t, fakeResourceGroup, resourceGroup, "Resource group doesn't match") assert.Equal(t, podNamespace+"-"+podName, containerGroup, "Container group name is not expected") - return http.StatusOK, aci.ContainerGroup{ + return http.StatusOK, aci.ContainerGroup{ Tags: map[string]string{ "NodeName": fakeNodeName, }, @@ -630,12 +630,12 @@ func TestGetPodWithGPU(t *testing.T) { } } - pod, err := provider.GetPod(context.Background(), podNamespace, podName) + pod, err := provider.GetPod(context.Background(), podNamespace, podName) if err != nil { t.Fatal("Failed to get pod", err) } - assert.Check(t, pod != nil, "Response pod should not be nil") + assert.Check(t, pod != nil, "Response pod should not be nil") assert.Check(t, pod.Spec.Containers != nil, "Containers should not be nil") assert.Check(t, pod.Spec.Containers[0].Resources.Requests != nil, "Containers[0].Resources.Requests should not be nil") assert.Check( @@ -786,7 +786,7 @@ func prepareMocks() (*AADMock, *ACIMock, *ACIProvider, error) { GPURegionalSKUs: []*aci.GPURegionalSKU{ &aci.GPURegionalSKU{ Location: fakeRegion, - SKUs: []aci.GPUSKU{aci.K80, aci.P100, aci.V100}, + SKUs: []aci.GPUSKU{aci.K80, aci.P100, aci.V100}, }, }, }, @@ -803,7 +803,7 @@ func prepareMocks() (*AADMock, *ACIMock, *ACIProvider, error) { return aadServerMocker, aciServerMocker, provider, nil } -func createTestProvider(aadServerMocker *AADMock, aciServerMocker*ACIMock) (*ACIProvider, error) { +func createTestProvider(aadServerMocker *AADMock, aciServerMocker *ACIMock) (*ACIProvider, error) { auth := azure.NewAuthentication( azure.PublicCloud.Name, fakeClientID, @@ -850,6 +850,66 @@ func ptrQuantity(q resource.Quantity) *resource.Quantity { return &q } +func TestCreatePodWithNamedLivenessProbe(t *testing.T) { + _, aciServerMocker, provider, err := prepareMocks() + + if err != nil { + t.Fatal("Unable to prepare the mocks", err) + } + + podName := "pod-" + uuid.New().String() + podNamespace := "ns-" + uuid.New().String() + + aciServerMocker.OnCreate = func(subscription, resourceGroup, containerGroup string, cg *aci.ContainerGroup) (int, interface{}) { + assert.Check(t, cg.Containers[0].LivenessProbe != nil, "Liveness probe expected") + assert.Check(t, is.Equal(10, cg.Containers[0].LivenessProbe.InitialDelaySeconds), "Initial Probe Delay doesn't match") + assert.Check(t, is.Equal(5, cg.Containers[0].LivenessProbe.Period), "Probe Period doesn't match") + assert.Check(t, is.Equal(60, cg.Containers[0].LivenessProbe.TimeoutSeconds), "Probe Timeout doesn't match") + assert.Check(t, is.Equal(3, cg.Containers[0].LivenessProbe.SuccessThreshold), "Probe Success Threshold doesn't match") + assert.Check(t, is.Equal(5, cg.Containers[0].LivenessProbe.FailureThreshold), "Probe Failure Threshold doesn't match") + assert.Check(t, cg.Containers[0].LivenessProbe.HTTPGet != nil, "Expected an HTTP Get Probe") + assert.Check(t, is.Equal(8080, cg.Containers[0].LivenessProbe.HTTPGet.Port), "Expected Port to be 8080") + pod := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: podName, + Namespace: podNamespace, + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + v1.Container{ + Name: "nginx", + Ports: []v1.ContainerPort{ + v1.ContainerPort{ + Name: "http", + ContainerPort: 8080, + }, + }, + LivenessProbe: &v1.Probe{ + Handler: v1.Handler{ + HTTPGet: &v1.HTTPGetAction{ + Port: intstr.FromString("http"), + Path: "/", + }, + }, + InitialDelaySeconds: 10, + PeriodSeconds: 5, + TimeoutSeconds: 60, + SuccessThreshold: 3, + FailureThreshold: 5, + }, + }, + }, + }, + } + + if err := provider.CreatePod(context.Background(), pod); err != nil { + t.Fatal("Failed to create pod", err) + } + + return http.StatusOK, cg + } +} + func TestCreatePodWithLivenessProbe(t *testing.T) { _, aciServerMocker, provider, err := prepareMocks() @@ -891,7 +951,7 @@ func TestCreatePodWithLivenessProbe(t *testing.T) { LivenessProbe: &v1.Probe{ Handler: v1.Handler{ HTTPGet: &v1.HTTPGetAction{ - Port: intstr.FromString("8080"), + Port: intstr.FromInt(8080), Path: "/", }, }, @@ -952,7 +1012,7 @@ func TestCreatePodWithReadinessProbe(t *testing.T) { ReadinessProbe: &v1.Probe{ Handler: v1.Handler{ HTTPGet: &v1.HTTPGetAction{ - Port: intstr.FromString("8080"), + Port: intstr.FromInt(8080), Path: "/", }, },