Update ACI liveness/readiness probe handling to work with named ports (#333)

* Update ACI liveness/readiness probe handling to work with named ports
This commit is contained in:
Jeremy Rickard
2019-04-23 12:43:48 -06:00
committed by Brian Goff
parent ceb9b16c5c
commit 45d2ef06b2
4 changed files with 192 additions and 49 deletions

View File

@@ -31,6 +31,7 @@ import (
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/intstr"
clientcmdv1 "k8s.io/client-go/tools/clientcmd/api/v1"
"k8s.io/client-go/tools/remotecommand"
stats "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1"
@@ -54,11 +55,10 @@ const (
)
const (
gpuResourceName v1.ResourceName = "nvidia.com/gpu"
gpuTypeAnnotation = "virtual-kubelet.io/gpu-type"
gpuResourceName v1.ResourceName = "nvidia.com/gpu"
gpuTypeAnnotation = "virtual-kubelet.io/gpu-type"
)
// ACIProvider implements the virtual-kubelet provider interface and communicates with Azure's ACI APIs.
type ACIProvider struct {
aciClient *aci.Client
@@ -324,37 +324,37 @@ func (p *ACIProvider) setupCapacity(ctx context.Context) error {
defer span.End()
logger := log.G(ctx).WithField("method", "setupCapacity")
// Set sane defaults for Capacity in case config is not supplied
// Set sane defaults for Capacity in case config is not supplied
p.cpu = "800"
p.memory = "4Ti"
p.pods = "800"
if cpuQuota := os.Getenv("ACI_QUOTA_CPU"); cpuQuota != "" {
if cpuQuota := os.Getenv("ACI_QUOTA_CPU"); cpuQuota != "" {
p.cpu = cpuQuota
}
if memoryQuota := os.Getenv("ACI_QUOTA_MEMORY"); memoryQuota != "" {
if memoryQuota := os.Getenv("ACI_QUOTA_MEMORY"); memoryQuota != "" {
p.memory = memoryQuota
}
if podsQuota := os.Getenv("ACI_QUOTA_POD"); podsQuota != "" {
if podsQuota := os.Getenv("ACI_QUOTA_POD"); podsQuota != "" {
p.pods = podsQuota
}
metadata, err := p.aciClient.GetResourceProviderMetadata(ctx)
metadata, err := p.aciClient.GetResourceProviderMetadata(ctx)
if err != nil {
if err != nil {
msg := "Unable to fetch the ACI metadata"
logger.WithError(err).Error(msg)
return err
}
if metadata == nil || metadata.GPURegionalSKUs == nil {
if metadata == nil || metadata.GPURegionalSKUs == nil {
logger.Warn("ACI GPU capacity is not enabled. GPU capacity will be disabled")
return nil
}
for _, regionalSKU := range metadata.GPURegionalSKUs {
for _, regionalSKU := range metadata.GPURegionalSKUs {
if strings.EqualFold(regionalSKU.Location, p.region) && len(regionalSKU.SKUs) != 0 {
p.gpu = "100"
if gpu := os.Getenv("ACI_QUOTA_GPU"); gpu != "" {
@@ -364,7 +364,7 @@ func (p *ACIProvider) setupCapacity(ctx context.Context) error {
}
}
return nil
return nil
}
func (p *ACIProvider) setupNetworkProfile(auth *client.Authentication) error {
@@ -832,7 +832,7 @@ func (p *ACIProvider) ExecInContainer(name string, uid types.UID, container stri
return err
}
wsURI := xcrsp.WebSocketURI
wsURI := xcrsp.WebSocketURI
password := xcrsp.Password
c, _, _ := websocket.DefaultDialer.Dial(wsURI, nil)
@@ -1238,7 +1238,7 @@ func (p *ACIProvider) getContainers(pod *v1.Pod) ([]aci.Container, error) {
}
if container.LivenessProbe != nil {
probe, err := getProbe(container.LivenessProbe)
probe, err := getProbe(container.LivenessProbe, container.Ports)
if err != nil {
return nil, err
}
@@ -1246,7 +1246,7 @@ func (p *ACIProvider) getContainers(pod *v1.Pod) ([]aci.Container, error) {
}
if container.ReadinessProbe != nil {
probe, err := getProbe(container.ReadinessProbe)
probe, err := getProbe(container.ReadinessProbe, container.Ports)
if err != nil {
return nil, err
}
@@ -1263,20 +1263,20 @@ func (p *ACIProvider) getGPUSKU(pod *v1.Pod) (aci.GPUSKU, error) {
return "", fmt.Errorf("The pod requires GPU resource, but ACI doesn't provide GPU enabled container group in region %s", p.region)
}
if desiredSKU, ok := pod.Annotations[gpuTypeAnnotation]; ok {
if desiredSKU, ok := pod.Annotations[gpuTypeAnnotation]; ok {
for _, supportedSKU := range p.gpuSKUs {
if strings.EqualFold(string(desiredSKU), string(supportedSKU)) {
return supportedSKU, nil
}
}
return "", fmt.Errorf("The pod requires GPU SKU %s, but ACI only supports SKUs %v in region %s", desiredSKU, p.region, p.gpuSKUs)
return "", fmt.Errorf("The pod requires GPU SKU %s, but ACI only supports SKUs %v in region %s", desiredSKU, p.region, p.gpuSKUs)
}
return p.gpuSKUs[0], nil
return p.gpuSKUs[0], nil
}
func getProbe(probe *v1.Probe) (*aci.ContainerProbe, error) {
func getProbe(probe *v1.Probe, ports []v1.ContainerPort) (*aci.ContainerProbe, error) {
if probe.Handler.Exec != nil && probe.Handler.HTTPGet != nil {
return nil, fmt.Errorf("probe may not specify more than one of \"exec\" and \"httpGet\"")
@@ -1298,8 +1298,26 @@ func getProbe(probe *v1.Probe) (*aci.ContainerProbe, error) {
var httpGET *aci.ContainerHTTPGetProbe
if probe.Handler.HTTPGet != nil {
var portValue int
port := probe.Handler.HTTPGet.Port
switch port.Type {
case intstr.Int:
portValue = port.IntValue()
case intstr.String:
portName := port.String()
for _, p := range ports {
if portName == p.Name {
portValue = int(p.ContainerPort)
break
}
}
if portValue == 0 {
return nil, fmt.Errorf("unable to find named port: %s", portName)
}
}
httpGET = &aci.ContainerHTTPGetProbe{
Port: probe.Handler.HTTPGet.Port.IntValue(),
Port: portValue,
Path: probe.Handler.HTTPGet.Path,
Scheme: string(probe.Handler.HTTPGet.Scheme),
}

View File

@@ -205,32 +205,32 @@ func TestCreatePodWithGPU(t *testing.T) {
aadServerMocker := NewAADMock()
aciServerMocker := NewACIMock()
podName := "pod-" + uuid.New().String()
podName := "pod-" + uuid.New().String()
podNamespace := "ns-" + uuid.New().String()
gpuSKU := aci.GPUSKU("sku-" + uuid.New().String())
aciServerMocker.OnGetRPManifest = func() (int, interface{}) {
aciServerMocker.OnGetRPManifest = func() (int, interface{}) {
manifest := &aci.ResourceProviderManifest{
Metadata: &aci.ResourceProviderMetadata{
GPURegionalSKUs: []*aci.GPURegionalSKU{
&aci.GPURegionalSKU{
Location: fakeRegion,
SKUs: []aci.GPUSKU{gpuSKU, aci.K80, aci.P100},
SKUs: []aci.GPUSKU{gpuSKU, aci.K80, aci.P100},
},
},
},
}
return http.StatusOK, manifest
return http.StatusOK, manifest
}
provider, err := createTestProvider(aadServerMocker, aciServerMocker)
provider, err := createTestProvider(aadServerMocker, aciServerMocker)
if err != nil {
t.Fatalf("failed to create the test provider. %s", err.Error())
return
}
aciServerMocker.OnCreate = func(subscription, resourceGroup, containerGroup string, cg *aci.ContainerGroup) (int, interface{}) {
aciServerMocker.OnCreate = func(subscription, resourceGroup, containerGroup string, cg *aci.ContainerGroup) (int, interface{}) {
assert.Check(t, is.Equal(fakeSubscription, subscription), "Subscription doesn't match")
assert.Check(t, is.Equal(fakeResourceGroup, resourceGroup), "Resource group doesn't match")
assert.Check(t, is.Equal(podNamespace+"-"+podName, containerGroup), "Container group name is not expected")
@@ -247,10 +247,10 @@ func TestCreatePodWithGPU(t *testing.T) {
assert.Check(t, is.Equal(int32(10), cg.ContainerGroupProperties.Containers[0].Resources.Limits.GPU.Count), "Requests GPU Count is not expected")
assert.Check(t, is.Equal(gpuSKU, cg.ContainerGroupProperties.Containers[0].Resources.Limits.GPU.SKU), "Requests GPU SKU is not expected")
return http.StatusOK, cg
return http.StatusOK, cg
}
pod := &v1.Pod{
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: podName,
Namespace: podNamespace,
@@ -273,42 +273,42 @@ func TestCreatePodWithGPU(t *testing.T) {
},
}
if err := provider.CreatePod(context.Background(), pod); err != nil {
if err := provider.CreatePod(context.Background(), pod); err != nil {
t.Fatal("Failed to create pod", err)
}
}
// Tests create pod with GPU SKU in annotation.
// Tests create pod with GPU SKU in annotation.
func TestCreatePodWithGPUSKU(t *testing.T) {
aadServerMocker := NewAADMock()
aciServerMocker := NewACIMock()
podName := "pod-" + uuid.New().String()
podName := "pod-" + uuid.New().String()
podNamespace := "ns-" + uuid.New().String()
gpuSKU := aci.GPUSKU("sku-" + uuid.New().String())
aciServerMocker.OnGetRPManifest = func() (int, interface{}) {
aciServerMocker.OnGetRPManifest = func() (int, interface{}) {
manifest := &aci.ResourceProviderManifest{
Metadata: &aci.ResourceProviderMetadata{
GPURegionalSKUs: []*aci.GPURegionalSKU{
&aci.GPURegionalSKU{
Location: fakeRegion,
SKUs: []aci.GPUSKU{aci.K80, aci.P100, gpuSKU},
SKUs: []aci.GPUSKU{aci.K80, aci.P100, gpuSKU},
},
},
},
}
return http.StatusOK, manifest
return http.StatusOK, manifest
}
provider, err := createTestProvider(aadServerMocker, aciServerMocker)
provider, err := createTestProvider(aadServerMocker, aciServerMocker)
if err != nil {
t.Fatalf("failed to create the test provider. %s", err.Error())
return
}
aciServerMocker.OnCreate = func(subscription, resourceGroup, containerGroup string, cg *aci.ContainerGroup) (int, interface{}) {
aciServerMocker.OnCreate = func(subscription, resourceGroup, containerGroup string, cg *aci.ContainerGroup) (int, interface{}) {
assert.Check(t, is.Equal(fakeSubscription, subscription), "Subscription doesn't match")
assert.Check(t, is.Equal(fakeResourceGroup, resourceGroup), "Resource group doesn't match")
assert.Check(t, cg != nil, "Container group is nil")
@@ -326,10 +326,10 @@ func TestCreatePodWithGPUSKU(t *testing.T) {
assert.Check(t, is.Equal(int32(1), cg.ContainerGroupProperties.Containers[0].Resources.Limits.GPU.Count), "Requests GPU Count is not expected")
assert.Check(t, is.Equal(gpuSKU, cg.ContainerGroupProperties.Containers[0].Resources.Limits.GPU.SKU), "Requests GPU SKU is not expected")
return http.StatusOK, cg
return http.StatusOK, cg
}
pod := &v1.Pod{
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: podName,
Namespace: podNamespace,
@@ -355,7 +355,7 @@ func TestCreatePodWithGPUSKU(t *testing.T) {
},
}
if err := provider.CreatePod(context.Background(), pod); err != nil {
if err := provider.CreatePod(context.Background(), pod); err != nil {
t.Fatal("Failed to create pod", err)
}
}
@@ -577,19 +577,19 @@ func TestGetPodWithoutResourceRequestsLimits(t *testing.T) {
func TestGetPodWithGPU(t *testing.T) {
_, aciServerMocker, provider, err := prepareMocks()
if err != nil {
if err != nil {
t.Fatal("Unable to prepare the mocks", err)
}
podName := "pod-" + uuid.New().String()
podName := "pod-" + uuid.New().String()
podNamespace := "ns-" + uuid.New().String()
aciServerMocker.OnGetContainerGroup = func(subscription, resourceGroup, containerGroup string) (int, interface{}) {
aciServerMocker.OnGetContainerGroup = func(subscription, resourceGroup, containerGroup string) (int, interface{}) {
assert.Equal(t, fakeSubscription, subscription, "Subscription doesn't match")
assert.Equal(t, fakeResourceGroup, resourceGroup, "Resource group doesn't match")
assert.Equal(t, podNamespace+"-"+podName, containerGroup, "Container group name is not expected")
return http.StatusOK, aci.ContainerGroup{
return http.StatusOK, aci.ContainerGroup{
Tags: map[string]string{
"NodeName": fakeNodeName,
},
@@ -630,12 +630,12 @@ func TestGetPodWithGPU(t *testing.T) {
}
}
pod, err := provider.GetPod(context.Background(), podNamespace, podName)
pod, err := provider.GetPod(context.Background(), podNamespace, podName)
if err != nil {
t.Fatal("Failed to get pod", err)
}
assert.Check(t, pod != nil, "Response pod should not be nil")
assert.Check(t, pod != nil, "Response pod should not be nil")
assert.Check(t, pod.Spec.Containers != nil, "Containers should not be nil")
assert.Check(t, pod.Spec.Containers[0].Resources.Requests != nil, "Containers[0].Resources.Requests should not be nil")
assert.Check(
@@ -786,7 +786,7 @@ func prepareMocks() (*AADMock, *ACIMock, *ACIProvider, error) {
GPURegionalSKUs: []*aci.GPURegionalSKU{
&aci.GPURegionalSKU{
Location: fakeRegion,
SKUs: []aci.GPUSKU{aci.K80, aci.P100, aci.V100},
SKUs: []aci.GPUSKU{aci.K80, aci.P100, aci.V100},
},
},
},
@@ -803,7 +803,7 @@ func prepareMocks() (*AADMock, *ACIMock, *ACIProvider, error) {
return aadServerMocker, aciServerMocker, provider, nil
}
func createTestProvider(aadServerMocker *AADMock, aciServerMocker*ACIMock) (*ACIProvider, error) {
func createTestProvider(aadServerMocker *AADMock, aciServerMocker *ACIMock) (*ACIProvider, error) {
auth := azure.NewAuthentication(
azure.PublicCloud.Name,
fakeClientID,
@@ -850,6 +850,66 @@ func ptrQuantity(q resource.Quantity) *resource.Quantity {
return &q
}
func TestCreatePodWithNamedLivenessProbe(t *testing.T) {
_, aciServerMocker, provider, err := prepareMocks()
if err != nil {
t.Fatal("Unable to prepare the mocks", err)
}
podName := "pod-" + uuid.New().String()
podNamespace := "ns-" + uuid.New().String()
aciServerMocker.OnCreate = func(subscription, resourceGroup, containerGroup string, cg *aci.ContainerGroup) (int, interface{}) {
assert.Check(t, cg.Containers[0].LivenessProbe != nil, "Liveness probe expected")
assert.Check(t, is.Equal(10, cg.Containers[0].LivenessProbe.InitialDelaySeconds), "Initial Probe Delay doesn't match")
assert.Check(t, is.Equal(5, cg.Containers[0].LivenessProbe.Period), "Probe Period doesn't match")
assert.Check(t, is.Equal(60, cg.Containers[0].LivenessProbe.TimeoutSeconds), "Probe Timeout doesn't match")
assert.Check(t, is.Equal(3, cg.Containers[0].LivenessProbe.SuccessThreshold), "Probe Success Threshold doesn't match")
assert.Check(t, is.Equal(5, cg.Containers[0].LivenessProbe.FailureThreshold), "Probe Failure Threshold doesn't match")
assert.Check(t, cg.Containers[0].LivenessProbe.HTTPGet != nil, "Expected an HTTP Get Probe")
assert.Check(t, is.Equal(8080, cg.Containers[0].LivenessProbe.HTTPGet.Port), "Expected Port to be 8080")
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: podName,
Namespace: podNamespace,
},
Spec: v1.PodSpec{
Containers: []v1.Container{
v1.Container{
Name: "nginx",
Ports: []v1.ContainerPort{
v1.ContainerPort{
Name: "http",
ContainerPort: 8080,
},
},
LivenessProbe: &v1.Probe{
Handler: v1.Handler{
HTTPGet: &v1.HTTPGetAction{
Port: intstr.FromString("http"),
Path: "/",
},
},
InitialDelaySeconds: 10,
PeriodSeconds: 5,
TimeoutSeconds: 60,
SuccessThreshold: 3,
FailureThreshold: 5,
},
},
},
},
}
if err := provider.CreatePod(context.Background(), pod); err != nil {
t.Fatal("Failed to create pod", err)
}
return http.StatusOK, cg
}
}
func TestCreatePodWithLivenessProbe(t *testing.T) {
_, aciServerMocker, provider, err := prepareMocks()
@@ -891,7 +951,7 @@ func TestCreatePodWithLivenessProbe(t *testing.T) {
LivenessProbe: &v1.Probe{
Handler: v1.Handler{
HTTPGet: &v1.HTTPGetAction{
Port: intstr.FromString("8080"),
Port: intstr.FromInt(8080),
Path: "/",
},
},
@@ -952,7 +1012,7 @@ func TestCreatePodWithReadinessProbe(t *testing.T) {
ReadinessProbe: &v1.Probe{
Handler: v1.Handler{
HTTPGet: &v1.HTTPGetAction{
Port: intstr.FromString("8080"),
Port: intstr.FromInt(8080),
Path: "/",
},
},