Update ACI liveness/readiness probe handling to work with named ports (#333)
* Update ACI liveness/readiness probe handling to work with named ports
This commit is contained in:
committed by
Brian Goff
parent
ceb9b16c5c
commit
45d2ef06b2
@@ -31,6 +31,7 @@ import (
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
"k8s.io/apimachinery/pkg/util/intstr"
|
||||
clientcmdv1 "k8s.io/client-go/tools/clientcmd/api/v1"
|
||||
"k8s.io/client-go/tools/remotecommand"
|
||||
stats "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1"
|
||||
@@ -54,11 +55,10 @@ const (
|
||||
)
|
||||
|
||||
const (
|
||||
gpuResourceName v1.ResourceName = "nvidia.com/gpu"
|
||||
gpuTypeAnnotation = "virtual-kubelet.io/gpu-type"
|
||||
gpuResourceName v1.ResourceName = "nvidia.com/gpu"
|
||||
gpuTypeAnnotation = "virtual-kubelet.io/gpu-type"
|
||||
)
|
||||
|
||||
|
||||
// ACIProvider implements the virtual-kubelet provider interface and communicates with Azure's ACI APIs.
|
||||
type ACIProvider struct {
|
||||
aciClient *aci.Client
|
||||
@@ -324,37 +324,37 @@ func (p *ACIProvider) setupCapacity(ctx context.Context) error {
|
||||
defer span.End()
|
||||
logger := log.G(ctx).WithField("method", "setupCapacity")
|
||||
|
||||
// Set sane defaults for Capacity in case config is not supplied
|
||||
// Set sane defaults for Capacity in case config is not supplied
|
||||
p.cpu = "800"
|
||||
p.memory = "4Ti"
|
||||
p.pods = "800"
|
||||
|
||||
if cpuQuota := os.Getenv("ACI_QUOTA_CPU"); cpuQuota != "" {
|
||||
if cpuQuota := os.Getenv("ACI_QUOTA_CPU"); cpuQuota != "" {
|
||||
p.cpu = cpuQuota
|
||||
}
|
||||
|
||||
if memoryQuota := os.Getenv("ACI_QUOTA_MEMORY"); memoryQuota != "" {
|
||||
if memoryQuota := os.Getenv("ACI_QUOTA_MEMORY"); memoryQuota != "" {
|
||||
p.memory = memoryQuota
|
||||
}
|
||||
|
||||
if podsQuota := os.Getenv("ACI_QUOTA_POD"); podsQuota != "" {
|
||||
if podsQuota := os.Getenv("ACI_QUOTA_POD"); podsQuota != "" {
|
||||
p.pods = podsQuota
|
||||
}
|
||||
|
||||
metadata, err := p.aciClient.GetResourceProviderMetadata(ctx)
|
||||
metadata, err := p.aciClient.GetResourceProviderMetadata(ctx)
|
||||
|
||||
if err != nil {
|
||||
if err != nil {
|
||||
msg := "Unable to fetch the ACI metadata"
|
||||
logger.WithError(err).Error(msg)
|
||||
return err
|
||||
}
|
||||
|
||||
if metadata == nil || metadata.GPURegionalSKUs == nil {
|
||||
if metadata == nil || metadata.GPURegionalSKUs == nil {
|
||||
logger.Warn("ACI GPU capacity is not enabled. GPU capacity will be disabled")
|
||||
return nil
|
||||
}
|
||||
|
||||
for _, regionalSKU := range metadata.GPURegionalSKUs {
|
||||
for _, regionalSKU := range metadata.GPURegionalSKUs {
|
||||
if strings.EqualFold(regionalSKU.Location, p.region) && len(regionalSKU.SKUs) != 0 {
|
||||
p.gpu = "100"
|
||||
if gpu := os.Getenv("ACI_QUOTA_GPU"); gpu != "" {
|
||||
@@ -364,7 +364,7 @@ func (p *ACIProvider) setupCapacity(ctx context.Context) error {
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *ACIProvider) setupNetworkProfile(auth *client.Authentication) error {
|
||||
@@ -832,7 +832,7 @@ func (p *ACIProvider) ExecInContainer(name string, uid types.UID, container stri
|
||||
return err
|
||||
}
|
||||
|
||||
wsURI := xcrsp.WebSocketURI
|
||||
wsURI := xcrsp.WebSocketURI
|
||||
password := xcrsp.Password
|
||||
|
||||
c, _, _ := websocket.DefaultDialer.Dial(wsURI, nil)
|
||||
@@ -1238,7 +1238,7 @@ func (p *ACIProvider) getContainers(pod *v1.Pod) ([]aci.Container, error) {
|
||||
}
|
||||
|
||||
if container.LivenessProbe != nil {
|
||||
probe, err := getProbe(container.LivenessProbe)
|
||||
probe, err := getProbe(container.LivenessProbe, container.Ports)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -1246,7 +1246,7 @@ func (p *ACIProvider) getContainers(pod *v1.Pod) ([]aci.Container, error) {
|
||||
}
|
||||
|
||||
if container.ReadinessProbe != nil {
|
||||
probe, err := getProbe(container.ReadinessProbe)
|
||||
probe, err := getProbe(container.ReadinessProbe, container.Ports)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -1263,20 +1263,20 @@ func (p *ACIProvider) getGPUSKU(pod *v1.Pod) (aci.GPUSKU, error) {
|
||||
return "", fmt.Errorf("The pod requires GPU resource, but ACI doesn't provide GPU enabled container group in region %s", p.region)
|
||||
}
|
||||
|
||||
if desiredSKU, ok := pod.Annotations[gpuTypeAnnotation]; ok {
|
||||
if desiredSKU, ok := pod.Annotations[gpuTypeAnnotation]; ok {
|
||||
for _, supportedSKU := range p.gpuSKUs {
|
||||
if strings.EqualFold(string(desiredSKU), string(supportedSKU)) {
|
||||
return supportedSKU, nil
|
||||
}
|
||||
}
|
||||
|
||||
return "", fmt.Errorf("The pod requires GPU SKU %s, but ACI only supports SKUs %v in region %s", desiredSKU, p.region, p.gpuSKUs)
|
||||
return "", fmt.Errorf("The pod requires GPU SKU %s, but ACI only supports SKUs %v in region %s", desiredSKU, p.region, p.gpuSKUs)
|
||||
}
|
||||
|
||||
return p.gpuSKUs[0], nil
|
||||
return p.gpuSKUs[0], nil
|
||||
}
|
||||
|
||||
func getProbe(probe *v1.Probe) (*aci.ContainerProbe, error) {
|
||||
func getProbe(probe *v1.Probe, ports []v1.ContainerPort) (*aci.ContainerProbe, error) {
|
||||
|
||||
if probe.Handler.Exec != nil && probe.Handler.HTTPGet != nil {
|
||||
return nil, fmt.Errorf("probe may not specify more than one of \"exec\" and \"httpGet\"")
|
||||
@@ -1298,8 +1298,26 @@ func getProbe(probe *v1.Probe) (*aci.ContainerProbe, error) {
|
||||
|
||||
var httpGET *aci.ContainerHTTPGetProbe
|
||||
if probe.Handler.HTTPGet != nil {
|
||||
var portValue int
|
||||
port := probe.Handler.HTTPGet.Port
|
||||
switch port.Type {
|
||||
case intstr.Int:
|
||||
portValue = port.IntValue()
|
||||
case intstr.String:
|
||||
portName := port.String()
|
||||
for _, p := range ports {
|
||||
if portName == p.Name {
|
||||
portValue = int(p.ContainerPort)
|
||||
break
|
||||
}
|
||||
}
|
||||
if portValue == 0 {
|
||||
return nil, fmt.Errorf("unable to find named port: %s", portName)
|
||||
}
|
||||
}
|
||||
|
||||
httpGET = &aci.ContainerHTTPGetProbe{
|
||||
Port: probe.Handler.HTTPGet.Port.IntValue(),
|
||||
Port: portValue,
|
||||
Path: probe.Handler.HTTPGet.Path,
|
||||
Scheme: string(probe.Handler.HTTPGet.Scheme),
|
||||
}
|
||||
|
||||
@@ -205,32 +205,32 @@ func TestCreatePodWithGPU(t *testing.T) {
|
||||
aadServerMocker := NewAADMock()
|
||||
aciServerMocker := NewACIMock()
|
||||
|
||||
podName := "pod-" + uuid.New().String()
|
||||
podName := "pod-" + uuid.New().String()
|
||||
podNamespace := "ns-" + uuid.New().String()
|
||||
gpuSKU := aci.GPUSKU("sku-" + uuid.New().String())
|
||||
|
||||
aciServerMocker.OnGetRPManifest = func() (int, interface{}) {
|
||||
aciServerMocker.OnGetRPManifest = func() (int, interface{}) {
|
||||
manifest := &aci.ResourceProviderManifest{
|
||||
Metadata: &aci.ResourceProviderMetadata{
|
||||
GPURegionalSKUs: []*aci.GPURegionalSKU{
|
||||
&aci.GPURegionalSKU{
|
||||
Location: fakeRegion,
|
||||
SKUs: []aci.GPUSKU{gpuSKU, aci.K80, aci.P100},
|
||||
SKUs: []aci.GPUSKU{gpuSKU, aci.K80, aci.P100},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
return http.StatusOK, manifest
|
||||
return http.StatusOK, manifest
|
||||
}
|
||||
|
||||
provider, err := createTestProvider(aadServerMocker, aciServerMocker)
|
||||
provider, err := createTestProvider(aadServerMocker, aciServerMocker)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create the test provider. %s", err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
aciServerMocker.OnCreate = func(subscription, resourceGroup, containerGroup string, cg *aci.ContainerGroup) (int, interface{}) {
|
||||
aciServerMocker.OnCreate = func(subscription, resourceGroup, containerGroup string, cg *aci.ContainerGroup) (int, interface{}) {
|
||||
assert.Check(t, is.Equal(fakeSubscription, subscription), "Subscription doesn't match")
|
||||
assert.Check(t, is.Equal(fakeResourceGroup, resourceGroup), "Resource group doesn't match")
|
||||
assert.Check(t, is.Equal(podNamespace+"-"+podName, containerGroup), "Container group name is not expected")
|
||||
@@ -247,10 +247,10 @@ func TestCreatePodWithGPU(t *testing.T) {
|
||||
assert.Check(t, is.Equal(int32(10), cg.ContainerGroupProperties.Containers[0].Resources.Limits.GPU.Count), "Requests GPU Count is not expected")
|
||||
assert.Check(t, is.Equal(gpuSKU, cg.ContainerGroupProperties.Containers[0].Resources.Limits.GPU.SKU), "Requests GPU SKU is not expected")
|
||||
|
||||
return http.StatusOK, cg
|
||||
return http.StatusOK, cg
|
||||
}
|
||||
|
||||
pod := &v1.Pod{
|
||||
pod := &v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: podName,
|
||||
Namespace: podNamespace,
|
||||
@@ -273,42 +273,42 @@ func TestCreatePodWithGPU(t *testing.T) {
|
||||
},
|
||||
}
|
||||
|
||||
if err := provider.CreatePod(context.Background(), pod); err != nil {
|
||||
if err := provider.CreatePod(context.Background(), pod); err != nil {
|
||||
t.Fatal("Failed to create pod", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Tests create pod with GPU SKU in annotation.
|
||||
// Tests create pod with GPU SKU in annotation.
|
||||
func TestCreatePodWithGPUSKU(t *testing.T) {
|
||||
aadServerMocker := NewAADMock()
|
||||
aciServerMocker := NewACIMock()
|
||||
|
||||
podName := "pod-" + uuid.New().String()
|
||||
podName := "pod-" + uuid.New().String()
|
||||
podNamespace := "ns-" + uuid.New().String()
|
||||
gpuSKU := aci.GPUSKU("sku-" + uuid.New().String())
|
||||
|
||||
aciServerMocker.OnGetRPManifest = func() (int, interface{}) {
|
||||
aciServerMocker.OnGetRPManifest = func() (int, interface{}) {
|
||||
manifest := &aci.ResourceProviderManifest{
|
||||
Metadata: &aci.ResourceProviderMetadata{
|
||||
GPURegionalSKUs: []*aci.GPURegionalSKU{
|
||||
&aci.GPURegionalSKU{
|
||||
Location: fakeRegion,
|
||||
SKUs: []aci.GPUSKU{aci.K80, aci.P100, gpuSKU},
|
||||
SKUs: []aci.GPUSKU{aci.K80, aci.P100, gpuSKU},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
return http.StatusOK, manifest
|
||||
return http.StatusOK, manifest
|
||||
}
|
||||
|
||||
provider, err := createTestProvider(aadServerMocker, aciServerMocker)
|
||||
provider, err := createTestProvider(aadServerMocker, aciServerMocker)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create the test provider. %s", err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
aciServerMocker.OnCreate = func(subscription, resourceGroup, containerGroup string, cg *aci.ContainerGroup) (int, interface{}) {
|
||||
aciServerMocker.OnCreate = func(subscription, resourceGroup, containerGroup string, cg *aci.ContainerGroup) (int, interface{}) {
|
||||
assert.Check(t, is.Equal(fakeSubscription, subscription), "Subscription doesn't match")
|
||||
assert.Check(t, is.Equal(fakeResourceGroup, resourceGroup), "Resource group doesn't match")
|
||||
assert.Check(t, cg != nil, "Container group is nil")
|
||||
@@ -326,10 +326,10 @@ func TestCreatePodWithGPUSKU(t *testing.T) {
|
||||
assert.Check(t, is.Equal(int32(1), cg.ContainerGroupProperties.Containers[0].Resources.Limits.GPU.Count), "Requests GPU Count is not expected")
|
||||
assert.Check(t, is.Equal(gpuSKU, cg.ContainerGroupProperties.Containers[0].Resources.Limits.GPU.SKU), "Requests GPU SKU is not expected")
|
||||
|
||||
return http.StatusOK, cg
|
||||
return http.StatusOK, cg
|
||||
}
|
||||
|
||||
pod := &v1.Pod{
|
||||
pod := &v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: podName,
|
||||
Namespace: podNamespace,
|
||||
@@ -355,7 +355,7 @@ func TestCreatePodWithGPUSKU(t *testing.T) {
|
||||
},
|
||||
}
|
||||
|
||||
if err := provider.CreatePod(context.Background(), pod); err != nil {
|
||||
if err := provider.CreatePod(context.Background(), pod); err != nil {
|
||||
t.Fatal("Failed to create pod", err)
|
||||
}
|
||||
}
|
||||
@@ -577,19 +577,19 @@ func TestGetPodWithoutResourceRequestsLimits(t *testing.T) {
|
||||
func TestGetPodWithGPU(t *testing.T) {
|
||||
_, aciServerMocker, provider, err := prepareMocks()
|
||||
|
||||
if err != nil {
|
||||
if err != nil {
|
||||
t.Fatal("Unable to prepare the mocks", err)
|
||||
}
|
||||
|
||||
podName := "pod-" + uuid.New().String()
|
||||
podName := "pod-" + uuid.New().String()
|
||||
podNamespace := "ns-" + uuid.New().String()
|
||||
|
||||
aciServerMocker.OnGetContainerGroup = func(subscription, resourceGroup, containerGroup string) (int, interface{}) {
|
||||
aciServerMocker.OnGetContainerGroup = func(subscription, resourceGroup, containerGroup string) (int, interface{}) {
|
||||
assert.Equal(t, fakeSubscription, subscription, "Subscription doesn't match")
|
||||
assert.Equal(t, fakeResourceGroup, resourceGroup, "Resource group doesn't match")
|
||||
assert.Equal(t, podNamespace+"-"+podName, containerGroup, "Container group name is not expected")
|
||||
|
||||
return http.StatusOK, aci.ContainerGroup{
|
||||
return http.StatusOK, aci.ContainerGroup{
|
||||
Tags: map[string]string{
|
||||
"NodeName": fakeNodeName,
|
||||
},
|
||||
@@ -630,12 +630,12 @@ func TestGetPodWithGPU(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
pod, err := provider.GetPod(context.Background(), podNamespace, podName)
|
||||
pod, err := provider.GetPod(context.Background(), podNamespace, podName)
|
||||
if err != nil {
|
||||
t.Fatal("Failed to get pod", err)
|
||||
}
|
||||
|
||||
assert.Check(t, pod != nil, "Response pod should not be nil")
|
||||
assert.Check(t, pod != nil, "Response pod should not be nil")
|
||||
assert.Check(t, pod.Spec.Containers != nil, "Containers should not be nil")
|
||||
assert.Check(t, pod.Spec.Containers[0].Resources.Requests != nil, "Containers[0].Resources.Requests should not be nil")
|
||||
assert.Check(
|
||||
@@ -786,7 +786,7 @@ func prepareMocks() (*AADMock, *ACIMock, *ACIProvider, error) {
|
||||
GPURegionalSKUs: []*aci.GPURegionalSKU{
|
||||
&aci.GPURegionalSKU{
|
||||
Location: fakeRegion,
|
||||
SKUs: []aci.GPUSKU{aci.K80, aci.P100, aci.V100},
|
||||
SKUs: []aci.GPUSKU{aci.K80, aci.P100, aci.V100},
|
||||
},
|
||||
},
|
||||
},
|
||||
@@ -803,7 +803,7 @@ func prepareMocks() (*AADMock, *ACIMock, *ACIProvider, error) {
|
||||
return aadServerMocker, aciServerMocker, provider, nil
|
||||
}
|
||||
|
||||
func createTestProvider(aadServerMocker *AADMock, aciServerMocker*ACIMock) (*ACIProvider, error) {
|
||||
func createTestProvider(aadServerMocker *AADMock, aciServerMocker *ACIMock) (*ACIProvider, error) {
|
||||
auth := azure.NewAuthentication(
|
||||
azure.PublicCloud.Name,
|
||||
fakeClientID,
|
||||
@@ -850,6 +850,66 @@ func ptrQuantity(q resource.Quantity) *resource.Quantity {
|
||||
return &q
|
||||
}
|
||||
|
||||
func TestCreatePodWithNamedLivenessProbe(t *testing.T) {
|
||||
_, aciServerMocker, provider, err := prepareMocks()
|
||||
|
||||
if err != nil {
|
||||
t.Fatal("Unable to prepare the mocks", err)
|
||||
}
|
||||
|
||||
podName := "pod-" + uuid.New().String()
|
||||
podNamespace := "ns-" + uuid.New().String()
|
||||
|
||||
aciServerMocker.OnCreate = func(subscription, resourceGroup, containerGroup string, cg *aci.ContainerGroup) (int, interface{}) {
|
||||
assert.Check(t, cg.Containers[0].LivenessProbe != nil, "Liveness probe expected")
|
||||
assert.Check(t, is.Equal(10, cg.Containers[0].LivenessProbe.InitialDelaySeconds), "Initial Probe Delay doesn't match")
|
||||
assert.Check(t, is.Equal(5, cg.Containers[0].LivenessProbe.Period), "Probe Period doesn't match")
|
||||
assert.Check(t, is.Equal(60, cg.Containers[0].LivenessProbe.TimeoutSeconds), "Probe Timeout doesn't match")
|
||||
assert.Check(t, is.Equal(3, cg.Containers[0].LivenessProbe.SuccessThreshold), "Probe Success Threshold doesn't match")
|
||||
assert.Check(t, is.Equal(5, cg.Containers[0].LivenessProbe.FailureThreshold), "Probe Failure Threshold doesn't match")
|
||||
assert.Check(t, cg.Containers[0].LivenessProbe.HTTPGet != nil, "Expected an HTTP Get Probe")
|
||||
assert.Check(t, is.Equal(8080, cg.Containers[0].LivenessProbe.HTTPGet.Port), "Expected Port to be 8080")
|
||||
pod := &v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: podName,
|
||||
Namespace: podNamespace,
|
||||
},
|
||||
Spec: v1.PodSpec{
|
||||
Containers: []v1.Container{
|
||||
v1.Container{
|
||||
Name: "nginx",
|
||||
Ports: []v1.ContainerPort{
|
||||
v1.ContainerPort{
|
||||
Name: "http",
|
||||
ContainerPort: 8080,
|
||||
},
|
||||
},
|
||||
LivenessProbe: &v1.Probe{
|
||||
Handler: v1.Handler{
|
||||
HTTPGet: &v1.HTTPGetAction{
|
||||
Port: intstr.FromString("http"),
|
||||
Path: "/",
|
||||
},
|
||||
},
|
||||
InitialDelaySeconds: 10,
|
||||
PeriodSeconds: 5,
|
||||
TimeoutSeconds: 60,
|
||||
SuccessThreshold: 3,
|
||||
FailureThreshold: 5,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
if err := provider.CreatePod(context.Background(), pod); err != nil {
|
||||
t.Fatal("Failed to create pod", err)
|
||||
}
|
||||
|
||||
return http.StatusOK, cg
|
||||
}
|
||||
}
|
||||
|
||||
func TestCreatePodWithLivenessProbe(t *testing.T) {
|
||||
_, aciServerMocker, provider, err := prepareMocks()
|
||||
|
||||
@@ -891,7 +951,7 @@ func TestCreatePodWithLivenessProbe(t *testing.T) {
|
||||
LivenessProbe: &v1.Probe{
|
||||
Handler: v1.Handler{
|
||||
HTTPGet: &v1.HTTPGetAction{
|
||||
Port: intstr.FromString("8080"),
|
||||
Port: intstr.FromInt(8080),
|
||||
Path: "/",
|
||||
},
|
||||
},
|
||||
@@ -952,7 +1012,7 @@ func TestCreatePodWithReadinessProbe(t *testing.T) {
|
||||
ReadinessProbe: &v1.Probe{
|
||||
Handler: v1.Handler{
|
||||
HTTPGet: &v1.HTTPGetAction{
|
||||
Port: intstr.FromString("8080"),
|
||||
Port: intstr.FromInt(8080),
|
||||
Path: "/",
|
||||
},
|
||||
},
|
||||
|
||||
Reference in New Issue
Block a user