Update ACI liveness/readiness probe handling to work with named ports (#333)

* Update ACI liveness/readiness probe handling to work with named ports
This commit is contained in:
Jeremy Rickard
2019-04-23 12:43:48 -06:00
committed by Brian Goff
parent ceb9b16c5c
commit 45d2ef06b2
4 changed files with 192 additions and 49 deletions

View File

@@ -31,6 +31,7 @@ import (
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/intstr"
clientcmdv1 "k8s.io/client-go/tools/clientcmd/api/v1"
"k8s.io/client-go/tools/remotecommand"
stats "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1"
@@ -54,11 +55,10 @@ const (
)
const (
gpuResourceName v1.ResourceName = "nvidia.com/gpu"
gpuTypeAnnotation = "virtual-kubelet.io/gpu-type"
gpuResourceName v1.ResourceName = "nvidia.com/gpu"
gpuTypeAnnotation = "virtual-kubelet.io/gpu-type"
)
// ACIProvider implements the virtual-kubelet provider interface and communicates with Azure's ACI APIs.
type ACIProvider struct {
aciClient *aci.Client
@@ -324,37 +324,37 @@ func (p *ACIProvider) setupCapacity(ctx context.Context) error {
defer span.End()
logger := log.G(ctx).WithField("method", "setupCapacity")
// Set sane defaults for Capacity in case config is not supplied
// Set sane defaults for Capacity in case config is not supplied
p.cpu = "800"
p.memory = "4Ti"
p.pods = "800"
if cpuQuota := os.Getenv("ACI_QUOTA_CPU"); cpuQuota != "" {
if cpuQuota := os.Getenv("ACI_QUOTA_CPU"); cpuQuota != "" {
p.cpu = cpuQuota
}
if memoryQuota := os.Getenv("ACI_QUOTA_MEMORY"); memoryQuota != "" {
if memoryQuota := os.Getenv("ACI_QUOTA_MEMORY"); memoryQuota != "" {
p.memory = memoryQuota
}
if podsQuota := os.Getenv("ACI_QUOTA_POD"); podsQuota != "" {
if podsQuota := os.Getenv("ACI_QUOTA_POD"); podsQuota != "" {
p.pods = podsQuota
}
metadata, err := p.aciClient.GetResourceProviderMetadata(ctx)
metadata, err := p.aciClient.GetResourceProviderMetadata(ctx)
if err != nil {
if err != nil {
msg := "Unable to fetch the ACI metadata"
logger.WithError(err).Error(msg)
return err
}
if metadata == nil || metadata.GPURegionalSKUs == nil {
if metadata == nil || metadata.GPURegionalSKUs == nil {
logger.Warn("ACI GPU capacity is not enabled. GPU capacity will be disabled")
return nil
}
for _, regionalSKU := range metadata.GPURegionalSKUs {
for _, regionalSKU := range metadata.GPURegionalSKUs {
if strings.EqualFold(regionalSKU.Location, p.region) && len(regionalSKU.SKUs) != 0 {
p.gpu = "100"
if gpu := os.Getenv("ACI_QUOTA_GPU"); gpu != "" {
@@ -364,7 +364,7 @@ func (p *ACIProvider) setupCapacity(ctx context.Context) error {
}
}
return nil
return nil
}
func (p *ACIProvider) setupNetworkProfile(auth *client.Authentication) error {
@@ -832,7 +832,7 @@ func (p *ACIProvider) ExecInContainer(name string, uid types.UID, container stri
return err
}
wsURI := xcrsp.WebSocketURI
wsURI := xcrsp.WebSocketURI
password := xcrsp.Password
c, _, _ := websocket.DefaultDialer.Dial(wsURI, nil)
@@ -1238,7 +1238,7 @@ func (p *ACIProvider) getContainers(pod *v1.Pod) ([]aci.Container, error) {
}
if container.LivenessProbe != nil {
probe, err := getProbe(container.LivenessProbe)
probe, err := getProbe(container.LivenessProbe, container.Ports)
if err != nil {
return nil, err
}
@@ -1246,7 +1246,7 @@ func (p *ACIProvider) getContainers(pod *v1.Pod) ([]aci.Container, error) {
}
if container.ReadinessProbe != nil {
probe, err := getProbe(container.ReadinessProbe)
probe, err := getProbe(container.ReadinessProbe, container.Ports)
if err != nil {
return nil, err
}
@@ -1263,20 +1263,20 @@ func (p *ACIProvider) getGPUSKU(pod *v1.Pod) (aci.GPUSKU, error) {
return "", fmt.Errorf("The pod requires GPU resource, but ACI doesn't provide GPU enabled container group in region %s", p.region)
}
if desiredSKU, ok := pod.Annotations[gpuTypeAnnotation]; ok {
if desiredSKU, ok := pod.Annotations[gpuTypeAnnotation]; ok {
for _, supportedSKU := range p.gpuSKUs {
if strings.EqualFold(string(desiredSKU), string(supportedSKU)) {
return supportedSKU, nil
}
}
return "", fmt.Errorf("The pod requires GPU SKU %s, but ACI only supports SKUs %v in region %s", desiredSKU, p.region, p.gpuSKUs)
return "", fmt.Errorf("The pod requires GPU SKU %s, but ACI only supports SKUs %v in region %s", desiredSKU, p.region, p.gpuSKUs)
}
return p.gpuSKUs[0], nil
return p.gpuSKUs[0], nil
}
func getProbe(probe *v1.Probe) (*aci.ContainerProbe, error) {
func getProbe(probe *v1.Probe, ports []v1.ContainerPort) (*aci.ContainerProbe, error) {
if probe.Handler.Exec != nil && probe.Handler.HTTPGet != nil {
return nil, fmt.Errorf("probe may not specify more than one of \"exec\" and \"httpGet\"")
@@ -1298,8 +1298,26 @@ func getProbe(probe *v1.Probe) (*aci.ContainerProbe, error) {
var httpGET *aci.ContainerHTTPGetProbe
if probe.Handler.HTTPGet != nil {
var portValue int
port := probe.Handler.HTTPGet.Port
switch port.Type {
case intstr.Int:
portValue = port.IntValue()
case intstr.String:
portName := port.String()
for _, p := range ports {
if portName == p.Name {
portValue = int(p.ContainerPort)
break
}
}
if portValue == 0 {
return nil, fmt.Errorf("unable to find named port: %s", portName)
}
}
httpGET = &aci.ContainerHTTPGetProbe{
Port: probe.Handler.HTTPGet.Port.IntValue(),
Port: portValue,
Path: probe.Handler.HTTPGet.Path,
Scheme: string(probe.Handler.HTTPGet.Scheme),
}