GPU support in ACI provider (#563)

* GPU support in ACI provider
This commit is contained in:
Vipin Duleb
2019-04-02 18:11:35 -07:00
committed by Brian Goff
parent 1dadd46e20
commit bab9c59ac8
11 changed files with 557 additions and 63 deletions

View File

@@ -7,6 +7,7 @@ import (
"io/ioutil"
)
// NewContainerGroupDiagnostics creates a container group diagnostics object
func NewContainerGroupDiagnostics(logAnalyticsID, logAnalyticsKey string) (*ContainerGroupDiagnostics, error) {
if logAnalyticsID == "" || logAnalyticsKey == "" {
@@ -21,6 +22,7 @@ func NewContainerGroupDiagnostics(logAnalyticsID, logAnalyticsKey string) (*Cont
}, nil
}
// NewContainerGroupDiagnosticsFromFile creates a container group diagnostics object from the specified file
func NewContainerGroupDiagnosticsFromFile(filepath string) (*ContainerGroupDiagnostics, error) {
analyticsdata, err := ioutil.ReadFile(filepath)

View File

@@ -14,8 +14,8 @@ import (
const (
// BaseURI is the default URI used for compute services.
baseURI = "https://management.azure.com"
defaultUserAgent = "virtual-kubelet/azure-arm-aci/2018-09-01"
apiVersion = "2018-09-01"
defaultUserAgent = "virtual-kubelet/azure-arm-aci/2018-10-01"
apiVersion = "2018-10-01"
containerGroupURLPath = "subscriptions/{{.subscriptionId}}/resourceGroups/{{.resourceGroup}}/providers/Microsoft.ContainerInstance/containerGroups/{{.containerGroupName}}"
containerGroupListURLPath = "subscriptions/{{.subscriptionId}}/providers/Microsoft.ContainerInstance/containerGroups"

View File

@@ -11,12 +11,13 @@ import (
"github.com/virtual-kubelet/azure-aci/client/api"
)
// TerminalSizeRequest is the terminal size request
type TerminalSizeRequest struct {
Width int
Height int
}
// Starts the exec command for a specified container instance in a specified resource group and container group.
// LaunchExec starts the exec command for a specified container instance in a specified resource group and container group.
// From: https://docs.microsoft.com/en-us/rest/api/container-instances/startcontainer/launchexec
func (c *Client) LaunchExec(resourceGroup, containerGroupName, containerName, command string, terminalSize TerminalSizeRequest) (ExecResponse, error) {
urlParams := url.Values{
@@ -35,7 +36,7 @@ func (c *Client) LaunchExec(resourceGroup, containerGroupName, containerName, co
var xcrsp ExecResponse
xcrsp.Password = ""
xcrsp.WebSocketUri = ""
xcrsp.WebSocketURI = ""
b := new(bytes.Buffer)

View File

@@ -14,7 +14,7 @@ import (
// GetContainerGroup gets an Azure Container Instance in the provided
// resource group with the given container group name.
// From: https://docs.microsoft.com/en-us/rest/api/container-instances/containergroups/get
func (c *Client) GetContainerGroup(ctx context.Context, resourceGroup, containerGroupName string) (*ContainerGroup, error, *int) {
func (c *Client) GetContainerGroup(ctx context.Context, resourceGroup, containerGroupName string) (*ContainerGroup, *int, error) {
urlParams := url.Values{
"api-version": []string{apiVersion},
}
@@ -26,7 +26,7 @@ func (c *Client) GetContainerGroup(ctx context.Context, resourceGroup, container
// Create the request.
req, err := http.NewRequest("GET", uri, nil)
if err != nil {
return nil, fmt.Errorf("Creating get container group uri request failed: %v", err), nil
return nil, nil, fmt.Errorf("Creating get container group uri request failed: %v", err)
}
req = req.WithContext(ctx)
@@ -36,29 +36,29 @@ func (c *Client) GetContainerGroup(ctx context.Context, resourceGroup, container
"resourceGroup": resourceGroup,
"containerGroupName": containerGroupName,
}); err != nil {
return nil, fmt.Errorf("Expanding URL with parameters failed: %v", err), nil
return nil, nil, fmt.Errorf("Expanding URL with parameters failed: %v", err)
}
// Send the request.
resp, err := c.hc.Do(req)
if err != nil {
return nil, fmt.Errorf("Sending get container group request failed: %v", err), nil
return nil, nil, fmt.Errorf("Sending get container group request failed: %v", err)
}
defer resp.Body.Close()
// 200 (OK) is a success response.
if err := api.CheckResponse(resp); err != nil {
return nil, err, &resp.StatusCode
return nil, &resp.StatusCode, err
}
// Decode the body from the response.
if resp.Body == nil {
return nil, errors.New("Get container group returned an empty body in the response"), &resp.StatusCode
return nil, &resp.StatusCode, errors.New("Get container group returned an empty body in the response")
}
var cg ContainerGroup
if err := json.NewDecoder(resp.Body).Decode(&cg); err != nil {
return nil, fmt.Errorf("Decoding get container group response body failed: %v", err), &resp.StatusCode
return nil, &resp.StatusCode, fmt.Errorf("Decoding get container group response body failed: %v", err)
}
return &cg, nil, &resp.StatusCode
return &cg, &resp.StatusCode, nil
}

View File

@@ -0,0 +1,76 @@
package aci
import (
"context"
"encoding/json"
"errors"
"fmt"
"net/http"
"net/url"
"github.com/virtual-kubelet/azure-aci/client/api"
)
const (
resourceProviderURLPath = "providers/Microsoft.ContainerInstance"
resourceProviderAPIVersion = "2018-02-01"
)
// GetResourceProviderMetadata gets the ACI resource provider metadata
func (c *Client) GetResourceProviderMetadata(ctx context.Context) (*ResourceProviderMetadata, error) {
manifest, err := c.getResourceProviderManifest(ctx)
if err != nil {
return nil, err
}
if manifest == nil {
return nil, fmt.Errorf("The resource provider manifest is empty")
}
if manifest.Metadata == nil {
return nil, fmt.Errorf("The resource provider metadata is empty")
}
return manifest.Metadata, nil
}
func (c *Client) getResourceProviderManifest(ctx context.Context) (*ResourceProviderManifest, error) {
urlParams := url.Values{
"api-version": []string{resourceProviderAPIVersion},
"$expand": []string{"metadata"},
}
// Create the url.
uri := api.ResolveRelative(c.auth.ResourceManagerEndpoint, resourceProviderURLPath)
uri += "?" + url.Values(urlParams).Encode()
// Create the request.
req, err := http.NewRequest("GET", uri, nil)
if err != nil {
return nil, fmt.Errorf("Creating get resource provider manifest request failed: %v", err)
}
req = req.WithContext(ctx)
// Send the request.
resp, err := c.hc.Do(req)
if err != nil {
return nil, fmt.Errorf("Sending get resource provider manifest request failed: %v", err)
}
defer resp.Body.Close()
// 200 (OK) is a success response.
if err := api.CheckResponse(resp); err != nil {
return nil, err
}
// Decode the body from the response.
if resp.Body == nil {
return nil, errors.New("Get resource provider manifest returned an empty body in the response")
}
var manifest ResourceProviderManifest
if err := json.NewDecoder(resp.Body).Decode(&manifest); err != nil {
return nil, fmt.Errorf("Decoding get resource provider manifest response body failed: %v", err)
}
return &manifest, nil
}

View File

@@ -237,22 +237,35 @@ type Resource struct {
Tags map[string]string `json:"tags,omitempty"`
}
// ResourceLimits is the resource limits.
type ResourceLimits struct {
MemoryInGB float64 `json:"memoryInGB,omitempty"`
CPU float64 `json:"cpu,omitempty"`
// GPUSKU enumerates the values for GPU SKUs
type GPUSKU string
const (
// K80 specifies the K80 GPU SKU
K80 GPUSKU = "K80"
// P100 specifies the P100 GPU SKU
P100 GPUSKU = "P100"
// V100 specifies the V100 GPU SKU
V100 GPUSKU = "V100"
)
// GPUResource is the GPU resource for the container group.
type GPUResource struct {
Count int32 `json:"count"`
SKU GPUSKU `json:"sku"`
}
// ResourceRequests is the resource requests.
type ResourceRequests struct {
MemoryInGB float64 `json:"memoryInGB,omitempty"`
CPU float64 `json:"cpu,omitempty"`
// ComputeResources is the compute resource.
type ComputeResources struct {
MemoryInGB float64 `json:"memoryInGB,omitempty"`
CPU float64 `json:"cpu,omitempty"`
GPU *GPUResource `json:"gpu,omitempty"`
}
// ResourceRequirements is the resource requirements.
type ResourceRequirements struct {
Requests *ResourceRequests `json:"requests,omitempty"`
Limits *ResourceLimits `json:"limits,omitempty"`
Requests *ComputeResources `json:"requests,omitempty"`
Limits *ComputeResources `json:"limits,omitempty"`
}
// Usage is a single usage result
@@ -305,7 +318,7 @@ type ExecRequest struct {
// ExecResponse is a request for Launch Exec API response for ACI.
type ExecResponse struct {
WebSocketUri string `json:"webSocketUri,omitempty"`
WebSocketURI string `json:"webSocketUri,omitempty"`
Password string `json:"password,omitempty"`
}
@@ -488,3 +501,20 @@ const (
LogAnalyticsMetadataKeyNodeName string = "node-name"
LogAnalyticsMetadataKeyClusterResourceID string = "cluster-resource-id"
)
// GPURegionalSKU is the ACI GPU regional SKU
type GPURegionalSKU struct {
Location string `json:"location"`
SKUs []GPUSKU `json:"skus"`
}
// ResourceProviderMetadata is the ACI resource provider metadata
type ResourceProviderMetadata struct {
VNetSupportRegions []string `json:"vnetSupportRegions,omitempty"`
GPURegionalSKUs []*GPURegionalSKU `json:"gpuRegionalSkus,omitempty"`
}
// ResourceProviderManifest is the ACI resource provider manifest
type ResourceProviderManifest struct {
Metadata *ResourceProviderMetadata `json:"metadata"`
}