519 lines
14 KiB
Go
519 lines
14 KiB
Go
package fargate
|
|
|
|
import (
|
|
"fmt"
|
|
"log"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/aws/aws-sdk-go/aws"
|
|
"github.com/aws/aws-sdk-go/service/ecs"
|
|
|
|
corev1 "k8s.io/api/core/v1"
|
|
"k8s.io/apimachinery/pkg/api/resource"
|
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
|
k8sTypes "k8s.io/apimachinery/pkg/types"
|
|
)
|
|
|
|
const (
|
|
// Prefixes for objects created in Fargate.
|
|
taskDefFamilyPrefix = "vk-podspec"
|
|
taskTagPrefix = "vk-pod"
|
|
|
|
// Task status strings.
|
|
taskStatusProvisioning = "PROVISIONING"
|
|
taskStatusPending = "PENDING"
|
|
taskStatusRunning = "RUNNING"
|
|
taskStatusStopped = "STOPPED"
|
|
|
|
// Task attachment types.
|
|
taskAttachmentENI = "ElasticNetworkInterface"
|
|
taskAttachmentENIPrivateIPv4Address = "privateIPv4Address"
|
|
|
|
// Reason used for task state changes.
|
|
taskGenericReason = "Initiated by user"
|
|
|
|
// Annotation to configure the task role.
|
|
taskRoleAnnotation = "iam.amazonaws.com/role"
|
|
)
|
|
|
|
// Pod is the representation of a Kubernetes pod in Fargate.
|
|
type Pod struct {
|
|
// Kubernetes pod properties.
|
|
namespace string
|
|
name string
|
|
uid k8sTypes.UID
|
|
|
|
// Fargate task properties.
|
|
cluster *Cluster
|
|
taskDefArn string
|
|
taskArn string
|
|
taskRoleArn string
|
|
taskStatus string
|
|
taskRefreshTime time.Time
|
|
taskCPU int64
|
|
taskMemory int64
|
|
containers map[string]*container
|
|
}
|
|
|
|
// NewPod creates a new Kubernetes pod on Fargate.
|
|
func NewPod(cluster *Cluster, pod *corev1.Pod) (*Pod, error) {
|
|
api := client.api
|
|
|
|
// Initialize the pod.
|
|
fgPod := &Pod{
|
|
namespace: pod.Namespace,
|
|
name: pod.Name,
|
|
uid: pod.UID,
|
|
cluster: cluster,
|
|
containers: make(map[string]*container),
|
|
}
|
|
|
|
tag := fgPod.buildTaskDefinitionTag()
|
|
|
|
// Create a task definition matching the pod spec.
|
|
taskDef := &ecs.RegisterTaskDefinitionInput{
|
|
Family: aws.String(tag),
|
|
RequiresCompatibilities: []*string{aws.String(ecs.CompatibilityFargate)},
|
|
NetworkMode: aws.String(ecs.NetworkModeAwsvpc),
|
|
ContainerDefinitions: []*ecs.ContainerDefinition{},
|
|
}
|
|
|
|
if cluster.executionRoleArn != "" {
|
|
taskDef.ExecutionRoleArn = aws.String(cluster.executionRoleArn)
|
|
}
|
|
|
|
// For each container in the pod...
|
|
for _, containerSpec := range pod.Spec.Containers {
|
|
// Create a container definition.
|
|
cntr, err := newContainer(&containerSpec)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if cluster.cloudWatchLogGroupName != "" {
|
|
// Configure container logs to be sent to the configured Cloudwatch Logs Log Group.
|
|
cntr.definition.LogConfiguration = &ecs.LogConfiguration{
|
|
LogDriver: aws.String(ecs.LogDriverAwslogs),
|
|
Options: map[string]*string{
|
|
"awslogs-group": aws.String(cluster.cloudWatchLogGroupName),
|
|
"awslogs-region": aws.String(cluster.region),
|
|
"awslogs-stream-prefix": aws.String(fmt.Sprintf("%s_%s", tag, containerSpec.Name)),
|
|
},
|
|
}
|
|
}
|
|
|
|
// Add the container's resource requirements to its pod's total resource requirements.
|
|
fgPod.taskCPU += *cntr.definition.Cpu
|
|
fgPod.taskMemory += *cntr.definition.Memory
|
|
|
|
// Insert the container to its pod.
|
|
fgPod.containers[containerSpec.Name] = cntr
|
|
|
|
// Insert container definition to the task definition.
|
|
taskDef.ContainerDefinitions = append(taskDef.ContainerDefinitions, &cntr.definition)
|
|
}
|
|
|
|
// Set task resource limits.
|
|
err := fgPod.mapTaskSize()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
taskDef.Cpu = aws.String(strconv.Itoa(int(fgPod.taskCPU)))
|
|
taskDef.Memory = aws.String(strconv.Itoa(int(fgPod.taskMemory)))
|
|
|
|
if val, ok := pod.Annotations[taskRoleAnnotation]; ok {
|
|
taskDef.TaskRoleArn = aws.String(val)
|
|
fgPod.taskRoleArn = *taskDef.TaskRoleArn
|
|
}
|
|
|
|
// Register the task definition with Fargate.
|
|
log.Printf("RegisterTaskDefinition input:%+v", taskDef)
|
|
output, err := api.RegisterTaskDefinition(taskDef)
|
|
log.Printf("RegisterTaskDefinition err:%+v output:%+v", err, output)
|
|
if err != nil {
|
|
err = fmt.Errorf("failed to register task definition: %v", err)
|
|
return nil, err
|
|
}
|
|
|
|
// Save the registered task definition ARN.
|
|
fgPod.taskDefArn = *output.TaskDefinition.TaskDefinitionArn
|
|
|
|
if cluster != nil {
|
|
cluster.InsertPod(fgPod, tag)
|
|
}
|
|
|
|
return fgPod, nil
|
|
}
|
|
|
|
// NewPodFromTag creates a new pod identified by a tag.
|
|
func NewPodFromTag(cluster *Cluster, tag string) (*Pod, error) {
|
|
data := strings.Split(tag, "_")
|
|
|
|
if len(data) < 4 ||
|
|
data[0] != taskDefFamilyPrefix ||
|
|
data[1] != cluster.name {
|
|
return nil, fmt.Errorf("invalid tag")
|
|
}
|
|
|
|
pod := &Pod{
|
|
namespace: data[2],
|
|
name: data[3],
|
|
cluster: cluster,
|
|
containers: make(map[string]*container),
|
|
}
|
|
|
|
return pod, nil
|
|
}
|
|
|
|
// Start deploys and runs a Kubernetes pod on Fargate.
|
|
func (pod *Pod) Start() error {
|
|
api := client.api
|
|
|
|
// Pods always get an ENI with a private IPv4 address in customer subnet.
|
|
// Assign a public IPv4 address to the ENI only if requested.
|
|
assignPublicIPAddress := ecs.AssignPublicIpDisabled
|
|
if pod.cluster.assignPublicIPv4Address {
|
|
assignPublicIPAddress = ecs.AssignPublicIpEnabled
|
|
}
|
|
|
|
// Start the task.
|
|
runTaskInput := &ecs.RunTaskInput{
|
|
Cluster: aws.String(pod.cluster.name),
|
|
Count: aws.Int64(1),
|
|
LaunchType: aws.String(ecs.LaunchTypeFargate),
|
|
NetworkConfiguration: &ecs.NetworkConfiguration{
|
|
AwsvpcConfiguration: &ecs.AwsVpcConfiguration{
|
|
AssignPublicIp: aws.String(assignPublicIPAddress),
|
|
SecurityGroups: aws.StringSlice(pod.cluster.securityGroups),
|
|
Subnets: aws.StringSlice(pod.cluster.subnets),
|
|
},
|
|
},
|
|
PlatformVersion: aws.String(pod.cluster.platformVersion),
|
|
StartedBy: aws.String(pod.buildTaskTag()),
|
|
TaskDefinition: aws.String(pod.taskDefArn),
|
|
}
|
|
|
|
log.Printf("RunTask input:%+v", runTaskInput)
|
|
runTaskOutput, err := api.RunTask(runTaskInput)
|
|
log.Printf("RunTask err:%+v output:%+v", err, runTaskOutput)
|
|
if err != nil || len(runTaskOutput.Tasks) == 0 {
|
|
err = fmt.Errorf("failed to run task: %v", err)
|
|
return err
|
|
}
|
|
|
|
// Save the task ARN.
|
|
pod.taskArn = *runTaskOutput.Tasks[0].TaskArn
|
|
|
|
return nil
|
|
}
|
|
|
|
// Stop stops a running Kubernetes pod on Fargate.
|
|
func (pod *Pod) Stop() error {
|
|
api := client.api
|
|
|
|
// Stop the task.
|
|
stopTaskInput := &ecs.StopTaskInput{
|
|
Cluster: aws.String(pod.cluster.name),
|
|
Reason: aws.String(taskGenericReason),
|
|
Task: aws.String(pod.taskArn),
|
|
}
|
|
|
|
log.Printf("StopTask input:%+v", stopTaskInput)
|
|
stopTaskOutput, err := api.StopTask(stopTaskInput)
|
|
log.Printf("StopTask err:%+v output:%+v", err, stopTaskOutput)
|
|
if err != nil {
|
|
err = fmt.Errorf("failed to stop task: %v", err)
|
|
return err
|
|
}
|
|
|
|
// Deregister the task definition.
|
|
_, err = api.DeregisterTaskDefinition(&ecs.DeregisterTaskDefinitionInput{
|
|
TaskDefinition: aws.String(pod.taskDefArn),
|
|
})
|
|
if err != nil {
|
|
log.Printf("Failed to deregister task definition: %v", err)
|
|
}
|
|
|
|
// Remove the pod from its cluster.
|
|
if pod.cluster != nil {
|
|
pod.cluster.RemovePod(pod.buildTaskDefinitionTag())
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// GetSpec returns the specification of a Kubernetes pod on Fargate.
|
|
func (pod *Pod) GetSpec() (*corev1.Pod, error) {
|
|
task, err := pod.describe()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return pod.getSpec(task)
|
|
}
|
|
|
|
// GetStatus returns the status of a Kubernetes pod on Fargate.
|
|
func (pod *Pod) GetStatus() corev1.PodStatus {
|
|
task, err := pod.describe()
|
|
if err != nil {
|
|
return corev1.PodStatus{Phase: corev1.PodUnknown}
|
|
}
|
|
|
|
return pod.getStatus(task)
|
|
}
|
|
|
|
// BuildTaskDefinitionTag returns the task definition tag for this pod.
|
|
func (pod *Pod) buildTaskDefinitionTag() string {
|
|
return buildTaskDefinitionTag(pod.cluster.name, pod.namespace, pod.name)
|
|
}
|
|
|
|
// buildTaskDefinitionTag builds a task definition tag from its components.
|
|
func buildTaskDefinitionTag(clusterName string, namespace string, name string) string {
|
|
// vk-podspec_cluster_namespacae_podname
|
|
return fmt.Sprintf("%s_%s_%s_%s", taskDefFamilyPrefix, clusterName, namespace, name)
|
|
}
|
|
|
|
// BuildTaskTag returns the pod's task tag, used for mapping a task back to its pod.
|
|
func (pod *Pod) buildTaskTag() string {
|
|
return fmt.Sprintf("%s", pod.uid)
|
|
}
|
|
|
|
// mapTaskSize maps Kubernetes pod resource requirements to a Fargate task size.
|
|
func (pod *Pod) mapTaskSize() error {
|
|
//
|
|
// Kubernetes pods do not have explicit resource requirements; their containers do. Pod resource
|
|
// requirements are the sum of the pod's containers' requirements.
|
|
//
|
|
// Fargate tasks have explicit CPU and memory limits. Both are required and specify the maximum
|
|
// amount of resources for the task. The limits must match a task size on taskSizeTable.
|
|
//
|
|
var cpu int64
|
|
var memory int64
|
|
|
|
// Find the smallest Fargate task size that can satisfy the total resource request.
|
|
for _, row := range taskSizeTable {
|
|
if pod.taskCPU <= row.cpu {
|
|
for mem := row.memory.min; mem <= row.memory.max; mem += row.memory.inc {
|
|
if pod.taskMemory <= mem/MiB {
|
|
cpu = row.cpu
|
|
memory = mem / MiB
|
|
break
|
|
}
|
|
}
|
|
|
|
if cpu != 0 {
|
|
break
|
|
}
|
|
}
|
|
}
|
|
|
|
log.Printf("Mapped resource requirements (cpu:%v, memory:%v) to task size (cpu:%v, memory:%v)",
|
|
pod.taskCPU, pod.taskMemory, cpu, memory)
|
|
|
|
// Fail if the resource requirements cannot be satisfied by any Fargate task size.
|
|
if cpu == 0 {
|
|
return fmt.Errorf("resource requirements (cpu:%v, memory:%v) are too high",
|
|
pod.taskCPU, pod.taskMemory)
|
|
}
|
|
|
|
// Fargate task CPU size is specified in vCPU/1024s and memory size is specified in MiBs.
|
|
pod.taskCPU = cpu
|
|
pod.taskMemory = memory
|
|
|
|
return nil
|
|
}
|
|
|
|
// Describe retrieves the status of a Kubernetes pod from Fargate.
|
|
func (pod *Pod) describe() (*ecs.Task, error) {
|
|
api := client.api
|
|
|
|
// Describe the task.
|
|
describeTasksInput := &ecs.DescribeTasksInput{
|
|
Cluster: aws.String(pod.cluster.name),
|
|
Tasks: []*string{aws.String(pod.taskArn)},
|
|
}
|
|
|
|
describeTasksOutput, err := api.DescribeTasks(describeTasksInput)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
task := describeTasksOutput.Tasks[0]
|
|
|
|
pod.taskStatus = *task.LastStatus
|
|
pod.taskRefreshTime = time.Now()
|
|
|
|
return task, nil
|
|
}
|
|
|
|
// GetSpec returns the specification of a Kubernetes pod on Fargate.
|
|
func (pod *Pod) getSpec(task *ecs.Task) (*corev1.Pod, error) {
|
|
containers := make([]corev1.Container, 0, len(task.Containers))
|
|
|
|
for _, c := range task.Containers {
|
|
cntrDef := pod.containers[*c.Name].definition
|
|
|
|
cntr := corev1.Container{
|
|
Name: *c.Name,
|
|
Image: *cntrDef.Image,
|
|
Command: aws.StringValueSlice(cntrDef.EntryPoint),
|
|
Args: aws.StringValueSlice(cntrDef.Command),
|
|
Resources: corev1.ResourceRequirements{
|
|
Limits: corev1.ResourceList{
|
|
corev1.ResourceCPU: resource.MustParse(fmt.Sprintf("%d", *cntrDef.Cpu)),
|
|
corev1.ResourceMemory: resource.MustParse(fmt.Sprintf("%dMi", *cntrDef.Memory)),
|
|
},
|
|
Requests: corev1.ResourceList{
|
|
corev1.ResourceCPU: resource.MustParse(fmt.Sprintf("%d", *cntrDef.Cpu)),
|
|
corev1.ResourceMemory: resource.MustParse(fmt.Sprintf("%dMi", *cntrDef.MemoryReservation)),
|
|
},
|
|
},
|
|
Ports: make([]corev1.ContainerPort, 0, len(cntrDef.PortMappings)),
|
|
Env: make([]corev1.EnvVar, 0, len(cntrDef.Environment)),
|
|
}
|
|
|
|
if cntrDef.WorkingDirectory != nil {
|
|
cntr.WorkingDir = *cntrDef.WorkingDirectory
|
|
}
|
|
|
|
for _, mapping := range cntrDef.PortMappings {
|
|
cntr.Ports = append(cntr.Ports, corev1.ContainerPort{
|
|
ContainerPort: int32(*mapping.ContainerPort),
|
|
HostPort: int32(*mapping.HostPort),
|
|
Protocol: corev1.ProtocolTCP,
|
|
})
|
|
}
|
|
|
|
for _, env := range cntrDef.Environment {
|
|
cntr.Env = append(cntr.Env, corev1.EnvVar{
|
|
Name: *env.Name,
|
|
Value: *env.Value,
|
|
})
|
|
}
|
|
|
|
containers = append(containers, cntr)
|
|
}
|
|
|
|
annotations := make(map[string]string)
|
|
|
|
if pod.taskRoleArn != "" {
|
|
annotations[taskRoleAnnotation] = pod.taskRoleArn
|
|
}
|
|
|
|
podSpec := corev1.Pod{
|
|
TypeMeta: metav1.TypeMeta{
|
|
Kind: "Pod",
|
|
APIVersion: "v1",
|
|
},
|
|
ObjectMeta: metav1.ObjectMeta{
|
|
Namespace: pod.namespace,
|
|
Name: pod.name,
|
|
UID: pod.uid,
|
|
Annotations: annotations,
|
|
},
|
|
Spec: corev1.PodSpec{
|
|
NodeName: pod.cluster.nodeName,
|
|
Volumes: []corev1.Volume{},
|
|
Containers: containers,
|
|
},
|
|
Status: pod.getStatus(task),
|
|
}
|
|
|
|
return &podSpec, nil
|
|
}
|
|
|
|
// GetStatus returns the status of a Kubernetes pod on Fargate.
|
|
func (pod *Pod) getStatus(task *ecs.Task) corev1.PodStatus {
|
|
// Translate task status to pod phase.
|
|
phase := corev1.PodUnknown
|
|
|
|
switch pod.taskStatus {
|
|
case taskStatusProvisioning:
|
|
phase = corev1.PodPending
|
|
case taskStatusPending:
|
|
phase = corev1.PodPending
|
|
case taskStatusRunning:
|
|
phase = corev1.PodRunning
|
|
case taskStatusStopped:
|
|
phase = corev1.PodSucceeded
|
|
}
|
|
|
|
// Set pod conditions based on task's last known status.
|
|
isScheduled := corev1.ConditionFalse
|
|
isInitialized := corev1.ConditionFalse
|
|
isReady := corev1.ConditionFalse
|
|
|
|
switch pod.taskStatus {
|
|
case taskStatusProvisioning:
|
|
isScheduled = corev1.ConditionTrue
|
|
case taskStatusPending:
|
|
isScheduled = corev1.ConditionTrue
|
|
case taskStatusRunning:
|
|
isScheduled = corev1.ConditionTrue
|
|
isInitialized = corev1.ConditionTrue
|
|
isReady = corev1.ConditionTrue
|
|
case taskStatusStopped:
|
|
isScheduled = corev1.ConditionTrue
|
|
isInitialized = corev1.ConditionTrue
|
|
isReady = corev1.ConditionTrue
|
|
}
|
|
|
|
conditions := []corev1.PodCondition{
|
|
corev1.PodCondition{
|
|
Type: corev1.PodScheduled,
|
|
Status: isScheduled,
|
|
},
|
|
corev1.PodCondition{
|
|
Type: corev1.PodInitialized,
|
|
Status: isInitialized,
|
|
},
|
|
corev1.PodCondition{
|
|
Type: corev1.PodReady,
|
|
Status: isReady,
|
|
},
|
|
}
|
|
|
|
// Set the pod start time as the task creation time.
|
|
var startTime metav1.Time
|
|
if task.CreatedAt != nil {
|
|
startTime = metav1.NewTime(*task.CreatedAt)
|
|
}
|
|
|
|
// Set the pod IP address from the task ENI information.
|
|
privateIPv4Address := ""
|
|
for _, attachment := range task.Attachments {
|
|
if *attachment.Type == taskAttachmentENI {
|
|
for _, detail := range attachment.Details {
|
|
if *detail.Name == taskAttachmentENIPrivateIPv4Address {
|
|
privateIPv4Address = *detail.Value
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Get statuses from all containers in this pod.
|
|
containerStatuses := make([]corev1.ContainerStatus, 0, len(task.Containers))
|
|
for _, cntr := range task.Containers {
|
|
containerStatuses = append(containerStatuses, pod.containers[*cntr.Name].getStatus(cntr))
|
|
}
|
|
|
|
// Build the pod status structure to be reported.
|
|
status := corev1.PodStatus{
|
|
Phase: phase,
|
|
Conditions: conditions,
|
|
Message: "",
|
|
Reason: "",
|
|
HostIP: privateIPv4Address,
|
|
PodIP: privateIPv4Address,
|
|
StartTime: &startTime,
|
|
InitContainerStatuses: nil,
|
|
ContainerStatuses: containerStatuses,
|
|
QOSClass: corev1.PodQOSBestEffort,
|
|
}
|
|
|
|
return status
|
|
}
|