Create a provider to use Azure Batch (#133)
* Started work on provider * WIP Adding batch provider * Working basic call into pool client. Need to parameterize the baseurl * Fixed job creation by manipulating the content-type * WIP Kicking off containers. Dirty * [wip] More meat around scheduling simple containers. * Working on basic task wrapper to co-schedule pods * WIP on task wrapper * WIP * Working pod minimal wrapper for batch * Integrate pod template code into provider * Cleaning up * Move to docker without gpu * WIP batch integration * partially working * Working logs * Tidy code * WIP: Testing and readme * Added readme and terraform deployment for GPU Azure Batch pool. * Update to enable low priority nodes for gpu * Fix log formatting bug. Return node logs when container not yet started * Moved to golang v1.10 * Fix cri test * Fix up minor docs Issue. Add provider to readme. Add var for vk image.
This commit is contained in:
committed by
Robbie Zhang
parent
1ad6fb434e
commit
d6e8b3daf7
21
vendor/github.com/lawrencegripper/pod2docker/LICENSE
generated
vendored
Normal file
21
vendor/github.com/lawrencegripper/pod2docker/LICENSE
generated
vendored
Normal file
@@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2018 Lawrence Gripper
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
105
vendor/github.com/lawrencegripper/pod2docker/pod2docker.go
generated
vendored
Normal file
105
vendor/github.com/lawrencegripper/pod2docker/pod2docker.go
generated
vendored
Normal file
@@ -0,0 +1,105 @@
|
||||
package pod2docker
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"strings"
|
||||
"text/template"
|
||||
|
||||
"k8s.io/api/core/v1"
|
||||
)
|
||||
|
||||
// ImageRegistryCredential - Used to input a credential used by docker login
|
||||
type ImageRegistryCredential struct {
|
||||
Server string `json:"server,omitempty"`
|
||||
Username string `json:"username,omitempty"`
|
||||
Password string `json:"password,omitempty"`
|
||||
}
|
||||
|
||||
// PodComponents provides details to run a pod
|
||||
type PodComponents struct {
|
||||
PullCredentials []ImageRegistryCredential
|
||||
InitContainers []v1.Container
|
||||
Containers []v1.Container
|
||||
Volumes []v1.Volume
|
||||
PodName string
|
||||
}
|
||||
|
||||
// GetBashCommand generates the bash script to execute the pod
|
||||
func GetBashCommand(p PodComponents) (string, error) {
|
||||
template := template.New("run.sh.tmpl").Option("missingkey=error").Funcs(template.FuncMap{
|
||||
"getLaunchCommand": getLaunchCommand,
|
||||
"isHostPathVolume": isHostPathVolume,
|
||||
"isEmptyDirVolume": isEmptyDirVolume,
|
||||
"isPullAlways": isPullAlways,
|
||||
"getValidVolumeMounts": getValidVolumeMounts,
|
||||
"isNvidiaRuntime": isNvidiaRuntime,
|
||||
})
|
||||
|
||||
template, err := template.Parse(azureBatchPodTemplate)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
var output bytes.Buffer
|
||||
err = template.Execute(&output, p)
|
||||
return output.String(), err
|
||||
}
|
||||
|
||||
func getLaunchCommand(container v1.Container) (cmd string) {
|
||||
if len(container.Command) > 0 {
|
||||
cmd += strings.Join(container.Command, " ")
|
||||
}
|
||||
if len(cmd) > 0 {
|
||||
cmd += " "
|
||||
}
|
||||
if len(container.Args) > 0 {
|
||||
cmd += strings.Join(container.Args, " ")
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func isNvidiaRuntime(c v1.Container) bool {
|
||||
if _, exists := c.Resources.Limits["nvidia.com/gpu"]; exists {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func isHostPathVolume(v v1.Volume) bool {
|
||||
if v.HostPath == nil {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func isEmptyDirVolume(v v1.Volume) bool {
|
||||
if v.EmptyDir == nil {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func isPullAlways(c v1.Container) bool {
|
||||
if c.ImagePullPolicy == v1.PullAlways {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func getValidVolumeMounts(container v1.Container, volumes []v1.Volume) []v1.VolumeMount {
|
||||
volDic := make(map[string]v1.Volume)
|
||||
for _, vol := range volumes {
|
||||
volDic[vol.Name] = vol
|
||||
}
|
||||
var mounts []v1.VolumeMount
|
||||
for _, mount := range container.VolumeMounts {
|
||||
vol, ok := volDic[mount.Name]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
if vol.EmptyDir == nil && vol.HostPath == nil {
|
||||
continue
|
||||
}
|
||||
mounts = append(mounts, mount)
|
||||
}
|
||||
return mounts
|
||||
}
|
||||
167
vendor/github.com/lawrencegripper/pod2docker/pod2docker_template.go
generated
vendored
Normal file
167
vendor/github.com/lawrencegripper/pod2docker/pod2docker_template.go
generated
vendored
Normal file
@@ -0,0 +1,167 @@
|
||||
package pod2docker
|
||||
|
||||
// Todo: Investigate a better way to inline this template - especially when escaping the backticks.
|
||||
// Consider: https://mattjibson.com/blog/2014/11/19/esc-embedding-static-assets/
|
||||
const azureBatchPodTemplate = `
|
||||
#!/bin/bash
|
||||
set -eE
|
||||
trap cleanup EXIT
|
||||
|
||||
if ! type 'docker' > /dev/null; then
|
||||
echo 'Docker not installed... exiting'
|
||||
exit 1
|
||||
fi
|
||||
|
||||
{{/* Vars */}}
|
||||
{{$podName := .PodName}}
|
||||
{{$volumes := .Volumes}}
|
||||
|
||||
{{/* Login to required image repositories */}}
|
||||
{{range .PullCredentials }}
|
||||
docker login -u {{.Username}} -p {{.Password}} {{.Server}}
|
||||
{{end}}
|
||||
|
||||
function cleanup(){
|
||||
{{/* Take a copy of the container log is removed when container is deleted */}}
|
||||
echo 'Pod Exited: Copying logs'
|
||||
{{range $index, $container := .InitContainers}}
|
||||
if [[ -f ./initcontainer-{{$index}}.cid ]]; then
|
||||
container_{{$index}}_ID=$(<./initcontainer-{{$index}}.cid)
|
||||
container_{{$index}}_Log_Path=$(docker inspect --format='{{"{{.LogPath}}"}}' $container_{{$index}}_ID)
|
||||
cp $container_{{$index}}_Log_Path ./{{$container.Name}}.log
|
||||
|
||||
docker rm -f $container_{{$index}}_ID
|
||||
rm -f ./initcontainer-{{$index}}.cid
|
||||
fi
|
||||
{{end}}
|
||||
|
||||
{{range $index, $container := .Containers}}
|
||||
if [[ -f ./{{$container.Name}}.log && -f ./container-{{$index}}.cid ]]; then
|
||||
container_{{$index}}_ID=$(<./container-{{$index}}.cid)
|
||||
container_{{$index}}_Log_Path=$(docker inspect --format='{{"{{.LogPath}}"}}' $container_{{$index}}_ID)
|
||||
rm ./{{$container.Name}}.log {{/* Remove the existing symlink */}}
|
||||
cp $container_{{$index}}_Log_Path ./{{$container.Name}}.log
|
||||
fi
|
||||
{{end}}
|
||||
|
||||
{{/* Remove the containers, network and volumes */}}
|
||||
|
||||
echo 'Pod Exited: Removing all containers'
|
||||
if ls container-* 1> /dev/null 2>&1; then
|
||||
for line in ` + "`ls container-*`" + `
|
||||
do
|
||||
id=$(cat $line)
|
||||
echo '-Logs container..'
|
||||
docker logs $id
|
||||
echo '-Removing container..'
|
||||
docker rm -f $id
|
||||
rm -f $line
|
||||
done
|
||||
fi
|
||||
echo '-Removing pause container..'
|
||||
docker rm -f {{$podName}} || echo 'Remove pause container failed'
|
||||
rm -f ./pauseid.cid
|
||||
echo '-Removing network container..'
|
||||
docker network rm {{$podName}} || echo 'Remove network failed'
|
||||
|
||||
echo '-Removing volumes..'
|
||||
{{range .Volumes}}
|
||||
docker volume rm -f {{$podName}}_{{.Name}} || echo 'Remove volume failed'
|
||||
{{end}}
|
||||
}
|
||||
|
||||
{{/* Create Pod network and start it */}}
|
||||
docker network create {{$podName}}
|
||||
docker run -d --network {{$podName}} --name {{$podName}} --cidfile="./pauseid.cid" gcr.io/google_containers/pause:1.0
|
||||
|
||||
{{/* Handle volumes */}}
|
||||
{{range .Volumes}}
|
||||
{{if isHostPathVolume .}}
|
||||
docker volume create --name {{$podName}}_{{.Name}} --opt type=none --opt device={{.VolumeSource.HostPath.Path}} --opt o=bind
|
||||
{{end}}
|
||||
{{if isEmptyDirVolume .}}
|
||||
docker volume create {{$podName}}_{{.Name}}
|
||||
{{end}}
|
||||
{{end}}
|
||||
|
||||
{{/* Run the init containers in the Pod. Attaching to shared namespace */}}
|
||||
{{range $index, $container := .InitContainers}}
|
||||
echo 'Running init container {{$index}}..'
|
||||
{{if isPullAlways .}}
|
||||
docker pull {{$container.Image}}
|
||||
{{end}}
|
||||
docker run --network container:{{$podName}} --ipc container:{{$podName}} \
|
||||
{{- if isNvidiaRuntime $container}}
|
||||
--runtime nvidia \
|
||||
{{- end}}
|
||||
{{- range $index, $envs := $container.Env}}
|
||||
-e "{{$envs.Name}}:{{$envs.Value}}" \
|
||||
{{- end}}
|
||||
{{- range $index, $mount := getValidVolumeMounts $container $volumes}}
|
||||
-v {{$podName}}_{{$mount.Name}}:{{$mount.MountPath}} \
|
||||
{{- end}}
|
||||
--cidfile=./initcontainer-{{$index}}.cid {{$container.Image}} {{getLaunchCommand $container}}
|
||||
{{end}}
|
||||
|
||||
|
||||
{{/* Run the containers in the Pod. Attaching to shared namespace */}}
|
||||
{{range $index, $container := .Containers}}
|
||||
{{if isPullAlways .}}
|
||||
docker pull {{$container.Image}}
|
||||
{{end}}
|
||||
docker run -d --network container:{{$podName}} --ipc container:{{$podName}} \
|
||||
{{- if isNvidiaRuntime $container}}
|
||||
--runtime nvidia \
|
||||
{{- end}}
|
||||
{{- range $index, $envs := $container.Env}}
|
||||
-e "{{$envs.Name}}:{{$envs.Value}}" \
|
||||
{{- end}}
|
||||
{{- range $index, $mount := getValidVolumeMounts $container $volumes}}
|
||||
-v {{$podName}}_{{$mount.Name}}:{{$mount.MountPath}} \
|
||||
{{- end}}
|
||||
--cidfile=./container-{{$index}}.cid {{$container.Image}} {{getLaunchCommand $container}}
|
||||
{{end}}
|
||||
|
||||
{{/* Symlink all container logs files to task directory */}}
|
||||
{{range $index, $container := .Containers}}
|
||||
container_{{$index}}_ID=$(<./container-{{$index}}.cid)
|
||||
container_{{$index}}_Log_Path=$(docker inspect --format='{{"{{.LogPath}}"}}' $container_{{$index}}_ID)
|
||||
ln -f -s $container_{{$index}}_Log_Path ./{{$container.Name}}.log
|
||||
{{end}}
|
||||
|
||||
echo 'Running Pod: {{.PodName}}'
|
||||
|
||||
{{/* Wait until any of these containers stop */}}
|
||||
echo 'Waiting for any of the containers to exit'
|
||||
for line in ` + "`ls container-*`" + `
|
||||
do
|
||||
id=$(cat $line)
|
||||
docker wait $id &
|
||||
done
|
||||
|
||||
while [ $(jobs -p | wc -l) == {{.Containers | len}} ]
|
||||
do
|
||||
sleep 2
|
||||
done
|
||||
|
||||
|
||||
{{/* Get exit codes from containers */}}
|
||||
echo 'Checking container exit codes'
|
||||
overallExitCode=0
|
||||
for line in ` + "`ls container-*`" + `
|
||||
do
|
||||
id=$(cat $line)
|
||||
echo 'Getting exitcode'
|
||||
exitCode=$(docker inspect -f {{"{{.State.ExitCode}}"}} $id)
|
||||
|
||||
echo 'ID: ' $id ' ExitCode: ' $exitCode
|
||||
echo 'Checking exitcode'
|
||||
if (($exitCode != 0))
|
||||
then
|
||||
echo 'Assigning exitcode'
|
||||
overallExitCode=$exitCode
|
||||
fi
|
||||
done
|
||||
|
||||
exit $overallExitCode
|
||||
`
|
||||
Reference in New Issue
Block a user