Initial commit

This commit is contained in:
Ria Bhatia
2017-12-04 13:32:57 -06:00
committed by Erik St. Martin
commit 0075e5b0f3
9056 changed files with 2523100 additions and 0 deletions

View File

@@ -0,0 +1,510 @@
// +build linux,cgo
package native
import (
"fmt"
"path/filepath"
"strings"
"syscall"
"github.com/hyperhq/hypercli/daemon/execdriver"
derr "github.com/hyperhq/hypercli/errors"
"github.com/hyperhq/hypercli/pkg/mount"
"github.com/hyperhq/hypercli/profiles/seccomp"
"github.com/hyperhq/hypercli/volume"
"github.com/opencontainers/runc/libcontainer/apparmor"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/devices"
)
// createContainer populates and configures the container type with the
// data provided by the execdriver.Command
func (d *Driver) createContainer(c *execdriver.Command, hooks execdriver.Hooks) (container *configs.Config, err error) {
container = execdriver.InitContainer(c)
if err := d.createIpc(container, c); err != nil {
return nil, err
}
if err := d.createPid(container, c); err != nil {
return nil, err
}
if err := d.createUTS(container, c); err != nil {
return nil, err
}
if err := d.setupRemappedRoot(container, c); err != nil {
return nil, err
}
if err := d.createNetwork(container, c, hooks); err != nil {
return nil, err
}
if c.ProcessConfig.Privileged {
if !container.Readonlyfs {
// clear readonly for /sys
for i := range container.Mounts {
if container.Mounts[i].Destination == "/sys" {
container.Mounts[i].Flags &= ^syscall.MS_RDONLY
}
}
container.ReadonlyPaths = nil
}
// clear readonly for cgroup
for i := range container.Mounts {
if container.Mounts[i].Device == "cgroup" {
container.Mounts[i].Flags &= ^syscall.MS_RDONLY
}
}
container.MaskPaths = nil
if err := d.setPrivileged(container); err != nil {
return nil, err
}
} else {
if err := d.setCapabilities(container, c); err != nil {
return nil, err
}
if c.SeccompProfile == "" {
container.Seccomp = seccomp.GetDefaultProfile()
}
}
// add CAP_ prefix to all caps for new libcontainer update to match
// the spec format.
for i, s := range container.Capabilities {
if !strings.HasPrefix(s, "CAP_") {
container.Capabilities[i] = fmt.Sprintf("CAP_%s", s)
}
}
container.AdditionalGroups = c.GroupAdd
if c.AppArmorProfile != "" {
container.AppArmorProfile = c.AppArmorProfile
}
if c.SeccompProfile != "" && c.SeccompProfile != "unconfined" {
container.Seccomp, err = seccomp.LoadProfile(c.SeccompProfile)
if err != nil {
return nil, err
}
}
if err := execdriver.SetupCgroups(container, c); err != nil {
return nil, err
}
container.OomScoreAdj = c.OomScoreAdj
if container.Readonlyfs {
for i := range container.Mounts {
switch container.Mounts[i].Destination {
case "/proc", "/dev", "/dev/pts":
continue
}
container.Mounts[i].Flags |= syscall.MS_RDONLY
}
/* These paths must be remounted as r/o */
container.ReadonlyPaths = append(container.ReadonlyPaths, "/dev")
}
if err := d.setupMounts(container, c); err != nil {
return nil, err
}
d.setupLabels(container, c)
d.setupRlimits(container, c)
return container, nil
}
func (d *Driver) createNetwork(container *configs.Config, c *execdriver.Command, hooks execdriver.Hooks) error {
if c.Network == nil {
return nil
}
if c.Network.ContainerID != "" {
d.Lock()
active := d.activeContainers[c.Network.ContainerID]
d.Unlock()
if active == nil {
return fmt.Errorf("%s is not a valid running container to join", c.Network.ContainerID)
}
state, err := active.State()
if err != nil {
return err
}
container.Namespaces.Add(configs.NEWNET, state.NamespacePaths[configs.NEWNET])
return nil
}
if c.Network.NamespacePath != "" {
container.Namespaces.Add(configs.NEWNET, c.Network.NamespacePath)
return nil
}
// only set up prestart hook if the namespace path is not set (this should be
// all cases *except* for --net=host shared networking)
container.Hooks = &configs.Hooks{
Prestart: []configs.Hook{
configs.NewFunctionHook(func(s configs.HookState) error {
if len(hooks.PreStart) > 0 {
for _, fnHook := range hooks.PreStart {
// A closed channel for OOM is returned here as it will be
// non-blocking and return the correct result when read.
chOOM := make(chan struct{})
close(chOOM)
if err := fnHook(&c.ProcessConfig, s.Pid, chOOM); err != nil {
return err
}
}
}
return nil
}),
},
}
return nil
}
func (d *Driver) createIpc(container *configs.Config, c *execdriver.Command) error {
if c.Ipc.HostIpc {
container.Namespaces.Remove(configs.NEWIPC)
return nil
}
if c.Ipc.ContainerID != "" {
d.Lock()
active := d.activeContainers[c.Ipc.ContainerID]
d.Unlock()
if active == nil {
return fmt.Errorf("%s is not a valid running container to join", c.Ipc.ContainerID)
}
state, err := active.State()
if err != nil {
return err
}
container.Namespaces.Add(configs.NEWIPC, state.NamespacePaths[configs.NEWIPC])
}
return nil
}
func (d *Driver) createPid(container *configs.Config, c *execdriver.Command) error {
if c.Pid.HostPid {
container.Namespaces.Remove(configs.NEWPID)
return nil
}
return nil
}
func (d *Driver) createUTS(container *configs.Config, c *execdriver.Command) error {
if c.UTS.HostUTS {
container.Namespaces.Remove(configs.NEWUTS)
container.Hostname = ""
return nil
}
return nil
}
func (d *Driver) setupRemappedRoot(container *configs.Config, c *execdriver.Command) error {
if c.RemappedRoot.UID == 0 {
container.Namespaces.Remove(configs.NEWUSER)
return nil
}
// convert the Docker daemon id map to the libcontainer variant of the same struct
// this keeps us from having to import libcontainer code across Docker client + daemon packages
cuidMaps := []configs.IDMap{}
cgidMaps := []configs.IDMap{}
for _, idMap := range c.UIDMapping {
cuidMaps = append(cuidMaps, configs.IDMap(idMap))
}
for _, idMap := range c.GIDMapping {
cgidMaps = append(cgidMaps, configs.IDMap(idMap))
}
container.UidMappings = cuidMaps
container.GidMappings = cgidMaps
for _, node := range container.Devices {
node.Uid = uint32(c.RemappedRoot.UID)
node.Gid = uint32(c.RemappedRoot.GID)
}
// TODO: until a kernel/mount solution exists for handling remount in a user namespace,
// we must clear the readonly flag for the cgroups mount (@mrunalp concurs)
for i := range container.Mounts {
if container.Mounts[i].Device == "cgroup" {
container.Mounts[i].Flags &= ^syscall.MS_RDONLY
}
}
return nil
}
func (d *Driver) setPrivileged(container *configs.Config) (err error) {
container.Capabilities = execdriver.GetAllCapabilities()
container.Cgroups.Resources.AllowAllDevices = true
hostDevices, err := devices.HostDevices()
if err != nil {
return err
}
container.Devices = hostDevices
if apparmor.IsEnabled() {
container.AppArmorProfile = "unconfined"
}
return nil
}
func (d *Driver) setCapabilities(container *configs.Config, c *execdriver.Command) (err error) {
container.Capabilities, err = execdriver.TweakCapabilities(container.Capabilities, c.CapAdd, c.CapDrop)
return err
}
func (d *Driver) setupRlimits(container *configs.Config, c *execdriver.Command) {
if c.Resources == nil {
return
}
for _, rlimit := range c.Resources.Rlimits {
container.Rlimits = append(container.Rlimits, configs.Rlimit{
Type: rlimit.Type,
Hard: rlimit.Hard,
Soft: rlimit.Soft,
})
}
}
// If rootfs mount propagation is RPRIVATE, that means all the volumes are
// going to be private anyway. There is no need to apply per volume
// propagation on top. This is just an optimization so that cost of per volume
// propagation is paid only if user decides to make some volume non-private
// which will force rootfs mount propagation to be non RPRIVATE.
func checkResetVolumePropagation(container *configs.Config) {
if container.RootPropagation != mount.RPRIVATE {
return
}
for _, m := range container.Mounts {
m.PropagationFlags = nil
}
}
func getMountInfo(mountinfo []*mount.Info, dir string) *mount.Info {
for _, m := range mountinfo {
if m.Mountpoint == dir {
return m
}
}
return nil
}
// Get the source mount point of directory passed in as argument. Also return
// optional fields.
func getSourceMount(source string) (string, string, error) {
// Ensure any symlinks are resolved.
sourcePath, err := filepath.EvalSymlinks(source)
if err != nil {
return "", "", err
}
mountinfos, err := mount.GetMounts()
if err != nil {
return "", "", err
}
mountinfo := getMountInfo(mountinfos, sourcePath)
if mountinfo != nil {
return sourcePath, mountinfo.Optional, nil
}
path := sourcePath
for {
path = filepath.Dir(path)
mountinfo = getMountInfo(mountinfos, path)
if mountinfo != nil {
return path, mountinfo.Optional, nil
}
if path == "/" {
break
}
}
// If we are here, we did not find parent mount. Something is wrong.
return "", "", fmt.Errorf("Could not find source mount of %s", source)
}
// Ensure mount point on which path is mounted, is shared.
func ensureShared(path string) error {
sharedMount := false
sourceMount, optionalOpts, err := getSourceMount(path)
if err != nil {
return err
}
// Make sure source mount point is shared.
optsSplit := strings.Split(optionalOpts, " ")
for _, opt := range optsSplit {
if strings.HasPrefix(opt, "shared:") {
sharedMount = true
break
}
}
if !sharedMount {
return fmt.Errorf("Path %s is mounted on %s but it is not a shared mount.", path, sourceMount)
}
return nil
}
// Ensure mount point on which path is mounted, is either shared or slave.
func ensureSharedOrSlave(path string) error {
sharedMount := false
slaveMount := false
sourceMount, optionalOpts, err := getSourceMount(path)
if err != nil {
return err
}
// Make sure source mount point is shared.
optsSplit := strings.Split(optionalOpts, " ")
for _, opt := range optsSplit {
if strings.HasPrefix(opt, "shared:") {
sharedMount = true
break
} else if strings.HasPrefix(opt, "master:") {
slaveMount = true
break
}
}
if !sharedMount && !slaveMount {
return fmt.Errorf("Path %s is mounted on %s but it is not a shared or slave mount.", path, sourceMount)
}
return nil
}
func (d *Driver) setupMounts(container *configs.Config, c *execdriver.Command) error {
userMounts := make(map[string]struct{})
for _, m := range c.Mounts {
userMounts[m.Destination] = struct{}{}
}
// Filter out mounts that are overridden by user supplied mounts
var defaultMounts []*configs.Mount
_, mountDev := userMounts["/dev"]
for _, m := range container.Mounts {
if _, ok := userMounts[m.Destination]; !ok {
if mountDev && strings.HasPrefix(m.Destination, "/dev/") {
container.Devices = nil
continue
}
defaultMounts = append(defaultMounts, m)
}
}
container.Mounts = defaultMounts
mountPropagationMap := map[string]int{
"private": mount.PRIVATE,
"rprivate": mount.RPRIVATE,
"shared": mount.SHARED,
"rshared": mount.RSHARED,
"slave": mount.SLAVE,
"rslave": mount.RSLAVE,
}
for _, m := range c.Mounts {
for _, cm := range container.Mounts {
if cm.Destination == m.Destination {
return derr.ErrorCodeMountDup.WithArgs(m.Destination)
}
}
if m.Source == "tmpfs" {
var (
data = "size=65536k"
flags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV
err error
)
if m.Data != "" {
flags, data, err = mount.ParseTmpfsOptions(m.Data)
if err != nil {
return err
}
}
container.Mounts = append(container.Mounts, &configs.Mount{
Source: m.Source,
Destination: m.Destination,
Data: data,
Device: "tmpfs",
Flags: flags,
PropagationFlags: []int{mountPropagationMap[volume.DefaultPropagationMode]},
})
continue
}
flags := syscall.MS_BIND | syscall.MS_REC
var pFlag int
if !m.Writable {
flags |= syscall.MS_RDONLY
}
// Determine property of RootPropagation based on volume
// properties. If a volume is shared, then keep root propagation
// shared. This should work for slave and private volumes too.
//
// For slave volumes, it can be either [r]shared/[r]slave.
//
// For private volumes any root propagation value should work.
pFlag = mountPropagationMap[m.Propagation]
if pFlag == mount.SHARED || pFlag == mount.RSHARED {
if err := ensureShared(m.Source); err != nil {
return err
}
rootpg := container.RootPropagation
if rootpg != mount.SHARED && rootpg != mount.RSHARED {
execdriver.SetRootPropagation(container, mount.SHARED)
}
} else if pFlag == mount.SLAVE || pFlag == mount.RSLAVE {
if err := ensureSharedOrSlave(m.Source); err != nil {
return err
}
rootpg := container.RootPropagation
if rootpg != mount.SHARED && rootpg != mount.RSHARED && rootpg != mount.SLAVE && rootpg != mount.RSLAVE {
execdriver.SetRootPropagation(container, mount.RSLAVE)
}
}
mount := &configs.Mount{
Source: m.Source,
Destination: m.Destination,
Device: "bind",
Flags: flags,
}
if pFlag != 0 {
mount.PropagationFlags = []int{pFlag}
}
container.Mounts = append(container.Mounts, mount)
}
checkResetVolumePropagation(container)
return nil
}
func (d *Driver) setupLabels(container *configs.Config, c *execdriver.Command) {
container.ProcessLabel = c.ProcessLabel
container.MountLabel = c.MountLabel
}

View File

@@ -0,0 +1,582 @@
// +build linux,cgo
package native
import (
"fmt"
"io"
"io/ioutil"
"os"
"os/exec"
"path/filepath"
"strings"
"sync"
"syscall"
"time"
"github.com/Sirupsen/logrus"
"github.com/hyperhq/hypercli/daemon/execdriver"
"github.com/hyperhq/hypercli/pkg/parsers"
"github.com/hyperhq/hypercli/pkg/pools"
"github.com/hyperhq/hypercli/pkg/reexec"
sysinfo "github.com/hyperhq/hypercli/pkg/system"
"github.com/hyperhq/hypercli/pkg/term"
aaprofile "github.com/hyperhq/hypercli/profiles/apparmor"
"github.com/opencontainers/runc/libcontainer"
"github.com/opencontainers/runc/libcontainer/apparmor"
"github.com/opencontainers/runc/libcontainer/cgroups/systemd"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/system"
"github.com/opencontainers/runc/libcontainer/utils"
)
// Define constants for native driver
const (
DriverName = "native"
Version = "0.2"
defaultApparmorProfile = "docker-default"
)
// Driver contains all information for native driver,
// it implements execdriver.Driver.
type Driver struct {
root string
activeContainers map[string]libcontainer.Container
machineMemory int64
factory libcontainer.Factory
sync.Mutex
}
// NewDriver returns a new native driver, called from NewDriver of execdriver.
func NewDriver(root string, options []string) (*Driver, error) {
meminfo, err := sysinfo.ReadMemInfo()
if err != nil {
return nil, err
}
if err := sysinfo.MkdirAll(root, 0700); err != nil {
return nil, err
}
if apparmor.IsEnabled() {
if err := aaprofile.InstallDefault(defaultApparmorProfile); err != nil {
apparmorProfiles := []string{defaultApparmorProfile}
// Allow daemon to run if loading failed, but are active
// (possibly through another run, manually, or via system startup)
for _, policy := range apparmorProfiles {
if err := aaprofile.IsLoaded(policy); err != nil {
return nil, fmt.Errorf("AppArmor enabled on system but the %s profile could not be loaded.", policy)
}
}
}
}
// choose cgroup manager
// this makes sure there are no breaking changes to people
// who upgrade from versions without native.cgroupdriver opt
cgm := libcontainer.Cgroupfs
// parse the options
for _, option := range options {
key, val, err := parsers.ParseKeyValueOpt(option)
if err != nil {
return nil, err
}
key = strings.ToLower(key)
switch key {
case "native.cgroupdriver":
// override the default if they set options
switch val {
case "systemd":
if systemd.UseSystemd() {
cgm = libcontainer.SystemdCgroups
} else {
// warn them that they chose the wrong driver
logrus.Warn("You cannot use systemd as native.cgroupdriver, using cgroupfs instead")
}
case "cgroupfs":
cgm = libcontainer.Cgroupfs
default:
return nil, fmt.Errorf("Unknown native.cgroupdriver given %q. try cgroupfs or systemd", val)
}
default:
return nil, fmt.Errorf("Unknown option %s\n", key)
}
}
f, err := libcontainer.New(
root,
cgm,
libcontainer.InitPath(reexec.Self(), DriverName),
)
if err != nil {
return nil, err
}
return &Driver{
root: root,
activeContainers: make(map[string]libcontainer.Container),
machineMemory: meminfo.MemTotal,
factory: f,
}, nil
}
type execOutput struct {
exitCode int
err error
}
// Run implements the exec driver Driver interface,
// it calls libcontainer APIs to run a container.
func (d *Driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, hooks execdriver.Hooks) (execdriver.ExitStatus, error) {
destroyed := false
var err error
c.TmpDir, err = ioutil.TempDir("", c.ID)
if err != nil {
return execdriver.ExitStatus{ExitCode: -1}, err
}
defer os.RemoveAll(c.TmpDir)
// take the Command and populate the libcontainer.Config from it
container, err := d.createContainer(c, hooks)
if err != nil {
return execdriver.ExitStatus{ExitCode: -1}, err
}
p := &libcontainer.Process{
Args: append([]string{c.ProcessConfig.Entrypoint}, c.ProcessConfig.Arguments...),
Env: c.ProcessConfig.Env,
Cwd: c.WorkingDir,
User: c.ProcessConfig.User,
}
if err := setupPipes(container, &c.ProcessConfig, p, pipes); err != nil {
return execdriver.ExitStatus{ExitCode: -1}, err
}
cont, err := d.factory.Create(c.ID, container)
if err != nil {
return execdriver.ExitStatus{ExitCode: -1}, err
}
d.Lock()
d.activeContainers[c.ID] = cont
d.Unlock()
defer func() {
if !destroyed {
cont.Destroy()
}
d.cleanContainer(c.ID)
}()
if err := cont.Start(p); err != nil {
return execdriver.ExitStatus{ExitCode: -1}, err
}
// 'oom' is used to emit 'oom' events to the eventstream, 'oomKilled' is used
// to set the 'OOMKilled' flag in state
oom := notifyOnOOM(cont)
oomKilled := notifyOnOOM(cont)
if hooks.Start != nil {
pid, err := p.Pid()
if err != nil {
p.Signal(os.Kill)
p.Wait()
return execdriver.ExitStatus{ExitCode: -1}, err
}
hooks.Start(&c.ProcessConfig, pid, oom)
}
waitF := p.Wait
if nss := cont.Config().Namespaces; !nss.Contains(configs.NEWPID) {
// we need such hack for tracking processes with inherited fds,
// because cmd.Wait() waiting for all streams to be copied
waitF = waitInPIDHost(p, cont)
}
ps, err := waitF()
if err != nil {
execErr, ok := err.(*exec.ExitError)
if !ok {
return execdriver.ExitStatus{ExitCode: -1}, err
}
ps = execErr.ProcessState
}
cont.Destroy()
destroyed = true
// oomKilled will have an oom event if any process within the container was
// OOM killed at any time, not only if the init process OOMed.
//
// Perhaps we only want the OOMKilled flag to be set if the OOM
// resulted in a container death, but there isn't a good way to do this
// because the kernel's cgroup oom notification does not provide information
// such as the PID. This could be heuristically done by checking that the OOM
// happened within some very small time slice for the container dying (and
// optionally exit-code 137), but I don't think the cgroup oom notification
// can be used to reliably determine this
//
// Even if there were multiple OOMs, it's sufficient to read one value
// because libcontainer's oom notify will discard the channel after the
// cgroup is destroyed
_, oomKill := <-oomKilled
return execdriver.ExitStatus{ExitCode: utils.ExitStatus(ps.Sys().(syscall.WaitStatus)), OOMKilled: oomKill}, nil
}
// notifyOnOOM returns a channel that signals if the container received an OOM notification
// for any process. If it is unable to subscribe to OOM notifications then a closed
// channel is returned as it will be non-blocking and return the correct result when read.
func notifyOnOOM(container libcontainer.Container) <-chan struct{} {
oom, err := container.NotifyOOM()
if err != nil {
logrus.Warnf("Your kernel does not support OOM notifications: %s", err)
c := make(chan struct{})
close(c)
return c
}
return oom
}
func killCgroupProcs(c libcontainer.Container) {
var procs []*os.Process
if err := c.Pause(); err != nil {
logrus.Warn(err)
}
pids, err := c.Processes()
if err != nil {
// don't care about childs if we can't get them, this is mostly because cgroup already deleted
logrus.Warnf("Failed to get processes from container %s: %v", c.ID(), err)
}
for _, pid := range pids {
if p, err := os.FindProcess(pid); err == nil {
procs = append(procs, p)
if err := p.Kill(); err != nil {
logrus.Warn(err)
}
}
}
if err := c.Resume(); err != nil {
logrus.Warn(err)
}
for _, p := range procs {
if _, err := p.Wait(); err != nil {
logrus.Warn(err)
}
}
}
func waitInPIDHost(p *libcontainer.Process, c libcontainer.Container) func() (*os.ProcessState, error) {
return func() (*os.ProcessState, error) {
pid, err := p.Pid()
if err != nil {
return nil, err
}
process, err := os.FindProcess(pid)
s, err := process.Wait()
if err != nil {
execErr, ok := err.(*exec.ExitError)
if !ok {
return s, err
}
s = execErr.ProcessState
}
killCgroupProcs(c)
p.Wait()
return s, err
}
}
// Kill implements the exec driver Driver interface.
func (d *Driver) Kill(c *execdriver.Command, sig int) error {
d.Lock()
active := d.activeContainers[c.ID]
d.Unlock()
if active == nil {
return fmt.Errorf("active container for %s does not exist", c.ID)
}
state, err := active.State()
if err != nil {
return err
}
return syscall.Kill(state.InitProcessPid, syscall.Signal(sig))
}
// Pause implements the exec driver Driver interface,
// it calls libcontainer API to pause a container.
func (d *Driver) Pause(c *execdriver.Command) error {
d.Lock()
active := d.activeContainers[c.ID]
d.Unlock()
if active == nil {
return fmt.Errorf("active container for %s does not exist", c.ID)
}
return active.Pause()
}
// Unpause implements the exec driver Driver interface,
// it calls libcontainer API to unpause a container.
func (d *Driver) Unpause(c *execdriver.Command) error {
d.Lock()
active := d.activeContainers[c.ID]
d.Unlock()
if active == nil {
return fmt.Errorf("active container for %s does not exist", c.ID)
}
return active.Resume()
}
// Terminate implements the exec driver Driver interface.
func (d *Driver) Terminate(c *execdriver.Command) error {
defer d.cleanContainer(c.ID)
container, err := d.factory.Load(c.ID)
if err != nil {
return err
}
defer container.Destroy()
state, err := container.State()
if err != nil {
return err
}
pid := state.InitProcessPid
currentStartTime, err := system.GetProcessStartTime(pid)
if err != nil {
return err
}
if state.InitProcessStartTime == currentStartTime {
err = syscall.Kill(pid, 9)
syscall.Wait4(pid, nil, 0, nil)
}
return err
}
// Name implements the exec driver Driver interface.
func (d *Driver) Name() string {
return fmt.Sprintf("%s-%s", DriverName, Version)
}
// GetPidsForContainer implements the exec driver Driver interface.
func (d *Driver) GetPidsForContainer(id string) ([]int, error) {
d.Lock()
active := d.activeContainers[id]
d.Unlock()
if active == nil {
return nil, fmt.Errorf("active container for %s does not exist", id)
}
return active.Processes()
}
func (d *Driver) cleanContainer(id string) error {
d.Lock()
delete(d.activeContainers, id)
d.Unlock()
return os.RemoveAll(filepath.Join(d.root, id))
}
func (d *Driver) createContainerRoot(id string) error {
return os.MkdirAll(filepath.Join(d.root, id), 0655)
}
// Clean implements the exec driver Driver interface.
func (d *Driver) Clean(id string) error {
return os.RemoveAll(filepath.Join(d.root, id))
}
// Stats implements the exec driver Driver interface.
func (d *Driver) Stats(id string) (*execdriver.ResourceStats, error) {
d.Lock()
c := d.activeContainers[id]
d.Unlock()
if c == nil {
return nil, execdriver.ErrNotRunning
}
now := time.Now()
stats, err := c.Stats()
if err != nil {
return nil, err
}
memoryLimit := c.Config().Cgroups.Resources.Memory
// if the container does not have any memory limit specified set the
// limit to the machines memory
if memoryLimit == 0 {
memoryLimit = d.machineMemory
}
return &execdriver.ResourceStats{
Stats: stats,
Read: now,
MemoryLimit: memoryLimit,
}, nil
}
// Update updates configs for a container
func (d *Driver) Update(c *execdriver.Command) error {
d.Lock()
cont := d.activeContainers[c.ID]
d.Unlock()
if cont == nil {
return execdriver.ErrNotRunning
}
config := cont.Config()
if err := execdriver.SetupCgroups(&config, c); err != nil {
return err
}
if err := cont.Set(config); err != nil {
return err
}
return nil
}
// TtyConsole implements the exec driver Terminal interface.
type TtyConsole struct {
console libcontainer.Console
}
// NewTtyConsole returns a new TtyConsole struct.
func NewTtyConsole(console libcontainer.Console, pipes *execdriver.Pipes) (*TtyConsole, error) {
tty := &TtyConsole{
console: console,
}
if err := tty.AttachPipes(pipes); err != nil {
tty.Close()
return nil, err
}
return tty, nil
}
// Resize implements Resize method of Terminal interface
func (t *TtyConsole) Resize(h, w int) error {
return term.SetWinsize(t.console.Fd(), &term.Winsize{Height: uint16(h), Width: uint16(w)})
}
// AttachPipes attaches given pipes to TtyConsole
func (t *TtyConsole) AttachPipes(pipes *execdriver.Pipes) error {
go func() {
if wb, ok := pipes.Stdout.(interface {
CloseWriters() error
}); ok {
defer wb.CloseWriters()
}
pools.Copy(pipes.Stdout, t.console)
}()
if pipes.Stdin != nil {
go func() {
pools.Copy(t.console, pipes.Stdin)
pipes.Stdin.Close()
}()
}
return nil
}
// Close implements Close method of Terminal interface
func (t *TtyConsole) Close() error {
return t.console.Close()
}
func setupPipes(container *configs.Config, processConfig *execdriver.ProcessConfig, p *libcontainer.Process, pipes *execdriver.Pipes) error {
rootuid, err := container.HostUID()
if err != nil {
return err
}
if processConfig.Tty {
cons, err := p.NewConsole(rootuid)
if err != nil {
return err
}
term, err := NewTtyConsole(cons, pipes)
if err != nil {
return err
}
processConfig.Terminal = term
return nil
}
// not a tty--set up stdio pipes
term := &execdriver.StdConsole{}
processConfig.Terminal = term
// if we are not in a user namespace, there is no reason to go through
// the hassle of setting up os-level pipes with proper (remapped) ownership
// so we will do the prior shortcut for non-userns containers
if rootuid == 0 {
p.Stdout = pipes.Stdout
p.Stderr = pipes.Stderr
r, w, err := os.Pipe()
if err != nil {
return err
}
if pipes.Stdin != nil {
go func() {
io.Copy(w, pipes.Stdin)
w.Close()
}()
p.Stdin = r
}
return nil
}
// if we have user namespaces enabled (rootuid != 0), we will set
// up os pipes for stderr, stdout, stdin so we can chown them to
// the proper ownership to allow for proper access to the underlying
// fds
var fds []int
//setup stdout
r, w, err := os.Pipe()
if err != nil {
return err
}
fds = append(fds, int(r.Fd()), int(w.Fd()))
if pipes.Stdout != nil {
go io.Copy(pipes.Stdout, r)
}
term.Closers = append(term.Closers, r)
p.Stdout = w
//setup stderr
r, w, err = os.Pipe()
if err != nil {
return err
}
fds = append(fds, int(r.Fd()), int(w.Fd()))
if pipes.Stderr != nil {
go io.Copy(pipes.Stderr, r)
}
term.Closers = append(term.Closers, r)
p.Stderr = w
//setup stdin
r, w, err = os.Pipe()
if err != nil {
return err
}
fds = append(fds, int(r.Fd()), int(w.Fd()))
if pipes.Stdin != nil {
go func() {
io.Copy(w, pipes.Stdin)
w.Close()
}()
p.Stdin = r
}
for _, fd := range fds {
if err := syscall.Fchown(fd, rootuid, rootuid); err != nil {
return fmt.Errorf("Failed to chown pipes fd: %v", err)
}
}
return nil
}
// SupportsHooks implements the execdriver Driver interface.
// The libcontainer/runC-based native execdriver does exploit the hook mechanism
func (d *Driver) SupportsHooks() bool {
return true
}

View File

@@ -0,0 +1,14 @@
// +build !linux
package native
import (
"fmt"
"github.com/hyperhq/hypercli/daemon/execdriver"
)
// NewDriver returns a new native driver, called from NewDriver of execdriver.
func NewDriver(root string, options []string) (execdriver.Driver, error) {
return nil, fmt.Errorf("native driver not supported on non-linux")
}

View File

@@ -0,0 +1,14 @@
// +build linux,!cgo
package native
import (
"fmt"
"github.com/hyperhq/hypercli/daemon/execdriver"
)
// NewDriver returns a new native driver, called from NewDriver of execdriver.
func NewDriver(root string, options []string) (execdriver.Driver, error) {
return nil, fmt.Errorf("native driver not supported on non-linux")
}

View File

@@ -0,0 +1,87 @@
// +build linux
package native
import (
"fmt"
"os"
"os/exec"
"strings"
"syscall"
"github.com/hyperhq/hypercli/daemon/execdriver"
"github.com/opencontainers/runc/libcontainer"
// Blank import 'nsenter' so that init in that package will call c
// function 'nsexec()' to do 'setns' before Go runtime take over,
// it's used for join to exist ns like 'docker exec' command.
_ "github.com/opencontainers/runc/libcontainer/nsenter"
"github.com/opencontainers/runc/libcontainer/utils"
)
// Exec implements the exec driver Driver interface,
// it calls libcontainer APIs to execute a container.
func (d *Driver) Exec(c *execdriver.Command, processConfig *execdriver.ProcessConfig, pipes *execdriver.Pipes, hooks execdriver.Hooks) (int, error) {
active := d.activeContainers[c.ID]
if active == nil {
return -1, fmt.Errorf("No active container exists with ID %s", c.ID)
}
user := processConfig.User
if c.RemappedRoot.UID != 0 && user == "" {
//if user namespaces are enabled, set user explicitly so uid/gid is set to 0
//otherwise we end up with the overflow id and no permissions (65534)
user = "0"
}
p := &libcontainer.Process{
Args: append([]string{processConfig.Entrypoint}, processConfig.Arguments...),
Env: c.ProcessConfig.Env,
Cwd: c.WorkingDir,
User: user,
}
if processConfig.Privileged {
p.Capabilities = execdriver.GetAllCapabilities()
}
// add CAP_ prefix to all caps for new libcontainer update to match
// the spec format.
for i, s := range p.Capabilities {
if !strings.HasPrefix(s, "CAP_") {
p.Capabilities[i] = fmt.Sprintf("CAP_%s", s)
}
}
config := active.Config()
if err := setupPipes(&config, processConfig, p, pipes); err != nil {
return -1, err
}
if err := active.Start(p); err != nil {
return -1, err
}
if hooks.Start != nil {
pid, err := p.Pid()
if err != nil {
p.Signal(os.Kill)
p.Wait()
return -1, err
}
// A closed channel for OOM is returned here as it will be
// non-blocking and return the correct result when read.
chOOM := make(chan struct{})
close(chOOM)
hooks.Start(&c.ProcessConfig, pid, chOOM)
}
ps, err := p.Wait()
if err != nil {
exitErr, ok := err.(*exec.ExitError)
if !ok {
return -1, err
}
ps = exitErr.ProcessState
}
return utils.ExitStatus(ps.Sys().(syscall.WaitStatus)), nil
}

View File

@@ -0,0 +1,45 @@
// +build linux
package native
import (
"fmt"
"os"
"runtime"
"github.com/hyperhq/hypercli/pkg/reexec"
"github.com/opencontainers/runc/libcontainer"
)
func init() {
reexec.Register(DriverName, initializer)
}
func fatal(err error) {
if lerr, ok := err.(libcontainer.Error); ok {
lerr.Detail(os.Stderr)
os.Exit(1)
}
fmt.Fprintln(os.Stderr, err)
os.Exit(1)
}
func initializer() {
runtime.GOMAXPROCS(1)
runtime.LockOSThread()
factory, err := libcontainer.New("")
if err != nil {
fatal(err)
}
if err := factory.StartInitialization(); err != nil {
fatal(err)
}
panic("unreachable")
}
func writeError(err error) {
fmt.Fprint(os.Stderr, err)
os.Exit(1)
}

View File

@@ -0,0 +1,100 @@
package template
import (
"syscall"
"github.com/opencontainers/runc/libcontainer/apparmor"
"github.com/opencontainers/runc/libcontainer/configs"
)
const defaultMountFlags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV
// New returns the docker default configuration for libcontainer
func New() *configs.Config {
container := &configs.Config{
Capabilities: []string{
"CHOWN",
"DAC_OVERRIDE",
"FSETID",
"FOWNER",
"MKNOD",
"NET_RAW",
"SETGID",
"SETUID",
"SETFCAP",
"SETPCAP",
"NET_BIND_SERVICE",
"SYS_CHROOT",
"KILL",
"AUDIT_WRITE",
},
Namespaces: configs.Namespaces([]configs.Namespace{
{Type: "NEWNS"},
{Type: "NEWUTS"},
{Type: "NEWIPC"},
{Type: "NEWPID"},
{Type: "NEWNET"},
{Type: "NEWUSER"},
}),
Cgroups: &configs.Cgroup{
ScopePrefix: "docker", // systemd only
Resources: &configs.Resources{
AllowAllDevices: false,
MemorySwappiness: -1,
},
},
Mounts: []*configs.Mount{
{
Source: "proc",
Destination: "/proc",
Device: "proc",
Flags: defaultMountFlags,
},
{
Source: "tmpfs",
Destination: "/dev",
Device: "tmpfs",
Flags: syscall.MS_NOSUID | syscall.MS_STRICTATIME,
Data: "mode=755",
},
{
Source: "devpts",
Destination: "/dev/pts",
Device: "devpts",
Flags: syscall.MS_NOSUID | syscall.MS_NOEXEC,
Data: "newinstance,ptmxmode=0666,mode=0620,gid=5",
},
{
Source: "sysfs",
Destination: "/sys",
Device: "sysfs",
Flags: defaultMountFlags | syscall.MS_RDONLY,
},
{
Source: "cgroup",
Destination: "/sys/fs/cgroup",
Device: "cgroup",
Flags: defaultMountFlags | syscall.MS_RDONLY,
},
},
MaskPaths: []string{
"/proc/kcore",
"/proc/latency_stats",
"/proc/timer_stats",
},
ReadonlyPaths: []string{
"/proc/asound",
"/proc/bus",
"/proc/fs",
"/proc/irq",
"/proc/sys",
"/proc/sysrq-trigger",
},
}
if apparmor.IsEnabled() {
container.AppArmorProfile = "docker-default"
}
return container
}

View File

@@ -0,0 +1,3 @@
// +build !linux
package template