virtual-kubelet/vendor/github.com/vmware/vic/lib/tether/ops_linux.go

// Copyright 2016-2017 VMware, Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package tether

import (
	"context"
	"errors"
	"fmt"
	"io/ioutil"
	"net"
	"net/url"
	"os"
	"path"
	"path/filepath"
	"strconv"
	"strings"
	"sync"
	"syscall"
	"time"

	log "github.com/Sirupsen/logrus"
	"github.com/d2g/dhcp4"
	"github.com/docker/docker/pkg/archive"

	// need to use libcontainer for user validation, for os/user package cannot find user here if container image is busybox
	"github.com/opencontainers/runc/libcontainer/user"
	"github.com/vishvananda/netlink"

	"github.com/vmware/vic/lib/dhcp"
	"github.com/vmware/vic/lib/dhcp/client"
	"github.com/vmware/vic/lib/etcconf"
	"github.com/vmware/vic/pkg/ip"
	"github.com/vmware/vic/pkg/trace"
	"github.com/vmware/vmw-guestinfo/rpcout"
)

var (
	defaultExecUser = &user.ExecUser{
		Uid:  syscall.Getuid(),
		Gid:  syscall.Getgid(),
		Home: "/",
	}

	filesForMinOSLinux = map[string]os.FileMode{
		"/etc/hostname":            0644,
		"/etc/hosts":               0644,
		"/etc/resolv.conf":         0644,
		"/.tether/etc/hostname":    0644,
		"/.tether/etc/hosts":       0644,
		"/.tether/etc/resolv.conf": 0644,
	}
)

const (
	hostnameFile          = "/etc/hostname"
	hostnameFileBindSrc   = "/.tether/etc/hostname"
	hostsPathBindSrc      = "/.tether/etc/hosts"
	resolvConfPathBindSrc = "/.tether/etc/resolv.conf"
	byLabelDir            = "/dev/disk/by-label"

	pciDevPath         = "/sys/bus/pci/devices"
	nfsFileSystemType  = "nfs"
	ext4FileSystemType = "ext4"
	bridgeTableNumber  = 201

	// directory in which to perform the direct mount of disk for bind mount to actual
	// target
	diskBindBase = "/.filesystem-by-label/"
	// used to isolate applications from the lost+found in the root of ext4
	volumeDataDir = "/.vic.vol.data"

	// temp directory to copy existing data to mounts
	bindDir = "/.bind"
)

type BaseOperations struct {
	dhcpLoops    map[string]chan struct{}
	dynEndpoints map[string][]*NetworkEndpoint
	config       Config

	// Exclusive access to utilityPids table
	utilityPidMutex sync.Mutex
	// set of child PIDs for one-off non-persistent processes
	utilityPids map[int]chan int
}

// NetLink gives us an interface to the netlink calls used so that
// we can test the calling code.
type Netlink interface {
	LinkByName(string) (netlink.Link, error)
	LinkSetName(netlink.Link, string) error
	LinkSetDown(netlink.Link) error
	LinkSetUp(netlink.Link) error
	LinkSetAlias(netlink.Link, string) error
	AddrList(netlink.Link, int) ([]netlink.Addr, error)
	AddrAdd(netlink.Link, *netlink.Addr) error
	AddrDel(netlink.Link, *netlink.Addr) error
	RouteAdd(*netlink.Route) error
	RouteDel(*netlink.Route) error
	RuleList(family int) ([]netlink.Rule, error)
	// Not quite netlink, but tightly associated

	LinkBySlot(slot int32) (netlink.Link, error)
}

func (t *BaseOperations) LinkByName(name string) (netlink.Link, error) {
	return netlink.LinkByName(name)
}

func (t *BaseOperations) LinkSetName(link netlink.Link, name string) error {
	return netlink.LinkSetName(link, name)
}

func (t *BaseOperations) LinkSetDown(link netlink.Link) error {
	return netlink.LinkSetDown(link)
}

func (t *BaseOperations) LinkSetUp(link netlink.Link) error {
	return netlink.LinkSetUp(link)
}

func (t *BaseOperations) LinkSetAlias(link netlink.Link, alias string) error {
	return netlink.LinkSetAlias(link, alias)
}

func (t *BaseOperations) AddrList(link netlink.Link, family int) ([]netlink.Addr, error) {
	return netlink.AddrList(link, family)
}

func (t *BaseOperations) AddrAdd(link netlink.Link, addr *netlink.Addr) error {
	return netlink.AddrAdd(link, addr)
}

func (t *BaseOperations) AddrDel(link netlink.Link, addr *netlink.Addr) error {
	return netlink.AddrDel(link, addr)
}

func (t *BaseOperations) RouteAdd(route *netlink.Route) error {
	return netlink.RouteAdd(route)
}

func (t *BaseOperations) RouteDel(route *netlink.Route) error {
	return netlink.RouteDel(route)
}

func (t *BaseOperations) RuleList(family int) ([]netlink.Rule, error) {
	return netlink.RuleList(family)
}

func (t *BaseOperations) LinkBySlot(slot int32) (netlink.Link, error) {
	pciPath, err := slotToPCIPath(slot, 0)
	if err != nil {
		return nil, err
	}

	name, err := pciToLinkName(pciPath)
	if err != nil {
		return nil, err
	}

	log.Debugf("got link name: %#v", name)
	return t.LinkByName(name)
}

// SetHostname sets both the kernel hostname and /etc/hostname to the specified string
func (t *BaseOperations) SetHostname(hostname string, aliases ...string) error {
	defer trace.End(trace.Begin("setting hostname to " + hostname))

	old, err := os.Hostname()
	if err != nil {
		log.Warnf("Unable to get current hostname - will not be able to revert on failure: %s", err)
	}

	err = Sys.Syscall.Sethostname([]byte(hostname))
	if err != nil {
		log.Errorf("Unable to set hostname: %s", err)
		return err
	}
	log.Debugf("Updated kernel hostname")

	// bind-mount /.tether/etc/hostname to /etc/hostname
	if err = bindMount(hostnameFileBindSrc, hostnameFile); err != nil {
		return err
	}

	// update /etc/hostname to match
	err = ioutil.WriteFile(hostnameFile, []byte(hostname), 0644)
	if err != nil {
		log.Errorf("Failed to update hostname in %s", hostnameFile)

		// revert the hostname
		if old != "" {
			log.Warnf("Reverting kernel hostname to %s", old)
			err2 := Sys.Syscall.Sethostname([]byte(old))
			if err2 != nil {
				log.Errorf("Unable to revert kernel hostname - kernel and hostname file are out of sync! Error: %s", err2)
			}
		}

		return err
	}

	if err := BindSys.Hosts.Load(); err != nil {
		log.Errorf("Unable to load existing /etc/hosts file - modifications since last load will be overwritten: %s", err)
	}

	// add entry to hosts for resolution without nameservers
	lo4 := net.IPv4(127, 0, 1, 1)
	for _, a := range append(aliases, hostname) {
		BindSys.Hosts.SetHost(a, lo4)
		BindSys.Hosts.SetHost(a, net.IPv6loopback)
	}

	if err = bindMountAndSave(Sys.Hosts, BindSys.Hosts); err != nil {
		return err
	}

	return nil
}

func slotToPCIPath(pciSlot int32, fun int32) (string, error) {
	// see https://kb.vmware.com/kb/2047927
	dev := pciSlot & 0x1f        // DDDDD
	bus := (pciSlot >> 5) & 0x1f // BBBBB
	if fun == 0 {
		fun = (pciSlot >> 10) & 0x7 // FFF
	}
	if bus == 0 {
		return path.Join(pciDevPath, fmt.Sprintf("0000:%02x:%02x.%d", bus, dev, fun)), nil
	}

	// device on secondary bus, prepend pci bridge address, pciBridge0.pciSlotNumber is "17" aka "0x11"
	bridgeSlot := 0x11 + (bus - 1)
	bridgeAddr, err := slotToPCIPath(bridgeSlot, fun)
	if err != nil {
		return "", err
	}

	return path.Join(bridgeAddr, fmt.Sprintf("0000:*:%02x.0", dev)), nil
}

func pciToLinkName(pciPath string) (string, error) {
	p := filepath.Join(pciPath, "net", "*")
	matches, err := filepath.Glob(p)
	if err != nil {
		return "", err
	}

	if len(matches) != 1 {
		return "", fmt.Errorf("%d eth interfaces match %s (%v)", len(matches), p, matches)
	}

	return path.Base(matches[0]), nil
}

func renameLink(t Netlink, link netlink.Link, slot int32, endpoint *NetworkEndpoint) (rlink netlink.Link, err error) {
	rlink = link
	defer func() {
		// we still need to ensure that the link is up irrespective of path
		err = t.LinkSetUp(link)
		if err != nil {
			err = fmt.Errorf("failed to bring link %s up: %s", link.Attrs().Name, err)
		}
	}()

	if link.Attrs().Name == endpoint.Name || link.Attrs().Alias == endpoint.Name || endpoint.Name == "" {
		// if the network is already identified, whether as primary name or alias it doesn't need repeating.
		// if the name is empty then it should not be aliases or named directly. IPAM data should still be applied.
		return link, nil
	}

	if strings.HasPrefix(link.Attrs().Name, "eth") {
		log.Infof("Renaming link %s to %s", link.Attrs().Name, endpoint.Name)

		err := t.LinkSetDown(link)
		if err != nil {
			detail := fmt.Sprintf("failed to set link %s down for rename: %s", link.Attrs().Name, err)
			return nil, errors.New(detail)
		}

		err = t.LinkSetName(link, endpoint.Name)
		if err != nil {
			return nil, err
		}

		// reacquire link with updated attributes
		link, err := t.LinkBySlot(slot)
		if err != nil {
			detail := fmt.Sprintf("unable to reacquire link %s after rename pass: %s", link.Attrs().Name, err)
			return nil, errors.New(detail)
		}

		return link, nil
	}

	if link.Attrs().Alias == "" {
		log.Infof("Aliasing link %s to %s", link.Attrs().Name, endpoint.Name)
		err := t.LinkSetAlias(link, endpoint.Name)
		if err != nil {
			return nil, err
		}

		// reacquire link with updated attributes
		link, err := t.LinkBySlot(slot)
		if err != nil {
			detail := fmt.Sprintf("unable to reacquire link %s after rename pass: %s", link.Attrs().Name, err)
			return nil, errors.New(detail)
		}

		return link, nil
	}

	log.Warnf("Unable to add additional alias on link %s for %s", link.Attrs().Name, endpoint.Name)
	return link, nil
}

func getDynamicIP(t Netlink, link netlink.Link, endpoint *NetworkEndpoint) (client.Client, error) {
	var ack *dhcp.Packet
	var err error

	// use dhcp to acquire address
	dc, err := client.NewClient(link.Attrs().Index, link.Attrs().HardwareAddr)
	if err != nil {
		return nil, err
	}

	params := []byte{byte(dhcp4.OptionSubnetMask)}
	if ip.IsUnspecifiedIP(endpoint.Network.Gateway.IP) {
		params = append(params, byte(dhcp4.OptionRouter))
	}
	if len(endpoint.Network.Nameservers) == 0 {
		params = append(params, byte(dhcp4.OptionDomainNameServer))
	}

	dc.SetParameterRequestList(params...)

	err = dc.Request()
	if err != nil {
		log.Errorf("error sending dhcp request: %s", err)
		return nil, err
	}

	ack = dc.LastAck()
	if ack.YourIP() == nil || ack.SubnetMask() == nil {
		err = fmt.Errorf("dhcp assigned nil ip or subnet mask")
		log.Error(err)
		return nil, err
	}

	log.Infof("DHCP response: IP=%s, SubnetMask=%s, Gateway=%s, DNS=%s, Lease Time=%s", ack.YourIP(), ack.SubnetMask(), ack.Gateway(), ack.DNS(), ack.LeaseTime())
	defer func() {
		if err != nil && ack != nil {
			dc.Release()
		}
	}()

	return dc, nil
}

func updateEndpoint(newIP *net.IPNet, endpoint *NetworkEndpoint) {
	log.Debugf("updateEndpoint(%s, %+v)", newIP, endpoint)

	dhcp := endpoint.DHCP
	if dhcp == nil {
		endpoint.Assigned = *newIP
		endpoint.Network.Assigned.Gateway = endpoint.Network.Gateway
		endpoint.Network.Assigned.Nameservers = endpoint.Network.Nameservers
		return
	}

	endpoint.Assigned = dhcp.Assigned
	endpoint.Network.Assigned.Gateway = dhcp.Gateway
	if len(dhcp.Nameservers) > 0 {
		endpoint.Network.Assigned.Nameservers = dhcp.Nameservers
	}
}

func linkAddrUpdate(old, new *net.IPNet, t Netlink, link netlink.Link) error {
	log.Infof("setting ip address %s for link %s", new, link.Attrs().Name)

	if old != nil && !old.IP.Equal(new.IP) {
		log.Debugf("removing old address %s", old)
		if err := t.AddrDel(link, &netlink.Addr{IPNet: old}); err != nil {
			if errno, ok := err.(syscall.Errno); !ok || errno != syscall.EADDRNOTAVAIL {
				log.Errorf("failed to remove existing address %s: %s", old, err)
				return err
			}
		}

		log.Debugf("removed old address %s for link %s", old, link.Attrs().Name)
	}

	// assign IP to NIC
	if err := t.AddrAdd(link, &netlink.Addr{IPNet: new}); err != nil {
		if errno, ok := err.(syscall.Errno); !ok || errno != syscall.EEXIST {
			log.Errorf("failed to assign ip %s for link %s", new, link.Attrs().Name)
			return err

		}

		log.Warnf("address %s already set on interface %s", new, link.Attrs().Name)
	}

	log.Debugf("added address %s to link %s", new, link.Attrs().Name)
	return nil
}

func updateRoutes(newIP *net.IPNet, t Netlink, link netlink.Link, endpoint *NetworkEndpoint) error {
	gw := endpoint.Network.Assigned.Gateway
	if ip.IsUnspecifiedIP(gw.IP) {
		return nil
	}

	if endpoint.Network.Default {
		return updateDefaultRoute(newIP, t, link, endpoint)
	}

	for _, d := range endpoint.Network.Destinations {
		r := &netlink.Route{
			LinkIndex: link.Attrs().Index,
			Dst:       &d,
			Gw:        gw.IP,
		}

		if err := t.RouteAdd(r); err != nil && !os.IsNotExist(err) {
			log.Errorf("failed to add route for destination %s via gateway %s", d, gw.IP)
		}
	}

	return nil
}

func bridgeTableExists(t Netlink) bool {
	rules, err := t.RuleList(syscall.AF_INET)
	if err != nil {
		return false
	}

	for _, r := range rules {
		if r.Table == bridgeTableNumber {
			return true
		}
	}

	return false
}

func updateDefaultRoute(newIP *net.IPNet, t Netlink, link netlink.Link, endpoint *NetworkEndpoint) error {
	gw := endpoint.Network.Assigned.Gateway

	// Add routes
	if !endpoint.Network.Default || ip.IsUnspecifiedIP(gw.IP) {
		log.Debugf("not setting route for network: default=%v gateway=%s", endpoint.Network.Default, gw.IP)
		return nil
	}

	// #nosec: Errors unhandled.
	_, defaultNet, _ := net.ParseCIDR("0.0.0.0/0")
	// delete default route first
	if err := t.RouteDel(&netlink.Route{LinkIndex: link.Attrs().Index, Dst: defaultNet}); err != nil {
		if errno, ok := err.(syscall.Errno); !ok || errno != syscall.ESRCH {
			return fmt.Errorf("could not update default route: %s", err)
		}
	}

	// delete the default route for the bridge.out table, if it exists
	bTablePresent := bridgeTableExists(t)
	if bTablePresent {
		if err := t.RouteDel(&netlink.Route{LinkIndex: link.Attrs().Index, Dst: defaultNet, Table: bridgeTableNumber}); err != nil {
			if errno, ok := err.(syscall.Errno); !ok || errno != syscall.ESRCH {
				return fmt.Errorf("could not update default route for bridge.out table: %s", err)
			}
		}
	}

	log.Infof("Setting default gateway to %s", gw.IP)
	route := &netlink.Route{LinkIndex: link.Attrs().Index, Dst: defaultNet, Gw: gw.IP}
	if err := t.RouteAdd(route); err != nil {
		return fmt.Errorf("failed to add gateway route for endpoint %s: %s", endpoint.Network.Name, err)
	}

	if bTablePresent {
		// Gateway IP may not contain a network mask, so it is taken from the assigned interface configuration
		// where network mask has to be defined.
		gwNet := &net.IPNet{
			IP:   gw.IP.Mask(newIP.Mask),
			Mask: newIP.Mask,
		}

		log.Debugf("Adding route to default gateway network: %s/%s", gwNet.IP, gwNet.Mask)

		route = &netlink.Route{LinkIndex: link.Attrs().Index, Dst: gwNet, Table: bridgeTableNumber}
		if err := t.RouteAdd(route); err != nil {
			// if IP address has changed and it stays within the same subnet it will cause already exists error,
			// so we can safely ignore it.
			errno, ok := err.(syscall.Errno)
			if !ok || errno != syscall.EEXIST {
				return fmt.Errorf(
					"failed to add gateway network route for table bridge.out for endpoint %s: %s",
					endpoint.Network.Name, err)
			}
		}

		route = &netlink.Route{LinkIndex: link.Attrs().Index, Dst: defaultNet, Gw: gw.IP, Table: bridgeTableNumber}
		if err := t.RouteAdd(route); err != nil {
			return fmt.Errorf("failed to add gateway route for table bridge.out for endpoint %s: %s", endpoint.Network.Name, err)
		}
	}

	log.Infof("updated default route to %s interface, gateway: %s", endpoint.Network.Name, gw.IP)
	return nil
}

func (t *BaseOperations) updateHosts(endpoint *NetworkEndpoint) error {
	log.Debugf("%+v", endpoint)
	// Add /etc/hosts entry
	if endpoint.Network.Name == "" {
		return nil
	}

	if err := BindSys.Hosts.Load(); err != nil {
		log.Errorf("Unable to load existing /etc/hosts file - modifications since last load will be overwritten: %s", err)
	}

	BindSys.Hosts.SetHost(fmt.Sprintf("%s.localhost", endpoint.Network.Name), endpoint.Assigned.IP)

	if err := bindMountAndSave(Sys.Hosts, BindSys.Hosts); err != nil {
		return err
	}

	return nil
}

func (t *BaseOperations) updateNameservers(endpoint *NetworkEndpoint) error {
	gw := endpoint.Network.Assigned.Gateway
	ns := endpoint.Network.Assigned.Nameservers
	// if `--dns-server` option is supplied at VCH creation, do not overwrite with
	// dhcp-provided name servers, and make sure they appear at the top of the list
	if len(endpoint.Network.Nameservers) > 0 {
		ns = append(endpoint.Network.Nameservers, ns...)
	}
	// Add nameservers
	// This is incredibly trivial for now - should be updated to a less messy approach
	if len(ns) > 0 {
		BindSys.ResolvConf.AddNameservers(ns...)
		log.Infof("Added nameservers: %+v", ns)
	} else if !ip.IsUnspecifiedIP(gw.IP) {
		BindSys.ResolvConf.AddNameservers(gw.IP)
		log.Infof("Added nameserver: %s", gw.IP)
	}

	if err := bindMountAndSave(Sys.ResolvConf, BindSys.ResolvConf); err != nil {
		return err
	}

	return nil
}

func (t *BaseOperations) Apply(endpoint *NetworkEndpoint) error {
	defer trace.End(trace.Begin("applying endpoint configuration for " + endpoint.Network.Name))

	return ApplyEndpoint(t, t, endpoint)
}

func ApplyEndpoint(nl Netlink, t *BaseOperations, endpoint *NetworkEndpoint) error {
	if endpoint.configured {
		log.Infof("skipping applying config for network %s as it has been applied already", endpoint.Network.Name)
		return nil // already applied
	}

	// Locate interface
	slot, err := strconv.Atoi(endpoint.ID)
	if err != nil {
		return fmt.Errorf("endpoint ID must be a base10 numeric pci slot identifier: %s", err)
	}

	defer func() {
		if err == nil {
			log.Infof("successfully applied config for network %s", endpoint.Network.Name)
			endpoint.configured = true
		}
	}()

	var link netlink.Link
	link, err = nl.LinkBySlot(int32(slot))
	if err != nil {
		return fmt.Errorf("unable to acquire reference to link %s: %s", endpoint.ID, err)
	}

	// rename the link if needed
	link, err = renameLink(nl, link, int32(slot), endpoint)
	if err != nil {
		return fmt.Errorf("unable to reacquire link %s after rename pass: %s", endpoint.ID, err)
	}

	var dc client.Client
	defer func() {
		if err != nil && dc != nil {
			dc.Release()
		}
	}()

	var newIP *net.IPNet

	if endpoint.IsDynamic() && endpoint.DHCP == nil {
		if e, ok := t.dynEndpoints[endpoint.ID]; ok {
			// endpoint shares NIC, copy over DHCP
			endpoint.DHCP = e[0].DHCP
		}
	}

	log.Debugf("%+v", endpoint)
	if endpoint.IsDynamic() {
		if endpoint.DHCP == nil {
			dc, err = getDynamicIP(nl, link, endpoint)
			if err != nil {
				return err
			}

			ack := dc.LastAck()
			endpoint.DHCP = &DHCPInfo{
				Assigned:    net.IPNet{IP: ack.YourIP(), Mask: ack.SubnetMask()},
				Nameservers: ack.DNS(),
				Gateway:     net.IPNet{IP: ack.Gateway(), Mask: ack.SubnetMask()},
			}
		}
		newIP = &endpoint.DHCP.Assigned
	} else {
		newIP = endpoint.IP
		if newIP.IP.Equal(net.IPv4zero) {
			// managed externally
			return nil
		}
	}

	var old *net.IPNet
	if !ip.IsUnspecifiedIP(endpoint.Assigned.IP) {
		old = &endpoint.Assigned
	}

	if err = linkAddrUpdate(old, newIP, nl, link); err != nil {
		return err
	}

	updateEndpoint(newIP, endpoint)

	if err = updateRoutes(newIP, nl, link, endpoint); err != nil {
		return err
	}

	if err = t.updateHosts(endpoint); err != nil {
		return err
	}

	Sys.ResolvConf.RemoveNameservers(endpoint.Network.Assigned.Nameservers...)
	if err = t.updateNameservers(endpoint); err != nil {
		return err
	}

	if endpoint.IsDynamic() {
		eps := t.dynEndpoints[endpoint.ID]
		found := false
		for _, e := range eps {
			if e == endpoint {
				found = true
				break
			}
		}

		if !found {
			eps = append(eps, endpoint)
			t.dynEndpoints[endpoint.ID] = eps
		}
	}

	// add renew/release loop if necessary
	if dc != nil {
		if _, ok := t.dhcpLoops[endpoint.ID]; !ok {
			stop := make(chan struct{})
			if err != nil {
				log.Errorf("could not make DHCP client id for link %s: %s", link.Attrs().Name, err)
			} else {
				t.dhcpLoops[endpoint.ID] = stop
				go t.dhcpLoop(stop, endpoint, dc)
			}
		}
	}

	return nil
}

func (t *BaseOperations) dhcpLoop(stop chan struct{}, e *NetworkEndpoint, dc client.Client) {
	divisor := time.Duration(2)
	exp := time.After(dc.LastAck().LeaseTime() / 2)
	for {
		select {
		case <-stop:
			// release the ip
			log.Infof("releasing IP address for network %s", e.Name)
			dc.Release()
			return

		case <-exp:
			log.Infof("renewing IP address for network %s", e.Name)
			err := dc.Renew()
			if err != nil {
				log.Errorf("failed to renew ip address for network %s: %s", e.Name, err)

				// wait half of the remaining lease time before trying again
				divisor *= 2
				duration := dc.LastAck().LeaseTime() / divisor

				// for now go with a minimum retry of 1min
				if duration < time.Minute {
					duration = time.Minute
				}

				exp = time.After(duration)

				continue
			}

			ack := dc.LastAck()
			log.Infof("successfully renewed ip address: IP=%s, SubnetMask=%s, Gateway=%s, DNS=%s, Lease Time=%s", ack.YourIP(), ack.SubnetMask(), ack.Gateway(), ack.DNS(), ack.LeaseTime())

			e.DHCP = &DHCPInfo{
				Assigned:    net.IPNet{IP: ack.YourIP(), Mask: ack.SubnetMask()},
				Gateway:     net.IPNet{IP: ack.Gateway(), Mask: ack.SubnetMask()},
				Nameservers: ack.DNS(),
			}

			// TODO: determine if there are actually any changes to apply before performing updates
			e.configured = false
			t.Apply(e)
			if err = t.config.UpdateNetworkEndpoint(e); err != nil {
				log.Error(err)
			}
			// update any endpoints that share this NIC
			for _, d := range t.dynEndpoints[e.ID] {
				if e == d {
					// already applied above
					continue
				}

				d.DHCP = e.DHCP
				d.configured = false
				t.Apply(d)
				if err = t.config.UpdateNetworkEndpoint(d); err != nil {
					log.Error(err)
				}
			}

			t.config.Flush()

			exp = time.After(ack.LeaseTime() / 2)
		}
	}
}

// MountLabel performs a mount with the label and target being absolute paths
func (t *BaseOperations) MountLabel(ctx context.Context, label, target string) error {
	defer trace.End(trace.Begin(fmt.Sprintf("Mounting %s on %s", label, target)))

	bindTarget := path.Join(BindSys.Root, diskBindBase, label)

	fi, err := os.Stat(target)
	if err != nil {
		// #nosec
		if err := os.MkdirAll(target, 0755); err != nil {
			// same as MountFileSystem error for consistency
			return fmt.Errorf("unable to create mount point %s: %s", target, err)
		}
	}

	// convert the label to a filesystem path
	label = "/dev/disk/by-label/" + label

	var e1, e2 error
	_, e1 = os.Stat(bindTarget)
	if e1 == nil {
		// bindTarget exists, check whether or not bindTarget is a mount point
		e2 = os.Remove(bindTarget)
	}

	if (e1 == nil && e2 == nil) || os.IsNotExist(e1) {
		// #nosec
		if err := os.MkdirAll(bindTarget, 0755); err != nil {
			return fmt.Errorf("unable to create mount point %s: %s", bindTarget, err)
		}
		if err := mountDeviceLabel(ctx, label, bindTarget); err != nil {
			return err
		}
	}

	// at this point bindTarget should be mounted successfully
	mntsrc := path.Join(bindTarget, volumeDataDir)
	mnttype := "bind"
	mntflags := uintptr(syscall.MS_BIND)
	// if the volume contains a volumeDataDir directory then mount that instead of the root of the filesystem
	// if we cannot read it we go with the root of the filesystem
	_, err = os.Stat(mntsrc)
	if err != nil {
		if os.IsNotExist(err) {
			// if there's not such directory then revert to using the device directly
			log.Info("No %s data directory in volume, mounting filesystem directly", volumeDataDir)
			mntsrc = label
			mnttype = ext4FileSystemType
			mntflags = syscall.MS_NOATIME
		} else {
			return fmt.Errorf("unable to determine whether lost+found masking is required: %s", err)
		}
	}

	if err := Sys.Syscall.Mount(mntsrc, target, mnttype, mntflags, ""); err != nil {
		// consistent with MountFileSystem
		detail := fmt.Sprintf("mounting %s on %s failed: %s", label, target, err)
		return errors.New(detail)
	}

	// change the ownership of the target directory to the original uid/gid
	if fi != nil {
		sys, ok := fi.Sys().(*syscall.Stat_t)
		if !ok {
			return fmt.Errorf("unable to get uid/gid from existing target directory %s", target)
		}
		uid := int(sys.Uid)
		gid := int(sys.Gid)

		log.Debugf("Setting target uid/gid to the mount source as %d/%d", uid, gid)
		if err := os.Chown(target, uid, gid); err != nil {
			return fmt.Errorf("unable to change the owner of the mount point %s: %s", target, err)
		}

		log.Debugf("Setting target %s permissions to the mount source as: %#o",
			target, fi.Mode())
		if err := os.Chmod(target, fi.Mode()); err != nil {
			return fmt.Errorf("failed to set target %s mount permissions as %#o: %s",
				target, fi.Mode(), err)

		}
	}
	return nil
}

// mountDeviceLabel mounts the device to target
func mountDeviceLabel(ctx context.Context, label string, target string) error {
	// wait for device label to show up until the ctx deadline exceeds.
WaitForDevice:
	for {
		select {
		case <-ctx.Done():
			detail := fmt.Sprintf("timed out waiting for %s to appear", label)
			return errors.New(detail)
		default:
			_, err := os.Stat(label)
			if err == nil || !os.IsNotExist(err) {
				break WaitForDevice
			}
			// sleep for 1 ms to reduce pressure on cpu
			time.Sleep(time.Millisecond)
		}
	}

	if err := Sys.Syscall.Mount(label, target, ext4FileSystemType, syscall.MS_NOATIME, ""); err != nil {
		// consistent with MountFileSystem
		detail := fmt.Sprintf("Actual mount: mounting %s on %s failed: %s", label, target, err)
		return errors.New(detail)
	}

	return nil
}

// MountTarget performs a mount based on the target path from the source url
// This assumes that the source url is valid and available.
func (t *BaseOperations) MountTarget(ctx context.Context, source url.URL, target string, mountOptions string) error {
	defer trace.End(trace.Begin(fmt.Sprintf("Mounting %s on %s", source.String(), target)))

	// #nosec
	if err := os.MkdirAll(target, 0755); err != nil {
		// same as MountLabel error for consistency
		return fmt.Errorf("unable to create mount point %s: %s", target, err)
	}

	rawSource := source.Hostname() + ":/" + source.Path
	// NOTE: by default we are supporting "NOATIME" and it can be configurable later. this must be specfied as a flag.
	// Additionally, we must parse out the "ro" option and supply it as a flag as well for this flavor of the mount call.
	if err := Sys.Syscall.Mount(rawSource, target, nfsFileSystemType, syscall.MS_NOATIME, mountOptions); err != nil {
		log.Errorf("mounting %s on %s failed: %s", source.String(), target, err)
		return err
	}

	return nil
}

// CopyExistingContent copies the underlying files shadowed by a mount on a directory
// to the volume mounted on the directory
// see bug https://github.com/vmware/vic/issues/3482
func (t *BaseOperations) CopyExistingContent(source string) error {
	defer trace.End(trace.Begin(fmt.Sprintf("copyExistingContent from %s", source)))

	source = filepath.Clean(source)

	bind := path.Join(BindSys.Root, bindDir)

	// if mounted volume is not empty skip the copy task
	if empty, err := isEmpty(source); err != nil || !empty {
		if err != nil {
			log.Errorf("error checking directory for contents %s: %+v", source, err)
			return err
		}
		log.Debugf("Skipping copy as volume %s is not empty", source)
		return nil
	}

	log.Debugf("creating directory %s", bind)
	// #nosec
	if err := os.MkdirAll(bind, 0755); err != nil {
		log.Errorf("error creating directory %s: %+v", bind, err)
		return err
	}

	// remove dir
	defer func() {
		log.Debugf("removing %s", bind)
		if err := os.Remove(bind); err != nil {
			log.Errorf("error removing directory %s: %+v", bind, err)
		}
	}()

	parentDir := filepath.Dir(source)
	// mount the parent directory of the source to bind
	// e.g if source is /foo/bar, mount /foo to ./bind
	log.Debugf("mounting %s on %s", parentDir, bind)
	if err := Sys.Syscall.Mount(parentDir, bind, ext4FileSystemType, syscall.MS_BIND, ""); err != nil {
		log.Errorf("error mounting to %s: %+v", bind, err)
		return err
	}

	// unmount
	defer func() {
		log.Debugf("unmounting %s", bind)
		if err := Sys.Syscall.Unmount(bind, syscall.MNT_DETACH); err != nil {
			log.Errorf("error unmounting %+v", err)
		}
	}()

	mountedSource := filepath.Join(bind, filepath.Base(source))
	// copy data from the bind to the source
	// e.g if source is /foo/bar, copy ./bind/bar to /foo/bar
	log.Debugf("copying contents from to %s to %s", mountedSource, source)
	if err := archive.CopyWithTar(mountedSource, source); err != nil {
		log.Errorf("err copying %s to %s: %+v", mountedSource, source, err)
		return err
	}

	return nil
}

// ProcessEnv does OS specific checking and munging on the process environment prior to launch
func (t *BaseOperations) ProcessEnv(env []string) []string {
	// TODO: figure out how we're going to specify user and pass all the settings along
	// in the meantime, hardcode HOME to /root
	homeIndex := -1
	for i, tuple := range env {
		if strings.HasPrefix(tuple, "HOME=") {
			homeIndex = i
			break
		}
	}
	if homeIndex == -1 {
		return append(env, "HOME=/root")
	}

	return env
}

// Fork triggers vmfork and handles the necessary pre/post OS level operations
func (t *BaseOperations) Fork() error {
	// unload vmxnet3 module

	// fork
	out, ok, err := rpcout.SendOne("vmfork-begin -1 -1")
	if err != nil {
		detail := fmt.Sprintf("error while calling vmfork: err=%s, out=%s, ok=%t", err, out, ok)
		log.Error(detail)
		return errors.New(detail)
	}

	if !ok {
		detail := fmt.Sprintf("failed to vmfork: %s", out)
		log.Error(detail)
		return errors.New(detail)
	}

	log.Infof("vmfork call succeeded: %s", out)

	// update system time

	// rescan scsi bus

	// reload vmxnet3 module

	// ensure memory and cores are brought online if not using udev

	return nil
}

func (t *BaseOperations) Setup(config Config) error {
	if err := createBindSrcTarget(filesForMinOSLinux); err != nil {
		return err
	}

	err := Sys.Hosts.Load()
	if err != nil {
		return err
	}

	// Seed the working copy of the hosts file with that from the image
	BindSys.Hosts.Copy(Sys.Hosts)

	// make sure localhost entries are present
	entries := []struct {
		hostname string
		addr     net.IP
	}{
		{"localhost", net.ParseIP("127.0.0.1")},
		{"localhost4", net.ParseIP("127.0.0.1")},
		{"localhost.localdomain", net.ParseIP("127.0.0.1")},
		{"localhost4.localdomain4", net.ParseIP("127.0.0.1")},
		{"ip6-localhost", net.ParseIP("::1")},
		{"localhost", net.ParseIP("::1")},
		{"localhost.localdomain", net.ParseIP("::1")},
		{"localhost6.localdomain6", net.ParseIP("::1")},
		{"ip6-loopback", net.ParseIP("::1")},
		{"ip6-localnet", net.ParseIP("fe00::0")},
		{"ip6-mcastprefix", net.ParseIP("ff00::0")},
		{"ip6-allnodes", net.ParseIP("ff02::1")},
		{"ip6-allrouters", net.ParseIP("ff02::2")},
	}

	for _, e := range entries {
		BindSys.Hosts.SetHost(e.hostname, e.addr)
	}

	if err := bindMountAndSave(Sys.Hosts, BindSys.Hosts); err != nil {
		return err
	}

	t.dynEndpoints = make(map[string][]*NetworkEndpoint)
	t.dhcpLoops = make(map[string]chan struct{})
	t.config = config

	return nil
}

func (t *BaseOperations) Cleanup() error {
	for _, stop := range t.dhcpLoops {
		close(stop)
	}

	return nil
}

func chrootSysProcAttr(attr *syscall.SysProcAttr, chroot string) *syscall.SysProcAttr {
	if attr == nil {
		attr = &syscall.SysProcAttr{}
	}
	attr.Chroot = chroot

	return attr
}

// Need to put this here because Windows does not support SysProcAttr.Credential
// getUserSysProcAttr relies on docker user package to verify user specification
// Examples of valid user specifications are:
//     * ""
//     * "user"
//     * "uid"
//     * "user:group"
//     * "uid:gid
//     * "user:gid"
//     * "uid:group"
func getUserSysProcAttr(uid, gid string) (*syscall.SysProcAttr, error) {
	if uid == "" && gid == "" {
		log.Debugf("no user id or group id specified")
		return nil, nil
	}

	userGroup := uid
	if gid != "" {
		userGroup = fmt.Sprintf("%s:%s", uid, gid)
	}
	passwdPath, err := user.GetPasswdPath()
	if err != nil {
		return nil, err
	}
	groupPath, err := user.GetGroupPath()
	if err != nil {
		return nil, err
	}
	execUser, err := user.GetExecUserPath(userGroup, defaultExecUser, passwdPath, groupPath)
	if err != nil {
		return nil, err
	}

	sysProc := &syscall.SysProcAttr{
		Credential: &syscall.Credential{
			Uid: uint32(execUser.Uid),
			Gid: uint32(execUser.Gid),
		},
		Setsid: true,
	}
	for _, sgid := range execUser.Sgids {
		sysProc.Credential.Groups = append(sysProc.Credential.Groups, uint32(sgid))
	}
	return sysProc, nil
}

// isEmpty returns true if the directory is empty or contains a lost+found folder
func isEmpty(name string) (bool, error) {
	files, err := readDir(name)
	if err != nil || len(files) > 0 {
		return false, err
	}
	return true, nil
}

// readDir reads a directory and hides a specific dir "lost+found"
func readDir(dir string) ([]os.FileInfo, error) {
	lostnfound := "lost+found"
	files, err := ioutil.ReadDir(dir)
	if err != nil {
		return nil, err
	}

	result := files[:0]
	for _, f := range files {
		if f.Name() != lostnfound {
			result = append(result, f)
		}
	}

	return result, nil
}

func bindMount(src, target string) error {
	// no need to return if unmount fails; it's possible that the target is not mounted previously
	log.Infof("unmounting %s", target)
	if err := Sys.Syscall.Unmount(target, syscall.MNT_DETACH); err != nil {
		if err.Error() == os.ErrInvalid.Error() {
			log.Debug("path is not currently a bindmount target")
		} else {
			log.Errorf("failed to unmount %s: %s", target, err)
		}
	}

	// bind mount src to target
	log.Infof("bind-mounting %s on %s", src, target)
	if err := Sys.Syscall.Mount(src, target, "bind", syscall.MS_BIND, ""); err != nil {
		detail := fmt.Errorf("failed to mount %s to %s: %s", src, target, err)
		log.Error(detail)
		return detail
	}

	// make sure the file is readable
	// #nosec: Expect file permissions to be 0600 or less
	if err := os.Chmod(target, 0644); err != nil {
		return err
	}

	return nil
}

func bindMountAndSave(conf etcconf.Conf, bind etcconf.Conf) error {
	if err := bind.Save(); err != nil {
		return err
	}

	return bindMount(bind.Path(), conf.Path())
}

// Create necessary directories/files as the src/target for bind mount.
// See https://github.com/vmware/vic/issues/489
func createBindSrcTarget(files map[string]os.FileMode) error {
	// The directory has to exist before creating the new file
	for filePath, fmode := range files {
		// this allows us to prefix arbitrary path so as to support unit testing
		filePath = path.Join(Sys.Root, filePath)

		dir := path.Dir(filePath)
		if _, err := os.Stat(dir); os.IsNotExist(err) {
			// #nosec: Expect file permissions to be 0600 or less
			if err := os.MkdirAll(dir, 0755); err != nil {
				return fmt.Errorf("failed to create directory %s: %s", dir, err)
			}
		}
		f, err := os.OpenFile(filePath, os.O_CREATE, fmode)
		if err != nil {
			return fmt.Errorf("failed to open file %s: %s", filePath, err)
		}

		if err = f.Close(); err != nil {
			return fmt.Errorf("failed to close file %s: %s", filePath, err)
		}
	}

	return nil
}

// LaunchUtility allows for starting, blocking on, and receiving the exit code of a
// process while in the presence of an embedded child reaper.
// The function passed in will be launched under lock and MUST NOT wait for the process to
// exit. It's expected the function be a closure wrapped around StartProcess or similar.
func (t *BaseOperations) LaunchUtility(fn UtilityFn) (<-chan int, error) {
	return launchUtility(t, fn)
}

func launchUtility(t *BaseOperations, fn UtilityFn) (<-chan int, error) {
	t.utilityPidMutex.Lock()
	defer t.utilityPidMutex.Unlock()

	if t.utilityPids == nil {
		t.utilityPids = make(map[int]chan int)
		log.Debug("Initialized utility process pid map")
	}

	log.Debug("Launching utility process")
	proc, err := fn()
	if err != nil {
		return nil, err
	}

	pid := proc.Pid
	exitChan := make(chan int, 1)
	t.utilityPids[pid] = exitChan
	log.Debugf("Registered utility pid: %d", pid)

	return exitChan, nil
}

func (t *BaseOperations) HandleUtilityExit(pid, exitCode int) bool {
	return handleUtilityExit(t, pid, exitCode)
}

func handleUtilityExit(t *BaseOperations, pid, exitCode int) bool {
	t.utilityPidMutex.Lock()
	defer t.utilityPidMutex.Unlock()

	pidchannel, ok := t.utilityPids[pid]
	if !ok {
		return false
	}

	delete(t.utilityPids, pid)
	pidchannel <- exitCode
	close(pidchannel)

	return true
}