* Add Virtual Kubelet provider for VIC Initial virtual kubelet provider for VMware VIC. This provider currently handles creating and starting of a pod VM via the VIC portlayer and persona server. Image store handling via the VIC persona server. This provider currently requires the feature/wolfpack branch of VIC. * Added pod stop and delete. Also added node capacity. Added the ability to stop and delete pod VMs via VIC. Also retrieve node capacity information from the VCH. * Cleanup and readme file Some file clean up and added a Readme.md markdown file for the VIC provider. * Cleaned up errors, added function comments, moved operation code 1. Cleaned up error handling. Set standard for creating errors. 2. Added method prototype comments for all interface functions. 3. Moved PodCreator, PodStarter, PodStopper, and PodDeleter to a new folder. * Add mocking code and unit tests for podcache, podcreator, and podstarter Used the unit test framework used in VIC to handle assertions in the provider's unit test. Mocking code generated using OSS project mockery, which is compatible with the testify assertion framework. * Vendored packages for the VIC provider Requires feature/wolfpack branch of VIC and a few specific commit sha of projects used within VIC. * Implementation of POD Stopper and Deleter unit tests (#4) * Updated files for initial PR
642 lines
14 KiB
Go
642 lines
14 KiB
Go
// Copyright 2016-2017 VMware, Inc. All Rights Reserved.
|
||
//
|
||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||
// you may not use this file except in compliance with the License.
|
||
// You may obtain a copy of the License at
|
||
//
|
||
// http://www.apache.org/licenses/LICENSE-2.0
|
||
//
|
||
// Unless required by applicable law or agreed to in writing, software
|
||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
// See the License for the specific language governing permissions and
|
||
// limitations under the License.
|
||
|
||
package dns
|
||
|
||
import (
|
||
"bufio"
|
||
"fmt"
|
||
"math/rand"
|
||
"net"
|
||
"os"
|
||
"strings"
|
||
|
||
"sync"
|
||
"time"
|
||
|
||
log "github.com/Sirupsen/logrus"
|
||
|
||
"github.com/vmware/vic/lib/portlayer/network"
|
||
"github.com/vmware/vic/pkg/trace"
|
||
|
||
mdns "github.com/miekg/dns"
|
||
)
|
||
|
||
const (
|
||
DefaultIP = "127.0.0.1"
|
||
DefaultPort = 53
|
||
DefaultTTL = 600 * time.Second
|
||
DefaultCacheSize = 1024
|
||
DefaultTimeout = 4 * time.Second
|
||
)
|
||
|
||
var (
|
||
options = ServerOptions{}
|
||
random *rand.Rand
|
||
)
|
||
|
||
func init() {
|
||
random = rand.New(rand.NewSource(time.Now().UnixNano()))
|
||
}
|
||
|
||
// ServerOptions represents the server options
|
||
type ServerOptions struct {
|
||
IP string
|
||
Port int
|
||
Interface string
|
||
|
||
Nameservers flagMultipleVar
|
||
|
||
Timeout time.Duration
|
||
|
||
TTL time.Duration
|
||
CacheSize int
|
||
|
||
Debug bool
|
||
}
|
||
|
||
// Server represents udp/tcp server and clients
|
||
type Server struct {
|
||
ServerOptions
|
||
|
||
// used for serving dns
|
||
udpserver *mdns.Server
|
||
udpconn *net.UDPConn
|
||
|
||
tcpserver *mdns.Server
|
||
tcplisten *net.TCPListener
|
||
|
||
// used for forwarding queries
|
||
udpclient *mdns.Client
|
||
tcpclient *mdns.Client
|
||
|
||
// used for speeding up external lookups
|
||
cache *Cache
|
||
wg *sync.WaitGroup
|
||
}
|
||
|
||
type flagMultipleVar []string
|
||
|
||
func (i *flagMultipleVar) String() string {
|
||
return fmt.Sprint(*i)
|
||
}
|
||
|
||
func (i *flagMultipleVar) Set(value string) error {
|
||
*i = append(*i, value)
|
||
return nil
|
||
}
|
||
|
||
// NewServer returns a new Server
|
||
func NewServer(options ServerOptions) *Server {
|
||
var err error
|
||
|
||
// Default TTL
|
||
if options.TTL == 0 {
|
||
options.TTL = DefaultTTL
|
||
}
|
||
|
||
// Default port
|
||
if options.Port == 0 {
|
||
options.Port = DefaultPort
|
||
}
|
||
|
||
// Default nameservers
|
||
if len(options.Nameservers) == 0 {
|
||
options.Nameservers = resolvconf()
|
||
}
|
||
|
||
// Default cache size
|
||
if options.CacheSize == 0 {
|
||
options.CacheSize = DefaultCacheSize
|
||
}
|
||
|
||
// Default timeout
|
||
if options.Timeout == 0 {
|
||
options.Timeout = DefaultTimeout
|
||
}
|
||
|
||
server := &Server{
|
||
ServerOptions: options,
|
||
cache: NewCache(CacheOptions{options.CacheSize, options.TTL}),
|
||
wg: new(sync.WaitGroup),
|
||
}
|
||
|
||
udpaddr := &net.UDPAddr{
|
||
IP: net.ParseIP(server.IP),
|
||
Port: server.Port,
|
||
}
|
||
|
||
server.udpconn, err = net.ListenUDP("udp", udpaddr)
|
||
if err != nil {
|
||
log.Errorf("ListenUDP failed %s", err)
|
||
return nil
|
||
}
|
||
|
||
tcpaddr := &net.TCPAddr{
|
||
IP: net.ParseIP(server.IP),
|
||
Port: server.Port,
|
||
}
|
||
|
||
server.tcplisten, err = net.ListenTCP("tcp", tcpaddr)
|
||
if err != nil {
|
||
log.Errorf("ListenTCP failed %s", err)
|
||
return nil
|
||
}
|
||
|
||
server.udpclient = &mdns.Client{
|
||
Net: "udp",
|
||
ReadTimeout: server.Timeout,
|
||
WriteTimeout: server.Timeout,
|
||
SingleInflight: true,
|
||
}
|
||
|
||
server.tcpclient = &mdns.Client{
|
||
Net: "tcp",
|
||
ReadTimeout: server.Timeout,
|
||
WriteTimeout: server.Timeout,
|
||
SingleInflight: true,
|
||
}
|
||
|
||
return server
|
||
}
|
||
|
||
// Addr returns the ip:port of the server
|
||
func (s *Server) Addr() string {
|
||
return fmt.Sprintf("%s:%d", s.IP, s.Port)
|
||
}
|
||
|
||
func respServerFailure(w mdns.ResponseWriter, r *mdns.Msg) error {
|
||
m := new(mdns.Msg)
|
||
m.SetRcode(r, mdns.RcodeServerFailure)
|
||
// Does not matter if this write fails
|
||
return w.WriteMsg(m)
|
||
}
|
||
|
||
func respNotImplemented(w mdns.ResponseWriter, r *mdns.Msg) error {
|
||
m := &mdns.Msg{
|
||
MsgHdr: mdns.MsgHdr{
|
||
Authoritative: false,
|
||
RecursionDesired: false,
|
||
RecursionAvailable: false,
|
||
Rcode: mdns.RcodeNotImplemented,
|
||
},
|
||
Compress: true,
|
||
}
|
||
m.SetReply(r)
|
||
|
||
if err := w.WriteMsg(m); err != nil {
|
||
log.Errorf("Error writing RcodeNotImplemented response, %s", err)
|
||
return err
|
||
}
|
||
return nil
|
||
}
|
||
|
||
func resolvconf() []string {
|
||
var servers []string
|
||
|
||
file, err := os.Open("/etc/resolv.conf")
|
||
if err != nil {
|
||
return nil
|
||
}
|
||
defer file.Close()
|
||
|
||
scanner := bufio.NewScanner(file)
|
||
for scanner.Scan() {
|
||
line := scanner.Text()
|
||
|
||
// skip comments
|
||
if len(line) > 0 && (line[0] == ';' || line[0] == '#') {
|
||
continue
|
||
}
|
||
f := strings.SplitN(line, " ", 2)
|
||
|
||
if len(f) < 1 {
|
||
continue
|
||
}
|
||
|
||
if f[0] == "nameserver" {
|
||
if len(f) > 1 && len(servers) < 3 { // small, but the standard limit
|
||
// One more check: make sure server name is
|
||
// just an IP address. Otherwise we need DNS
|
||
// to look it up.
|
||
if net.ParseIP(f[1]) != nil {
|
||
servers = append(servers, f[1])
|
||
}
|
||
}
|
||
}
|
||
}
|
||
if err := scanner.Err(); err != nil {
|
||
return nil
|
||
}
|
||
return servers
|
||
}
|
||
|
||
// SeenBefore returns the cached response
|
||
func (s *Server) SeenBefore(w mdns.ResponseWriter, r *mdns.Msg) (bool, error) {
|
||
defer trace.End(trace.Begin(r.String()))
|
||
|
||
// Do we have it in the cache
|
||
if m := s.cache.Get(r); m != nil {
|
||
log.Debugf("Cache hit for %q", r.String())
|
||
|
||
// Overwrite the ID with the request's ID
|
||
m.Id = r.Id
|
||
m.Compress = true
|
||
m.Truncated = false
|
||
|
||
if err := w.WriteMsg(m); err != nil {
|
||
log.Errorf("Error writing response: %q", err)
|
||
return true, err
|
||
}
|
||
return true, nil
|
||
}
|
||
return false, nil
|
||
}
|
||
|
||
// HandleForwarding forwards a request to the nameservers and returns the response
|
||
func (s *Server) HandleForwarding(w mdns.ResponseWriter, r *mdns.Msg) (bool, error) {
|
||
defer trace.End(trace.Begin(r.String()))
|
||
|
||
var m *mdns.Msg
|
||
var err error
|
||
var try int
|
||
|
||
if len(s.Nameservers) == 0 {
|
||
log.Errorf("No nameservers defined, can not forward")
|
||
return false, respServerFailure(w, r)
|
||
}
|
||
|
||
// which protocol are they talking
|
||
tcp := false
|
||
if _, ok := w.RemoteAddr().(*net.TCPAddr); ok {
|
||
tcp = true
|
||
}
|
||
|
||
// Use request ID for "random" nameserver selection.
|
||
nsid := int(r.Id) % len(s.Nameservers)
|
||
|
||
Redo:
|
||
nameserver := s.Nameservers[nsid]
|
||
if i := strings.Index(nameserver, ":"); i < 0 {
|
||
nameserver += ":53"
|
||
}
|
||
|
||
if tcp {
|
||
m, _, err = s.tcpclient.Exchange(r, nameserver)
|
||
} else {
|
||
m, _, err = s.udpclient.Exchange(r, nameserver)
|
||
}
|
||
if err != nil {
|
||
// Seen an error, this can only mean, "server not reached", try again but only if we have not exausted our nameservers.
|
||
if try < len(s.Nameservers) {
|
||
try++
|
||
nsid = (nsid + 1) % len(s.Nameservers)
|
||
goto Redo
|
||
}
|
||
|
||
log.Errorf("Failure to forward request: %q", err)
|
||
return false, respServerFailure(w, r)
|
||
}
|
||
|
||
// We have a response so cache it
|
||
s.cache.Add(m)
|
||
|
||
m.Compress = true
|
||
if err := w.WriteMsg(m); err != nil {
|
||
log.Errorf("Error writing response: %q", err)
|
||
return true, err
|
||
}
|
||
return true, nil
|
||
}
|
||
|
||
// lookupByAlias looks up a container by alias, given the requesting container
|
||
// and the network of the incoming request. It first does a container-scoped
|
||
// alias search, followed by a network-scoped aliased search for alias. The
|
||
// result is a collection of endpoints for the matching container that are
|
||
// only on the same network as the requesting container.
|
||
func lookupByAlias(netCtx *network.Context, scope *network.Scope, reqc *network.Container, alias string) []*network.Endpoint {
|
||
// container specific alias search
|
||
cons := netCtx.ContainersByAlias(network.ScopedAliasName(scope.Name(), reqc.Name(), alias))
|
||
if len(cons) == 0 {
|
||
// scope-wide alias search
|
||
cons = netCtx.ContainersByAlias(network.ScopedAliasName(scope.Name(), "", alias))
|
||
}
|
||
|
||
if len(cons) == 0 {
|
||
return nil
|
||
}
|
||
|
||
var eps []*network.Endpoint
|
||
for _, c := range cons {
|
||
if e := c.Endpoint(scope); e != nil {
|
||
eps = append(eps, e)
|
||
}
|
||
}
|
||
|
||
return eps
|
||
}
|
||
|
||
// lookupByName looks up a container by name given the requesting container. It returns a collection
|
||
// of endpoints on networks that both the requesting and matching container share.
|
||
func lookupByName(netCtx *network.Context, reqc *network.Container, name string) []*network.Endpoint {
|
||
var eps []*network.Endpoint
|
||
if m := netCtx.Container(name); m != nil {
|
||
// look if networks overlap for the requesting container and the
|
||
// matched container
|
||
for _, ec := range reqc.Endpoints() {
|
||
if em := m.Endpoint(ec.Scope()); em != nil {
|
||
eps = append(eps, em)
|
||
}
|
||
}
|
||
}
|
||
|
||
return eps
|
||
}
|
||
|
||
// HandleVIC returns a response to a container name/id request
|
||
func (s *Server) HandleVIC(w mdns.ResponseWriter, r *mdns.Msg) (bool, error) {
|
||
defer trace.End(trace.Begin(r.String()))
|
||
|
||
question := r.Question[0]
|
||
|
||
netCtx := network.DefaultContext
|
||
if netCtx == nil {
|
||
log.Errorf("DefaultContext is not initialized")
|
||
return false, fmt.Errorf("DefaultContext is not initialized")
|
||
}
|
||
|
||
clientIP, _, err := net.SplitHostPort(w.RemoteAddr().String())
|
||
if err != nil {
|
||
log.Errorf("SplitHostPort failed: %q", err)
|
||
return false, err
|
||
}
|
||
|
||
log.Debugf("RemoteAddr: %s", clientIP)
|
||
ip := net.ParseIP(clientIP)
|
||
|
||
var name string
|
||
var domain string
|
||
|
||
name = strings.TrimSuffix(question.Name, ".")
|
||
// Do we have a domain?
|
||
i := strings.IndexRune(name, '.')
|
||
if i >= 0 {
|
||
name, domain = name[:i], name[i+1:]
|
||
}
|
||
|
||
// get the requesting container's endpoint
|
||
e := netCtx.ContainerByAddr(ip)
|
||
if e == nil {
|
||
return false, fmt.Errorf("Could not find requesting container with ip %s", ip)
|
||
}
|
||
|
||
if domain != "" && e.Scope().Name() != domain {
|
||
return false, fmt.Errorf("Intra-scope request for container %s in %s from %s", name, domain, e.Scope().Name())
|
||
}
|
||
|
||
eps := lookupByAlias(netCtx, e.Scope(), e.Container(), name)
|
||
if len(eps) == 0 {
|
||
// lookup container by name
|
||
eps = lookupByName(netCtx, e.Container(), name)
|
||
}
|
||
|
||
if len(eps) == 0 {
|
||
log.Debugf("Can't find the container: %q", name)
|
||
return false, fmt.Errorf("Can't find the container: %q", name)
|
||
}
|
||
|
||
// FIXME: Add AAAA when we support it
|
||
answer := make([]mdns.RR, len(eps))
|
||
// shuffle eps (Fisher–Yates shuffle)
|
||
for i := len(eps) - 1; i > 0; i-- {
|
||
j := random.Intn(i + 1)
|
||
eps[i], eps[j] = eps[j], eps[i]
|
||
}
|
||
|
||
for i, e := range eps {
|
||
if e.IP().IsUnspecified() {
|
||
return false, fmt.Errorf("No ip for container %q", name)
|
||
}
|
||
|
||
answer[i] = &mdns.A{
|
||
Hdr: mdns.RR_Header{
|
||
Name: question.Name,
|
||
Rrtype: mdns.TypeA,
|
||
Class: mdns.ClassINET,
|
||
Ttl: uint32(DefaultTTL.Seconds()),
|
||
},
|
||
A: e.IP(),
|
||
}
|
||
}
|
||
|
||
// Start crafting reply msg
|
||
m := &mdns.Msg{
|
||
MsgHdr: mdns.MsgHdr{
|
||
Authoritative: true,
|
||
RecursionAvailable: true,
|
||
},
|
||
Compress: true,
|
||
}
|
||
m.SetReply(r)
|
||
|
||
m.Answer = append(m.Answer, answer...)
|
||
|
||
// Which protocol we are talking
|
||
tcp := false
|
||
if _, ok := w.LocalAddr().(*net.TCPAddr); ok {
|
||
tcp = true
|
||
}
|
||
|
||
// 512 byte payload guarantees that DNS packets can be reassembled if fragmented in transit.
|
||
bufsize := 512
|
||
|
||
// With EDNS0 in use a larger payload size can be specified.
|
||
if o := r.IsEdns0(); o != nil {
|
||
bufsize = int(o.UDPSize())
|
||
}
|
||
|
||
// Make sure we are not smaller than 512
|
||
if bufsize < 512 {
|
||
bufsize = 512
|
||
}
|
||
|
||
// With TCP we can send up to 64K
|
||
if tcp {
|
||
bufsize = mdns.MaxMsgSize - 1
|
||
}
|
||
|
||
// Trim the answer RRs one by one till the whole message fits within the reply size
|
||
if m.Len() > bufsize {
|
||
if tcp {
|
||
m.Truncated = true
|
||
}
|
||
|
||
for m.Len() > bufsize {
|
||
m.Answer = m.Answer[:len(m.Answer)-1]
|
||
}
|
||
}
|
||
|
||
if err := w.WriteMsg(m); err != nil {
|
||
log.Errorf("Error writing response, %s", err)
|
||
return true, err
|
||
}
|
||
w.Close()
|
||
|
||
return true, nil
|
||
}
|
||
|
||
// ServeDNS implements the handler interface
|
||
func (s *Server) ServeDNS(w mdns.ResponseWriter, r *mdns.Msg) {
|
||
defer trace.End(trace.Begin(r.String()))
|
||
|
||
if r == nil || len(r.Question) == 0 {
|
||
return
|
||
}
|
||
|
||
// Reject multi-question query
|
||
if len(r.Question) != 1 {
|
||
log.Errorf("Rejected multi-question query")
|
||
|
||
respServerFailure(w, r)
|
||
return
|
||
}
|
||
q := r.Question[0]
|
||
|
||
// Reject non-INET type query
|
||
if q.Qclass != mdns.ClassINET {
|
||
log.Errorf("Rejected non-inet query")
|
||
|
||
respNotImplemented(w, r)
|
||
return
|
||
}
|
||
|
||
// Reject ANY type query
|
||
if q.Qtype == mdns.TypeANY {
|
||
log.Errorf("Rejected ANY query")
|
||
|
||
respNotImplemented(w, r)
|
||
return
|
||
}
|
||
|
||
// Check VIC first
|
||
// Currently VIC can only answer ipv4 "A" queries
|
||
if q.Qtype == mdns.TypeA {
|
||
ok, err := s.HandleVIC(w, r)
|
||
if ok {
|
||
if err != nil {
|
||
log.Errorf("HandleVIC returned: %q", err)
|
||
}
|
||
return
|
||
}
|
||
}
|
||
|
||
// Do we have the response in our cache?
|
||
ok, err := s.SeenBefore(w, r)
|
||
if ok {
|
||
if err != nil {
|
||
log.Errorf("SeenBefore returned: %q", err)
|
||
}
|
||
return
|
||
}
|
||
|
||
// Then forward
|
||
ok, err = s.HandleForwarding(w, r)
|
||
if ok {
|
||
if err != nil {
|
||
log.Errorf("HandleForwarding returned: %q", err)
|
||
}
|
||
return
|
||
}
|
||
|
||
}
|
||
|
||
// Start starts the DNS server
|
||
func (s *Server) Start() {
|
||
// Call BindToDevice if IP is empty and Interface is set
|
||
if s.IP == "" && s.Interface != "" {
|
||
uf, err := s.udpconn.File()
|
||
if err != nil {
|
||
log.Errorf("Getting the fd failed with: %s", err)
|
||
} else {
|
||
// BindToDevice binds the socket associated with fd to device.
|
||
if err := BindToDevice(int(uf.Fd()), s.Interface); err != nil {
|
||
log.Errorf("Calling BindToDevice failed with: %s", err)
|
||
}
|
||
}
|
||
}
|
||
|
||
udpserver := &mdns.Server{
|
||
Handler: s,
|
||
PacketConn: s.udpconn,
|
||
}
|
||
s.udpserver = udpserver
|
||
|
||
s.wg.Add(1)
|
||
go func() {
|
||
defer s.wg.Done()
|
||
|
||
udpserver.ActivateAndServe()
|
||
log.Debugf("UDP server exited")
|
||
}()
|
||
log.Infof("Ready for queries on udp://%s", s.Addr())
|
||
|
||
// Call BindToDevice if IP is empty and Interface is set
|
||
if s.IP == "" && s.Interface != "" {
|
||
tf, err := s.tcplisten.File()
|
||
if err != nil {
|
||
log.Errorf("Getting the fd failed with: %s", err)
|
||
} else {
|
||
// BindToDevice binds the socket associated with fd to device.
|
||
if err := BindToDevice(int(tf.Fd()), s.Interface); err != nil {
|
||
log.Errorf("Calling BindToDevice failed with: %s", err)
|
||
}
|
||
}
|
||
}
|
||
|
||
tcpserver := &mdns.Server{Handler: s, Listener: s.tcplisten}
|
||
s.tcpserver = tcpserver
|
||
|
||
s.wg.Add(1)
|
||
go func() {
|
||
defer s.wg.Done()
|
||
|
||
tcpserver.ActivateAndServe()
|
||
log.Debugf("TCP server exited")
|
||
}()
|
||
log.Infof("Ready for queries on tcp://%s", s.Addr())
|
||
|
||
}
|
||
|
||
// Stop stops the DNS server gracefully
|
||
func (s *Server) Stop() {
|
||
if s.udpserver != nil {
|
||
log.Debugf("Shutting down udpserver")
|
||
s.udpserver.Shutdown()
|
||
}
|
||
s.udpconn = nil
|
||
s.udpserver = nil
|
||
|
||
if s.tcpserver != nil {
|
||
log.Debugf("Shutting down tcpserver")
|
||
s.tcpserver.Shutdown()
|
||
}
|
||
s.tcplisten = nil
|
||
s.tcpserver = nil
|
||
}
|
||
|
||
// Wait block until wg returns
|
||
func (s *Server) Wait() {
|
||
s.wg.Wait()
|
||
}
|