Add support for tracing via OpenCencus

This adds a few flags for configuring the tracer.
Includes support for jaeger tracing (built into OC).
This commit is contained in:
Brian Goff
2018-09-19 18:01:39 -07:00
parent 43d32d2301
commit 682b2bccf8
139 changed files with 19834 additions and 42 deletions

View File

@@ -7,6 +7,7 @@ import (
"github.com/pkg/errors"
"github.com/virtual-kubelet/virtual-kubelet/providers/azure/client/aci"
"go.opencensus.io/trace"
"golang.org/x/sync/errgroup"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -15,12 +16,19 @@ import (
// GetStatsSummary returns the stats summary for pods running on ACI
func (p *ACIProvider) GetStatsSummary(ctx context.Context) (summary *stats.Summary, err error) {
ctx, span := trace.StartSpan(ctx, "GetSummaryStats")
defer span.End()
addAzureAttributes(span, p)
p.metricsSync.Lock()
defer p.metricsSync.Unlock()
span.Annotate(nil, "acquired metrics mutex")
if time.Now().Sub(p.metricsSyncTime) < time.Minute {
span.AddAttributes(trace.BoolAttribute("preCachedResult", true), trace.StringAttribute("cachedResultSampleTime", p.metricsSyncTime.String()))
return p.lastMetric, nil
}
span.AddAttributes(trace.BoolAttribute("preCachedResult", false), trace.StringAttribute("cachedResultSampleTime", p.metricsSyncTime.String()))
select {
case <-ctx.Done():
@@ -37,6 +45,7 @@ func (p *ACIProvider) GetStatsSummary(ctx context.Context) (summary *stats.Summa
}()
pods := p.resourceManager.GetPods()
var errGroup errgroup.Group
chResult := make(chan stats.PodStats, len(pods))
@@ -48,7 +57,16 @@ func (p *ACIProvider) GetStatsSummary(ctx context.Context) (summary *stats.Summa
if pod.Status.Phase != v1.PodRunning {
continue
}
pod := pod
errGroup.Go(func() error {
ctx, span := trace.StartSpan(ctx, "getPodMetrics")
defer span.End()
span.AddAttributes(
trace.StringAttribute("UID", string(pod.UID)),
trace.StringAttribute("Name", pod.Name),
trace.StringAttribute("Namespace", pod.Namespace),
)
select {
case <-ctx.Done():
return ctx.Err()
@@ -58,6 +76,8 @@ func (p *ACIProvider) GetStatsSummary(ctx context.Context) (summary *stats.Summa
<-sema
}()
span.Annotate(nil, "Acquired semaphore")
cgName := containerGroupName(pod)
// cpu/mem and net stats are split because net stats do not support container level detail
systemStats, err := p.aciClient.GetContainerGroupMetrics(ctx, p.resourceGroup, cgName, aci.MetricsRequest{
@@ -68,8 +88,10 @@ func (p *ACIProvider) GetStatsSummary(ctx context.Context) (summary *stats.Summa
Types: []aci.MetricType{aci.MetricTypeCPUUsage, aci.MetricTypeMemoryUsage},
})
if err != nil {
span.SetStatus(trace.Status{Code: trace.StatusCodeUnknown, Message: err.Error()})
return errors.Wrapf(err, "error fetching cpu/mem stats for container group %s", cgName)
}
span.Annotate(nil, "Got system stats")
netStats, err := p.aciClient.GetContainerGroupMetrics(ctx, p.resourceGroup, cgName, aci.MetricsRequest{
Start: start,
@@ -78,8 +100,10 @@ func (p *ACIProvider) GetStatsSummary(ctx context.Context) (summary *stats.Summa
Types: []aci.MetricType{aci.MetricTyperNetworkBytesRecievedPerSecond, aci.MetricTyperNetworkBytesTransmittedPerSecond},
})
if err != nil {
span.SetStatus(trace.Status{Code: trace.StatusCodeUnknown, Message: err.Error()})
return errors.Wrapf(err, "error fetching network stats for container group %s", cgName)
}
span.Annotate(nil, "Got network stats")
chResult <- collectMetrics(pod, systemStats, netStats)
return nil
@@ -90,6 +114,7 @@ func (p *ACIProvider) GetStatsSummary(ctx context.Context) (summary *stats.Summa
return nil, errors.Wrap(err, "error in request to fetch container group metrics")
}
close(chResult)
span.Annotate([]trace.Attribute{trace.Int64Attribute("nPods", int64(len(pods)))}, "Collected stats from Azure")
var s stats.Summary
s.Node = stats.NodeStats{