Add support for tracing via OpenCencus
This adds a few flags for configuring the tracer. Includes support for jaeger tracing (built into OC).
This commit is contained in:
@@ -7,6 +7,7 @@ import (
|
||||
|
||||
"github.com/pkg/errors"
|
||||
"github.com/virtual-kubelet/virtual-kubelet/providers/azure/client/aci"
|
||||
"go.opencensus.io/trace"
|
||||
"golang.org/x/sync/errgroup"
|
||||
"k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
@@ -15,12 +16,19 @@ import (
|
||||
|
||||
// GetStatsSummary returns the stats summary for pods running on ACI
|
||||
func (p *ACIProvider) GetStatsSummary(ctx context.Context) (summary *stats.Summary, err error) {
|
||||
ctx, span := trace.StartSpan(ctx, "GetSummaryStats")
|
||||
defer span.End()
|
||||
addAzureAttributes(span, p)
|
||||
|
||||
p.metricsSync.Lock()
|
||||
defer p.metricsSync.Unlock()
|
||||
span.Annotate(nil, "acquired metrics mutex")
|
||||
|
||||
if time.Now().Sub(p.metricsSyncTime) < time.Minute {
|
||||
span.AddAttributes(trace.BoolAttribute("preCachedResult", true), trace.StringAttribute("cachedResultSampleTime", p.metricsSyncTime.String()))
|
||||
return p.lastMetric, nil
|
||||
}
|
||||
span.AddAttributes(trace.BoolAttribute("preCachedResult", false), trace.StringAttribute("cachedResultSampleTime", p.metricsSyncTime.String()))
|
||||
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
@@ -37,6 +45,7 @@ func (p *ACIProvider) GetStatsSummary(ctx context.Context) (summary *stats.Summa
|
||||
}()
|
||||
|
||||
pods := p.resourceManager.GetPods()
|
||||
|
||||
var errGroup errgroup.Group
|
||||
chResult := make(chan stats.PodStats, len(pods))
|
||||
|
||||
@@ -48,7 +57,16 @@ func (p *ACIProvider) GetStatsSummary(ctx context.Context) (summary *stats.Summa
|
||||
if pod.Status.Phase != v1.PodRunning {
|
||||
continue
|
||||
}
|
||||
pod := pod
|
||||
errGroup.Go(func() error {
|
||||
ctx, span := trace.StartSpan(ctx, "getPodMetrics")
|
||||
defer span.End()
|
||||
span.AddAttributes(
|
||||
trace.StringAttribute("UID", string(pod.UID)),
|
||||
trace.StringAttribute("Name", pod.Name),
|
||||
trace.StringAttribute("Namespace", pod.Namespace),
|
||||
)
|
||||
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
@@ -58,6 +76,8 @@ func (p *ACIProvider) GetStatsSummary(ctx context.Context) (summary *stats.Summa
|
||||
<-sema
|
||||
}()
|
||||
|
||||
span.Annotate(nil, "Acquired semaphore")
|
||||
|
||||
cgName := containerGroupName(pod)
|
||||
// cpu/mem and net stats are split because net stats do not support container level detail
|
||||
systemStats, err := p.aciClient.GetContainerGroupMetrics(ctx, p.resourceGroup, cgName, aci.MetricsRequest{
|
||||
@@ -68,8 +88,10 @@ func (p *ACIProvider) GetStatsSummary(ctx context.Context) (summary *stats.Summa
|
||||
Types: []aci.MetricType{aci.MetricTypeCPUUsage, aci.MetricTypeMemoryUsage},
|
||||
})
|
||||
if err != nil {
|
||||
span.SetStatus(trace.Status{Code: trace.StatusCodeUnknown, Message: err.Error()})
|
||||
return errors.Wrapf(err, "error fetching cpu/mem stats for container group %s", cgName)
|
||||
}
|
||||
span.Annotate(nil, "Got system stats")
|
||||
|
||||
netStats, err := p.aciClient.GetContainerGroupMetrics(ctx, p.resourceGroup, cgName, aci.MetricsRequest{
|
||||
Start: start,
|
||||
@@ -78,8 +100,10 @@ func (p *ACIProvider) GetStatsSummary(ctx context.Context) (summary *stats.Summa
|
||||
Types: []aci.MetricType{aci.MetricTyperNetworkBytesRecievedPerSecond, aci.MetricTyperNetworkBytesTransmittedPerSecond},
|
||||
})
|
||||
if err != nil {
|
||||
span.SetStatus(trace.Status{Code: trace.StatusCodeUnknown, Message: err.Error()})
|
||||
return errors.Wrapf(err, "error fetching network stats for container group %s", cgName)
|
||||
}
|
||||
span.Annotate(nil, "Got network stats")
|
||||
|
||||
chResult <- collectMetrics(pod, systemStats, netStats)
|
||||
return nil
|
||||
@@ -90,6 +114,7 @@ func (p *ACIProvider) GetStatsSummary(ctx context.Context) (summary *stats.Summa
|
||||
return nil, errors.Wrap(err, "error in request to fetch container group metrics")
|
||||
}
|
||||
close(chResult)
|
||||
span.Annotate([]trace.Attribute{trace.Int64Attribute("nPods", int64(len(pods)))}, "Collected stats from Azure")
|
||||
|
||||
var s stats.Summary
|
||||
s.Node = stats.NodeStats{
|
||||
|
||||
Reference in New Issue
Block a user