Make tracing interface to coalesce logging/tracing (#519)
* Define and use an interface for logging. This allows alternative implementations to use whatever logging package they want. Currently the interface just mimicks what logrus already implements, with minor modifications to not rely on logrus itself. I think the interface is pretty solid in terms of logging implementations being able to do what they need to. * Make tracing interface to coalesce logging/tracing Allows us to share data between the tracer and the logger so we can simplify log/trace handling wher we generally want data to go both places.
This commit is contained in:
@@ -19,15 +19,14 @@ import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/Sirupsen/logrus"
|
||||
"github.com/gorilla/websocket"
|
||||
"github.com/virtual-kubelet/virtual-kubelet/log"
|
||||
"github.com/virtual-kubelet/virtual-kubelet/manager"
|
||||
client "github.com/virtual-kubelet/virtual-kubelet/providers/azure/client"
|
||||
"github.com/virtual-kubelet/virtual-kubelet/providers/azure/client/aci"
|
||||
"github.com/virtual-kubelet/virtual-kubelet/providers/azure/client/network"
|
||||
"go.opencensus.io/trace"
|
||||
"k8s.io/api/core/v1"
|
||||
"github.com/virtual-kubelet/virtual-kubelet/trace"
|
||||
v1 "k8s.io/api/core/v1"
|
||||
k8serr "k8s.io/apimachinery/pkg/api/errors"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
@@ -480,11 +479,11 @@ func getKubeProxyExtension(secretPath, masterURI, clusterCIDR string) (*aci.Exte
|
||||
return &extension, nil
|
||||
}
|
||||
|
||||
func addAzureAttributes(span *trace.Span, p *ACIProvider) {
|
||||
span.AddAttributes(
|
||||
trace.StringAttribute("azure.resourceGroup", p.resourceGroup),
|
||||
trace.StringAttribute("azure.region", p.region),
|
||||
)
|
||||
func addAzureAttributes(ctx context.Context, span trace.Span, p *ACIProvider) context.Context {
|
||||
return span.WithFields(ctx, log.Fields{
|
||||
"azure.resourceGroup": p.resourceGroup,
|
||||
"azure.region": p.region,
|
||||
})
|
||||
}
|
||||
|
||||
// CreatePod accepts a Pod definition and creates
|
||||
@@ -492,7 +491,7 @@ func addAzureAttributes(span *trace.Span, p *ACIProvider) {
|
||||
func (p *ACIProvider) CreatePod(ctx context.Context, pod *v1.Pod) error {
|
||||
ctx, span := trace.StartSpan(ctx, "aci.CreatePod")
|
||||
defer span.End()
|
||||
addAzureAttributes(span, p)
|
||||
ctx = addAzureAttributes(ctx, span, p)
|
||||
|
||||
var containerGroup aci.ContainerGroup
|
||||
containerGroup.Location = p.region
|
||||
@@ -694,7 +693,7 @@ func (p *ACIProvider) UpdatePod(ctx context.Context, pod *v1.Pod) error {
|
||||
func (p *ACIProvider) DeletePod(ctx context.Context, pod *v1.Pod) error {
|
||||
ctx, span := trace.StartSpan(ctx, "aci.DeletePod")
|
||||
defer span.End()
|
||||
addAzureAttributes(span, p)
|
||||
ctx = addAzureAttributes(ctx, span, p)
|
||||
|
||||
err := p.aciClient.DeleteContainerGroup(ctx, p.resourceGroup, fmt.Sprintf("%s-%s", pod.Namespace, pod.Name))
|
||||
return wrapError(err)
|
||||
@@ -705,7 +704,7 @@ func (p *ACIProvider) DeletePod(ctx context.Context, pod *v1.Pod) error {
|
||||
func (p *ACIProvider) GetPod(ctx context.Context, namespace, name string) (*v1.Pod, error) {
|
||||
ctx, span := trace.StartSpan(ctx, "aci.GetPod")
|
||||
defer span.End()
|
||||
addAzureAttributes(span, p)
|
||||
ctx = addAzureAttributes(ctx, span, p)
|
||||
|
||||
cg, err, status := p.aciClient.GetContainerGroup(ctx, p.resourceGroup, fmt.Sprintf("%s-%s", namespace, name))
|
||||
if err != nil {
|
||||
@@ -726,7 +725,7 @@ func (p *ACIProvider) GetPod(ctx context.Context, namespace, name string) (*v1.P
|
||||
func (p *ACIProvider) GetContainerLogs(ctx context.Context, namespace, podName, containerName string, tail int) (string, error) {
|
||||
ctx, span := trace.StartSpan(ctx, "aci.GetContainerLogs")
|
||||
defer span.End()
|
||||
addAzureAttributes(span, p)
|
||||
ctx = addAzureAttributes(ctx, span, p)
|
||||
|
||||
logContent := ""
|
||||
cg, err, _ := p.aciClient.GetContainerGroup(ctx, p.resourceGroup, fmt.Sprintf("%s-%s", namespace, podName))
|
||||
@@ -744,7 +743,6 @@ func (p *ACIProvider) GetContainerLogs(ctx context.Context, namespace, podName,
|
||||
cLogs, err := p.aciClient.GetContainerLogs(ctx, p.resourceGroup, cg.Name, containerName, tail)
|
||||
if err != nil {
|
||||
log.G(ctx).WithField("method", "GetContainerLogs").WithError(err).Debug("Error getting container logs, retrying")
|
||||
span.Annotate(nil, "Error getting container logs, retrying")
|
||||
time.Sleep(5000 * time.Millisecond)
|
||||
} else {
|
||||
logContent = cLogs.Content
|
||||
@@ -841,7 +839,7 @@ func (p *ACIProvider) ExecInContainer(name string, uid types.UID, container stri
|
||||
func (p *ACIProvider) GetPodStatus(ctx context.Context, namespace, name string) (*v1.PodStatus, error) {
|
||||
ctx, span := trace.StartSpan(ctx, "aci.GetPodStatus")
|
||||
defer span.End()
|
||||
addAzureAttributes(span, p)
|
||||
ctx = addAzureAttributes(ctx, span, p)
|
||||
|
||||
pod, err := p.GetPod(ctx, namespace, name)
|
||||
if err != nil {
|
||||
@@ -859,7 +857,7 @@ func (p *ACIProvider) GetPodStatus(ctx context.Context, namespace, name string)
|
||||
func (p *ACIProvider) GetPods(ctx context.Context) ([]*v1.Pod, error) {
|
||||
ctx, span := trace.StartSpan(ctx, "aci.GetPods")
|
||||
defer span.End()
|
||||
addAzureAttributes(span, p)
|
||||
ctx = addAzureAttributes(ctx, span, p)
|
||||
|
||||
cgs, err := p.aciClient.ListContainerGroups(ctx, p.resourceGroup)
|
||||
if err != nil {
|
||||
@@ -875,7 +873,7 @@ func (p *ACIProvider) GetPods(ctx context.Context) ([]*v1.Pod, error) {
|
||||
|
||||
p, err := containerGroupToPod(&c)
|
||||
if err != nil {
|
||||
log.G(context.TODO()).WithFields(logrus.Fields{
|
||||
log.G(ctx).WithFields(log.Fields{
|
||||
"name": c.Name,
|
||||
"id": c.ID,
|
||||
}).WithError(err).Error("error converting container group to pod")
|
||||
|
||||
@@ -25,7 +25,7 @@ type AcsCredential struct {
|
||||
// NewAcsCredential returns an AcsCredential struct from file path
|
||||
func NewAcsCredential(p string) (*AcsCredential, error) {
|
||||
logger := log.G(context.TODO()).WithField("method", "NewAcsCredential").WithField("file", p)
|
||||
log.Trace(logger, "Reading ACS credential file")
|
||||
logger.Debug("Reading ACS credential file")
|
||||
|
||||
b, err := ioutil.ReadFile(p)
|
||||
if err != nil {
|
||||
@@ -38,6 +38,6 @@ func NewAcsCredential(p string) (*AcsCredential, error) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
log.Trace(logger, "Load ACS credential file successfully")
|
||||
logger.Debug("Load ACS credential file successfully")
|
||||
return &cred, nil
|
||||
}
|
||||
|
||||
@@ -7,10 +7,11 @@ import (
|
||||
|
||||
"github.com/cpuguy83/strongerrors/status/ocstatus"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/virtual-kubelet/virtual-kubelet/log"
|
||||
"github.com/virtual-kubelet/virtual-kubelet/providers/azure/client/aci"
|
||||
"go.opencensus.io/trace"
|
||||
"github.com/virtual-kubelet/virtual-kubelet/trace"
|
||||
"golang.org/x/sync/errgroup"
|
||||
"k8s.io/api/core/v1"
|
||||
v1 "k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
stats "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1"
|
||||
)
|
||||
@@ -19,17 +20,24 @@ import (
|
||||
func (p *ACIProvider) GetStatsSummary(ctx context.Context) (summary *stats.Summary, err error) {
|
||||
ctx, span := trace.StartSpan(ctx, "GetSummaryStats")
|
||||
defer span.End()
|
||||
addAzureAttributes(span, p)
|
||||
ctx = addAzureAttributes(ctx, span, p)
|
||||
|
||||
p.metricsSync.Lock()
|
||||
defer p.metricsSync.Unlock()
|
||||
span.Annotate(nil, "acquired metrics mutex")
|
||||
|
||||
log.G(ctx).Debug("acquired metrics mutex")
|
||||
|
||||
if time.Now().Sub(p.metricsSyncTime) < time.Minute {
|
||||
span.AddAttributes(trace.BoolAttribute("preCachedResult", true), trace.StringAttribute("cachedResultSampleTime", p.metricsSyncTime.String()))
|
||||
span.WithFields(ctx, log.Fields{
|
||||
"preCachedResult": true,
|
||||
"cachedResultSampleTime": p.metricsSyncTime.String(),
|
||||
})
|
||||
return p.lastMetric, nil
|
||||
}
|
||||
span.AddAttributes(trace.BoolAttribute("preCachedResult", false), trace.StringAttribute("cachedResultSampleTime", p.metricsSyncTime.String()))
|
||||
ctx = span.WithFields(ctx, log.Fields{
|
||||
"preCachedResult": false,
|
||||
"cachedResultSampleTime": p.metricsSyncTime.String(),
|
||||
})
|
||||
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
@@ -62,11 +70,11 @@ func (p *ACIProvider) GetStatsSummary(ctx context.Context) (summary *stats.Summa
|
||||
errGroup.Go(func() error {
|
||||
ctx, span := trace.StartSpan(ctx, "getPodMetrics")
|
||||
defer span.End()
|
||||
span.AddAttributes(
|
||||
trace.StringAttribute("UID", string(pod.UID)),
|
||||
trace.StringAttribute("Name", pod.Name),
|
||||
trace.StringAttribute("Namespace", pod.Namespace),
|
||||
)
|
||||
logger := log.G(ctx).WithFields(log.Fields{
|
||||
"UID": string(pod.UID),
|
||||
"Name": pod.Name,
|
||||
"Namespace": pod.Namespace,
|
||||
})
|
||||
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
@@ -77,7 +85,7 @@ func (p *ACIProvider) GetStatsSummary(ctx context.Context) (summary *stats.Summa
|
||||
<-sema
|
||||
}()
|
||||
|
||||
span.Annotate(nil, "Acquired semaphore")
|
||||
logger.Debug("Acquired semaphore")
|
||||
|
||||
cgName := containerGroupName(pod)
|
||||
// cpu/mem and net stats are split because net stats do not support container level detail
|
||||
@@ -92,7 +100,7 @@ func (p *ACIProvider) GetStatsSummary(ctx context.Context) (summary *stats.Summa
|
||||
span.SetStatus(ocstatus.FromError(err))
|
||||
return errors.Wrapf(err, "error fetching cpu/mem stats for container group %s", cgName)
|
||||
}
|
||||
span.Annotate(nil, "Got system stats")
|
||||
logger.Debug("Got system stats")
|
||||
|
||||
netStats, err := p.aciClient.GetContainerGroupMetrics(ctx, p.resourceGroup, cgName, aci.MetricsRequest{
|
||||
Start: start,
|
||||
@@ -104,7 +112,7 @@ func (p *ACIProvider) GetStatsSummary(ctx context.Context) (summary *stats.Summa
|
||||
span.SetStatus(ocstatus.FromError(err))
|
||||
return errors.Wrapf(err, "error fetching network stats for container group %s", cgName)
|
||||
}
|
||||
span.Annotate(nil, "Got network stats")
|
||||
logger.Debug("Got network stats")
|
||||
|
||||
chResult <- collectMetrics(pod, systemStats, netStats)
|
||||
return nil
|
||||
@@ -112,10 +120,11 @@ func (p *ACIProvider) GetStatsSummary(ctx context.Context) (summary *stats.Summa
|
||||
}
|
||||
|
||||
if err := errGroup.Wait(); err != nil {
|
||||
span.SetStatus(ocstatus.FromError(err))
|
||||
return nil, errors.Wrap(err, "error in request to fetch container group metrics")
|
||||
}
|
||||
close(chResult)
|
||||
span.Annotate([]trace.Attribute{trace.Int64Attribute("nPods", int64(len(pods)))}, "Collected stats from Azure")
|
||||
log.G(ctx).Debugf("Collected status from azure for %d pods", len(pods))
|
||||
|
||||
var s stats.Summary
|
||||
s.Node = stats.NodeStats{
|
||||
|
||||
Reference in New Issue
Block a user