index of Stackdriver timeseries by region

preventing cross-region queries in a scrape

After the last post I decided to add a little bit more safety by querying by location. The idea is that data within location is together, so 5 location-specific queries has more availability than 1 location-agnostic query (which might time out). I don’t know if that’s right, but let’s try it.

The problem is that location goes with resource type, and some resource types use location while others use zone and others use region. However, we can cheat: Stackdriver’s timeseries.list ignores labels in the group-by that don’t exist. So we’ll just group by all the possible location-specific labels and see what happens:

type TimeSeriesDescriptor struct {
    Project       string
    MetricType    string
    ResourceType  string
    LocationLabel string
    LocationValue string
}

func (descriptor TimeSeriesDescriptor) String() string {
    var id strings.Builder
    id.WriteString("(")
    id.WriteString(descriptor.Project)
    id.WriteString(", ")
    id.WriteString(descriptor.ResourceType)
    id.WriteString(", ")
    id.WriteString(descriptor.MetricType)
    if len(descriptor.LocationLabel) > 0 {
        id.WriteString(", ")
        id.WriteString(descriptor.LocationLabel)
        id.WriteString("=")
        id.WriteString(descriptor.LocationValue)
    }
    id.WriteString(")")
    return id.String()
}

func (descriptor TimeSeriesDescriptor) Filter() string {
    var filter strings.Builder
    filter.WriteString("project=\"")
    filter.WriteString(descriptor.Project)
    filter.WriteString("\" resource.type=\"")
    filter.WriteString(descriptor.ResourceType)
    filter.WriteString("\" metric.type=\"")
    filter.WriteString(descriptor.MetricType)
    filter.WriteString("\"")
    if len(descriptor.LocationLabel) > 0 {
        filter.WriteString(" resource.labels.\"")
        filter.WriteString(descriptor.LocationLabel)
        filter.WriteString("\"=\"")
        filter.WriteString(descriptor.LocationValue)
        filter.WriteString("\"")
    }
    return filter.String()
}

func fetchResourceTypesInternal(ctx context.Context, client *monitoring.MetricClient,
    workspace string, start, end time.Time, in *metricpb.MetricDescriptor,
    out chan<- interface{}, e chan<- error) {
    pieces := strings.SplitN(in.GetName(), "/", 3)
    request := monitoringpb.ListTimeSeriesRequest{
        Name:   workspace,
        View:   monitoringpb.ListTimeSeriesRequest_HEADERS,
        Filter: fmt.Sprintf("project=\"%s\" metric.type=\"%s\"", pieces[1], in.GetType()),
        Interval: &monitoringpb.TimeInterval{
            StartTime: &googlepb.Timestamp{Seconds: start.Unix()},
            EndTime:   &googlepb.Timestamp{Seconds: end.Unix()},
        },
        Aggregation: &monitoringpb.Aggregation{GroupByFields: []string{
            "resource.labels.project_id",
            "resource.labels.zone", // THE IMPORTANT PARTS
            "resource.labels.region",
            "resource.labels.location"}},
    }
    if in.GetMetricKind() == metricpb.MetricDescriptor_GAUGE {
        request.Aggregation.PerSeriesAligner = monitoringpb.Aggregation_ALIGN_NEXT_OLDER
    }

    it := client.ListTimeSeries(ctx, &request)
    for header, err := it.Next(); err != iterator.Done; header, err = it.Next() {
        if err != nil {
            e <- errors.Wrapf(err, "failed to fetch time series descriptors for %s", in.GetType())
            break
        }
        descriptor := TimeSeriesDescriptor{
            Project:      header.GetResource().GetLabels()["project_id"],
            MetricType:   in.GetType(),
            ResourceType: header.GetResource().GetType(),
        }
        if value, ok := header.GetResource().GetLabels()["zone"]; ok {
            descriptor.LocationLabel = "zone"
            descriptor.LocationValue = value
        } else if value, ok := header.GetResource().GetLabels()["region"]; ok {
            descriptor.LocationLabel = "region"
            descriptor.LocationValue = value
        } else if value, ok := header.GetResource().GetLabels()["location"]; ok {
            descriptor.LocationLabel = "location"
            descriptor.LocationValue = value
        }
        out <- descriptor
    }
}

This worked but my empty project now takes 16s to scrape. Next is to take all the time series IDs, make the actual raw data queries, and store them somewhere. Maybe Cloud Bigtable?

Published by using 376 words.