Skip to content

Commit 5f5fc23

Browse files
committed
add additional scope level stats matcher to enable/disable stats (#43923)
Signed-off-by: wbpcode/wangbaiping <wbphub@gmail.com>
1 parent 820d3f4 commit 5f5fc23

11 files changed

Lines changed: 399 additions & 16 deletions

File tree

changelogs/current.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,5 +20,11 @@ new_features:
2020
- area: stats
2121
change: |
2222
Added support to limit the number of stats stored in each stats scope in the stats libarary.
23+
- area: stats
24+
change: |
25+
Added support for cluster-level stats matcher, allowing more granular control over which stats
26+
are enabled and reported at the cluster level. This the stats matcher could be configured via
27+
the xDS API dynamically on a per-cluster basis.
28+
See :ref:`envoy.stats_matcher <well_known_metadata_envoy_stats_matcher>` for more details.
2329
2430
deprecated:

docs/root/configuration/advanced/metadata_configurations.rst

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,3 +17,61 @@ modifying Envoy's core API or implementation.
1717
For instance, users can add extra attributes to routes, such as the route owner or upstream service maintainer, to metadata.
1818
They can then enable Envoy to log these attributes to the access log or report them to StatsD, among other possibilities.
1919
Moreover, users can write a filter/extension to read these attributes and execute any specific logic.
20+
21+
.. _well_known_metadata:
22+
23+
Well-Known Metadata
24+
-------------------
25+
26+
The following ``typed_filter_metadata`` or ``filter_metadata`` keys are recognized by Envoy and control built-in behavior.
27+
Each entry specifies where the metadata can be configured.
28+
29+
.. _well_known_metadata_envoy_stats_matcher:
30+
31+
``envoy.stats_matcher``
32+
~~~~~~~~~~~~~~~~~~~~~~~
33+
34+
**Type:** :ref:`envoy.config.metrics.v3.StatsMatcher <envoy_v3_api_msg_config.metrics.v3.StatsMatcher>`
35+
36+
**Applicable to:** Upstream cluster (:ref:`Cluster.metadata <envoy_v3_api_field_config.cluster.v3.Cluster.metadata>`)
37+
38+
**Fields:** ``typed_filter_metadata``
39+
40+
When present in a cluster's ``typed_filter_metadata``, Envoy uses the provided
41+
:ref:`StatsMatcher <envoy_v3_api_msg_config.metrics.v3.StatsMatcher>` as the stats matcher for that
42+
cluster's stats scope. This per-cluster matcher **replaces** (not supplements) the global stats
43+
matcher configured in the bootstrap :ref:`StatsConfig
44+
<envoy_v3_api_msg_config.metrics.v3.StatsConfig>`. Child scopes created under the cluster scope
45+
inherit the matcher unless overridden.
46+
47+
This allows fine-grained control over which stats are created per cluster — for example, enabling a
48+
minimal set of stats on high-cardinality clusters to reduce memory and CPU overhead.
49+
50+
Example:
51+
52+
.. code-block:: yaml
53+
54+
clusters:
55+
- name: my_cluster
56+
connect_timeout: 0.25s
57+
type: STATIC
58+
lb_policy: ROUND_ROBIN
59+
metadata:
60+
typed_filter_metadata:
61+
envoy.stats_matcher:
62+
"@type": type.googleapis.com/envoy.config.metrics.v3.StatsMatcher
63+
inclusion_list:
64+
patterns:
65+
- prefix: "cluster.my_cluster.upstream_cx"
66+
load_assignment:
67+
cluster_name: my_cluster
68+
endpoints:
69+
- lb_endpoints:
70+
- endpoint:
71+
address:
72+
socket_address:
73+
address: 127.0.0.1
74+
port_value: 10001
75+
76+
In this example, only stats whose names start with ``cluster.my_cluster.upstream_cx`` are created
77+
for ``my_cluster``, all other cluster stats are suppressed.

source/common/stats/stats_matcher_impl.cc

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,28 +14,28 @@ namespace Stats {
1414

1515
// TODO(ambuc): Refactor this into common/matchers.cc, since StatsMatcher is really just a thin
1616
// wrapper around what might be called a StringMatcherList.
17-
StatsMatcherImpl::StatsMatcherImpl(const envoy::config::metrics::v3::StatsConfig& config,
17+
StatsMatcherImpl::StatsMatcherImpl(const envoy::config::metrics::v3::StatsMatcher& stats_matcher,
1818
SymbolTable& symbol_table,
1919
Server::Configuration::CommonFactoryContext& context)
2020
: symbol_table_(symbol_table), stat_name_pool_(std::make_unique<StatNamePool>(symbol_table)) {
2121

22-
switch (config.stats_matcher().stats_matcher_case()) {
22+
switch (stats_matcher.stats_matcher_case()) {
2323
case envoy::config::metrics::v3::StatsMatcher::StatsMatcherCase::kRejectAll:
2424
// In this scenario, there are no matchers to store.
25-
is_inclusive_ = !config.stats_matcher().reject_all();
25+
is_inclusive_ = !stats_matcher.reject_all();
2626
break;
2727
case envoy::config::metrics::v3::StatsMatcher::StatsMatcherCase::kInclusionList:
2828
// If we have an inclusion list, we are being default-exclusive.
29-
for (const auto& stats_matcher : config.stats_matcher().inclusion_list().patterns()) {
30-
matchers_.push_back(Matchers::StringMatcherImpl(stats_matcher, context));
29+
for (const auto& pattern : stats_matcher.inclusion_list().patterns()) {
30+
matchers_.push_back(Matchers::StringMatcherImpl(pattern, context));
3131
optimizeLastMatcher();
3232
}
3333
is_inclusive_ = false;
3434
break;
3535
case envoy::config::metrics::v3::StatsMatcher::StatsMatcherCase::kExclusionList:
3636
// If we have an exclusion list, we are being default-inclusive.
37-
for (const auto& stats_matcher : config.stats_matcher().exclusion_list().patterns()) {
38-
matchers_.push_back(Matchers::StringMatcherImpl(stats_matcher, context));
37+
for (const auto& pattern : stats_matcher.exclusion_list().patterns()) {
38+
matchers_.push_back(Matchers::StringMatcherImpl(pattern, context));
3939
optimizeLastMatcher();
4040
}
4141
FALLTHRU;

source/common/stats/stats_matcher_impl.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,10 @@ namespace Stats {
2121
class StatsMatcherImpl : public StatsMatcher {
2222
public:
2323
StatsMatcherImpl(const envoy::config::metrics::v3::StatsConfig& config, SymbolTable& symbol_table,
24-
Server::Configuration::CommonFactoryContext& context);
24+
Server::Configuration::CommonFactoryContext& context)
25+
: StatsMatcherImpl(config.stats_matcher(), symbol_table, context) {}
26+
StatsMatcherImpl(const envoy::config::metrics::v3::StatsMatcher& stats_matcher,
27+
SymbolTable& symbol_table, Server::Configuration::CommonFactoryContext& context);
2528

2629
// Default constructor simply allows everything.
2730
StatsMatcherImpl() = default;

source/common/upstream/BUILD

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -397,6 +397,7 @@ envoy_cc_library(
397397
"@envoy_api//envoy/config/core/v3:pkg_cc_proto",
398398
"//source/common/stats:deferred_creation",
399399
"@envoy_api//envoy/config/endpoint/v3:pkg_cc_proto",
400+
"@envoy_api//envoy/config/metrics/v3:pkg_cc_proto",
400401
"@envoy_api//envoy/config/upstream/local_address_selector/v3:pkg_cc_proto",
401402
"@envoy_api//envoy/extensions/filters/http/upstream_codec/v3:pkg_cc_proto",
402403
"@envoy_api//envoy/extensions/transport_sockets/raw_buffer/v3:pkg_cc_proto",
@@ -424,6 +425,7 @@ envoy_cc_library(
424425
"//source/common/protobuf",
425426
"//source/common/protobuf:utility_lib",
426427
"//source/common/runtime:runtime_lib",
428+
"//source/common/stats:stats_matcher_lib",
427429
"//source/server:transport_socket_config_lib",
428430
],
429431
)

source/common/upstream/prod_cluster_info_factory.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ namespace Upstream {
1313
ClusterInfoConstSharedPtr
1414
ProdClusterInfoFactory::createClusterInfo(const CreateClusterInfoParams& params) {
1515
Envoy::Stats::ScopeSharedPtr scope =
16-
params.stats_.createScope(fmt::format("cluster.{}.", params.cluster_.name()));
16+
generateStatsScope(params.cluster_, params.server_context_, false);
1717

1818
Envoy::Server::Configuration::TransportSocketFactoryContextImpl factory_context(
1919
params.server_context_, *scope, params.server_context_.messageValidationVisitor());

source/common/upstream/upstream_impl.cc

Lines changed: 37 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
#include "envoy/config/core/v3/health_check.pb.h"
1717
#include "envoy/config/core/v3/protocol.pb.h"
1818
#include "envoy/config/endpoint/v3/endpoint_components.pb.h"
19+
#include "envoy/config/metrics/v3/stats.pb.h"
20+
#include "envoy/config/metrics/v3/stats.pb.validate.h"
1921
#include "envoy/config/upstream/local_address_selector/v3/default_local_address_selector.pb.h"
2022
#include "envoy/event/dispatcher.h"
2123
#include "envoy/event/timer.h"
@@ -55,6 +57,7 @@
5557
#include "source/common/runtime/runtime_features.h"
5658
#include "source/common/runtime/runtime_impl.h"
5759
#include "source/common/stats/deferred_creation.h"
60+
#include "source/common/stats/stats_matcher_impl.h"
5861
#include "source/common/upstream/cluster_factory_impl.h"
5962
#include "source/common/upstream/health_checker_impl.h"
6063
#include "source/common/upstream/locality_pool.h"
@@ -174,12 +177,6 @@ HostVector filterHosts(const absl::node_hash_set<HostSharedPtr>& hosts,
174177
return net_hosts;
175178
}
176179

177-
Stats::ScopeSharedPtr generateStatsScope(const envoy::config::cluster::v3::Cluster& config,
178-
Stats::Store& stats) {
179-
return stats.createScope(fmt::format(
180-
"cluster.{}.", config.alt_stat_name().empty() ? config.name() : config.alt_stat_name()));
181-
}
182-
183180
Network::ConnectionSocket::OptionsSharedPtr
184181
buildBaseSocketOptions(const envoy::config::cluster::v3::Cluster& cluster_config,
185182
const envoy::config::core::v3::BindConfig& bootstrap_bind_config) {
@@ -419,6 +416,39 @@ const absl::string_view ClusterImplBase::DoNotValidateAlpnRuntimeKey =
419416
const absl::string_view ClusterImplBase::DropOverloadRuntimeKey =
420417
"load_balancing_policy.drop_overload_limit";
421418

419+
constexpr absl::string_view StatsMatcherMetadataKey = "envoy.stats_matcher";
420+
421+
Stats::ScopeSharedPtr
422+
generateStatsScope(const envoy::config::cluster::v3::Cluster& config,
423+
Server::Configuration::ServerFactoryContext& server_context,
424+
bool use_alt_stat_name) {
425+
auto& stats = server_context.serverScope().store();
426+
Stats::StatsMatcherSharedPtr scope_matcher;
427+
428+
// Check for a per-cluster stats matcher in typed_filter_metadata under the specific key. If
429+
// present, unpack it as StatsMatcher and use it to restrict which stats are created for this
430+
// cluster's scope.
431+
const auto& typed_meta = config.metadata().typed_filter_metadata();
432+
if (auto it = typed_meta.find(StatsMatcherMetadataKey); it != typed_meta.end()) {
433+
envoy::config::metrics::v3::StatsMatcher stats_matcher_proto;
434+
if (auto status = MessageUtil::unpackTo(it->second, stats_matcher_proto); status.ok()) {
435+
MessageUtil::validate(stats_matcher_proto, server_context.messageValidationVisitor());
436+
scope_matcher = std::make_shared<Stats::StatsMatcherImpl>(
437+
stats_matcher_proto, stats.symbolTable(), server_context);
438+
} else {
439+
ENVOY_LOG_TO_LOGGER(Envoy::Logger::Registry::getLog(Envoy::Logger::Id::upstream), warn,
440+
"Failed to unpack stats matcher for cluster {}: {}", config.name(),
441+
status.message());
442+
}
443+
}
444+
445+
return stats.createScope(
446+
fmt::format("cluster.{}.", (!config.alt_stat_name().empty() && use_alt_stat_name)
447+
? config.alt_stat_name()
448+
: config.name()),
449+
false, {}, std::move(scope_matcher));
450+
}
451+
422452
// TODO(pianiststickman): this implementation takes a lock on the hot path and puts a copy of the
423453
// stat name into every host that receives a copy of that metric. This can be improved by putting
424454
// a single copy of the stat name into a thread-local key->index map so that the lock can be avoided
@@ -1554,7 +1584,7 @@ ClusterImplBase::ClusterImplBase(const envoy::config::cluster::v3::Cluster& clus
15541584
cluster_context.serverFactoryContext().mainThreadDispatcher())) {
15551585
auto& server_context = cluster_context.serverFactoryContext();
15561586

1557-
auto stats_scope = generateStatsScope(cluster, server_context.serverScope().store());
1587+
auto stats_scope = generateStatsScope(cluster, server_context);
15581588
transport_factory_context_ =
15591589
std::make_unique<Server::Configuration::TransportSocketFactoryContextImpl>(
15601590
server_context, *stats_scope, cluster_context.messageValidationVisitor());

source/common/upstream/upstream_impl.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,11 @@ using UpstreamNetworkFilterConfigProviderManager =
8282
Filter::FilterConfigProviderManager<Network::FilterFactoryCb,
8383
Server::Configuration::UpstreamFactoryContext>;
8484

85+
Stats::ScopeSharedPtr
86+
generateStatsScope(const envoy::config::cluster::v3::Cluster& config,
87+
Server::Configuration::ServerFactoryContext& server_context,
88+
bool use_alt_stat_name = true);
89+
8590
class LegacyLbPolicyConfigHelper {
8691
public:
8792
struct Result {

test/common/upstream/BUILD

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -560,6 +560,22 @@ envoy_cc_test(
560560
],
561561
)
562562

563+
envoy_cc_test(
564+
name = "prod_cluster_info_factory_test",
565+
srcs = ["prod_cluster_info_factory_test.cc"],
566+
rbe_pool = "6gig",
567+
deps = [
568+
":utility_lib",
569+
"//source/common/upstream:prod_cluster_info_factory_lib",
570+
"//source/extensions/transport_sockets/raw_buffer:config",
571+
"//test/mocks/runtime:runtime_mocks",
572+
"//test/mocks/server:server_factory_context_mocks",
573+
"//test/test_common:utility_lib",
574+
"@envoy_api//envoy/config/cluster/v3:pkg_cc_proto",
575+
"@envoy_api//envoy/config/metrics/v3:pkg_cc_proto",
576+
],
577+
)
578+
563579
envoy_cc_test(
564580
name = "upstream_impl_test",
565581
srcs = ["upstream_impl_test.cc"],
Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
#include "envoy/config/cluster/v3/cluster.pb.h"
2+
#include "envoy/config/metrics/v3/stats.pb.h"
3+
4+
#include "source/common/upstream/prod_cluster_info_factory.h"
5+
#include "source/extensions/transport_sockets/raw_buffer/config.h"
6+
7+
#include "test/common/upstream/utility.h"
8+
#include "test/mocks/runtime/mocks.h"
9+
#include "test/mocks/server/server_factory_context.h"
10+
#include "test/test_common/utility.h"
11+
12+
#include "gmock/gmock.h"
13+
#include "gtest/gtest.h"
14+
15+
using testing::NiceMock;
16+
using testing::ReturnRef;
17+
18+
namespace Envoy {
19+
namespace Upstream {
20+
namespace {
21+
22+
class ProdClusterInfoFactoryTest : public testing::Test {
23+
protected:
24+
ProdClusterInfoFactoryTest() { ON_CALL(server_context_, api()).WillByDefault(ReturnRef(*api_)); }
25+
26+
ClusterInfoConstSharedPtr createClusterInfo(const envoy::config::cluster::v3::Cluster& cluster) {
27+
return factory_.createClusterInfo({server_context_, cluster, bind_config_,
28+
server_context_.store_, server_context_.ssl_context_manager_,
29+
false, server_context_.thread_local_});
30+
}
31+
32+
NiceMock<Server::Configuration::MockServerFactoryContext> server_context_;
33+
NiceMock<Random::MockRandomGenerator> random_;
34+
Api::ApiPtr api_ = Api::createApiForTest(server_context_.store_, random_);
35+
envoy::config::core::v3::BindConfig bind_config_;
36+
ProdClusterInfoFactory factory_;
37+
};
38+
39+
// Verify that a cluster without a stats matcher in metadata creates all stats normally.
40+
TEST_F(ProdClusterInfoFactoryTest, NoMetadataStatsMatcher) {
41+
const std::string yaml = R"EOF(
42+
name: my_cluster
43+
connect_timeout: 0.25s
44+
type: STATIC
45+
lb_policy: ROUND_ROBIN
46+
load_assignment:
47+
endpoints:
48+
- lb_endpoints:
49+
- endpoint:
50+
address:
51+
socket_address:
52+
address: 127.0.0.1
53+
port_value: 11001
54+
)EOF";
55+
56+
auto info = createClusterInfo(parseClusterFromV3Yaml(yaml));
57+
ASSERT_NE(nullptr, info);
58+
info->trafficStats();
59+
60+
// Without a scope matcher, stats of any name are accepted.
61+
EXPECT_NE("", info->statsScope().counterFromString("upstream_cx_total").name());
62+
EXPECT_NE("", info->statsScope().counterFromString("upstream_rq_total").name());
63+
}
64+
65+
// Verify that a cluster with typed_filter_metadata["envoy.stats_matcher"] applies an inclusion
66+
// list: only stats matching the prefix are created; all others are rejected.
67+
TEST_F(ProdClusterInfoFactoryTest, MetadataStatsMatcherInclusionList) {
68+
const std::string yaml = R"EOF(
69+
name: my_cluster
70+
connect_timeout: 0.25s
71+
type: STATIC
72+
lb_policy: ROUND_ROBIN
73+
metadata:
74+
typed_filter_metadata:
75+
envoy.stats_matcher:
76+
"@type": type.googleapis.com/envoy.config.metrics.v3.StatsMatcher
77+
inclusion_list:
78+
patterns:
79+
- prefix: "cluster.my_cluster.upstream_cx"
80+
load_assignment:
81+
endpoints:
82+
- lb_endpoints:
83+
- endpoint:
84+
address:
85+
socket_address:
86+
address: 127.0.0.1
87+
port_value: 11001
88+
)EOF";
89+
90+
auto info = createClusterInfo(parseClusterFromV3Yaml(yaml));
91+
ASSERT_NE(nullptr, info);
92+
info->trafficStats();
93+
94+
// "cluster.my_cluster.upstream_cx_total" starts with the inclusion prefix — accepted.
95+
EXPECT_NE("", info->statsScope().counterFromString("upstream_cx_total").name());
96+
97+
// "cluster.my_cluster.upstream_rq_total" does not match the prefix — rejected.
98+
EXPECT_EQ("", info->statsScope().counterFromString("upstream_rq_total").name());
99+
}
100+
101+
// Verify that a cluster with an exclusion list rejects only the listed stats.
102+
TEST_F(ProdClusterInfoFactoryTest, MetadataStatsMatcherExclusionList) {
103+
const std::string yaml = R"EOF(
104+
name: my_cluster
105+
connect_timeout: 0.25s
106+
type: STATIC
107+
lb_policy: ROUND_ROBIN
108+
metadata:
109+
typed_filter_metadata:
110+
envoy.stats_matcher:
111+
"@type": type.googleapis.com/envoy.config.metrics.v3.StatsMatcher
112+
exclusion_list:
113+
patterns:
114+
- prefix: "cluster.my_cluster.upstream_rq"
115+
load_assignment:
116+
endpoints:
117+
- lb_endpoints:
118+
- endpoint:
119+
address:
120+
socket_address:
121+
address: 127.0.0.1
122+
port_value: 11001
123+
)EOF";
124+
125+
auto info = createClusterInfo(parseClusterFromV3Yaml(yaml));
126+
ASSERT_NE(nullptr, info);
127+
info->trafficStats();
128+
129+
// "cluster.my_cluster.upstream_cx_total" does not match the exclusion prefix — accepted.
130+
EXPECT_NE("", info->statsScope().counterFromString("upstream_cx_total").name());
131+
132+
// "cluster.my_cluster.upstream_rq_total" matches the exclusion prefix — rejected.
133+
EXPECT_EQ("", info->statsScope().counterFromString("upstream_rq_total").name());
134+
}
135+
136+
} // namespace
137+
} // namespace Upstream
138+
} // namespace Envoy

0 commit comments

Comments
 (0)