summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTao Cui <cuitao@kylinos.cn>2026-05-14 14:50:32 +0800
committerTejun Heo <tj@kernel.org>2026-05-14 11:22:55 -1000
commit009bcbd0b201d4dc125eb960a61cb6d4d9fdfc72 (patch)
tree1e47f948c6159c55a219ba2272991e9fa0c565c0
parentc68095c4a4c919cbd7de016fdfa25d19fa918a74 (diff)
cgroup/rdma: add rdma.events to track resource limit exhaustion
Add per-device hierarchical event counters to track when RDMA resource limits are exceeded. The rdma.events file reports max event counts propagated upward from the cgroup whose limit was hit to all ancestors. This mirrors the design of pids.events, where events are attributed to the cgroup that imposed the limit, not necessarily the cgroup where the allocation was attempted. Userspace can monitor this file via poll/epoll for real-time notification of resource exhaustion. Signed-off-by: Tao Cui <cuitao@kylinos.cn> Signed-off-by: Tejun Heo <tj@kernel.org>
-rw-r--r--include/linux/cgroup_rdma.h3
-rw-r--r--kernel/cgroup/rdma.c72
2 files changed, 73 insertions, 2 deletions
diff --git a/include/linux/cgroup_rdma.h b/include/linux/cgroup_rdma.h
index 80edae03c313..ac691fe7d3f5 100644
--- a/include/linux/cgroup_rdma.h
+++ b/include/linux/cgroup_rdma.h
@@ -24,6 +24,9 @@ struct rdma_cgroup {
* that belongs to this cgroup.
*/
struct list_head rpools;
+
+ /* Handle for rdma.events */
+ struct cgroup_file events_file;
};
struct rdmacg_device {
diff --git a/kernel/cgroup/rdma.c b/kernel/cgroup/rdma.c
index 4e3bf0bade18..927bbf1eb949 100644
--- a/kernel/cgroup/rdma.c
+++ b/kernel/cgroup/rdma.c
@@ -81,6 +81,9 @@ struct rdmacg_resource_pool {
u64 usage_sum;
/* total number counts which are set to max */
int num_max_cnt;
+
+ /* per-resource hierarchical max event counters */
+ u64 events_max[RDMACG_RESOURCE_MAX];
};
static struct rdma_cgroup *css_rdmacg(struct cgroup_subsys_state *css)
@@ -214,7 +217,8 @@ uncharge_cg_locked(struct rdma_cgroup *cg,
* watermark even after all resources are freed.
*/
for (i = 0; i < RDMACG_RESOURCE_MAX; i++) {
- if (rpool->resources[i].peak)
+ if (rpool->resources[i].peak ||
+ READ_ONCE(rpool->events_max[i]))
return;
}
/*
@@ -226,6 +230,34 @@ uncharge_cg_locked(struct rdma_cgroup *cg,
}
/**
+ * rdmacg_event_locked - fire hierarchical max event when resource limit is hit
+ * @over_cg: cgroup whose limit was exceeded
+ * @device: rdma device
+ * @index: resource type index
+ *
+ * Must be called under rdmacg_mutex. Propagates max event counts
+ * from @over_cg (including itself) upward to all ancestors with
+ * an rpool and notifies userspace.
+ */
+static void rdmacg_event_locked(struct rdma_cgroup *over_cg,
+ struct rdmacg_device *device,
+ enum rdmacg_resource_type index)
+{
+ struct rdmacg_resource_pool *rpool;
+ struct rdma_cgroup *p;
+
+ lockdep_assert_held(&rdmacg_mutex);
+
+ for (p = over_cg; parent_rdmacg(p); p = parent_rdmacg(p)) {
+ rpool = get_cg_rpool_locked(p, device);
+ if (!IS_ERR(rpool)) {
+ rpool->events_max[index]++;
+ cgroup_file_notify(&p->events_file);
+ }
+ }
+}
+
+/**
* rdmacg_uncharge_hierarchy - hierarchically uncharge rdma resource count
* @cg: pointer to cg to uncharge and all parents in hierarchy
* @device: pointer to rdmacg device
@@ -335,6 +367,8 @@ int rdmacg_try_charge(struct rdma_cgroup **rdmacg,
return 0;
err:
+ if (ret == -EAGAIN)
+ rdmacg_event_locked(p, device, index);
mutex_unlock(&rdmacg_mutex);
rdmacg_uncharge_hierarchy(cg, device, p, index);
return ret;
@@ -494,7 +528,8 @@ static ssize_t rdmacg_resource_set_max(struct kernfs_open_file *of,
int i;
for (i = 0; i < RDMACG_RESOURCE_MAX; i++) {
- if (rpool->resources[i].peak)
+ if (rpool->resources[i].peak ||
+ READ_ONCE(rpool->events_max[i]))
goto dev_err;
}
/*
@@ -569,6 +604,33 @@ static int rdmacg_resource_read(struct seq_file *sf, void *v)
return 0;
}
+static int rdmacg_events_show(struct seq_file *sf, void *v)
+{
+ struct rdma_cgroup *cg = css_rdmacg(seq_css(sf));
+ struct rdmacg_resource_pool *rpool;
+ struct rdmacg_device *device;
+ int i;
+
+ mutex_lock(&rdmacg_mutex);
+
+ list_for_each_entry(device, &rdmacg_devices, dev_node) {
+ rpool = find_cg_rpool_locked(cg, device);
+
+ seq_printf(sf, "%s ", device->name);
+ for (i = 0; i < RDMACG_RESOURCE_MAX; i++) {
+ seq_printf(sf, "%s.max=%llu",
+ rdmacg_resource_names[i],
+ rpool ? READ_ONCE(rpool->events_max[i]) : 0ULL);
+ if (i < RDMACG_RESOURCE_MAX - 1)
+ seq_putc(sf, ' ');
+ }
+ seq_putc(sf, '\n');
+ }
+
+ mutex_unlock(&rdmacg_mutex);
+ return 0;
+}
+
static struct cftype rdmacg_files[] = {
{
.name = "max",
@@ -589,6 +651,12 @@ static struct cftype rdmacg_files[] = {
.private = RDMACG_RESOURCE_TYPE_PEAK,
.flags = CFTYPE_NOT_ON_ROOT,
},
+ {
+ .name = "events",
+ .seq_show = rdmacg_events_show,
+ .file_offset = offsetof(struct rdma_cgroup, events_file),
+ .flags = CFTYPE_NOT_ON_ROOT,
+ },
{ } /* terminate */
};