1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
|
#!/usr/bin/env python3
# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
import argparse
import math
import os
from typing import Optional
from common_metrics import Cycles
from metric import (d_ratio, has_event, max, Event, JsonEncodeMetric,
JsonEncodeMetricGroupDescriptions, Literal, LoadEvents,
Metric, MetricGroup, Select)
# Global command line arguments.
_args = None
_zen_model: int = 1
interval_sec = Event("duration_time")
ins = Event("instructions")
cycles = Event("cycles")
# Number of CPU cycles scaled for SMT.
smt_cycles = Select(cycles / 2, Literal("#smt_on"), cycles)
def AmdBr():
def Total() -> MetricGroup:
br = Event("ex_ret_brn")
br_m_all = Event("ex_ret_brn_misp")
br_clr = Event("ex_ret_brn_cond_misp",
"ex_ret_msprd_brnch_instr_dir_msmtch",
"ex_ret_brn_resync")
br_r = d_ratio(br, interval_sec)
ins_r = d_ratio(ins, br)
misp_r = d_ratio(br_m_all, br)
clr_r = d_ratio(br_clr, interval_sec)
return MetricGroup("lpm_br_total", [
Metric("lpm_br_total_retired",
"The number of branch instructions retired per second.", br_r,
"insn/s"),
Metric(
"lpm_br_total_mispred",
"The number of branch instructions retired, of any type, that were "
"not correctly predicted as a percentage of all branch instrucions.",
misp_r, "100%"),
Metric("lpm_br_total_insn_between_branches",
"The number of instructions divided by the number of branches.",
ins_r, "insn"),
Metric("lpm_br_total_insn_fe_resteers",
"The number of resync branches per second.", clr_r, "req/s")
])
def Taken() -> MetricGroup:
br = Event("ex_ret_brn_tkn")
br_m_tk = Event("ex_ret_brn_tkn_misp")
br_r = d_ratio(br, interval_sec)
ins_r = d_ratio(ins, br)
misp_r = d_ratio(br_m_tk, br)
return MetricGroup("lpm_br_taken", [
Metric("lpm_br_taken_retired",
"The number of taken branches that were retired per second.",
br_r, "insn/s"),
Metric(
"lpm_br_taken_mispred",
"The number of retired taken branch instructions that were "
"mispredicted as a percentage of all taken branches.", misp_r,
"100%"),
Metric(
"lpm_br_taken_insn_between_branches",
"The number of instructions divided by the number of taken branches.",
ins_r, "insn"),
])
def Conditional() -> Optional[MetricGroup]:
global _zen_model
br = Event("ex_ret_brn_cond", "ex_ret_cond")
br_r = d_ratio(br, interval_sec)
ins_r = d_ratio(ins, br)
metrics = [
Metric("lpm_br_cond_retired", "Retired conditional branch instructions.",
br_r, "insn/s"),
Metric("lpm_br_cond_insn_between_branches",
"The number of instructions divided by the number of conditional "
"branches.", ins_r, "insn"),
]
if _zen_model == 2:
br_m_cond = Event("ex_ret_cond_misp")
misp_r = d_ratio(br_m_cond, br)
metrics += [
Metric("lpm_br_cond_mispred",
"Retired conditional branch instructions mispredicted as a "
"percentage of all conditional branches.", misp_r, "100%"),
]
return MetricGroup("lpm_br_cond", metrics)
def Fused() -> MetricGroup:
br = Event("ex_ret_fused_instr", "ex_ret_fus_brnch_inst")
br_r = d_ratio(br, interval_sec)
ins_r = d_ratio(ins, br)
return MetricGroup("lpm_br_cond", [
Metric("lpm_br_fused_retired",
"Retired fused branch instructions per second.", br_r, "insn/s"),
Metric(
"lpm_br_fused_insn_between_branches",
"The number of instructions divided by the number of fused "
"branches.", ins_r, "insn"),
])
def Far() -> MetricGroup:
br = Event("ex_ret_brn_far")
br_r = d_ratio(br, interval_sec)
ins_r = d_ratio(ins, br)
return MetricGroup("lpm_br_far", [
Metric("lpm_br_far_retired", "Retired far control transfers per second.",
br_r, "insn/s"),
Metric(
"lpm_br_far_insn_between_branches",
"The number of instructions divided by the number of far branches.",
ins_r, "insn"),
])
return MetricGroup("lpm_br", [Total(), Taken(), Conditional(), Fused(), Far()],
description="breakdown of retired branch instructions")
def AmdCtxSw() -> MetricGroup:
cs = Event("context\\-switches")
metrics = [
Metric("lpm_cs_rate", "Context switches per second",
d_ratio(cs, interval_sec), "ctxsw/s")
]
ev = Event("instructions")
metrics.append(Metric("lpm_cs_instr", "Instructions per context switch",
d_ratio(ev, cs), "instr/cs"))
ev = Event("cycles")
metrics.append(Metric("lpm_cs_cycles", "Cycles per context switch",
d_ratio(ev, cs), "cycles/cs"))
ev = Event("ls_dispatch.pure_ld", "ls_dispatch.ld_dispatch")
metrics.append(Metric("lpm_cs_loads", "Loads per context switch",
d_ratio(ev, cs), "loads/cs"))
ev = Event("ls_dispatch.pure_st", "ls_dispatch.store_dispatch")
metrics.append(Metric("lpm_cs_stores", "Stores per context switch",
d_ratio(ev, cs), "stores/cs"))
ev = Event("ex_ret_brn_tkn")
metrics.append(Metric("lpm_cs_br_taken", "Branches taken per context switch",
d_ratio(ev, cs), "br_taken/cs"))
return MetricGroup("lpm_cs", metrics,
description=("Number of context switches per second, instructions "
"retired & core cycles between context switches"))
def AmdDtlb() -> Optional[MetricGroup]:
global _zen_model
if _zen_model >= 4:
return None
d_dat = Event("ls_dc_accesses") if _zen_model <= 3 else None
d_h4k = Event("ls_l1_d_tlb_miss.tlb_reload_4k_l2_hit")
d_hcoal = Event(
"ls_l1_d_tlb_miss.tlb_reload_coalesced_page_hit") if _zen_model >= 2 else 0
d_h2m = Event("ls_l1_d_tlb_miss.tlb_reload_2m_l2_hit")
d_h1g = Event("ls_l1_d_tlb_miss.tlb_reload_1g_l2_hit")
d_m4k = Event("ls_l1_d_tlb_miss.tlb_reload_4k_l2_miss")
d_mcoal = Event(
"ls_l1_d_tlb_miss.tlb_reload_coalesced_page_miss") if _zen_model >= 2 else 0
d_m2m = Event("ls_l1_d_tlb_miss.tlb_reload_2m_l2_miss")
d_m1g = Event("ls_l1_d_tlb_miss.tlb_reload_1g_l2_miss")
d_w0 = Event("ls_tablewalker.dc_type0") if _zen_model <= 3 else None
d_w1 = Event("ls_tablewalker.dc_type1") if _zen_model <= 3 else None
walks = d_w0 + d_w1
walks_r = d_ratio(walks, interval_sec)
ins_w = d_ratio(ins, walks)
l1 = d_dat
l1_r = d_ratio(l1, interval_sec)
l2_hits = d_h4k + d_hcoal + d_h2m + d_h1g
l2_miss = d_m4k + d_mcoal + d_m2m + d_m1g
l2_r = d_ratio(l2_hits + l2_miss, interval_sec)
l1_miss = l2_hits + l2_miss + walks
l1_hits = max(l1 - l1_miss, 0)
ins_l = d_ratio(ins, l1_miss)
return MetricGroup("lpm_dtlb", [
MetricGroup("lpm_dtlb_ov", [
Metric("lpm_dtlb_ov_insn_bt_l1_miss",
"DTLB overview: instructions between l1 misses.", ins_l,
"insns"),
Metric("lpm_dtlb_ov_insn_bt_walks",
"DTLB overview: instructions between dtlb page table walks.",
ins_w, "insns"),
]),
MetricGroup("lpm_dtlb_l1", [
Metric("lpm_dtlb_l1_hits",
"DTLB L1 hits as percentage of all DTLB L1 accesses.",
d_ratio(l1_hits, l1), "100%"),
Metric("lpm_dtlb_l1_miss",
"DTLB L1 misses as percentage of all DTLB L1 accesses.",
d_ratio(l1_miss, l1), "100%"),
Metric("lpm_dtlb_l1_reqs", "DTLB L1 accesses per second.", l1_r,
"insns/s"),
]),
MetricGroup("lpm_dtlb_l2", [
Metric("lpm_dtlb_l2_hits",
"DTLB L2 hits as percentage of all DTLB L2 accesses.",
d_ratio(l2_hits, l2_hits + l2_miss), "100%"),
Metric("lpm_dtlb_l2_miss",
"DTLB L2 misses as percentage of all DTLB L2 accesses.",
d_ratio(l2_miss, l2_hits + l2_miss), "100%"),
Metric("lpm_dtlb_l2_reqs", "DTLB L2 accesses per second.", l2_r,
"insns/s"),
MetricGroup("lpm_dtlb_l2_4kb", [
Metric(
"lpm_dtlb_l2_4kb_hits",
"DTLB L2 4kb page size hits as percentage of all DTLB L2 4kb "
"accesses.", d_ratio(d_h4k, d_h4k + d_m4k), "100%"),
Metric(
"lpm_dtlb_l2_4kb_miss",
"DTLB L2 4kb page size misses as percentage of all DTLB L2 4kb"
"accesses.", d_ratio(d_m4k, d_h4k + d_m4k), "100%")
]),
MetricGroup("lpm_dtlb_l2_coalesced", [
Metric(
"lpm_dtlb_l2_coal_hits",
"DTLB L2 coalesced page (16kb) hits as percentage of all DTLB "
"L2 coalesced accesses.", d_ratio(d_hcoal,
d_hcoal + d_mcoal), "100%"),
Metric(
"lpm_dtlb_l2_coal_miss",
"DTLB L2 coalesced page (16kb) misses as percentage of all "
"DTLB L2 coalesced accesses.",
d_ratio(d_mcoal, d_hcoal + d_mcoal), "100%")
]),
MetricGroup("lpm_dtlb_l2_2mb", [
Metric(
"lpm_dtlb_l2_2mb_hits",
"DTLB L2 2mb page size hits as percentage of all DTLB L2 2mb "
"accesses.", d_ratio(d_h2m, d_h2m + d_m2m), "100%"),
Metric(
"lpm_dtlb_l2_2mb_miss",
"DTLB L2 2mb page size misses as percentage of all DTLB L2 "
"accesses.", d_ratio(d_m2m, d_h2m + d_m2m), "100%")
]),
MetricGroup("lpm_dtlb_l2_1g", [
Metric(
"lpm_dtlb_l2_1g_hits",
"DTLB L2 1gb page size hits as percentage of all DTLB L2 1gb "
"accesses.", d_ratio(d_h1g, d_h1g + d_m1g), "100%"),
Metric(
"lpm_dtlb_l2_1g_miss",
"DTLB L2 1gb page size misses as percentage of all DTLB L2 "
"1gb accesses.", d_ratio(d_m1g, d_h1g + d_m1g), "100%")
]),
]),
MetricGroup("lpm_dtlb_walks", [
Metric("lpm_dtlb_walks_reqs", "DTLB page table walks per second.",
walks_r, "walks/s"),
]),
], description="Data TLB metrics")
def AmdItlb():
global _zen_model
l2h = Event("bp_l1_tlb_miss_l2_tlb_hit", "bp_l1_tlb_miss_l2_hit")
l2m = Event("l2_itlb_misses")
l2r = l2h + l2m
itlb_l1_mg = None
l1m = l2r
if _zen_model <= 3:
l1r = Event("ic_fw32")
l1h = max(l1r - l1m, 0)
itlb_l1_mg = MetricGroup("lpm_itlb_l1", [
Metric("lpm_itlb_l1_hits",
"L1 ITLB hits as a perecentage of L1 ITLB accesses.",
d_ratio(l1h, l1h + l1m), "100%"),
Metric("lpm_itlb_l1_miss",
"L1 ITLB misses as a perecentage of L1 ITLB accesses.",
d_ratio(l1m, l1h + l1m), "100%"),
Metric("lpm_itlb_l1_reqs",
"The number of 32B fetch windows transferred from IC pipe to DE "
"instruction decoder per second.", d_ratio(
l1r, interval_sec),
"windows/sec"),
])
return MetricGroup("lpm_itlb", [
MetricGroup("lpm_itlb_ov", [
Metric("lpm_itlb_ov_insn_bt_l1_miss",
"Number of instructions between l1 misses", d_ratio(
ins, l1m), "insns"),
Metric("lpm_itlb_ov_insn_bt_l2_miss",
"Number of instructions between l2 misses", d_ratio(
ins, l2m), "insns"),
]),
itlb_l1_mg,
MetricGroup("lpm_itlb_l2", [
Metric("lpm_itlb_l2_hits",
"L2 ITLB hits as a percentage of all L2 ITLB accesses.",
d_ratio(l2h, l2r), "100%"),
Metric("lpm_itlb_l2_miss",
"L2 ITLB misses as a percentage of all L2 ITLB accesses.",
d_ratio(l2m, l2r), "100%"),
Metric("lpm_itlb_l2_reqs", "ITLB accesses per second.",
d_ratio(l2r, interval_sec), "accesses/sec"),
]),
], description="Instruction TLB breakdown")
def AmdLdSt() -> MetricGroup:
ldst_ld = Event("ls_dispatch.pure_ld", "ls_dispatch.ld_dispatch")
ldst_st = Event("ls_dispatch.pure_st", "ls_dispatch.store_dispatch")
ldst_ldc1 = Event(f"{ldst_ld}/cmask=1/")
ldst_stc1 = Event(f"{ldst_st}/cmask=1/")
ldst_ldc2 = Event(f"{ldst_ld}/cmask=2/")
ldst_stc2 = Event(f"{ldst_st}/cmask=2/")
ldst_ldc3 = Event(f"{ldst_ld}/cmask=3/")
ldst_stc3 = Event(f"{ldst_st}/cmask=3/")
ldst_cyc = Event("ls_not_halted_cyc")
ld_rate = d_ratio(ldst_ld, interval_sec)
st_rate = d_ratio(ldst_st, interval_sec)
ld_v1 = max(ldst_ldc1 - ldst_ldc2, 0)
ld_v2 = max(ldst_ldc2 - ldst_ldc3, 0)
ld_v3 = ldst_ldc3
st_v1 = max(ldst_stc1 - ldst_stc2, 0)
st_v2 = max(ldst_stc2 - ldst_stc3, 0)
st_v3 = ldst_stc3
return MetricGroup("lpm_ldst", [
MetricGroup("lpm_ldst_total", [
Metric("lpm_ldst_total_ld", "Number of loads dispatched per second.",
ld_rate, "insns/sec"),
Metric("lpm_ldst_total_st", "Number of stores dispatched per second.",
st_rate, "insns/sec"),
]),
MetricGroup("lpm_ldst_percent_insn", [
Metric("lpm_ldst_percent_insn_ld",
"Load instructions as a percentage of all instructions.",
d_ratio(ldst_ld, ins), "100%"),
Metric("lpm_ldst_percent_insn_st",
"Store instructions as a percentage of all instructions.",
d_ratio(ldst_st, ins), "100%"),
]),
MetricGroup("lpm_ldst_ret_loads_per_cycle", [
Metric(
"lpm_ldst_ret_loads_per_cycle_1",
"Load instructions retiring in 1 cycle as a percentage of all "
"unhalted cycles.", d_ratio(ld_v1, ldst_cyc), "100%"),
Metric(
"lpm_ldst_ret_loads_per_cycle_2",
"Load instructions retiring in 2 cycles as a percentage of all "
"unhalted cycles.", d_ratio(ld_v2, ldst_cyc), "100%"),
Metric(
"lpm_ldst_ret_loads_per_cycle_3",
"Load instructions retiring in 3 or more cycles as a percentage"
"of all unhalted cycles.", d_ratio(ld_v3, ldst_cyc), "100%"),
]),
MetricGroup("lpm_ldst_ret_stores_per_cycle", [
Metric(
"lpm_ldst_ret_stores_per_cycle_1",
"Store instructions retiring in 1 cycle as a percentage of all "
"unhalted cycles.", d_ratio(st_v1, ldst_cyc), "100%"),
Metric(
"lpm_ldst_ret_stores_per_cycle_2",
"Store instructions retiring in 2 cycles as a percentage of all "
"unhalted cycles.", d_ratio(st_v2, ldst_cyc), "100%"),
Metric(
"lpm_ldst_ret_stores_per_cycle_3",
"Store instructions retiring in 3 or more cycles as a percentage"
"of all unhalted cycles.", d_ratio(st_v3, ldst_cyc), "100%"),
]),
MetricGroup("lpm_ldst_insn_bt", [
Metric("lpm_ldst_insn_bt_ld", "Number of instructions between loads.",
d_ratio(ins, ldst_ld), "insns"),
Metric("lpm_ldst_insn_bt_st", "Number of instructions between stores.",
d_ratio(ins, ldst_st), "insns"),
])
], description="Breakdown of load/store instructions")
def AmdUpc() -> Metric:
ops = Event("ex_ret_ops", "ex_ret_cops")
upc = d_ratio(ops, smt_cycles)
return Metric("lpm_upc", "Micro-ops retired per core cycle (higher is better)",
upc, "uops/cycle")
def Idle() -> Metric:
cyc = Event("msr/mperf/")
tsc = Event("msr/tsc/")
low = max(tsc - cyc, 0)
return Metric(
"lpm_idle",
"Percentage of total wallclock cycles where CPUs are in low power state (C1 or deeper sleep state)",
d_ratio(low, tsc), "100%")
def Rapl() -> MetricGroup:
"""Processor socket power consumption estimate.
Use events from the running average power limit (RAPL) driver.
"""
# Watts = joules/second
# Currently only energy-pkg is supported by AMD:
# https://lore.kernel.org/lkml/20220105185659.643355-1-eranian@google.com/
pkg = Event("power/energy\\-pkg/")
cond_pkg = Select(pkg, has_event(pkg), math.nan)
scale = 2.3283064365386962890625e-10
metrics = [
Metric("lpm_cpu_power_pkg", "",
d_ratio(cond_pkg * scale, interval_sec), "Watts"),
]
return MetricGroup("lpm_cpu_power", metrics,
description="Processor socket power consumption estimates")
def UncoreL3():
acc = Event("l3_lookup_state.all_coherent_accesses_to_l3",
"l3_lookup_state.all_l3_req_typs")
miss = Event("l3_lookup_state.l3_miss",
"l3_comb_clstr_state.request_miss")
acc = max(acc, miss)
hits = acc - miss
return MetricGroup("lpm_l3", [
Metric("lpm_l3_accesses", "L3 victim cache accesses",
d_ratio(acc, interval_sec), "accesses/sec"),
Metric("lpm_l3_hits", "L3 victim cache hit rate",
d_ratio(hits, acc), "100%"),
Metric("lpm_l3_miss", "L3 victim cache miss rate", d_ratio(miss, acc),
"100%"),
], description="L3 cache breakdown per CCX")
def main() -> None:
global _args
global _zen_model
def dir_path(path: str) -> str:
"""Validate path is a directory for argparse."""
if os.path.isdir(path):
return path
raise argparse.ArgumentTypeError(
f'\'{path}\' is not a valid directory')
parser = argparse.ArgumentParser(description="AMD perf json generator")
parser.add_argument(
"-metricgroups", help="Generate metricgroups data", action='store_true')
parser.add_argument("model", help="e.g. amdzen[123]")
parser.add_argument(
'events_path',
type=dir_path,
help='Root of tree containing architecture directories containing json files'
)
_args = parser.parse_args()
directory = f"{_args.events_path}/x86/{_args.model}/"
LoadEvents(directory)
_zen_model = int(_args.model[6:])
all_metrics = MetricGroup("", [
AmdBr(),
AmdCtxSw(),
AmdDtlb(),
AmdItlb(),
AmdLdSt(),
AmdUpc(),
Cycles(),
Idle(),
Rapl(),
UncoreL3(),
])
if _args.metricgroups:
print(JsonEncodeMetricGroupDescriptions(all_metrics))
else:
print(JsonEncodeMetric(all_metrics))
if __name__ == '__main__':
main()
|