diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2026-06-15 13:51:27 +0530 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2026-06-15 13:51:27 +0530 |
| commit | 2d6d57f889f3a5e7d19009c560ea2002cdde9fb8 (patch) | |
| tree | aa1e373320daae5eaf98d0a58a2ae65858087cd8 /kernel | |
| parent | a53fcff8fc7530f59a8171824ed586200df724a0 (diff) | |
| parent | bc484a5096732cd858771cccd3164ec985bdc03d (diff) | |
Merge tag 'timers-ptp-2026-06-13' of gitolite.kernel.org:pub/scm/linux/kernel/git/tip/tip
Pull timekeeping updates from Thomas Gleixner:
"Updates for NTP/timekeeping and PTP:
- Expand timekeeping snapshot mechanisms
The various snapshot functions are mostly used for PTP to collect
"atomic" snapshots of various involved clocks.
They lack support for the recently introduced AUX clocks and do not
provide the underlying counter value (e.g. TSC) to user space.
Exposing the counter value snapshot allows for better control and
steering.
Convert the hard wired ktime_get_snapshot() to take a clock ID,
which allows the caller to select the clock ID to be captured along
with CLOCK_MONONOTONIC_RAW. Additionally capture the underlying
hardware counter value and the clock source ID of the counter.
Expand the hardware based snapshot capture where devices provide a
mechanism to snapshot the hardware PTP clock and the system counter
(usually via PCI/PTM) to support AUX clocks and also provide the
captured counter value back to the caller and not only the clock
timestamps derived from it.
- Add a new optional read_snapshot() callback to clocksources
That is required to capture atomic snapshots from clocksources
which are derived from TSC with a scaling mechanism (e.g. Hyper-V,
KVMclock).
The value pair is handed back in the snapshot structure to the
callers, so they can do the necessary correlations in a more
precise way.
This touches usage sites of the affected functions and data structure
all over the tree, but stays fully backwards compatible for the
existing user space exposed interfaces. New PTP IOCTLs will provide
access to the extended functionality in later kernel versions"
* tag 'timers-ptp-2026-06-13' of gitolite.kernel.org:pub/scm/linux/kernel/git/tip/tip: (28 commits)
ptp: vmclock: Use hw_cycles from snapshot for precise TSC pairing
x86/kvmclock: Implement read_snapshot() for kvmclock clocksource
clocksource/hyperv: Implement read_snapshot() for TSC page clocksource
timekeeping: Add clocksource read_snapshot() method and hw_cycles to snapshot
ptp: Switch to ktime_get_snapshot_id() for pre/post timestamps
timekeeping: Add support for AUX clock cross timestamping
timekeeping: Remove system_device_crosststamp::sys_realtime
ALSA: hda/common: Use system_device_crosststamp::sys_systime
wifi: iwlwifi: Use system_device_crosststamp::sys_systime
ptp: Use system_device_crosststamp::sys_systime
timekeeping: Prepare for cross timestamps on arbitrary clock IDs
timekeeping: Remove ktime_get_snapshot()
virtio_rtc: Use provided clock ID for history snapshot
net/mlx5: Use provided clock ID for history snapshot
igc: Use provided clock ID for history snapshot
ice/ptp: Use provided clock ID for history snapshot
wifi: iwlwifi: Adopt PTP cross timestamps to core changes
timekeeping: Add CLOCK ID to system_device_crosststamp
timekeeping: Add system_counterval_t to struct system_device_crosststamp
timekeeping: Add CLOCK_AUX support for ktime_get_snapshot_id()
...
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/time/timekeeping.c | 235 |
1 files changed, 158 insertions, 77 deletions
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index c493a4010305..0d5b67f609bb 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -67,6 +67,7 @@ static inline bool tk_is_aux(const struct timekeeper *tk) { return tk->id >= TIMEKEEPER_AUX_FIRST && tk->id <= TIMEKEEPER_AUX_LAST; } +static inline struct tk_data *aux_get_tk_data(clockid_t id); #else static inline bool tk_get_aux_ts64(unsigned int tkid, struct timespec64 *ts) { @@ -77,6 +78,10 @@ static inline bool tk_is_aux(const struct timekeeper *tk) { return false; } +static inline struct tk_data *aux_get_tk_data(clockid_t id) +{ + return NULL; +} #endif static inline void tk_update_aux_offs(struct timekeeper *tk, ktime_t offs) @@ -315,6 +320,7 @@ static __always_inline u64 tk_clock_read(const struct tk_read_base *tkr) return clock->read(clock); } + static inline void clocksource_disable_inline_read(void) { } static inline void clocksource_enable_inline_read(void) { } #endif @@ -1182,44 +1188,107 @@ noinstr time64_t __ktime_get_real_seconds(void) return tk->xtime_sec; } -/** - * ktime_get_snapshot - snapshots the realtime/monotonic raw clocks with counter - * @systime_snapshot: pointer to struct receiving the system time snapshot - */ -void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot) +static inline u64 tk_clock_read_snapshot(const struct tk_read_base *tkr, + struct clocksource_hw_snapshot *chs) { - struct timekeeper *tk = &tk_core.timekeeper; + struct clocksource *clock = READ_ONCE(tkr->clock); + + if (unlikely(clock->read_snapshot)) + return clock->read_snapshot(clock, chs); + + return clock->read(clock); +} + + +/** + * ktime_get_snapshot_id - Simultaneously snapshot a given clock ID with + * CLOCK_MONOTONIC_RAW and the underlying + * clocksource counter value. + * @clock_id: The clock ID to snapshot + * @systime_snapshot: Pointer to struct receiving the system time snapshot + */ +void ktime_get_snapshot_id(clockid_t clock_id, struct system_time_snapshot *systime_snapshot) +{ + ktime_t base_raw, base_sys, offs_sys, *offs, offs_zero = 0; + u64 nsec_raw, nsec_sys, now; + struct timekeeper *tk; + struct tk_data *tkd; unsigned int seq; - ktime_t base_raw; - ktime_t base_real; - ktime_t base_boot; - u64 nsec_raw; - u64 nsec_real; - u64 now; - WARN_ON_ONCE(timekeeping_suspended); + /* Invalidate the snapshot for all failure cases */ + systime_snapshot->valid = false; + + if (WARN_ON_ONCE(timekeeping_suspended)) + return; + + switch (clock_id) { + case CLOCK_REALTIME: + tkd = &tk_core; + offs = &tk_core.timekeeper.offs_real; + break; + /* Map RAW to MONOTONIC so the loop below is trivial */ + case CLOCK_MONOTONIC_RAW: + case CLOCK_MONOTONIC: + tkd = &tk_core; + offs = &offs_zero; + break; + case CLOCK_BOOTTIME: + tkd = &tk_core; + offs = &tk_core.timekeeper.offs_boot; + break; + case CLOCK_AUX ... CLOCK_AUX_LAST: + tkd = aux_get_tk_data(clock_id); + if (!tkd) + return; + offs = &tkd->timekeeper.offs_aux; + break; + default: + WARN_ON_ONCE(1); + return; + } + + tk = &tkd->timekeeper; do { - seq = read_seqcount_begin(&tk_core.seq); - now = tk_clock_read(&tk->tkr_mono); + struct clocksource_hw_snapshot chs = { }; + + seq = read_seqcount_begin(&tkd->seq); + + /* Aux clocks can be invalid */ + if (!tk->clock_valid) + return; + + now = tk_clock_read_snapshot(&tk->tkr_mono, &chs); systime_snapshot->cs_id = tk->tkr_mono.clock->id; + + systime_snapshot->hw_cycles = chs.hw_cycles; + systime_snapshot->hw_csid = chs.hw_csid; + systime_snapshot->cs_was_changed_seq = tk->cs_was_changed_seq; systime_snapshot->clock_was_set_seq = tk->clock_was_set_seq; - base_real = ktime_add(tk->tkr_mono.base, - tk_core.timekeeper.offs_real); - base_boot = ktime_add(tk->tkr_mono.base, - tk_core.timekeeper.offs_boot); + + base_sys = tk->tkr_mono.base; + offs_sys = *offs; base_raw = tk->tkr_raw.base; - nsec_real = timekeeping_cycles_to_ns(&tk->tkr_mono, now); - nsec_raw = timekeeping_cycles_to_ns(&tk->tkr_raw, now); - } while (read_seqcount_retry(&tk_core.seq, seq)); + + nsec_sys = timekeeping_cycles_to_ns(&tk->tkr_mono, now); + nsec_raw = timekeeping_cycles_to_ns(&tk->tkr_raw, now); + } while (read_seqcount_retry(&tkd->seq, seq)); systime_snapshot->cycles = now; - systime_snapshot->real = ktime_add_ns(base_real, nsec_real); - systime_snapshot->boot = ktime_add_ns(base_boot, nsec_real); - systime_snapshot->raw = ktime_add_ns(base_raw, nsec_raw); + systime_snapshot->systime = ktime_add_ns(base_sys, offs_sys + nsec_sys); + systime_snapshot->monoraw = ktime_add_ns(base_raw, nsec_raw); + + /* + * Special case for PTP. Just transfer the raw time into sys, + * so the call sites can consistently use snap::systime. + */ + if (clock_id == CLOCK_MONOTONIC_RAW) + systime_snapshot->systime = systime_snapshot->monoraw; + /* Tell the consumer that this snapshot is valid */ + systime_snapshot->valid = true; } -EXPORT_SYMBOL_GPL(ktime_get_snapshot); +EXPORT_SYMBOL_GPL(ktime_get_snapshot_id); /* Scale base by mult/div checking for overflow */ static int scale64_check_overflow(u64 mult, u64 div, u64 *base) @@ -1262,7 +1331,7 @@ static int adjust_historical_crosststamp(struct system_time_snapshot *history, struct system_device_crosststamp *ts) { struct timekeeper *tk = &tk_core.timekeeper; - u64 corr_raw, corr_real; + u64 corr_raw, corr_sys; bool interp_forward; int ret; @@ -1279,8 +1348,7 @@ static int adjust_historical_crosststamp(struct system_time_snapshot *history, * Scale the monotonic raw time delta by: * partial_history_cycles / total_history_cycles */ - corr_raw = (u64)ktime_to_ns( - ktime_sub(ts->sys_monoraw, history->raw)); + corr_raw = (u64)ktime_to_ns(ktime_sub(ts->sys_monoraw, history->monoraw)); ret = scale64_check_overflow(partial_history_cycles, total_history_cycles, &corr_raw); if (ret) @@ -1288,30 +1356,29 @@ static int adjust_historical_crosststamp(struct system_time_snapshot *history, /* * If there is a discontinuity in the history, scale monotonic raw - * correction by: - * mult(real)/mult(raw) yielding the realtime correction - * Otherwise, calculate the realtime correction similar to monotonic - * raw calculation + * correction by: + * mult(sys)/mult(raw) yielding the system time correction + * + * Otherwise, calculate the system time correction similar to monotonic + * raw calculation */ if (discontinuity) { - corr_real = mul_u64_u32_div - (corr_raw, tk->tkr_mono.mult, tk->tkr_raw.mult); + corr_sys = mul_u64_u32_div(corr_raw, tk->tkr_mono.mult, tk->tkr_raw.mult); } else { - corr_real = (u64)ktime_to_ns( - ktime_sub(ts->sys_realtime, history->real)); - ret = scale64_check_overflow(partial_history_cycles, - total_history_cycles, &corr_real); + corr_sys = (u64)ktime_to_ns(ktime_sub(ts->sys_systime, history->systime)); + ret = scale64_check_overflow(partial_history_cycles, total_history_cycles, + &corr_sys); if (ret) return ret; } - /* Fixup monotonic raw and real time time values */ + /* Fixup monotonic raw and system time time values */ if (interp_forward) { - ts->sys_monoraw = ktime_add_ns(history->raw, corr_raw); - ts->sys_realtime = ktime_add_ns(history->real, corr_real); + ts->sys_monoraw = ktime_add_ns(history->monoraw, corr_raw); + ts->sys_systime = ktime_add_ns(history->systime, corr_sys); } else { ts->sys_monoraw = ktime_sub_ns(ts->sys_monoraw, corr_raw); - ts->sys_realtime = ktime_sub_ns(ts->sys_realtime, corr_real); + ts->sys_systime = ktime_sub_ns(ts->sys_systime, corr_sys); } return 0; @@ -1368,6 +1435,8 @@ static bool convert_base_to_cs(struct system_counterval_t *scv) return false; scv->cycles += base->offset; + /* Set the clocksource ID as scv::cycles is now clocksource based */ + scv->cs_id = cs->id; return true; } @@ -1435,11 +1504,11 @@ EXPORT_SYMBOL_GPL(ktime_real_to_base_clock); /** * get_device_system_crosststamp - Synchronously capture system/device timestamp - * @get_time_fn: Callback to get simultaneous device time and - * system counter from the device driver + * @get_time_fn: Callback to get simultaneous device time and system counter + * from the device driver * @ctx: Context passed to get_time_fn() - * @history_begin: Historical reference point used to interpolate system - * time when counter provided by the driver is before the current interval + * @history_begin: Historical reference point used to interpolate system time when + * the counter value provided by the driver is before the current interval * @xtstamp: Receives simultaneously captured system and device time * * Reads a timestamp from a device and correlates it to system time @@ -1452,36 +1521,54 @@ int get_device_system_crosststamp(int (*get_time_fn) struct system_time_snapshot *history_begin, struct system_device_crosststamp *xtstamp) { - struct system_counterval_t system_counterval = {}; - struct timekeeper *tk = &tk_core.timekeeper; - u64 cycles, now, interval_start; - unsigned int clock_was_set_seq = 0; - ktime_t base_real, base_raw; - u64 nsec_real, nsec_raw; + u64 syscnt_cycles, cycles, now, interval_start; + unsigned int seq, clock_was_set_seq = 0; + ktime_t base_sys, base_raw, *offs; + u64 nsec_sys, nsec_raw; u8 cs_was_changed_seq; - unsigned int seq; bool do_interp; + struct timekeeper *tk; + struct tk_data *tkd; int ret; + switch (xtstamp->clock_id) { + case CLOCK_REALTIME: + tkd = &tk_core; + offs = &tk_core.timekeeper.offs_real; + break; + case CLOCK_AUX ... CLOCK_AUX_LAST: + tkd = aux_get_tk_data(xtstamp->clock_id); + if (!tkd) + return -ENODEV; + offs = &tkd->timekeeper.offs_aux; + break; + default: + WARN_ON_ONCE(1); + return -ENODEV; + } + + tk = &tkd->timekeeper; + do { - seq = read_seqcount_begin(&tk_core.seq); + seq = read_seqcount_begin(&tkd->seq); /* * Try to synchronously capture device time and a system * counter value calling back into the device driver */ - ret = get_time_fn(&xtstamp->device, &system_counterval, ctx); + ret = get_time_fn(&xtstamp->device, &xtstamp->sys_counter, ctx); if (ret) return ret; /* * Verify that the clocksource ID associated with the captured * system counter value is the same as for the currently - * installed timekeeper clocksource + * installed timekeeper clocksource and convert to it. */ - if (system_counterval.cs_id == CSID_GENERIC || - !convert_base_to_cs(&system_counterval)) + if (xtstamp->sys_counter.cs_id == CSID_GENERIC || + !convert_base_to_cs(&xtstamp->sys_counter)) return -ENODEV; - cycles = system_counterval.cycles; + + cycles = syscnt_cycles = xtstamp->sys_counter.cycles; /* * Check whether the system counter value provided by the @@ -1498,15 +1585,14 @@ int get_device_system_crosststamp(int (*get_time_fn) do_interp = false; } - base_real = ktime_add(tk->tkr_mono.base, - tk_core.timekeeper.offs_real); + base_sys = ktime_add(tk->tkr_mono.base, *offs); base_raw = tk->tkr_raw.base; - nsec_real = timekeeping_cycles_to_ns(&tk->tkr_mono, cycles); + nsec_sys = timekeeping_cycles_to_ns(&tk->tkr_mono, cycles); nsec_raw = timekeeping_cycles_to_ns(&tk->tkr_raw, cycles); - } while (read_seqcount_retry(&tk_core.seq, seq)); + } while (read_seqcount_retry(&tkd->seq, seq)); - xtstamp->sys_realtime = ktime_add_ns(base_real, nsec_real); + xtstamp->sys_systime = ktime_add_ns(base_sys, nsec_sys); xtstamp->sys_monoraw = ktime_add_ns(base_raw, nsec_raw); /* @@ -1523,24 +1609,19 @@ int get_device_system_crosststamp(int (*get_time_fn) * clocksource change */ if (!history_begin || - !timestamp_in_interval(history_begin->cycles, - cycles, system_counterval.cycles) || + !timestamp_in_interval(history_begin->cycles, cycles, syscnt_cycles) || history_begin->cs_was_changed_seq != cs_was_changed_seq) return -EINVAL; - partial_history_cycles = cycles - system_counterval.cycles; + + partial_history_cycles = cycles - syscnt_cycles; total_history_cycles = cycles - history_begin->cycles; - discontinuity = - history_begin->clock_was_set_seq != clock_was_set_seq; + discontinuity = history_begin->clock_was_set_seq != clock_was_set_seq; - ret = adjust_historical_crosststamp(history_begin, - partial_history_cycles, - total_history_cycles, - discontinuity, xtstamp); - if (ret) - return ret; + ret = adjust_historical_crosststamp(history_begin, partial_history_cycles, + total_history_cycles, discontinuity, xtstamp); } - return 0; + return ret; } EXPORT_SYMBOL_GPL(get_device_system_crosststamp); |
