Implement wake system capabilities using `timerfd_*` interface (linux)
Motivation
Since Systemd v254 (next release), local user sessions will be granted cap_wake_alarm
capability by default (link). Therefore applications will get access to clock_realtime_alarm
option in the linux kernel timer interface (aka POSIX timers), which signals kernel to automatically wake the system upon the timer expiration.
Implementation
The minimal applicable implementation consists of 2 syscalls timerfd_create
and timerfd_settime
, see examples below. It can individually manage multiple timer instances, referenced by fds. Thus avoiding need for a centralized queue logic and blind altering of a hardware RTC.
Examples
How to run examples without `cap_wake_alarm`
$ # Required capability can be set with capsh(1).
$ capsh --current # List current capabilities
Current: =
Current IAB:
$ sudo capsh --user=$USER --{caps,inh,addamb}='cap_wake_alarm+epi' -- -c "capsh --current"
Current: cap_wake_alarm=eip
Current IAB: ^cap_wake_alarm
$ sudo capsh --user=$USER --{caps,inh,addamb}='cap_wake_alarm+epi' -- -c "<executable>"
...
Simple single timer
$ gcc main.c -o ./a.out
$ ... "time ./a.out 2"
________________________
Executed in 2.00 secs
$ cat main.c
/*
* System wake up example using POSIX-like timer (linux only)
*
* Program sets a timer and then waits for an alarm.
* Note that `CAP_WAKE_ALARM` capability must be set.
*/
#include <assert.h>
#include <stdint.h>
#include <unistd.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <sys/timerfd.h>
int main(int argc, char* argv[]) {
time_t seconds = 0;
if (argc > 1) {
seconds = atol(argv[1]);
}
// `timerfd_create(2)`
int timer_fd = timerfd_create(CLOCK_BOOTTIME_ALARM, 0);
if (timer_fd < 0) {
perror("timerfd_create");
// `EPERM` - `CAP_WAKE_ALARM` capability is not set
// `EINVAL` - Alarm clock is not supported
return errno;
}
// Relative expiration time
struct itimerspec time = {
.it_value.tv_sec = seconds,
};
if (seconds == 0) {
printf("Alarm will never ring\n");
}
// `timerfd_settime(2)`
if (timerfd_settime(timer_fd, 0, &time, 0) != 0) {
perror("timerfd_settime");
return errno;
}
// Wait for the alarm
uint64_t expirations = 0;
assert(read(timer_fd, (char*)&expirations, sizeof(expirations)) == sizeof(expirations));
close(timer_fd);
return 0;
}
Multiple timers awaiting in paralel + demonstrating cancel behavior
$ # prefix timeout with "c" - to close timer fd, and/or "d" - to disarm timer, after arming
$ # program will exit when all not closed or disarmed timers expire
$ gcc main.c -o ./a.out
$ ... "./a.out cd1 c2 d3 4"
Disarm timer 0
Disarm timer 2
Close timer 0
Close timer 1
Timer 3 expired after 4 second(s)
$ cat main.c
#include <stdbool.h>
#include <assert.h>
#include <stdint.h>
#include <unistd.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <sys/timerfd.h>
#include <sys/select.h>
int arm_timer(int timer_fd, long int seconds) {
// Relative expiration time
struct itimerspec time = {
.it_value.tv_sec = seconds,
};
// `timerfd_settime(2)`
if (timerfd_settime(timer_fd, 0, &time, 0) != 0) {
perror("timerfd_settime");
return -errno;
}
return 0;
}
int init_timer() {
// `timerfd_create(2)`
int timer_fd = timerfd_create(CLOCK_BOOTTIME_ALARM, 0);
if (timer_fd < 0) {
perror("timerfd_create");
// `EPERM` - `CAP_WAKE_ALARM` capability is not set
// `EINVAL` - Alarm clock is not supported
return -errno;
}
return timer_fd;
}
int main(int argc, char* argv[]) {
time_t start = time(0);
int len = argc-1;
int* timers = malloc(sizeof(int)*len);
int* timers_id = malloc(sizeof(int)*len);
bool* timers_disarm = malloc(sizeof(bool)*len);
bool* timers_close = malloc(sizeof(bool)*len);
for (int i = 0; i < len; ++i) {
timers_id[i] = i;
timers_disarm[i] = false;
timers_close[i] = false;
char* arg = argv[i+1];
for (;;) {
if (arg[0] == 'd') {
timers_disarm[i] = true;
} else if (arg[0] == 'c') {
timers_close[i] = true;
} else {
break;
}
arg += 1;
}
int time = atol(arg);
if (time == 0) {
printf("Alarm will never ring for timer %i\n", i);
}
timers[i] = init_timer(time);
if (timers[i] < 0) {
return 1;
}
if (arm_timer(timers[i], time) != 0) {
return 1;
}
}
for (int i = 0; i < len; ++i) {
if (timers_disarm[i]) {
arm_timer(timers[i], 0);
printf("Disarm timer %i\n", timers_id[i]);
}
}
for (int i = 0; i < len; ++i) {
if (timers_close[i]) {
close(timers[i]);
printf("Close timer %i\n", timers_id[i]);
}
}
while (len > 0) {
{
int closed_timers = 0;
for (int i = 0; i < len; ++i) {
if (timers_close[i] || timers_disarm[i]) {
closed_timers += 1;
}
}
if (closed_timers == len) {
break;
}
}
fd_set fds;
FD_ZERO(&fds);
for (int i = 0; i < len; ++i) {
if (timers_close[i] != true) {
FD_SET(timers[i], &fds);
}
}
int nfds = select(1024, &fds, 0, 0, 0);
if (nfds < 0) {
perror("select: ");
return 1;
}
for (int i = 0; i < nfds; ++i) {
int timer;
int timer_id;
for (int j = 0; j < len; ++j) {
if (FD_ISSET(timers[j], &fds) != 0) {
timer = timers[j];
timer_id = timers_id[j];
for (int k = j+1; k < len; ++k) {
timers[k-1] = timers[k];
timers_id[k-1] = timers_id[k];
timers_disarm[k-1] = timers_disarm[k];
timers_close[k-1] = timers_close[k];
}
break;
}
}
uint64_t e;
int r = read(timer, &e, sizeof(e));
if (r < 0) {
perror("read: ");
return 1;
}
printf("Timer %i expired after %li second(s)\n", timer_id, time(0) - start);
close(timer);
len -= 1;
}
}
free(timers);
free(timers_id);
free(timers_disarm);
free(timers_close);
return 0;
}
Notes
- Absolute timeout can be set using
clock_realtime_alarm
andtfd_timer_abstime
flags, seetimerfd_settime(2)
. - Timer created with
timerfd_create
will be disarmed if associated fd is closed (e.g. when process terminates). - Internally, kernel, while going to suspend, simply sets RTC to send a wake interrupt on the nearest
*_alarm
timer expiration. - Systemd scheduler uses the same exact kernel interface if
WakeSystem
option is settrue
insystemd.timer(5)
. - You may find relevant information in the discussion of my proposal to implement a similar feature in kde kalarm (link).