ca41d3572e85ea5e1b8ba1744fa4551bedb87c07
[multipath-tools/.git] / libmultipath / uevent.c
1 /*
2  * uevent.c - trigger upon netlink uevents from the kernel
3  *
4  *      Only kernels from version 2.6.10* on provide the uevent netlink socket.
5  *      Until the libc-kernel-headers are updated, you need to compile with:
6  *
7  *        gcc -I /lib/modules/`uname -r`/build/include -o uevent_listen uevent_listen.c
8  *
9  * Copyright (C) 2004 Kay Sievers <kay.sievers@vrfy.org>
10  *
11  *      This program is free software; you can redistribute it and/or modify it
12  *      under the terms of the GNU General Public License as published by the
13  *      Free Software Foundation version 2 of the License.
14  *
15  *      This program is distributed in the hope that it will be useful, but
16  *      WITHOUT ANY WARRANTY; without even the implied warranty of
17  *      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  *      General Public License for more details.
19  *
20  *      You should have received a copy of the GNU General Public License along
21  *      with this program.  If not, see <http://www.gnu.org/licenses/>.
22  *
23  */
24
25 #include <unistd.h>
26 #include <stdio.h>
27 #include <errno.h>
28 #include <stdlib.h>
29 #include <stddef.h>
30 #include <string.h>
31 #include <fcntl.h>
32 #include <time.h>
33 #include <sys/socket.h>
34 #include <sys/user.h>
35 #include <sys/un.h>
36 #include <sys/poll.h>
37 #include <linux/types.h>
38 #include <linux/netlink.h>
39 #include <pthread.h>
40 #include <signal.h>
41 #include <limits.h>
42 #include <sys/mman.h>
43 #include <libudev.h>
44 #include <errno.h>
45
46 #include "memory.h"
47 #include "debug.h"
48 #include "list.h"
49 #include "uevent.h"
50 #include "vector.h"
51
52 typedef int (uev_trigger)(struct uevent *, void * trigger_data);
53
54 pthread_t uevq_thr;
55 LIST_HEAD(uevq);
56 pthread_mutex_t uevq_lock = PTHREAD_MUTEX_INITIALIZER;
57 pthread_mutex_t *uevq_lockp = &uevq_lock;
58 pthread_cond_t uev_cond = PTHREAD_COND_INITIALIZER;
59 pthread_cond_t *uev_condp = &uev_cond;
60 uev_trigger *my_uev_trigger;
61 void * my_trigger_data;
62 int servicing_uev;
63
64 int is_uevent_busy(void)
65 {
66         int empty;
67
68         pthread_mutex_lock(uevq_lockp);
69         empty = list_empty(&uevq);
70         pthread_mutex_unlock(uevq_lockp);
71         return (!empty || servicing_uev);
72 }
73
74 struct uevent * alloc_uevent (void)
75 {
76         struct uevent *uev = MALLOC(sizeof(struct uevent));
77
78         if (uev)
79                 INIT_LIST_HEAD(&uev->node);
80
81         return uev;
82 }
83
84 void
85 setup_thread_attr(pthread_attr_t *attr, size_t stacksize, int detached)
86 {
87         if (pthread_attr_init(attr)) {
88                 fprintf(stderr, "can't initialize thread attr: %s\n",
89                         strerror(errno));
90                 exit(1);
91         }
92         if (stacksize < PTHREAD_STACK_MIN)
93                 stacksize = PTHREAD_STACK_MIN;
94
95         if (pthread_attr_setstacksize(attr, stacksize)) {
96                 fprintf(stderr, "can't set thread stack size to %lu: %s\n",
97                         (unsigned long)stacksize, strerror(errno));
98                 exit(1);
99         }
100         if (detached && pthread_attr_setdetachstate(attr,
101                                                     PTHREAD_CREATE_DETACHED)) {
102                 fprintf(stderr, "can't set thread to detached: %s\n",
103                         strerror(errno));
104                 exit(1);
105         }
106 }
107
108 /*
109  * Called with uevq_lockp held
110  */
111 void
112 service_uevq(struct list_head *tmpq)
113 {
114         struct uevent *uev, *tmp;
115
116         list_for_each_entry_safe(uev, tmp, tmpq, node) {
117                 list_del_init(&uev->node);
118
119                 if (my_uev_trigger && my_uev_trigger(uev, my_trigger_data))
120                         condlog(0, "uevent trigger error");
121
122                 if (uev->udev)
123                         udev_device_unref(uev->udev);
124                 FREE(uev);
125         }
126 }
127
128 static void uevq_stop(void *arg)
129 {
130         struct udev *udev = arg;
131
132         condlog(3, "Stopping uev queue");
133         pthread_mutex_lock(uevq_lockp);
134         my_uev_trigger = NULL;
135         pthread_cond_signal(uev_condp);
136         pthread_mutex_unlock(uevq_lockp);
137         udev_unref(udev);
138 }
139
140 void
141 uevq_cleanup(struct list_head *tmpq)
142 {
143         struct uevent *uev, *tmp;
144
145         list_for_each_entry_safe(uev, tmp, tmpq, node) {
146                 list_del_init(&uev->node);
147                 FREE(uev);
148         }
149 }
150
151 /*
152  * Service the uevent queue.
153  */
154 int uevent_dispatch(int (*uev_trigger)(struct uevent *, void * trigger_data),
155                     void * trigger_data)
156 {
157         my_uev_trigger = uev_trigger;
158         my_trigger_data = trigger_data;
159
160         mlockall(MCL_CURRENT | MCL_FUTURE);
161
162         while (1) {
163                 LIST_HEAD(uevq_tmp);
164
165                 pthread_mutex_lock(uevq_lockp);
166                 servicing_uev = 0;
167                 /*
168                  * Condition signals are unreliable,
169                  * so make sure we only wait if we have to.
170                  */
171                 if (list_empty(&uevq)) {
172                         pthread_cond_wait(uev_condp, uevq_lockp);
173                 }
174                 servicing_uev = 1;
175                 list_splice_init(&uevq, &uevq_tmp);
176                 pthread_mutex_unlock(uevq_lockp);
177                 if (!my_uev_trigger)
178                         break;
179                 service_uevq(&uevq_tmp);
180         }
181         condlog(3, "Terminating uev service queue");
182         uevq_cleanup(&uevq);
183         return 0;
184 }
185
186 struct uevent *uevent_from_buffer(char *buf, ssize_t buflen)
187 {
188         struct uevent *uev;
189         char *buffer;
190         size_t bufpos;
191         int i;
192         char *pos;
193
194         uev = alloc_uevent();
195         if (!uev) {
196                 condlog(1, "lost uevent, oom");
197                 return NULL;
198         }
199
200         if ((size_t)buflen > sizeof(buf)-1)
201                 buflen = sizeof(buf)-1;
202
203         /*
204          * Copy the shared receive buffer contents to buffer private
205          * to this uevent so we can immediately reuse the shared buffer.
206          */
207         memcpy(uev->buffer, buf, HOTPLUG_BUFFER_SIZE + OBJECT_SIZE);
208         buffer = uev->buffer;
209         buffer[buflen] = '\0';
210
211         /* save start of payload */
212         bufpos = strlen(buffer) + 1;
213
214         /* action string */
215         uev->action = buffer;
216         pos = strchr(buffer, '@');
217         if (!pos) {
218                 condlog(3, "bad action string '%s'", buffer);
219                 FREE(uev);
220                 return NULL;
221         }
222         pos[0] = '\0';
223
224         /* sysfs path */
225         uev->devpath = &pos[1];
226
227         /* hotplug events have the environment attached - reconstruct envp[] */
228         for (i = 0; (bufpos < (size_t)buflen) && (i < HOTPLUG_NUM_ENVP-1); i++) {
229                 int keylen;
230                 char *key;
231
232                 key = &buffer[bufpos];
233                 keylen = strlen(key);
234                 uev->envp[i] = key;
235                 /* Filter out sequence number */
236                 if (strncmp(key, "SEQNUM=", 7) == 0) {
237                         char *eptr;
238
239                         uev->seqnum = strtoul(key + 7, &eptr, 10);
240                         if (eptr == key + 7)
241                                 uev->seqnum = -1;
242                 }
243                 bufpos += keylen + 1;
244         }
245         uev->envp[i] = NULL;
246
247         condlog(3, "uevent %ld '%s' from '%s'", uev->seqnum,
248                 uev->action, uev->devpath);
249         uev->kernel = strrchr(uev->devpath, '/');
250         if (uev->kernel)
251                 uev->kernel++;
252
253         /* print payload environment */
254         for (i = 0; uev->envp[i] != NULL; i++)
255                 condlog(5, "%s", uev->envp[i]);
256
257         return uev;
258 }
259
260 int failback_listen(void)
261 {
262         int sock;
263         struct sockaddr_nl snl;
264         struct sockaddr_un sun;
265         socklen_t addrlen;
266         int retval;
267         int rcvbufsz = 128*1024;
268         int rcvsz = 0;
269         int rcvszsz = sizeof(rcvsz);
270         unsigned int *prcvszsz = (unsigned int *)&rcvszsz;
271         const int feature_on = 1;
272         /*
273          * First check whether we have a udev socket
274          */
275         memset(&sun, 0x00, sizeof(struct sockaddr_un));
276         sun.sun_family = AF_LOCAL;
277         strcpy(&sun.sun_path[1], "/org/kernel/dm/multipath_event");
278         addrlen = offsetof(struct sockaddr_un, sun_path) + strlen(sun.sun_path+1) + 1;
279
280         sock = socket(AF_LOCAL, SOCK_DGRAM, 0);
281         if (sock >= 0) {
282
283                 condlog(3, "reading events from udev socket.");
284
285                 /* the bind takes care of ensuring only one copy running */
286                 retval = bind(sock, (struct sockaddr *) &sun, addrlen);
287                 if (retval < 0) {
288                         condlog(0, "bind failed, exit");
289                         goto exit;
290                 }
291
292                 /* enable receiving of the sender credentials */
293                 retval = setsockopt(sock, SOL_SOCKET, SO_PASSCRED,
294                                     &feature_on, sizeof(feature_on));
295                 if (retval < 0) {
296                         condlog(0, "failed to enable credential passing, exit");
297                         goto exit;
298                 }
299
300         } else {
301                 /* Fallback to read kernel netlink events */
302                 memset(&snl, 0x00, sizeof(struct sockaddr_nl));
303                 snl.nl_family = AF_NETLINK;
304                 snl.nl_pid = getpid();
305                 snl.nl_groups = 0x01;
306
307                 sock = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_KOBJECT_UEVENT);
308                 if (sock == -1) {
309                         condlog(0, "error getting socket, exit");
310                         return 1;
311                 }
312
313                 condlog(3, "reading events from kernel.");
314
315                 /*
316                  * try to avoid dropping uevents, even so, this is not a guarantee,
317                  * but it does help to change the netlink uevent socket's
318                  * receive buffer threshold from the default value of 106,496 to
319                  * the maximum value of 262,142.
320                  */
321                 retval = setsockopt(sock, SOL_SOCKET, SO_RCVBUF, &rcvbufsz,
322                                     sizeof(rcvbufsz));
323
324                 if (retval < 0) {
325                         condlog(0, "error setting receive buffer size for socket, exit");
326                         exit(1);
327                 }
328                 retval = getsockopt(sock, SOL_SOCKET, SO_RCVBUF, &rcvsz, prcvszsz);
329                 if (retval < 0) {
330                         condlog(0, "error setting receive buffer size for socket, exit");
331                         exit(1);
332                 }
333                 condlog(3, "receive buffer size for socket is %u.", rcvsz);
334
335                 /* enable receiving of the sender credentials */
336                 if (setsockopt(sock, SOL_SOCKET, SO_PASSCRED,
337                                &feature_on, sizeof(feature_on)) < 0) {
338                         condlog(0, "error on enabling credential passing for socket");
339                         exit(1);
340                 }
341
342                 retval = bind(sock, (struct sockaddr *) &snl,
343                               sizeof(struct sockaddr_nl));
344                 if (retval < 0) {
345                         condlog(0, "bind failed, exit");
346                         goto exit;
347                 }
348         }
349
350         while (1) {
351                 size_t bufpos;
352                 ssize_t buflen;
353                 struct uevent *uev;
354                 struct msghdr smsg;
355                 struct iovec iov;
356                 char cred_msg[CMSG_SPACE(sizeof(struct ucred))];
357                 struct cmsghdr *cmsg;
358                 struct ucred *cred;
359                 static char buf[HOTPLUG_BUFFER_SIZE + OBJECT_SIZE];
360
361                 memset(buf, 0x00, sizeof(buf));
362                 iov.iov_base = &buf;
363                 iov.iov_len = sizeof(buf);
364                 memset (&smsg, 0x00, sizeof(struct msghdr));
365                 smsg.msg_iov = &iov;
366                 smsg.msg_iovlen = 1;
367                 smsg.msg_control = cred_msg;
368                 smsg.msg_controllen = sizeof(cred_msg);
369
370                 buflen = recvmsg(sock, &smsg, 0);
371                 if (buflen < 0) {
372                         if (errno != EINTR)
373                                 condlog(0, "error receiving message, errno %d", errno);
374                         continue;
375                 }
376
377                 cmsg = CMSG_FIRSTHDR(&smsg);
378                 if (cmsg == NULL || cmsg->cmsg_type != SCM_CREDENTIALS) {
379                         condlog(3, "no sender credentials received, message ignored");
380                         continue;
381                 }
382
383                 cred = (struct ucred *)CMSG_DATA(cmsg);
384                 if (cred->uid != 0) {
385                         condlog(3, "sender uid=%d, message ignored", cred->uid);
386                         continue;
387                 }
388
389                 /* skip header */
390                 bufpos = strlen(buf) + 1;
391                 if (bufpos < sizeof("a@/d") || bufpos >= sizeof(buf)) {
392                         condlog(3, "invalid message length");
393                         continue;
394                 }
395
396                 /* check message header */
397                 if (strstr(buf, "@/") == NULL) {
398                         condlog(3, "unrecognized message header");
399                         continue;
400                 }
401                 if ((size_t)buflen > sizeof(buf)-1) {
402                         condlog(2, "buffer overflow for received uevent");
403                         buflen = sizeof(buf)-1;
404                 }
405
406                 uev = uevent_from_buffer(buf, buflen);
407                 if (!uev)
408                         continue;
409                 /*
410                  * Queue uevent and poke service pthread.
411                  */
412                 pthread_mutex_lock(uevq_lockp);
413                 list_add_tail(&uev->node, &uevq);
414                 pthread_cond_signal(uev_condp);
415                 pthread_mutex_unlock(uevq_lockp);
416         }
417
418 exit:
419         close(sock);
420         return 1;
421 }
422
423 struct uevent *uevent_from_udev_device(struct udev_device *dev)
424 {
425         struct uevent *uev;
426         int i = 0;
427         char *pos, *end;
428         struct udev_list_entry *list_entry;
429
430         uev = alloc_uevent();
431         if (!uev) {
432                 udev_device_unref(dev);
433                 condlog(1, "lost uevent, oom");
434                 return NULL;
435         }
436         pos = uev->buffer;
437         end = pos + HOTPLUG_BUFFER_SIZE + OBJECT_SIZE - 1;
438         udev_list_entry_foreach(list_entry, udev_device_get_properties_list_entry(dev)) {
439                 const char *name, *value;
440                 int bytes;
441
442                 name = udev_list_entry_get_name(list_entry);
443                 if (!name)
444                         name = "(null)";
445                 value = udev_list_entry_get_value(list_entry);
446                 if (!value)
447                         value = "(null)";
448                 bytes = snprintf(pos, end - pos, "%s=%s", name, value);
449                 if (pos + bytes >= end) {
450                         condlog(2, "buffer overflow for uevent");
451                         break;
452                 }
453                 uev->envp[i] = pos;
454                 pos += bytes;
455                 *pos = '\0';
456                 pos++;
457                 if (strcmp(name, "DEVPATH") == 0)
458                         uev->devpath = uev->envp[i] + 8;
459                 if (strcmp(name, "ACTION") == 0)
460                         uev->action = uev->envp[i] + 7;
461                 i++;
462                 if (i == HOTPLUG_NUM_ENVP - 1)
463                         break;
464         }
465         uev->udev = dev;
466         uev->envp[i] = NULL;
467
468         condlog(3, "uevent '%s' from '%s'", uev->action, uev->devpath);
469         uev->kernel = strrchr(uev->devpath, '/');
470         if (uev->kernel)
471                 uev->kernel++;
472
473         /* print payload environment */
474         for (i = 0; uev->envp[i] != NULL; i++)
475                 condlog(5, "%s", uev->envp[i]);
476         return uev;
477 }
478
479 int uevent_listen(struct udev *udev)
480 {
481         int err = 2;
482         struct udev_monitor *monitor = NULL;
483         int fd, socket_flags, events;
484         int need_failback = 1;
485         int timeout = 30;
486         LIST_HEAD(uevlisten_tmp);
487
488         /*
489          * Queue uevents for service by dedicated thread so that the uevent
490          * listening thread does not block on multipathd locks (vecs->lock)
491          * thereby not getting to empty the socket's receive buffer queue
492          * often enough.
493          */
494         if (!udev) {
495                 condlog(1, "no udev context");
496                 return 1;
497         }
498         udev_ref(udev);
499         pthread_cleanup_push(uevq_stop, udev);
500
501         monitor = udev_monitor_new_from_netlink(udev, "udev");
502         if (!monitor) {
503                 condlog(2, "failed to create udev monitor");
504                 goto out;
505         }
506 #ifdef LIBUDEV_API_RECVBUF
507         if (udev_monitor_set_receive_buffer_size(monitor, 128 * 1024 * 1024))
508                 condlog(2, "failed to increase buffer size");
509 #endif
510         fd = udev_monitor_get_fd(monitor);
511         if (fd < 0) {
512                 condlog(2, "failed to get monitor fd");
513                 goto out;
514         }
515         socket_flags = fcntl(fd, F_GETFL);
516         if (socket_flags < 0) {
517                 condlog(2, "failed to get monitor socket flags : %s",
518                         strerror(errno));
519                 goto out;
520         }
521         if (fcntl(fd, F_SETFL, socket_flags & ~O_NONBLOCK) < 0) {
522                 condlog(2, "failed to set monitor socket flags : %s",
523                         strerror(errno));
524                 goto out;
525         }
526         err = udev_monitor_filter_add_match_subsystem_devtype(monitor, "block",
527                                                               NULL);
528         if (err)
529                 condlog(2, "failed to create filter : %s", strerror(-err));
530         err = udev_monitor_enable_receiving(monitor);
531         if (err) {
532                 condlog(2, "failed to enable receiving : %s", strerror(-err));
533                 goto out;
534         }
535
536         events = 0;
537         while (1) {
538                 struct uevent *uev;
539                 struct udev_device *dev;
540                 struct pollfd ev_poll;
541                 int poll_timeout;
542                 int fdcount;
543
544                 memset(&ev_poll, 0, sizeof(struct pollfd));
545                 ev_poll.fd = fd;
546                 ev_poll.events = POLLIN;
547                 poll_timeout = timeout * 1000;
548                 errno = 0;
549                 fdcount = poll(&ev_poll, 1, poll_timeout);
550                 if (fdcount && ev_poll.revents & POLLIN) {
551                         timeout = 0;
552                         dev = udev_monitor_receive_device(monitor);
553                         if (!dev) {
554                                 condlog(0, "failed getting udev device");
555                                 continue;
556                         }
557                         uev = uevent_from_udev_device(dev);
558                         if (!uev)
559                                 continue;
560                         list_add_tail(&uev->node, &uevlisten_tmp);
561                         events++;
562                         continue;
563                 }
564                 if (fdcount < 0) {
565                         if (errno == EINTR)
566                                 continue;
567
568                         condlog(0, "error receiving "
569                                 "uevent message: %m");
570                         err = -errno;
571                         break;
572                 }
573                 if (!list_empty(&uevlisten_tmp)) {
574                         /*
575                          * Queue uevents and poke service pthread.
576                          */
577                         condlog(3, "Forwarding %d uevents", events);
578                         pthread_mutex_lock(uevq_lockp);
579                         list_splice_tail_init(&uevlisten_tmp, &uevq);
580                         pthread_cond_signal(uev_condp);
581                         pthread_mutex_unlock(uevq_lockp);
582                         events = 0;
583                 }
584                 timeout = 30;
585         }
586         need_failback = 0;
587 out:
588         if (monitor)
589                 udev_monitor_unref(monitor);
590         if (need_failback)
591                 err = failback_listen();
592         pthread_cleanup_pop(1);
593         return err;
594 }
595
596 extern int
597 uevent_get_major(struct uevent *uev)
598 {
599         char *p, *q;
600         int i, major = -1;
601
602         for (i = 0; uev->envp[i] != NULL; i++) {
603                 if (!strncmp(uev->envp[i], "MAJOR", 5) && strlen(uev->envp[i]) > 6) {
604                         p = uev->envp[i] + 6;
605                         major = strtoul(p, &q, 10);
606                         if (p == q) {
607                                 condlog(2, "invalid major '%s'", p);
608                                 major = -1;
609                         }
610                         break;
611                 }
612         }
613         return major;
614 }
615
616 extern int
617 uevent_get_minor(struct uevent *uev)
618 {
619         char *p, *q;
620         int i, minor = -1;
621
622         for (i = 0; uev->envp[i] != NULL; i++) {
623                 if (!strncmp(uev->envp[i], "MINOR", 5) && strlen(uev->envp[i]) > 6) {
624                         p = uev->envp[i] + 6;
625                         minor = strtoul(p, &q, 10);
626                         if (p == q) {
627                                 condlog(2, "invalid minor '%s'", p);
628                                 minor = -1;
629                         }
630                         break;
631                 }
632         }
633         return minor;
634 }
635
636 extern int
637 uevent_get_disk_ro(struct uevent *uev)
638 {
639         char *p, *q;
640         int i, ro = -1;
641
642         for (i = 0; uev->envp[i] != NULL; i++) {
643                 if (!strncmp(uev->envp[i], "DISK_RO", 6) && strlen(uev->envp[i]) > 7) {
644                         p = uev->envp[i] + 8;
645                         ro = strtoul(p, &q, 10);
646                         if (p == q) {
647                                 condlog(2, "invalid read_only setting '%s'", p);
648                                 ro = -1;
649                         }
650                         break;
651                 }
652         }
653         return ro;
654 }
655
656 extern char *
657 uevent_get_dm_name(struct uevent *uev)
658 {
659         char *p = NULL;
660         int i;
661
662         for (i = 0; uev->envp[i] != NULL; i++) {
663                 if (!strncmp(uev->envp[i], "DM_NAME", 6) &&
664                     strlen(uev->envp[i]) > 7) {
665                         p = MALLOC(strlen(uev->envp[i] + 8) + 1);
666                         strcpy(p, uev->envp[i] + 8);
667                         break;
668                 }
669         }
670         return p;
671 }