libmultipath: Remove an incorrect comment
[multipath-tools/.git] / libmultipath / uevent.c
1 /*
2  * uevent.c - trigger upon netlink uevents from the kernel
3  *
4  *      Only kernels from version 2.6.10* on provide the uevent netlink socket.
5  *      Until the libc-kernel-headers are updated, you need to compile with:
6  *
7  *        gcc -I /lib/modules/`uname -r`/build/include -o uevent_listen uevent_listen.c
8  *
9  * Copyright (C) 2004 Kay Sievers <kay.sievers@vrfy.org>
10  *
11  *      This program is free software; you can redistribute it and/or modify it
12  *      under the terms of the GNU General Public License as published by the
13  *      Free Software Foundation version 2 of the License.
14  *
15  *      This program is distributed in the hope that it will be useful, but
16  *      WITHOUT ANY WARRANTY; without even the implied warranty of
17  *      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  *      General Public License for more details.
19  *
20  *      You should have received a copy of the GNU General Public License along
21  *      with this program.  If not, see <http://www.gnu.org/licenses/>.
22  *
23  */
24
25 #include <unistd.h>
26 #include <stdio.h>
27 #include <errno.h>
28 #include <stdlib.h>
29 #include <stddef.h>
30 #include <string.h>
31 #include <fcntl.h>
32 #include <time.h>
33 #include <sys/socket.h>
34 #include <sys/user.h>
35 #include <sys/un.h>
36 #include <poll.h>
37 #include <linux/types.h>
38 #include <linux/netlink.h>
39 #include <pthread.h>
40 #include <limits.h>
41 #include <sys/mman.h>
42 #include <libudev.h>
43 #include <errno.h>
44
45 #include "memory.h"
46 #include "debug.h"
47 #include "list.h"
48 #include "uevent.h"
49 #include "vector.h"
50
51 typedef int (uev_trigger)(struct uevent *, void * trigger_data);
52
53 pthread_t uevq_thr;
54 LIST_HEAD(uevq);
55 pthread_mutex_t uevq_lock = PTHREAD_MUTEX_INITIALIZER;
56 pthread_mutex_t *uevq_lockp = &uevq_lock;
57 pthread_cond_t uev_cond = PTHREAD_COND_INITIALIZER;
58 pthread_cond_t *uev_condp = &uev_cond;
59 uev_trigger *my_uev_trigger;
60 void * my_trigger_data;
61 int servicing_uev;
62
63 int is_uevent_busy(void)
64 {
65         int empty;
66
67         pthread_mutex_lock(uevq_lockp);
68         empty = list_empty(&uevq);
69         pthread_mutex_unlock(uevq_lockp);
70         return (!empty || servicing_uev);
71 }
72
73 struct uevent * alloc_uevent (void)
74 {
75         struct uevent *uev = MALLOC(sizeof(struct uevent));
76
77         if (uev)
78                 INIT_LIST_HEAD(&uev->node);
79
80         return uev;
81 }
82
83 void
84 setup_thread_attr(pthread_attr_t *attr, size_t stacksize, int detached)
85 {
86         if (pthread_attr_init(attr)) {
87                 fprintf(stderr, "can't initialize thread attr: %s\n",
88                         strerror(errno));
89                 exit(1);
90         }
91         if (stacksize < PTHREAD_STACK_MIN)
92                 stacksize = PTHREAD_STACK_MIN;
93
94         if (pthread_attr_setstacksize(attr, stacksize)) {
95                 fprintf(stderr, "can't set thread stack size to %lu: %s\n",
96                         (unsigned long)stacksize, strerror(errno));
97                 exit(1);
98         }
99         if (detached && pthread_attr_setdetachstate(attr,
100                                                     PTHREAD_CREATE_DETACHED)) {
101                 fprintf(stderr, "can't set thread to detached: %s\n",
102                         strerror(errno));
103                 exit(1);
104         }
105 }
106
107 void
108 service_uevq(struct list_head *tmpq)
109 {
110         struct uevent *uev, *tmp;
111
112         list_for_each_entry_safe(uev, tmp, tmpq, node) {
113                 list_del_init(&uev->node);
114
115                 if (my_uev_trigger && my_uev_trigger(uev, my_trigger_data))
116                         condlog(0, "uevent trigger error");
117
118                 if (uev->udev)
119                         udev_device_unref(uev->udev);
120                 FREE(uev);
121         }
122 }
123
124 static void uevent_cleanup(void *arg)
125 {
126         struct udev *udev = arg;
127
128         condlog(3, "Releasing uevent_listen() resources");
129         udev_unref(udev);
130 }
131
132 void
133 uevq_cleanup(struct list_head *tmpq)
134 {
135         struct uevent *uev, *tmp;
136
137         list_for_each_entry_safe(uev, tmp, tmpq, node) {
138                 list_del_init(&uev->node);
139                 FREE(uev);
140         }
141 }
142
143 /*
144  * Service the uevent queue.
145  */
146 int uevent_dispatch(int (*uev_trigger)(struct uevent *, void * trigger_data),
147                     void * trigger_data)
148 {
149         my_uev_trigger = uev_trigger;
150         my_trigger_data = trigger_data;
151
152         mlockall(MCL_CURRENT | MCL_FUTURE);
153
154         while (1) {
155                 LIST_HEAD(uevq_tmp);
156
157                 pthread_mutex_lock(uevq_lockp);
158                 servicing_uev = 0;
159                 /*
160                  * Condition signals are unreliable,
161                  * so make sure we only wait if we have to.
162                  */
163                 if (list_empty(&uevq)) {
164                         pthread_cond_wait(uev_condp, uevq_lockp);
165                 }
166                 servicing_uev = 1;
167                 list_splice_init(&uevq, &uevq_tmp);
168                 pthread_mutex_unlock(uevq_lockp);
169                 if (!my_uev_trigger)
170                         break;
171                 service_uevq(&uevq_tmp);
172         }
173         condlog(3, "Terminating uev service queue");
174         uevq_cleanup(&uevq);
175         return 0;
176 }
177
178 struct uevent *uevent_from_buffer(char *buf, ssize_t buflen)
179 {
180         struct uevent *uev;
181         char *buffer;
182         size_t bufpos;
183         int i;
184         char *pos;
185
186         uev = alloc_uevent();
187         if (!uev) {
188                 condlog(1, "lost uevent, oom");
189                 return NULL;
190         }
191
192         if ((size_t)buflen > sizeof(buf)-1)
193                 buflen = sizeof(buf)-1;
194
195         /*
196          * Copy the shared receive buffer contents to buffer private
197          * to this uevent so we can immediately reuse the shared buffer.
198          */
199         memcpy(uev->buffer, buf, HOTPLUG_BUFFER_SIZE + OBJECT_SIZE);
200         buffer = uev->buffer;
201         buffer[buflen] = '\0';
202
203         /* save start of payload */
204         bufpos = strlen(buffer) + 1;
205
206         /* action string */
207         uev->action = buffer;
208         pos = strchr(buffer, '@');
209         if (!pos) {
210                 condlog(3, "bad action string '%s'", buffer);
211                 FREE(uev);
212                 return NULL;
213         }
214         pos[0] = '\0';
215
216         /* sysfs path */
217         uev->devpath = &pos[1];
218
219         /* hotplug events have the environment attached - reconstruct envp[] */
220         for (i = 0; (bufpos < (size_t)buflen) && (i < HOTPLUG_NUM_ENVP-1); i++) {
221                 int keylen;
222                 char *key;
223
224                 key = &buffer[bufpos];
225                 keylen = strlen(key);
226                 uev->envp[i] = key;
227                 /* Filter out sequence number */
228                 if (strncmp(key, "SEQNUM=", 7) == 0) {
229                         char *eptr;
230
231                         uev->seqnum = strtoul(key + 7, &eptr, 10);
232                         if (eptr == key + 7)
233                                 uev->seqnum = -1;
234                 }
235                 bufpos += keylen + 1;
236         }
237         uev->envp[i] = NULL;
238
239         condlog(3, "uevent %ld '%s' from '%s'", uev->seqnum,
240                 uev->action, uev->devpath);
241         uev->kernel = strrchr(uev->devpath, '/');
242         if (uev->kernel)
243                 uev->kernel++;
244
245         /* print payload environment */
246         for (i = 0; uev->envp[i] != NULL; i++)
247                 condlog(5, "%s", uev->envp[i]);
248
249         return uev;
250 }
251
252 int failback_listen(void)
253 {
254         int sock;
255         struct sockaddr_nl snl;
256         struct sockaddr_un sun;
257         socklen_t addrlen;
258         int retval;
259         int rcvbufsz = 128*1024;
260         int rcvsz = 0;
261         int rcvszsz = sizeof(rcvsz);
262         unsigned int *prcvszsz = (unsigned int *)&rcvszsz;
263         const int feature_on = 1;
264         /*
265          * First check whether we have a udev socket
266          */
267         memset(&sun, 0x00, sizeof(struct sockaddr_un));
268         sun.sun_family = AF_LOCAL;
269         strcpy(&sun.sun_path[1], "/org/kernel/dm/multipath_event");
270         addrlen = offsetof(struct sockaddr_un, sun_path) + strlen(sun.sun_path+1) + 1;
271
272         sock = socket(AF_LOCAL, SOCK_DGRAM, 0);
273         if (sock >= 0) {
274
275                 condlog(3, "reading events from udev socket.");
276
277                 /* the bind takes care of ensuring only one copy running */
278                 retval = bind(sock, (struct sockaddr *) &sun, addrlen);
279                 if (retval < 0) {
280                         condlog(0, "bind failed, exit");
281                         goto exit;
282                 }
283
284                 /* enable receiving of the sender credentials */
285                 retval = setsockopt(sock, SOL_SOCKET, SO_PASSCRED,
286                                     &feature_on, sizeof(feature_on));
287                 if (retval < 0) {
288                         condlog(0, "failed to enable credential passing, exit");
289                         goto exit;
290                 }
291
292         } else {
293                 /* Fallback to read kernel netlink events */
294                 memset(&snl, 0x00, sizeof(struct sockaddr_nl));
295                 snl.nl_family = AF_NETLINK;
296                 snl.nl_pid = getpid();
297                 snl.nl_groups = 0x01;
298
299                 sock = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_KOBJECT_UEVENT);
300                 if (sock == -1) {
301                         condlog(0, "error getting socket, exit");
302                         return 1;
303                 }
304
305                 condlog(3, "reading events from kernel.");
306
307                 /*
308                  * try to avoid dropping uevents, even so, this is not a guarantee,
309                  * but it does help to change the netlink uevent socket's
310                  * receive buffer threshold from the default value of 106,496 to
311                  * the maximum value of 262,142.
312                  */
313                 retval = setsockopt(sock, SOL_SOCKET, SO_RCVBUF, &rcvbufsz,
314                                     sizeof(rcvbufsz));
315
316                 if (retval < 0) {
317                         condlog(0, "error setting receive buffer size for socket, exit");
318                         exit(1);
319                 }
320                 retval = getsockopt(sock, SOL_SOCKET, SO_RCVBUF, &rcvsz, prcvszsz);
321                 if (retval < 0) {
322                         condlog(0, "error setting receive buffer size for socket, exit");
323                         exit(1);
324                 }
325                 condlog(3, "receive buffer size for socket is %u.", rcvsz);
326
327                 /* enable receiving of the sender credentials */
328                 if (setsockopt(sock, SOL_SOCKET, SO_PASSCRED,
329                                &feature_on, sizeof(feature_on)) < 0) {
330                         condlog(0, "error on enabling credential passing for socket");
331                         exit(1);
332                 }
333
334                 retval = bind(sock, (struct sockaddr *) &snl,
335                               sizeof(struct sockaddr_nl));
336                 if (retval < 0) {
337                         condlog(0, "bind failed, exit");
338                         goto exit;
339                 }
340         }
341
342         while (1) {
343                 size_t bufpos;
344                 ssize_t buflen;
345                 struct uevent *uev;
346                 struct msghdr smsg;
347                 struct iovec iov;
348                 char cred_msg[CMSG_SPACE(sizeof(struct ucred))];
349                 struct cmsghdr *cmsg;
350                 struct ucred *cred;
351                 static char buf[HOTPLUG_BUFFER_SIZE + OBJECT_SIZE];
352
353                 memset(buf, 0x00, sizeof(buf));
354                 iov.iov_base = &buf;
355                 iov.iov_len = sizeof(buf);
356                 memset (&smsg, 0x00, sizeof(struct msghdr));
357                 smsg.msg_iov = &iov;
358                 smsg.msg_iovlen = 1;
359                 smsg.msg_control = cred_msg;
360                 smsg.msg_controllen = sizeof(cred_msg);
361
362                 buflen = recvmsg(sock, &smsg, 0);
363                 if (buflen < 0) {
364                         if (errno != EINTR)
365                                 condlog(0, "error receiving message, errno %d", errno);
366                         continue;
367                 }
368
369                 cmsg = CMSG_FIRSTHDR(&smsg);
370                 if (cmsg == NULL || cmsg->cmsg_type != SCM_CREDENTIALS) {
371                         condlog(3, "no sender credentials received, message ignored");
372                         continue;
373                 }
374
375                 cred = (struct ucred *)CMSG_DATA(cmsg);
376                 if (cred->uid != 0) {
377                         condlog(3, "sender uid=%d, message ignored", cred->uid);
378                         continue;
379                 }
380
381                 /* skip header */
382                 bufpos = strlen(buf) + 1;
383                 if (bufpos < sizeof("a@/d") || bufpos >= sizeof(buf)) {
384                         condlog(3, "invalid message length");
385                         continue;
386                 }
387
388                 /* check message header */
389                 if (strstr(buf, "@/") == NULL) {
390                         condlog(3, "unrecognized message header");
391                         continue;
392                 }
393                 if ((size_t)buflen > sizeof(buf)-1) {
394                         condlog(2, "buffer overflow for received uevent");
395                         buflen = sizeof(buf)-1;
396                 }
397
398                 uev = uevent_from_buffer(buf, buflen);
399                 if (!uev)
400                         continue;
401                 /*
402                  * Queue uevent and poke service pthread.
403                  */
404                 pthread_mutex_lock(uevq_lockp);
405                 list_add_tail(&uev->node, &uevq);
406                 pthread_cond_signal(uev_condp);
407                 pthread_mutex_unlock(uevq_lockp);
408         }
409
410 exit:
411         close(sock);
412         return 1;
413 }
414
415 struct uevent *uevent_from_udev_device(struct udev_device *dev)
416 {
417         struct uevent *uev;
418         int i = 0;
419         char *pos, *end;
420         struct udev_list_entry *list_entry;
421
422         uev = alloc_uevent();
423         if (!uev) {
424                 udev_device_unref(dev);
425                 condlog(1, "lost uevent, oom");
426                 return NULL;
427         }
428         pos = uev->buffer;
429         end = pos + HOTPLUG_BUFFER_SIZE + OBJECT_SIZE - 1;
430         udev_list_entry_foreach(list_entry, udev_device_get_properties_list_entry(dev)) {
431                 const char *name, *value;
432                 int bytes;
433
434                 name = udev_list_entry_get_name(list_entry);
435                 if (!name)
436                         name = "(null)";
437                 value = udev_list_entry_get_value(list_entry);
438                 if (!value)
439                         value = "(null)";
440                 bytes = snprintf(pos, end - pos, "%s=%s", name, value);
441                 if (pos + bytes >= end) {
442                         condlog(2, "buffer overflow for uevent");
443                         break;
444                 }
445                 uev->envp[i] = pos;
446                 pos += bytes;
447                 *pos = '\0';
448                 pos++;
449                 if (strcmp(name, "DEVPATH") == 0)
450                         uev->devpath = uev->envp[i] + 8;
451                 if (strcmp(name, "ACTION") == 0)
452                         uev->action = uev->envp[i] + 7;
453                 i++;
454                 if (i == HOTPLUG_NUM_ENVP - 1)
455                         break;
456         }
457         uev->udev = dev;
458         uev->envp[i] = NULL;
459
460         condlog(3, "uevent '%s' from '%s'", uev->action, uev->devpath);
461         uev->kernel = strrchr(uev->devpath, '/');
462         if (uev->kernel)
463                 uev->kernel++;
464
465         /* print payload environment */
466         for (i = 0; uev->envp[i] != NULL; i++)
467                 condlog(5, "%s", uev->envp[i]);
468         return uev;
469 }
470
471 int uevent_listen(struct udev *udev)
472 {
473         int err = 2;
474         struct udev_monitor *monitor = NULL;
475         int fd, socket_flags, events;
476         int need_failback = 1;
477         int timeout = 30;
478         LIST_HEAD(uevlisten_tmp);
479
480         /*
481          * Queue uevents for service by dedicated thread so that the uevent
482          * listening thread does not block on multipathd locks (vecs->lock)
483          * thereby not getting to empty the socket's receive buffer queue
484          * often enough.
485          */
486         if (!udev) {
487                 condlog(1, "no udev context");
488                 return 1;
489         }
490         udev_ref(udev);
491         pthread_cleanup_push(uevent_cleanup, udev);
492
493         monitor = udev_monitor_new_from_netlink(udev, "udev");
494         if (!monitor) {
495                 condlog(2, "failed to create udev monitor");
496                 goto out;
497         }
498 #ifdef LIBUDEV_API_RECVBUF
499         if (udev_monitor_set_receive_buffer_size(monitor, 128 * 1024 * 1024))
500                 condlog(2, "failed to increase buffer size");
501 #endif
502         fd = udev_monitor_get_fd(monitor);
503         if (fd < 0) {
504                 condlog(2, "failed to get monitor fd");
505                 goto out;
506         }
507         socket_flags = fcntl(fd, F_GETFL);
508         if (socket_flags < 0) {
509                 condlog(2, "failed to get monitor socket flags : %s",
510                         strerror(errno));
511                 goto out;
512         }
513         if (fcntl(fd, F_SETFL, socket_flags & ~O_NONBLOCK) < 0) {
514                 condlog(2, "failed to set monitor socket flags : %s",
515                         strerror(errno));
516                 goto out;
517         }
518         err = udev_monitor_filter_add_match_subsystem_devtype(monitor, "block",
519                                                               NULL);
520         if (err)
521                 condlog(2, "failed to create filter : %s", strerror(-err));
522         err = udev_monitor_enable_receiving(monitor);
523         if (err) {
524                 condlog(2, "failed to enable receiving : %s", strerror(-err));
525                 goto out;
526         }
527
528         events = 0;
529         while (1) {
530                 struct uevent *uev;
531                 struct udev_device *dev;
532                 struct pollfd ev_poll;
533                 int poll_timeout;
534                 int fdcount;
535
536                 memset(&ev_poll, 0, sizeof(struct pollfd));
537                 ev_poll.fd = fd;
538                 ev_poll.events = POLLIN;
539                 poll_timeout = timeout * 1000;
540                 errno = 0;
541                 fdcount = poll(&ev_poll, 1, poll_timeout);
542                 if (fdcount && ev_poll.revents & POLLIN) {
543                         timeout = 0;
544                         dev = udev_monitor_receive_device(monitor);
545                         if (!dev) {
546                                 condlog(0, "failed getting udev device");
547                                 continue;
548                         }
549                         uev = uevent_from_udev_device(dev);
550                         if (!uev)
551                                 continue;
552                         list_add_tail(&uev->node, &uevlisten_tmp);
553                         events++;
554                         continue;
555                 }
556                 if (fdcount < 0) {
557                         if (errno == EINTR)
558                                 continue;
559
560                         condlog(0, "error receiving "
561                                 "uevent message: %m");
562                         err = -errno;
563                         break;
564                 }
565                 if (!list_empty(&uevlisten_tmp)) {
566                         /*
567                          * Queue uevents and poke service pthread.
568                          */
569                         condlog(3, "Forwarding %d uevents", events);
570                         pthread_mutex_lock(uevq_lockp);
571                         list_splice_tail_init(&uevlisten_tmp, &uevq);
572                         pthread_cond_signal(uev_condp);
573                         pthread_mutex_unlock(uevq_lockp);
574                         events = 0;
575                 }
576                 timeout = 30;
577         }
578         need_failback = 0;
579 out:
580         if (monitor)
581                 udev_monitor_unref(monitor);
582         if (need_failback)
583                 err = failback_listen();
584         pthread_cleanup_pop(1);
585         return err;
586 }
587
588 extern int
589 uevent_get_major(struct uevent *uev)
590 {
591         char *p, *q;
592         int i, major = -1;
593
594         for (i = 0; uev->envp[i] != NULL; i++) {
595                 if (!strncmp(uev->envp[i], "MAJOR", 5) && strlen(uev->envp[i]) > 6) {
596                         p = uev->envp[i] + 6;
597                         major = strtoul(p, &q, 10);
598                         if (p == q) {
599                                 condlog(2, "invalid major '%s'", p);
600                                 major = -1;
601                         }
602                         break;
603                 }
604         }
605         return major;
606 }
607
608 extern int
609 uevent_get_minor(struct uevent *uev)
610 {
611         char *p, *q;
612         int i, minor = -1;
613
614         for (i = 0; uev->envp[i] != NULL; i++) {
615                 if (!strncmp(uev->envp[i], "MINOR", 5) && strlen(uev->envp[i]) > 6) {
616                         p = uev->envp[i] + 6;
617                         minor = strtoul(p, &q, 10);
618                         if (p == q) {
619                                 condlog(2, "invalid minor '%s'", p);
620                                 minor = -1;
621                         }
622                         break;
623                 }
624         }
625         return minor;
626 }
627
628 extern int
629 uevent_get_disk_ro(struct uevent *uev)
630 {
631         char *p, *q;
632         int i, ro = -1;
633
634         for (i = 0; uev->envp[i] != NULL; i++) {
635                 if (!strncmp(uev->envp[i], "DISK_RO", 6) && strlen(uev->envp[i]) > 7) {
636                         p = uev->envp[i] + 8;
637                         ro = strtoul(p, &q, 10);
638                         if (p == q) {
639                                 condlog(2, "invalid read_only setting '%s'", p);
640                                 ro = -1;
641                         }
642                         break;
643                 }
644         }
645         return ro;
646 }
647
648 extern char *
649 uevent_get_dm_name(struct uevent *uev)
650 {
651         char *p = NULL;
652         int i;
653
654         for (i = 0; uev->envp[i] != NULL; i++) {
655                 if (!strncmp(uev->envp[i], "DM_NAME", 6) &&
656                     strlen(uev->envp[i]) > 7) {
657                         p = MALLOC(strlen(uev->envp[i] + 8) + 1);
658                         strcpy(p, uev->envp[i] + 8);
659                         break;
660                 }
661         }
662         return p;
663 }