multipath: libudev cleanup and bugfixes
[multipath-tools/.git] / libmultipath / uevent.c
1 /*
2  * uevent.c - trigger upon netlink uevents from the kernel
3  *
4  *      Only kernels from version 2.6.10* on provide the uevent netlink socket.
5  *      Until the libc-kernel-headers are updated, you need to compile with:
6  *
7  *        gcc -I /lib/modules/`uname -r`/build/include -o uevent_listen uevent_listen.c
8  *
9  * Copyright (C) 2004 Kay Sievers <kay.sievers@vrfy.org>
10  *
11  *      This program is free software; you can redistribute it and/or modify it
12  *      under the terms of the GNU General Public License as published by the
13  *      Free Software Foundation version 2 of the License.
14  *
15  *      This program is distributed in the hope that it will be useful, but
16  *      WITHOUT ANY WARRANTY; without even the implied warranty of
17  *      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  *      General Public License for more details.
19  *
20  *      You should have received a copy of the GNU General Public License along
21  *      with this program; if not, write to the Free Software Foundation, Inc.,
22  *      675 Mass Ave, Cambridge, MA 02139, USA.
23  *
24  */
25
26 #include <unistd.h>
27 #include <stdio.h>
28 #include <errno.h>
29 #include <stdlib.h>
30 #include <stddef.h>
31 #include <string.h>
32 #include <fcntl.h>
33 #include <time.h>
34 #include <sys/socket.h>
35 #include <sys/user.h>
36 #include <sys/un.h>
37 #include <linux/types.h>
38 #include <linux/netlink.h>
39 #include <pthread.h>
40 #include <limits.h>
41 #include <sys/mman.h>
42 #include <libudev.h>
43 #include <errno.h>
44
45 #include "memory.h"
46 #include "debug.h"
47 #include "list.h"
48 #include "uevent.h"
49 #include "vector.h"
50 #include "config.h"
51
52 typedef int (uev_trigger)(struct uevent *, void * trigger_data);
53
54 pthread_t uevq_thr;
55 LIST_HEAD(uevq);
56 pthread_mutex_t uevq_lock, *uevq_lockp = &uevq_lock;
57 pthread_cond_t  uev_cond,  *uev_condp  = &uev_cond;
58 uev_trigger *my_uev_trigger;
59 void * my_trigger_data;
60 int servicing_uev;
61
62 int is_uevent_busy(void)
63 {
64         int empty;
65
66         pthread_mutex_lock(uevq_lockp);
67         empty = list_empty(&uevq);
68         pthread_mutex_unlock(uevq_lockp);
69         return (!empty || servicing_uev);
70 }
71
72 struct uevent * alloc_uevent (void)
73 {
74         struct uevent *uev = MALLOC(sizeof(struct uevent));
75
76         if (uev)
77                 INIT_LIST_HEAD(&uev->node);
78
79         return uev;
80 }
81
82 void
83 setup_thread_attr(pthread_attr_t *attr, size_t stacksize, int detached)
84 {
85         if (pthread_attr_init(attr)) {
86                 fprintf(stderr, "can't initialize thread attr: %s\n",
87                         strerror(errno));
88                 exit(1);
89         }
90         if (stacksize < PTHREAD_STACK_MIN)
91                 stacksize = PTHREAD_STACK_MIN;
92
93         if (pthread_attr_setstacksize(attr, stacksize)) {
94                 fprintf(stderr, "can't set thread stack size to %lu: %s\n",
95                         (unsigned long)stacksize, strerror(errno));
96                 exit(1);
97         }
98         if (detached && pthread_attr_setdetachstate(attr,
99                                                     PTHREAD_CREATE_DETACHED)) {
100                 fprintf(stderr, "can't set thread to detached: %s\n",
101                         strerror(errno));
102                 exit(1);
103         }
104 }
105
106 /*
107  * Called with uevq_lockp held
108  */
109 void
110 service_uevq(struct list_head *tmpq)
111 {
112         struct uevent *uev, *tmp;
113
114         list_for_each_entry_safe(uev, tmp, tmpq, node) {
115                 list_del_init(&uev->node);
116
117                 if (my_uev_trigger && my_uev_trigger(uev, my_trigger_data))
118                         condlog(0, "uevent trigger error");
119
120                 if (uev->udev)
121                         udev_device_unref(uev->udev);
122                 FREE(uev);
123         }
124 }
125
126 static void uevq_stop(void *arg)
127 {
128         condlog(3, "Stopping uev queue");
129         pthread_mutex_lock(uevq_lockp);
130         my_uev_trigger = NULL;
131         pthread_cond_signal(uev_condp);
132         pthread_mutex_unlock(uevq_lockp);
133 }
134
135 /*
136  * Service the uevent queue.
137  */
138 int uevent_dispatch(int (*uev_trigger)(struct uevent *, void * trigger_data),
139                     void * trigger_data)
140 {
141         my_uev_trigger = uev_trigger;
142         my_trigger_data = trigger_data;
143
144         mlockall(MCL_CURRENT | MCL_FUTURE);
145
146         while (1) {
147                 LIST_HEAD(uevq_tmp);
148
149                 pthread_mutex_lock(uevq_lockp);
150                 servicing_uev = 0;
151                 /*
152                  * Condition signals are unreliable,
153                  * so make sure we only wait if we have to.
154                  */
155                 if (list_empty(&uevq)) {
156                         pthread_cond_wait(uev_condp, uevq_lockp);
157                 }
158                 servicing_uev = 1;
159                 list_splice_init(&uevq, &uevq_tmp);
160                 pthread_mutex_unlock(uevq_lockp);
161                 if (!my_uev_trigger)
162                         break;
163                 service_uevq(&uevq_tmp);
164         }
165         condlog(3, "Terminating uev service queue");
166         return 0;
167 }
168
169 int failback_listen(void)
170 {
171         int sock;
172         struct sockaddr_nl snl;
173         struct sockaddr_un sun;
174         socklen_t addrlen;
175         int retval;
176         int rcvbufsz = 128*1024;
177         int rcvsz = 0;
178         int rcvszsz = sizeof(rcvsz);
179         unsigned int *prcvszsz = (unsigned int *)&rcvszsz;
180         const int feature_on = 1;
181         /*
182          * First check whether we have a udev socket
183          */
184         memset(&sun, 0x00, sizeof(struct sockaddr_un));
185         sun.sun_family = AF_LOCAL;
186         strcpy(&sun.sun_path[1], "/org/kernel/dm/multipath_event");
187         addrlen = offsetof(struct sockaddr_un, sun_path) + strlen(sun.sun_path+1) + 1;
188
189         sock = socket(AF_LOCAL, SOCK_DGRAM, 0);
190         if (sock >= 0) {
191
192                 condlog(3, "reading events from udev socket.");
193
194                 /* the bind takes care of ensuring only one copy running */
195                 retval = bind(sock, (struct sockaddr *) &sun, addrlen);
196                 if (retval < 0) {
197                         condlog(0, "bind failed, exit");
198                         goto exit;
199                 }
200
201                 /* enable receiving of the sender credentials */
202                 setsockopt(sock, SOL_SOCKET, SO_PASSCRED,
203                            &feature_on, sizeof(feature_on));
204
205         } else {
206                 /* Fallback to read kernel netlink events */
207                 memset(&snl, 0x00, sizeof(struct sockaddr_nl));
208                 snl.nl_family = AF_NETLINK;
209                 snl.nl_pid = getpid();
210                 snl.nl_groups = 0x01;
211
212                 sock = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_KOBJECT_UEVENT);
213                 if (sock == -1) {
214                         condlog(0, "error getting socket, exit");
215                         return 1;
216                 }
217
218                 condlog(3, "reading events from kernel.");
219
220                 /*
221                  * try to avoid dropping uevents, even so, this is not a guarantee,
222                  * but it does help to change the netlink uevent socket's
223                  * receive buffer threshold from the default value of 106,496 to
224                  * the maximum value of 262,142.
225                  */
226                 retval = setsockopt(sock, SOL_SOCKET, SO_RCVBUF, &rcvbufsz,
227                                     sizeof(rcvbufsz));
228
229                 if (retval < 0) {
230                         condlog(0, "error setting receive buffer size for socket, exit");
231                         exit(1);
232                 }
233                 retval = getsockopt(sock, SOL_SOCKET, SO_RCVBUF, &rcvsz, prcvszsz);
234                 if (retval < 0) {
235                         condlog(0, "error setting receive buffer size for socket, exit");
236                         exit(1);
237                 }
238                 condlog(3, "receive buffer size for socket is %u.", rcvsz);
239
240                 /* enable receiving of the sender credentials */
241                 setsockopt(sock, SOL_SOCKET, SO_PASSCRED,
242                            &feature_on, sizeof(feature_on));
243
244                 retval = bind(sock, (struct sockaddr *) &snl,
245                               sizeof(struct sockaddr_nl));
246                 if (retval < 0) {
247                         condlog(0, "bind failed, exit");
248                         goto exit;
249                 }
250         }
251
252         while (1) {
253                 int i;
254                 char *pos;
255                 size_t bufpos;
256                 ssize_t buflen;
257                 struct uevent *uev;
258                 char *buffer;
259                 struct msghdr smsg;
260                 struct iovec iov;
261                 char cred_msg[CMSG_SPACE(sizeof(struct ucred))];
262                 struct cmsghdr *cmsg;
263                 struct ucred *cred;
264                 static char buf[HOTPLUG_BUFFER_SIZE + OBJECT_SIZE];
265
266                 memset(buf, 0x00, sizeof(buf));
267                 iov.iov_base = &buf;
268                 iov.iov_len = sizeof(buf);
269                 memset (&smsg, 0x00, sizeof(struct msghdr));
270                 smsg.msg_iov = &iov;
271                 smsg.msg_iovlen = 1;
272                 smsg.msg_control = cred_msg;
273                 smsg.msg_controllen = sizeof(cred_msg);
274
275                 buflen = recvmsg(sock, &smsg, 0);
276                 if (buflen < 0) {
277                         if (errno != EINTR)
278                                 condlog(0, "error receiving message, errno %d", errno);
279                         continue;
280                 }
281
282                 cmsg = CMSG_FIRSTHDR(&smsg);
283                 if (cmsg == NULL || cmsg->cmsg_type != SCM_CREDENTIALS) {
284                         condlog(3, "no sender credentials received, message ignored");
285                         continue;
286                 }
287
288                 cred = (struct ucred *)CMSG_DATA(cmsg);
289                 if (cred->uid != 0) {
290                         condlog(3, "sender uid=%d, message ignored", cred->uid);
291                         continue;
292                 }
293
294                 /* skip header */
295                 bufpos = strlen(buf) + 1;
296                 if (bufpos < sizeof("a@/d") || bufpos >= sizeof(buf)) {
297                         condlog(3, "invalid message length");
298                         continue;
299                 }
300
301                 /* check message header */
302                 if (strstr(buf, "@/") == NULL) {
303                         condlog(3, "unrecognized message header");
304                         continue;
305                 }
306                 if ((size_t)buflen > sizeof(buf)-1) {
307                         condlog(2, "buffer overflow for received uevent");
308                         buflen = sizeof(buf)-1;
309                 }
310
311                 uev = alloc_uevent();
312
313                 if (!uev) {
314                         condlog(1, "lost uevent, oom");
315                         continue;
316                 }
317
318                 if ((size_t)buflen > sizeof(buf)-1)
319                         buflen = sizeof(buf)-1;
320
321                 /*
322                  * Copy the shared receive buffer contents to buffer private
323                  * to this uevent so we can immediately reuse the shared buffer.
324                  */
325                 memcpy(uev->buffer, buf, HOTPLUG_BUFFER_SIZE + OBJECT_SIZE);
326                 buffer = uev->buffer;
327                 buffer[buflen] = '\0';
328
329                 /* save start of payload */
330                 bufpos = strlen(buffer) + 1;
331
332                 /* action string */
333                 uev->action = buffer;
334                 pos = strchr(buffer, '@');
335                 if (!pos) {
336                         condlog(3, "bad action string '%s'", buffer);
337                         continue;
338                 }
339                 pos[0] = '\0';
340
341                 /* sysfs path */
342                 uev->devpath = &pos[1];
343
344                 /* hotplug events have the environment attached - reconstruct envp[] */
345                 for (i = 0; (bufpos < (size_t)buflen) && (i < HOTPLUG_NUM_ENVP-1); i++) {
346                         int keylen;
347                         char *key;
348
349                         key = &buffer[bufpos];
350                         keylen = strlen(key);
351                         uev->envp[i] = key;
352                         bufpos += keylen + 1;
353                 }
354                 uev->envp[i] = NULL;
355
356                 condlog(3, "uevent '%s' from '%s'", uev->action, uev->devpath);
357                 uev->kernel = strrchr(uev->devpath, '/');
358                 if (uev->kernel)
359                         uev->kernel++;
360
361                 /* print payload environment */
362                 for (i = 0; uev->envp[i] != NULL; i++)
363                         condlog(5, "%s", uev->envp[i]);
364
365                 /*
366                  * Queue uevent and poke service pthread.
367                  */
368                 pthread_mutex_lock(uevq_lockp);
369                 list_add_tail(&uev->node, &uevq);
370                 pthread_cond_signal(uev_condp);
371                 pthread_mutex_unlock(uevq_lockp);
372         }
373
374 exit:
375         close(sock);
376         return 1;
377 }
378
379 int uevent_listen(void)
380 {
381         int err;
382         struct udev_monitor *monitor = NULL;
383         int fd, socket_flags;
384         int need_failback = 1;
385         /*
386          * Queue uevents for service by dedicated thread so that the uevent
387          * listening thread does not block on multipathd locks (vecs->lock)
388          * thereby not getting to empty the socket's receive buffer queue
389          * often enough.
390          */
391         INIT_LIST_HEAD(&uevq);
392
393         pthread_mutex_init(uevq_lockp, NULL);
394         pthread_cond_init(uev_condp, NULL);
395         pthread_cleanup_push(uevq_stop, NULL);
396
397         monitor = udev_monitor_new_from_netlink(conf->udev, "udev");
398         if (!monitor) {
399                 condlog(2, "failed to create udev monitor");
400                 goto out;
401         }
402 #ifdef LIBUDEV_API_RECVBUF
403         if (udev_monitor_set_receive_buffer_size(monitor, 128 * 1024 * 1024))
404                 condlog(2, "failed to increase buffer size");
405 #endif
406         fd = udev_monitor_get_fd(monitor);
407         if (fd < 0) {
408                 condlog(2, "failed to get monitor fd");
409                 goto out;
410         }
411         socket_flags = fcntl(fd, F_GETFL);
412         if (socket_flags < 0) {
413                 condlog(2, "failed to get monitor socket flags : %s",
414                         strerror(errno));
415                 goto out;
416         }
417         if (fcntl(fd, F_SETFL, socket_flags & ~O_NONBLOCK) < 0) {
418                 condlog(2, "failed to set monitor socket flags : %s",
419                         strerror(errno));
420                 goto out;
421         }
422         err = udev_monitor_filter_add_match_subsystem_devtype(monitor, "block",
423                                                               NULL);
424         if (err)
425                 condlog(2, "failed to create filter : %s\n", strerror(-err));
426         err = udev_monitor_enable_receiving(monitor);
427         if (err) {
428                 condlog(2, "failed to enable receiving : %s\n", strerror(-err));
429                 goto out;
430         }
431         while (1) {
432                 int i = 0;
433                 char *pos, *end;
434                 struct uevent *uev;
435                 struct udev_device *dev;
436                 struct udev_list_entry *list_entry;
437
438                 dev = udev_monitor_receive_device(monitor);
439                 if (!dev) {
440                         condlog(0, "failed getting udev device");
441                         continue;
442                 }
443
444                 uev = alloc_uevent();
445                 if (!uev) {
446                         udev_device_unref(dev);
447                         condlog(1, "lost uevent, oom");
448                         continue;
449                 }
450                 pos = uev->buffer;
451                 end = pos + HOTPLUG_BUFFER_SIZE + OBJECT_SIZE - 1;
452                 udev_list_entry_foreach(list_entry, udev_device_get_properties_list_entry(dev)) {
453                         const char *name, *value;
454                         int bytes;
455
456                         name = udev_list_entry_get_name(list_entry);
457                         if (!name)
458                                 name = "(null)";
459                         value = udev_list_entry_get_value(list_entry);
460                         if (!value)
461                                 value = "(null)";
462                         bytes = snprintf(pos, end - pos, "%s=%s", name,
463                                         value);
464                         if (pos + bytes >= end) {
465                                 condlog(2, "buffer overflow for uevent");
466                                 break;
467                         }
468                         uev->envp[i] = pos;
469                         pos += bytes;
470                         *pos = '\0';
471                         pos++;
472                         if (strcmp(name, "DEVPATH") == 0)
473                                 uev->devpath = uev->envp[i] + 8;
474                         if (strcmp(name, "ACTION") == 0)
475                                 uev->action = uev->envp[i] + 7;
476                         i++;
477                         if (i == HOTPLUG_NUM_ENVP - 1)
478                                 break;
479                 }
480                 uev->udev = dev;
481                 uev->envp[i] = NULL;
482
483                 condlog(3, "uevent '%s' from '%s'", uev->action, uev->devpath);
484                 uev->kernel = strrchr(uev->devpath, '/');
485                 if (uev->kernel)
486                         uev->kernel++;
487
488                 /* print payload environment */
489                 for (i = 0; uev->envp[i] != NULL; i++)
490                         condlog(5, "%s", uev->envp[i]);
491
492                 /*
493                  * Queue uevent and poke service pthread.
494                  */
495                 pthread_mutex_lock(uevq_lockp);
496                 list_add_tail(&uev->node, &uevq);
497                 pthread_cond_signal(uev_condp);
498                 pthread_mutex_unlock(uevq_lockp);
499         }
500         need_failback = 0;
501 out:
502         if (monitor)
503                 udev_monitor_unref(monitor);
504         if (need_failback)
505                 err = failback_listen();
506         pthread_cleanup_pop(1);
507         pthread_mutex_destroy(uevq_lockp);
508         pthread_cond_destroy(uev_condp);
509         return err;
510 }
511
512 extern int
513 uevent_get_major(struct uevent *uev)
514 {
515         char *p, *q;
516         int i, major = -1;
517
518         for (i = 0; uev->envp[i] != NULL; i++) {
519                 if (!strncmp(uev->envp[i], "MAJOR", 5) && strlen(uev->envp[i]) > 6) {
520                         p = uev->envp[i] + 6;
521                         major = strtoul(p, &q, 10);
522                         if (p == q) {
523                                 condlog(2, "invalid major '%s'", p);
524                                 major = -1;
525                         }
526                         break;
527                 }
528         }
529         return major;
530 }
531
532 extern int
533 uevent_get_minor(struct uevent *uev)
534 {
535         char *p, *q;
536         int i, minor = -1;
537
538         for (i = 0; uev->envp[i] != NULL; i++) {
539                 if (!strncmp(uev->envp[i], "MINOR", 5) && strlen(uev->envp[i]) > 6) {
540                         p = uev->envp[i] + 6;
541                         minor = strtoul(p, &q, 10);
542                         if (p == q) {
543                                 condlog(2, "invalid minor '%s'", p);
544                                 minor = -1;
545                         }
546                         break;
547                 }
548         }
549         return minor;
550 }
551
552 extern int
553 uevent_get_disk_ro(struct uevent *uev)
554 {
555         char *p, *q;
556         int i, ro = -1;
557
558         for (i = 0; uev->envp[i] != NULL; i++) {
559                 if (!strncmp(uev->envp[i], "DISK_RO", 6) && strlen(uev->envp[i]) > 7) {
560                         p = uev->envp[i] + 8;
561                         ro = strtoul(p, &q, 10);
562                         if (p == q) {
563                                 condlog(2, "invalid read_only setting '%s'", p);
564                                 ro = -1;
565                         }
566                         break;
567                 }
568         }
569         return ro;
570 }
571
572 extern char *
573 uevent_get_dm_name(struct uevent *uev)
574 {
575         char *p = NULL;
576         int i;
577
578         for (i = 0; uev->envp[i] != NULL; i++) {
579                 if (!strncmp(uev->envp[i], "DM_NAME", 6) &&
580                     strlen(uev->envp[i]) > 7) {
581                         p = MALLOC(strlen(uev->envp[i] + 8) + 1);
582                         strcpy(p, uev->envp[i] + 8);
583                         break;
584                 }
585         }
586         return p;
587 }