0643e14e851f95f50885f151bce4abc970d120c2
[multipath-tools/.git] / libmultipath / uevent.c
1 /*
2  * uevent.c - trigger upon netlink uevents from the kernel
3  *
4  *      Only kernels from version 2.6.10* on provide the uevent netlink socket.
5  *      Until the libc-kernel-headers are updated, you need to compile with:
6  *
7  *        gcc -I /lib/modules/`uname -r`/build/include -o uevent_listen uevent_listen.c
8  *
9  * Copyright (C) 2004 Kay Sievers <kay.sievers@vrfy.org>
10  *
11  *      This program is free software; you can redistribute it and/or modify it
12  *      under the terms of the GNU General Public License as published by the
13  *      Free Software Foundation version 2 of the License.
14  *
15  *      This program is distributed in the hope that it will be useful, but
16  *      WITHOUT ANY WARRANTY; without even the implied warranty of
17  *      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  *      General Public License for more details.
19  *
20  *      You should have received a copy of the GNU General Public License along
21  *      with this program; if not, write to the Free Software Foundation, Inc.,
22  *      675 Mass Ave, Cambridge, MA 02139, USA.
23  *
24  */
25
26 #include <unistd.h>
27 #include <stdio.h>
28 #include <errno.h>
29 #include <stdlib.h>
30 #include <stddef.h>
31 #include <string.h>
32 #include <fcntl.h>
33 #include <time.h>
34 #include <sys/socket.h>
35 #include <sys/user.h>
36 #include <sys/un.h>
37 #include <linux/types.h>
38 #include <linux/netlink.h>
39 #include <pthread.h>
40 #include <limits.h>
41 #include <sys/mman.h>
42 #include <libudev.h>
43 #include <errno.h>
44
45 #include "memory.h"
46 #include "debug.h"
47 #include "list.h"
48 #include "uevent.h"
49 #include "vector.h"
50 #include "config.h"
51
52 typedef int (uev_trigger)(struct uevent *, void * trigger_data);
53
54 pthread_t uevq_thr;
55 LIST_HEAD(uevq);
56 pthread_mutex_t uevq_lock = PTHREAD_MUTEX_INITIALIZER;
57 pthread_mutex_t *uevq_lockp = &uevq_lock;
58 pthread_cond_t uev_cond = PTHREAD_COND_INITIALIZER;
59 pthread_cond_t *uev_condp = &uev_cond;
60 uev_trigger *my_uev_trigger;
61 void * my_trigger_data;
62 int servicing_uev;
63
64 int is_uevent_busy(void)
65 {
66         int empty;
67
68         pthread_mutex_lock(uevq_lockp);
69         empty = list_empty(&uevq);
70         pthread_mutex_unlock(uevq_lockp);
71         return (!empty || servicing_uev);
72 }
73
74 struct uevent * alloc_uevent (void)
75 {
76         struct uevent *uev = MALLOC(sizeof(struct uevent));
77
78         if (uev)
79                 INIT_LIST_HEAD(&uev->node);
80
81         return uev;
82 }
83
84 void
85 setup_thread_attr(pthread_attr_t *attr, size_t stacksize, int detached)
86 {
87         if (pthread_attr_init(attr)) {
88                 fprintf(stderr, "can't initialize thread attr: %s\n",
89                         strerror(errno));
90                 exit(1);
91         }
92         if (stacksize < PTHREAD_STACK_MIN)
93                 stacksize = PTHREAD_STACK_MIN;
94
95         if (pthread_attr_setstacksize(attr, stacksize)) {
96                 fprintf(stderr, "can't set thread stack size to %lu: %s\n",
97                         (unsigned long)stacksize, strerror(errno));
98                 exit(1);
99         }
100         if (detached && pthread_attr_setdetachstate(attr,
101                                                     PTHREAD_CREATE_DETACHED)) {
102                 fprintf(stderr, "can't set thread to detached: %s\n",
103                         strerror(errno));
104                 exit(1);
105         }
106 }
107
108 /*
109  * Called with uevq_lockp held
110  */
111 void
112 service_uevq(struct list_head *tmpq)
113 {
114         struct uevent *uev, *tmp;
115
116         list_for_each_entry_safe(uev, tmp, tmpq, node) {
117                 list_del_init(&uev->node);
118
119                 if (my_uev_trigger && my_uev_trigger(uev, my_trigger_data))
120                         condlog(0, "uevent trigger error");
121
122                 if (uev->udev)
123                         udev_device_unref(uev->udev);
124                 FREE(uev);
125         }
126 }
127
128 static void uevq_stop(void *arg)
129 {
130         condlog(3, "Stopping uev queue");
131         pthread_mutex_lock(uevq_lockp);
132         my_uev_trigger = NULL;
133         pthread_cond_signal(uev_condp);
134         pthread_mutex_unlock(uevq_lockp);
135 }
136
137 void
138 uevq_cleanup(struct list_head *tmpq)
139 {
140         struct uevent *uev, *tmp;
141
142         list_for_each_entry_safe(uev, tmp, tmpq, node) {
143                 list_del_init(&uev->node);
144                 FREE(uev);
145         }
146 }
147
148 /*
149  * Service the uevent queue.
150  */
151 int uevent_dispatch(int (*uev_trigger)(struct uevent *, void * trigger_data),
152                     void * trigger_data)
153 {
154         my_uev_trigger = uev_trigger;
155         my_trigger_data = trigger_data;
156
157         mlockall(MCL_CURRENT | MCL_FUTURE);
158
159         while (1) {
160                 LIST_HEAD(uevq_tmp);
161
162                 pthread_mutex_lock(uevq_lockp);
163                 servicing_uev = 0;
164                 /*
165                  * Condition signals are unreliable,
166                  * so make sure we only wait if we have to.
167                  */
168                 if (list_empty(&uevq)) {
169                         pthread_cond_wait(uev_condp, uevq_lockp);
170                 }
171                 servicing_uev = 1;
172                 list_splice_init(&uevq, &uevq_tmp);
173                 pthread_mutex_unlock(uevq_lockp);
174                 if (!my_uev_trigger)
175                         break;
176                 service_uevq(&uevq_tmp);
177         }
178         condlog(3, "Terminating uev service queue");
179         uevq_cleanup(&uevq);
180         return 0;
181 }
182
183 int failback_listen(void)
184 {
185         int sock;
186         struct sockaddr_nl snl;
187         struct sockaddr_un sun;
188         socklen_t addrlen;
189         int retval;
190         int rcvbufsz = 128*1024;
191         int rcvsz = 0;
192         int rcvszsz = sizeof(rcvsz);
193         unsigned int *prcvszsz = (unsigned int *)&rcvszsz;
194         const int feature_on = 1;
195         /*
196          * First check whether we have a udev socket
197          */
198         memset(&sun, 0x00, sizeof(struct sockaddr_un));
199         sun.sun_family = AF_LOCAL;
200         strcpy(&sun.sun_path[1], "/org/kernel/dm/multipath_event");
201         addrlen = offsetof(struct sockaddr_un, sun_path) + strlen(sun.sun_path+1) + 1;
202
203         sock = socket(AF_LOCAL, SOCK_DGRAM, 0);
204         if (sock >= 0) {
205
206                 condlog(3, "reading events from udev socket.");
207
208                 /* the bind takes care of ensuring only one copy running */
209                 retval = bind(sock, (struct sockaddr *) &sun, addrlen);
210                 if (retval < 0) {
211                         condlog(0, "bind failed, exit");
212                         goto exit;
213                 }
214
215                 /* enable receiving of the sender credentials */
216                 setsockopt(sock, SOL_SOCKET, SO_PASSCRED,
217                            &feature_on, sizeof(feature_on));
218
219         } else {
220                 /* Fallback to read kernel netlink events */
221                 memset(&snl, 0x00, sizeof(struct sockaddr_nl));
222                 snl.nl_family = AF_NETLINK;
223                 snl.nl_pid = getpid();
224                 snl.nl_groups = 0x01;
225
226                 sock = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_KOBJECT_UEVENT);
227                 if (sock == -1) {
228                         condlog(0, "error getting socket, exit");
229                         return 1;
230                 }
231
232                 condlog(3, "reading events from kernel.");
233
234                 /*
235                  * try to avoid dropping uevents, even so, this is not a guarantee,
236                  * but it does help to change the netlink uevent socket's
237                  * receive buffer threshold from the default value of 106,496 to
238                  * the maximum value of 262,142.
239                  */
240                 retval = setsockopt(sock, SOL_SOCKET, SO_RCVBUF, &rcvbufsz,
241                                     sizeof(rcvbufsz));
242
243                 if (retval < 0) {
244                         condlog(0, "error setting receive buffer size for socket, exit");
245                         exit(1);
246                 }
247                 retval = getsockopt(sock, SOL_SOCKET, SO_RCVBUF, &rcvsz, prcvszsz);
248                 if (retval < 0) {
249                         condlog(0, "error setting receive buffer size for socket, exit");
250                         exit(1);
251                 }
252                 condlog(3, "receive buffer size for socket is %u.", rcvsz);
253
254                 /* enable receiving of the sender credentials */
255                 setsockopt(sock, SOL_SOCKET, SO_PASSCRED,
256                            &feature_on, sizeof(feature_on));
257
258                 retval = bind(sock, (struct sockaddr *) &snl,
259                               sizeof(struct sockaddr_nl));
260                 if (retval < 0) {
261                         condlog(0, "bind failed, exit");
262                         goto exit;
263                 }
264         }
265
266         while (1) {
267                 int i;
268                 char *pos;
269                 size_t bufpos;
270                 ssize_t buflen;
271                 struct uevent *uev;
272                 char *buffer;
273                 struct msghdr smsg;
274                 struct iovec iov;
275                 char cred_msg[CMSG_SPACE(sizeof(struct ucred))];
276                 struct cmsghdr *cmsg;
277                 struct ucred *cred;
278                 static char buf[HOTPLUG_BUFFER_SIZE + OBJECT_SIZE];
279
280                 memset(buf, 0x00, sizeof(buf));
281                 iov.iov_base = &buf;
282                 iov.iov_len = sizeof(buf);
283                 memset (&smsg, 0x00, sizeof(struct msghdr));
284                 smsg.msg_iov = &iov;
285                 smsg.msg_iovlen = 1;
286                 smsg.msg_control = cred_msg;
287                 smsg.msg_controllen = sizeof(cred_msg);
288
289                 buflen = recvmsg(sock, &smsg, 0);
290                 if (buflen < 0) {
291                         if (errno != EINTR)
292                                 condlog(0, "error receiving message, errno %d", errno);
293                         continue;
294                 }
295
296                 cmsg = CMSG_FIRSTHDR(&smsg);
297                 if (cmsg == NULL || cmsg->cmsg_type != SCM_CREDENTIALS) {
298                         condlog(3, "no sender credentials received, message ignored");
299                         continue;
300                 }
301
302                 cred = (struct ucred *)CMSG_DATA(cmsg);
303                 if (cred->uid != 0) {
304                         condlog(3, "sender uid=%d, message ignored", cred->uid);
305                         continue;
306                 }
307
308                 /* skip header */
309                 bufpos = strlen(buf) + 1;
310                 if (bufpos < sizeof("a@/d") || bufpos >= sizeof(buf)) {
311                         condlog(3, "invalid message length");
312                         continue;
313                 }
314
315                 /* check message header */
316                 if (strstr(buf, "@/") == NULL) {
317                         condlog(3, "unrecognized message header");
318                         continue;
319                 }
320                 if ((size_t)buflen > sizeof(buf)-1) {
321                         condlog(2, "buffer overflow for received uevent");
322                         buflen = sizeof(buf)-1;
323                 }
324
325                 uev = alloc_uevent();
326
327                 if (!uev) {
328                         condlog(1, "lost uevent, oom");
329                         continue;
330                 }
331
332                 if ((size_t)buflen > sizeof(buf)-1)
333                         buflen = sizeof(buf)-1;
334
335                 /*
336                  * Copy the shared receive buffer contents to buffer private
337                  * to this uevent so we can immediately reuse the shared buffer.
338                  */
339                 memcpy(uev->buffer, buf, HOTPLUG_BUFFER_SIZE + OBJECT_SIZE);
340                 buffer = uev->buffer;
341                 buffer[buflen] = '\0';
342
343                 /* save start of payload */
344                 bufpos = strlen(buffer) + 1;
345
346                 /* action string */
347                 uev->action = buffer;
348                 pos = strchr(buffer, '@');
349                 if (!pos) {
350                         condlog(3, "bad action string '%s'", buffer);
351                         continue;
352                 }
353                 pos[0] = '\0';
354
355                 /* sysfs path */
356                 uev->devpath = &pos[1];
357
358                 /* hotplug events have the environment attached - reconstruct envp[] */
359                 for (i = 0; (bufpos < (size_t)buflen) && (i < HOTPLUG_NUM_ENVP-1); i++) {
360                         int keylen;
361                         char *key;
362
363                         key = &buffer[bufpos];
364                         keylen = strlen(key);
365                         uev->envp[i] = key;
366                         /* Filter out sequence number */
367                         if (strncmp(key, "SEQNUM=", 7) == 0) {
368                                 char *eptr;
369
370                                 uev->seqnum = strtoul(key + 7, &eptr, 10);
371                                 if (eptr == key + 7)
372                                         uev->seqnum = -1;
373                         }
374                         bufpos += keylen + 1;
375                 }
376                 uev->envp[i] = NULL;
377
378                 condlog(3, "uevent %ld '%s' from '%s'", uev->seqnum,
379                         uev->action, uev->devpath);
380                 uev->kernel = strrchr(uev->devpath, '/');
381                 if (uev->kernel)
382                         uev->kernel++;
383
384                 /* print payload environment */
385                 for (i = 0; uev->envp[i] != NULL; i++)
386                         condlog(5, "%s", uev->envp[i]);
387
388                 /*
389                  * Queue uevent and poke service pthread.
390                  */
391                 pthread_mutex_lock(uevq_lockp);
392                 list_add_tail(&uev->node, &uevq);
393                 pthread_cond_signal(uev_condp);
394                 pthread_mutex_unlock(uevq_lockp);
395         }
396
397 exit:
398         close(sock);
399         return 1;
400 }
401
402 int uevent_listen(void)
403 {
404         int err;
405         struct udev_monitor *monitor = NULL;
406         int fd, socket_flags;
407         int need_failback = 1;
408         /*
409          * Queue uevents for service by dedicated thread so that the uevent
410          * listening thread does not block on multipathd locks (vecs->lock)
411          * thereby not getting to empty the socket's receive buffer queue
412          * often enough.
413          */
414         pthread_cleanup_push(uevq_stop, NULL);
415
416         monitor = udev_monitor_new_from_netlink(conf->udev, "udev");
417         if (!monitor) {
418                 condlog(2, "failed to create udev monitor");
419                 goto out;
420         }
421 #ifdef LIBUDEV_API_RECVBUF
422         if (udev_monitor_set_receive_buffer_size(monitor, 128 * 1024 * 1024))
423                 condlog(2, "failed to increase buffer size");
424 #endif
425         fd = udev_monitor_get_fd(monitor);
426         if (fd < 0) {
427                 condlog(2, "failed to get monitor fd");
428                 goto out;
429         }
430         socket_flags = fcntl(fd, F_GETFL);
431         if (socket_flags < 0) {
432                 condlog(2, "failed to get monitor socket flags : %s",
433                         strerror(errno));
434                 goto out;
435         }
436         if (fcntl(fd, F_SETFL, socket_flags & ~O_NONBLOCK) < 0) {
437                 condlog(2, "failed to set monitor socket flags : %s",
438                         strerror(errno));
439                 goto out;
440         }
441         err = udev_monitor_filter_add_match_subsystem_devtype(monitor, "block",
442                                                               NULL);
443         if (err)
444                 condlog(2, "failed to create filter : %s", strerror(-err));
445         err = udev_monitor_enable_receiving(monitor);
446         if (err) {
447                 condlog(2, "failed to enable receiving : %s", strerror(-err));
448                 goto out;
449         }
450         while (1) {
451                 int i = 0;
452                 char *pos, *end;
453                 struct uevent *uev;
454                 struct udev_device *dev;
455                 struct udev_list_entry *list_entry;
456
457                 dev = udev_monitor_receive_device(monitor);
458                 if (!dev) {
459                         condlog(0, "failed getting udev device");
460                         continue;
461                 }
462
463                 uev = alloc_uevent();
464                 if (!uev) {
465                         udev_device_unref(dev);
466                         condlog(1, "lost uevent, oom");
467                         continue;
468                 }
469                 pos = uev->buffer;
470                 end = pos + HOTPLUG_BUFFER_SIZE + OBJECT_SIZE - 1;
471                 udev_list_entry_foreach(list_entry, udev_device_get_properties_list_entry(dev)) {
472                         const char *name, *value;
473                         int bytes;
474
475                         name = udev_list_entry_get_name(list_entry);
476                         if (!name)
477                                 name = "(null)";
478                         value = udev_list_entry_get_value(list_entry);
479                         if (!value)
480                                 value = "(null)";
481                         bytes = snprintf(pos, end - pos, "%s=%s", name,
482                                         value);
483                         if (pos + bytes >= end) {
484                                 condlog(2, "buffer overflow for uevent");
485                                 break;
486                         }
487                         uev->envp[i] = pos;
488                         pos += bytes;
489                         *pos = '\0';
490                         pos++;
491                         if (strcmp(name, "DEVPATH") == 0)
492                                 uev->devpath = uev->envp[i] + 8;
493                         if (strcmp(name, "ACTION") == 0)
494                                 uev->action = uev->envp[i] + 7;
495                         i++;
496                         if (i == HOTPLUG_NUM_ENVP - 1)
497                                 break;
498                 }
499                 uev->udev = dev;
500                 uev->envp[i] = NULL;
501
502                 condlog(3, "uevent '%s' from '%s'", uev->action, uev->devpath);
503                 uev->kernel = strrchr(uev->devpath, '/');
504                 if (uev->kernel)
505                         uev->kernel++;
506
507                 /* print payload environment */
508                 for (i = 0; uev->envp[i] != NULL; i++)
509                         condlog(5, "%s", uev->envp[i]);
510
511                 /*
512                  * Queue uevent and poke service pthread.
513                  */
514                 pthread_mutex_lock(uevq_lockp);
515                 list_add_tail(&uev->node, &uevq);
516                 pthread_cond_signal(uev_condp);
517                 pthread_mutex_unlock(uevq_lockp);
518         }
519         need_failback = 0;
520 out:
521         if (monitor)
522                 udev_monitor_unref(monitor);
523         if (need_failback)
524                 err = failback_listen();
525         pthread_cleanup_pop(1);
526         return err;
527 }
528
529 extern int
530 uevent_get_major(struct uevent *uev)
531 {
532         char *p, *q;
533         int i, major = -1;
534
535         for (i = 0; uev->envp[i] != NULL; i++) {
536                 if (!strncmp(uev->envp[i], "MAJOR", 5) && strlen(uev->envp[i]) > 6) {
537                         p = uev->envp[i] + 6;
538                         major = strtoul(p, &q, 10);
539                         if (p == q) {
540                                 condlog(2, "invalid major '%s'", p);
541                                 major = -1;
542                         }
543                         break;
544                 }
545         }
546         return major;
547 }
548
549 extern int
550 uevent_get_minor(struct uevent *uev)
551 {
552         char *p, *q;
553         int i, minor = -1;
554
555         for (i = 0; uev->envp[i] != NULL; i++) {
556                 if (!strncmp(uev->envp[i], "MINOR", 5) && strlen(uev->envp[i]) > 6) {
557                         p = uev->envp[i] + 6;
558                         minor = strtoul(p, &q, 10);
559                         if (p == q) {
560                                 condlog(2, "invalid minor '%s'", p);
561                                 minor = -1;
562                         }
563                         break;
564                 }
565         }
566         return minor;
567 }
568
569 extern int
570 uevent_get_disk_ro(struct uevent *uev)
571 {
572         char *p, *q;
573         int i, ro = -1;
574
575         for (i = 0; uev->envp[i] != NULL; i++) {
576                 if (!strncmp(uev->envp[i], "DISK_RO", 6) && strlen(uev->envp[i]) > 7) {
577                         p = uev->envp[i] + 8;
578                         ro = strtoul(p, &q, 10);
579                         if (p == q) {
580                                 condlog(2, "invalid read_only setting '%s'", p);
581                                 ro = -1;
582                         }
583                         break;
584                 }
585         }
586         return ro;
587 }
588
589 extern char *
590 uevent_get_dm_name(struct uevent *uev)
591 {
592         char *p = NULL;
593         int i;
594
595         for (i = 0; uev->envp[i] != NULL; i++) {
596                 if (!strncmp(uev->envp[i], "DM_NAME", 6) &&
597                     strlen(uev->envp[i]) > 7) {
598                         p = MALLOC(strlen(uev->envp[i] + 8) + 1);
599                         strcpy(p, uev->envp[i] + 8);
600                         break;
601                 }
602         }
603         return p;
604 }