9ee3ade64d898269476d8c22f3024c2f1eeb39fa
[multipath-tools/.git] / libmultipath / uevent.c
1 /*
2  * uevent.c - trigger upon netlink uevents from the kernel
3  *
4  *      Only kernels from version 2.6.10* on provide the uevent netlink socket.
5  *      Until the libc-kernel-headers are updated, you need to compile with:
6  *
7  *        gcc -I /lib/modules/`uname -r`/build/include -o uevent_listen uevent_listen.c
8  *
9  * Copyright (C) 2004 Kay Sievers <kay.sievers@vrfy.org>
10  *
11  *      This program is free software; you can redistribute it and/or modify it
12  *      under the terms of the GNU General Public License as published by the
13  *      Free Software Foundation version 2 of the License.
14  *
15  *      This program is distributed in the hope that it will be useful, but
16  *      WITHOUT ANY WARRANTY; without even the implied warranty of
17  *      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  *      General Public License for more details.
19  *
20  *      You should have received a copy of the GNU General Public License along
21  *      with this program; if not, write to the Free Software Foundation, Inc.,
22  *      675 Mass Ave, Cambridge, MA 02139, USA.
23  *
24  */
25
26 #include <unistd.h>
27 #include <stdio.h>
28 #include <errno.h>
29 #include <stdlib.h>
30 #include <stddef.h>
31 #include <string.h>
32 #include <fcntl.h>
33 #include <time.h>
34 #include <sys/socket.h>
35 #include <sys/user.h>
36 #include <sys/un.h>
37 #include <linux/types.h>
38 #include <linux/netlink.h>
39 #include <pthread.h>
40 #include <limits.h>
41 #include <sys/mman.h>
42 #include <libudev.h>
43 #include <errno.h>
44
45 #include "memory.h"
46 #include "debug.h"
47 #include "list.h"
48 #include "uevent.h"
49 #include "vector.h"
50
51 typedef int (uev_trigger)(struct uevent *, void * trigger_data);
52
53 pthread_t uevq_thr;
54 LIST_HEAD(uevq);
55 pthread_mutex_t uevq_lock = PTHREAD_MUTEX_INITIALIZER;
56 pthread_mutex_t *uevq_lockp = &uevq_lock;
57 pthread_cond_t uev_cond = PTHREAD_COND_INITIALIZER;
58 pthread_cond_t *uev_condp = &uev_cond;
59 uev_trigger *my_uev_trigger;
60 void * my_trigger_data;
61 int servicing_uev;
62
63 int is_uevent_busy(void)
64 {
65         int empty;
66
67         pthread_mutex_lock(uevq_lockp);
68         empty = list_empty(&uevq);
69         pthread_mutex_unlock(uevq_lockp);
70         return (!empty || servicing_uev);
71 }
72
73 struct uevent * alloc_uevent (void)
74 {
75         struct uevent *uev = MALLOC(sizeof(struct uevent));
76
77         if (uev)
78                 INIT_LIST_HEAD(&uev->node);
79
80         return uev;
81 }
82
83 void
84 setup_thread_attr(pthread_attr_t *attr, size_t stacksize, int detached)
85 {
86         if (pthread_attr_init(attr)) {
87                 fprintf(stderr, "can't initialize thread attr: %s\n",
88                         strerror(errno));
89                 exit(1);
90         }
91         if (stacksize < PTHREAD_STACK_MIN)
92                 stacksize = PTHREAD_STACK_MIN;
93
94         if (pthread_attr_setstacksize(attr, stacksize)) {
95                 fprintf(stderr, "can't set thread stack size to %lu: %s\n",
96                         (unsigned long)stacksize, strerror(errno));
97                 exit(1);
98         }
99         if (detached && pthread_attr_setdetachstate(attr,
100                                                     PTHREAD_CREATE_DETACHED)) {
101                 fprintf(stderr, "can't set thread to detached: %s\n",
102                         strerror(errno));
103                 exit(1);
104         }
105 }
106
107 /*
108  * Called with uevq_lockp held
109  */
110 void
111 service_uevq(struct list_head *tmpq)
112 {
113         struct uevent *uev, *tmp;
114
115         list_for_each_entry_safe(uev, tmp, tmpq, node) {
116                 list_del_init(&uev->node);
117
118                 if (my_uev_trigger && my_uev_trigger(uev, my_trigger_data))
119                         condlog(0, "uevent trigger error");
120
121                 if (uev->udev)
122                         udev_device_unref(uev->udev);
123                 FREE(uev);
124         }
125 }
126
127 static void uevq_stop(void *arg)
128 {
129         struct udev *udev = arg;
130
131         condlog(3, "Stopping uev queue");
132         pthread_mutex_lock(uevq_lockp);
133         my_uev_trigger = NULL;
134         pthread_cond_signal(uev_condp);
135         pthread_mutex_unlock(uevq_lockp);
136         udev_unref(udev);
137 }
138
139 void
140 uevq_cleanup(struct list_head *tmpq)
141 {
142         struct uevent *uev, *tmp;
143
144         list_for_each_entry_safe(uev, tmp, tmpq, node) {
145                 list_del_init(&uev->node);
146                 FREE(uev);
147         }
148 }
149
150 /*
151  * Service the uevent queue.
152  */
153 int uevent_dispatch(int (*uev_trigger)(struct uevent *, void * trigger_data),
154                     void * trigger_data)
155 {
156         my_uev_trigger = uev_trigger;
157         my_trigger_data = trigger_data;
158
159         mlockall(MCL_CURRENT | MCL_FUTURE);
160
161         while (1) {
162                 LIST_HEAD(uevq_tmp);
163
164                 pthread_mutex_lock(uevq_lockp);
165                 servicing_uev = 0;
166                 /*
167                  * Condition signals are unreliable,
168                  * so make sure we only wait if we have to.
169                  */
170                 if (list_empty(&uevq)) {
171                         pthread_cond_wait(uev_condp, uevq_lockp);
172                 }
173                 servicing_uev = 1;
174                 list_splice_init(&uevq, &uevq_tmp);
175                 pthread_mutex_unlock(uevq_lockp);
176                 if (!my_uev_trigger)
177                         break;
178                 service_uevq(&uevq_tmp);
179         }
180         condlog(3, "Terminating uev service queue");
181         uevq_cleanup(&uevq);
182         return 0;
183 }
184
185 int failback_listen(void)
186 {
187         int sock;
188         struct sockaddr_nl snl;
189         struct sockaddr_un sun;
190         socklen_t addrlen;
191         int retval;
192         int rcvbufsz = 128*1024;
193         int rcvsz = 0;
194         int rcvszsz = sizeof(rcvsz);
195         unsigned int *prcvszsz = (unsigned int *)&rcvszsz;
196         const int feature_on = 1;
197         /*
198          * First check whether we have a udev socket
199          */
200         memset(&sun, 0x00, sizeof(struct sockaddr_un));
201         sun.sun_family = AF_LOCAL;
202         strcpy(&sun.sun_path[1], "/org/kernel/dm/multipath_event");
203         addrlen = offsetof(struct sockaddr_un, sun_path) + strlen(sun.sun_path+1) + 1;
204
205         sock = socket(AF_LOCAL, SOCK_DGRAM, 0);
206         if (sock >= 0) {
207
208                 condlog(3, "reading events from udev socket.");
209
210                 /* the bind takes care of ensuring only one copy running */
211                 retval = bind(sock, (struct sockaddr *) &sun, addrlen);
212                 if (retval < 0) {
213                         condlog(0, "bind failed, exit");
214                         goto exit;
215                 }
216
217                 /* enable receiving of the sender credentials */
218                 setsockopt(sock, SOL_SOCKET, SO_PASSCRED,
219                            &feature_on, sizeof(feature_on));
220
221         } else {
222                 /* Fallback to read kernel netlink events */
223                 memset(&snl, 0x00, sizeof(struct sockaddr_nl));
224                 snl.nl_family = AF_NETLINK;
225                 snl.nl_pid = getpid();
226                 snl.nl_groups = 0x01;
227
228                 sock = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_KOBJECT_UEVENT);
229                 if (sock == -1) {
230                         condlog(0, "error getting socket, exit");
231                         return 1;
232                 }
233
234                 condlog(3, "reading events from kernel.");
235
236                 /*
237                  * try to avoid dropping uevents, even so, this is not a guarantee,
238                  * but it does help to change the netlink uevent socket's
239                  * receive buffer threshold from the default value of 106,496 to
240                  * the maximum value of 262,142.
241                  */
242                 retval = setsockopt(sock, SOL_SOCKET, SO_RCVBUF, &rcvbufsz,
243                                     sizeof(rcvbufsz));
244
245                 if (retval < 0) {
246                         condlog(0, "error setting receive buffer size for socket, exit");
247                         exit(1);
248                 }
249                 retval = getsockopt(sock, SOL_SOCKET, SO_RCVBUF, &rcvsz, prcvszsz);
250                 if (retval < 0) {
251                         condlog(0, "error setting receive buffer size for socket, exit");
252                         exit(1);
253                 }
254                 condlog(3, "receive buffer size for socket is %u.", rcvsz);
255
256                 /* enable receiving of the sender credentials */
257                 setsockopt(sock, SOL_SOCKET, SO_PASSCRED,
258                            &feature_on, sizeof(feature_on));
259
260                 retval = bind(sock, (struct sockaddr *) &snl,
261                               sizeof(struct sockaddr_nl));
262                 if (retval < 0) {
263                         condlog(0, "bind failed, exit");
264                         goto exit;
265                 }
266         }
267
268         while (1) {
269                 int i;
270                 char *pos;
271                 size_t bufpos;
272                 ssize_t buflen;
273                 struct uevent *uev;
274                 char *buffer;
275                 struct msghdr smsg;
276                 struct iovec iov;
277                 char cred_msg[CMSG_SPACE(sizeof(struct ucred))];
278                 struct cmsghdr *cmsg;
279                 struct ucred *cred;
280                 static char buf[HOTPLUG_BUFFER_SIZE + OBJECT_SIZE];
281
282                 memset(buf, 0x00, sizeof(buf));
283                 iov.iov_base = &buf;
284                 iov.iov_len = sizeof(buf);
285                 memset (&smsg, 0x00, sizeof(struct msghdr));
286                 smsg.msg_iov = &iov;
287                 smsg.msg_iovlen = 1;
288                 smsg.msg_control = cred_msg;
289                 smsg.msg_controllen = sizeof(cred_msg);
290
291                 buflen = recvmsg(sock, &smsg, 0);
292                 if (buflen < 0) {
293                         if (errno != EINTR)
294                                 condlog(0, "error receiving message, errno %d", errno);
295                         continue;
296                 }
297
298                 cmsg = CMSG_FIRSTHDR(&smsg);
299                 if (cmsg == NULL || cmsg->cmsg_type != SCM_CREDENTIALS) {
300                         condlog(3, "no sender credentials received, message ignored");
301                         continue;
302                 }
303
304                 cred = (struct ucred *)CMSG_DATA(cmsg);
305                 if (cred->uid != 0) {
306                         condlog(3, "sender uid=%d, message ignored", cred->uid);
307                         continue;
308                 }
309
310                 /* skip header */
311                 bufpos = strlen(buf) + 1;
312                 if (bufpos < sizeof("a@/d") || bufpos >= sizeof(buf)) {
313                         condlog(3, "invalid message length");
314                         continue;
315                 }
316
317                 /* check message header */
318                 if (strstr(buf, "@/") == NULL) {
319                         condlog(3, "unrecognized message header");
320                         continue;
321                 }
322                 if ((size_t)buflen > sizeof(buf)-1) {
323                         condlog(2, "buffer overflow for received uevent");
324                         buflen = sizeof(buf)-1;
325                 }
326
327                 uev = alloc_uevent();
328
329                 if (!uev) {
330                         condlog(1, "lost uevent, oom");
331                         continue;
332                 }
333
334                 if ((size_t)buflen > sizeof(buf)-1)
335                         buflen = sizeof(buf)-1;
336
337                 /*
338                  * Copy the shared receive buffer contents to buffer private
339                  * to this uevent so we can immediately reuse the shared buffer.
340                  */
341                 memcpy(uev->buffer, buf, HOTPLUG_BUFFER_SIZE + OBJECT_SIZE);
342                 buffer = uev->buffer;
343                 buffer[buflen] = '\0';
344
345                 /* save start of payload */
346                 bufpos = strlen(buffer) + 1;
347
348                 /* action string */
349                 uev->action = buffer;
350                 pos = strchr(buffer, '@');
351                 if (!pos) {
352                         condlog(3, "bad action string '%s'", buffer);
353                         continue;
354                 }
355                 pos[0] = '\0';
356
357                 /* sysfs path */
358                 uev->devpath = &pos[1];
359
360                 /* hotplug events have the environment attached - reconstruct envp[] */
361                 for (i = 0; (bufpos < (size_t)buflen) && (i < HOTPLUG_NUM_ENVP-1); i++) {
362                         int keylen;
363                         char *key;
364
365                         key = &buffer[bufpos];
366                         keylen = strlen(key);
367                         uev->envp[i] = key;
368                         /* Filter out sequence number */
369                         if (strncmp(key, "SEQNUM=", 7) == 0) {
370                                 char *eptr;
371
372                                 uev->seqnum = strtoul(key + 7, &eptr, 10);
373                                 if (eptr == key + 7)
374                                         uev->seqnum = -1;
375                         }
376                         bufpos += keylen + 1;
377                 }
378                 uev->envp[i] = NULL;
379
380                 condlog(3, "uevent %ld '%s' from '%s'", uev->seqnum,
381                         uev->action, uev->devpath);
382                 uev->kernel = strrchr(uev->devpath, '/');
383                 if (uev->kernel)
384                         uev->kernel++;
385
386                 /* print payload environment */
387                 for (i = 0; uev->envp[i] != NULL; i++)
388                         condlog(5, "%s", uev->envp[i]);
389
390                 /*
391                  * Queue uevent and poke service pthread.
392                  */
393                 pthread_mutex_lock(uevq_lockp);
394                 list_add_tail(&uev->node, &uevq);
395                 pthread_cond_signal(uev_condp);
396                 pthread_mutex_unlock(uevq_lockp);
397         }
398
399 exit:
400         close(sock);
401         return 1;
402 }
403
404 int uevent_listen(struct udev *udev)
405 {
406         int err;
407         struct udev_monitor *monitor = NULL;
408         int fd, socket_flags;
409         int need_failback = 1;
410         /*
411          * Queue uevents for service by dedicated thread so that the uevent
412          * listening thread does not block on multipathd locks (vecs->lock)
413          * thereby not getting to empty the socket's receive buffer queue
414          * often enough.
415          */
416         if (!udev) {
417                 condlog(1, "no udev context");
418                 return 1;
419         }
420         udev_ref(udev);
421         pthread_cleanup_push(uevq_stop, udev);
422
423         monitor = udev_monitor_new_from_netlink(udev, "udev");
424         if (!monitor) {
425                 condlog(2, "failed to create udev monitor");
426                 err = 2;
427                 goto out;
428         }
429 #ifdef LIBUDEV_API_RECVBUF
430         if (udev_monitor_set_receive_buffer_size(monitor, 128 * 1024 * 1024))
431                 condlog(2, "failed to increase buffer size");
432 #endif
433         fd = udev_monitor_get_fd(monitor);
434         if (fd < 0) {
435                 condlog(2, "failed to get monitor fd");
436                 goto out;
437         }
438         socket_flags = fcntl(fd, F_GETFL);
439         if (socket_flags < 0) {
440                 condlog(2, "failed to get monitor socket flags : %s",
441                         strerror(errno));
442                 goto out;
443         }
444         if (fcntl(fd, F_SETFL, socket_flags & ~O_NONBLOCK) < 0) {
445                 condlog(2, "failed to set monitor socket flags : %s",
446                         strerror(errno));
447                 goto out;
448         }
449         err = udev_monitor_filter_add_match_subsystem_devtype(monitor, "block",
450                                                               NULL);
451         if (err)
452                 condlog(2, "failed to create filter : %s", strerror(-err));
453         err = udev_monitor_enable_receiving(monitor);
454         if (err) {
455                 condlog(2, "failed to enable receiving : %s", strerror(-err));
456                 goto out;
457         }
458         while (1) {
459                 int i = 0;
460                 char *pos, *end;
461                 struct uevent *uev;
462                 struct udev_device *dev;
463                 struct udev_list_entry *list_entry;
464
465                 dev = udev_monitor_receive_device(monitor);
466                 if (!dev) {
467                         condlog(0, "failed getting udev device");
468                         continue;
469                 }
470
471                 uev = alloc_uevent();
472                 if (!uev) {
473                         udev_device_unref(dev);
474                         condlog(1, "lost uevent, oom");
475                         continue;
476                 }
477                 pos = uev->buffer;
478                 end = pos + HOTPLUG_BUFFER_SIZE + OBJECT_SIZE - 1;
479                 udev_list_entry_foreach(list_entry, udev_device_get_properties_list_entry(dev)) {
480                         const char *name, *value;
481                         int bytes;
482
483                         name = udev_list_entry_get_name(list_entry);
484                         if (!name)
485                                 name = "(null)";
486                         value = udev_list_entry_get_value(list_entry);
487                         if (!value)
488                                 value = "(null)";
489                         bytes = snprintf(pos, end - pos, "%s=%s", name,
490                                         value);
491                         if (pos + bytes >= end) {
492                                 condlog(2, "buffer overflow for uevent");
493                                 break;
494                         }
495                         uev->envp[i] = pos;
496                         pos += bytes;
497                         *pos = '\0';
498                         pos++;
499                         if (strcmp(name, "DEVPATH") == 0)
500                                 uev->devpath = uev->envp[i] + 8;
501                         if (strcmp(name, "ACTION") == 0)
502                                 uev->action = uev->envp[i] + 7;
503                         i++;
504                         if (i == HOTPLUG_NUM_ENVP - 1)
505                                 break;
506                 }
507                 uev->udev = dev;
508                 uev->envp[i] = NULL;
509
510                 condlog(3, "uevent '%s' from '%s'", uev->action, uev->devpath);
511                 uev->kernel = strrchr(uev->devpath, '/');
512                 if (uev->kernel)
513                         uev->kernel++;
514
515                 /* print payload environment */
516                 for (i = 0; uev->envp[i] != NULL; i++)
517                         condlog(5, "%s", uev->envp[i]);
518
519                 /*
520                  * Queue uevent and poke service pthread.
521                  */
522                 pthread_mutex_lock(uevq_lockp);
523                 list_add_tail(&uev->node, &uevq);
524                 pthread_cond_signal(uev_condp);
525                 pthread_mutex_unlock(uevq_lockp);
526         }
527         need_failback = 0;
528 out:
529         if (monitor)
530                 udev_monitor_unref(monitor);
531         if (need_failback)
532                 err = failback_listen();
533         pthread_cleanup_pop(1);
534         return err;
535 }
536
537 extern int
538 uevent_get_major(struct uevent *uev)
539 {
540         char *p, *q;
541         int i, major = -1;
542
543         for (i = 0; uev->envp[i] != NULL; i++) {
544                 if (!strncmp(uev->envp[i], "MAJOR", 5) && strlen(uev->envp[i]) > 6) {
545                         p = uev->envp[i] + 6;
546                         major = strtoul(p, &q, 10);
547                         if (p == q) {
548                                 condlog(2, "invalid major '%s'", p);
549                                 major = -1;
550                         }
551                         break;
552                 }
553         }
554         return major;
555 }
556
557 extern int
558 uevent_get_minor(struct uevent *uev)
559 {
560         char *p, *q;
561         int i, minor = -1;
562
563         for (i = 0; uev->envp[i] != NULL; i++) {
564                 if (!strncmp(uev->envp[i], "MINOR", 5) && strlen(uev->envp[i]) > 6) {
565                         p = uev->envp[i] + 6;
566                         minor = strtoul(p, &q, 10);
567                         if (p == q) {
568                                 condlog(2, "invalid minor '%s'", p);
569                                 minor = -1;
570                         }
571                         break;
572                 }
573         }
574         return minor;
575 }
576
577 extern int
578 uevent_get_disk_ro(struct uevent *uev)
579 {
580         char *p, *q;
581         int i, ro = -1;
582
583         for (i = 0; uev->envp[i] != NULL; i++) {
584                 if (!strncmp(uev->envp[i], "DISK_RO", 6) && strlen(uev->envp[i]) > 7) {
585                         p = uev->envp[i] + 8;
586                         ro = strtoul(p, &q, 10);
587                         if (p == q) {
588                                 condlog(2, "invalid read_only setting '%s'", p);
589                                 ro = -1;
590                         }
591                         break;
592                 }
593         }
594         return ro;
595 }
596
597 extern char *
598 uevent_get_dm_name(struct uevent *uev)
599 {
600         char *p = NULL;
601         int i;
602
603         for (i = 0; uev->envp[i] != NULL; i++) {
604                 if (!strncmp(uev->envp[i], "DM_NAME", 6) &&
605                     strlen(uev->envp[i]) > 7) {
606                         p = MALLOC(strlen(uev->envp[i] + 8) + 1);
607                         strcpy(p, uev->envp[i] + 8);
608                         break;
609                 }
610         }
611         return p;
612 }