0d68390442c199fed60701ff9b607606c102ec91
[multipath-tools/.git] / libmultipath / uevent.c
1 /*
2  * uevent.c - trigger upon netlink uevents from the kernel
3  *
4  *      Only kernels from version 2.6.10* on provide the uevent netlink socket.
5  *      Until the libc-kernel-headers are updated, you need to compile with:
6  *
7  *        gcc -I /lib/modules/`uname -r`/build/include -o uevent_listen uevent_listen.c
8  *
9  * Copyright (C) 2004 Kay Sievers <kay.sievers@vrfy.org>
10  *
11  *      This program is free software; you can redistribute it and/or modify it
12  *      under the terms of the GNU General Public License as published by the
13  *      Free Software Foundation version 2 of the License.
14  *
15  *      This program is distributed in the hope that it will be useful, but
16  *      WITHOUT ANY WARRANTY; without even the implied warranty of
17  *      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  *      General Public License for more details.
19  *
20  *      You should have received a copy of the GNU General Public License along
21  *      with this program; if not, write to the Free Software Foundation, Inc.,
22  *      675 Mass Ave, Cambridge, MA 02139, USA.
23  *
24  */
25
26 #include <unistd.h>
27 #include <stdio.h>
28 #include <errno.h>
29 #include <stdlib.h>
30 #include <stddef.h>
31 #include <string.h>
32 #include <fcntl.h>
33 #include <time.h>
34 #include <sys/socket.h>
35 #include <sys/user.h>
36 #include <sys/un.h>
37 #include <linux/types.h>
38 #include <linux/netlink.h>
39 #include <pthread.h>
40 #include <limits.h>
41 #include <sys/mman.h>
42
43 #include "memory.h"
44 #include "debug.h"
45 #include "uevent.h"
46
47 typedef int (uev_trigger)(struct uevent *, void * trigger_data);
48
49 pthread_t uevq_thr;
50 struct uevent *uevqhp, *uevqtp;
51 pthread_mutex_t uevq_lock, *uevq_lockp = &uevq_lock;
52 pthread_mutex_t uevc_lock, *uevc_lockp = &uevc_lock;
53 pthread_cond_t  uev_cond,  *uev_condp  = &uev_cond;
54 uev_trigger *my_uev_trigger;
55 void * my_trigger_data;
56 int servicing_uev;
57
58 int is_uevent_busy(void)
59 {
60         return (uevqhp != NULL || servicing_uev);
61 }
62
63 void
64 setup_thread_attr(pthread_attr_t *attr, size_t stacksize, int detached)
65 {
66         if (pthread_attr_init(attr)) {
67                 fprintf(stderr, "can't initialize thread attr: %s\n",
68                         strerror(errno));
69                 exit(1);
70         }
71         if (stacksize < PTHREAD_STACK_MIN)
72                 stacksize = PTHREAD_STACK_MIN;
73
74         if (pthread_attr_setstacksize(attr, stacksize)) {
75                 fprintf(stderr, "can't set thread stack size to %lu: %s\n",
76                         (unsigned long)stacksize, strerror(errno));
77                 exit(1);
78         }
79         if (detached && pthread_attr_setdetachstate(attr,
80                                                     PTHREAD_CREATE_DETACHED)) {
81                 fprintf(stderr, "can't set thread to detached: %s\n",
82                         strerror(errno));
83                 exit(1);
84         }
85 }
86
87 static struct uevent * alloc_uevent (void)
88 {
89         return (struct uevent *)MALLOC(sizeof(struct uevent));
90 }
91
92 void
93 service_uevq(void)
94 {
95         int empty;
96         struct uevent *uev;
97
98         do {
99                 pthread_mutex_lock(uevq_lockp);
100                 empty = (uevqhp == NULL);
101                 if (!empty) {
102                         uev = uevqhp;
103                         uevqhp = uev->next;
104                         if (uevqtp == uev)
105                                 uevqtp = uev->next;
106                         pthread_mutex_unlock(uevq_lockp);
107
108                         if (my_uev_trigger && my_uev_trigger(uev,
109                                                         my_trigger_data))
110                                 condlog(0, "uevent trigger error");
111
112                         FREE(uev);
113                 }
114                 else {
115                         pthread_mutex_unlock(uevq_lockp);
116                 }
117         } while (empty == 0);
118 }
119
120 /*
121  * Service the uevent queue.
122  */
123 static void *
124 uevq_thread(void * et)
125 {
126         mlockall(MCL_CURRENT | MCL_FUTURE);
127
128         while (1) {
129                 pthread_mutex_lock(uevc_lockp);
130                 servicing_uev = 0;
131                 pthread_cond_wait(uev_condp, uevc_lockp);
132                 servicing_uev = 1;
133                 pthread_mutex_unlock(uevc_lockp);
134
135                 service_uevq();
136         }
137         return NULL;
138 }
139
140 int uevent_listen(int (*uev_trigger)(struct uevent *, void * trigger_data),
141                   void * trigger_data)
142 {
143         int sock;
144         struct sockaddr_nl snl;
145         struct sockaddr_un sun;
146         socklen_t addrlen;
147         int retval;
148         int rcvbufsz = 128*1024;
149         int rcvsz = 0;
150         int rcvszsz = sizeof(rcvsz);
151         unsigned int *prcvszsz = (unsigned int *)&rcvszsz;
152         pthread_attr_t attr;
153         const int feature_on = 1;
154
155         my_uev_trigger = uev_trigger;
156         my_trigger_data = trigger_data;
157
158         /*
159          * Queue uevents for service by dedicated thread so that the uevent
160          * listening thread does not block on multipathd locks (vecs->lock)
161          * thereby not getting to empty the socket's receive buffer queue
162          * often enough.
163          */
164         uevqhp = uevqtp = NULL;
165
166         pthread_mutex_init(uevq_lockp, NULL);
167         pthread_mutex_init(uevc_lockp, NULL);
168         pthread_cond_init(uev_condp, NULL);
169
170         setup_thread_attr(&attr, 64 * 1024, 0);
171         pthread_create(&uevq_thr, &attr, uevq_thread, NULL);
172
173         /*
174          * First check whether we have a udev socket
175          */
176         memset(&sun, 0x00, sizeof(struct sockaddr_un));
177         sun.sun_family = AF_LOCAL;
178         strcpy(&sun.sun_path[1], "/org/kernel/dm/multipath_event");
179         addrlen = offsetof(struct sockaddr_un, sun_path) + strlen(sun.sun_path+1) + 1;
180
181         sock = socket(AF_LOCAL, SOCK_DGRAM, 0);
182         if (sock >= 0) {
183
184                 condlog(3, "reading events from udev socket.");
185
186                 /* the bind takes care of ensuring only one copy running */
187                 retval = bind(sock, (struct sockaddr *) &sun, addrlen);
188                 if (retval < 0) {
189                         condlog(0, "bind failed, exit");
190                         goto exit;
191                 }
192
193                 /* enable receiving of the sender credentials */
194                 setsockopt(sock, SOL_SOCKET, SO_PASSCRED,
195                            &feature_on, sizeof(feature_on));
196
197         } else {
198                 /* Fallback to read kernel netlink events */
199                 memset(&snl, 0x00, sizeof(struct sockaddr_nl));
200                 snl.nl_family = AF_NETLINK;
201                 snl.nl_pid = getpid();
202                 snl.nl_groups = 0x01;
203
204                 sock = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_KOBJECT_UEVENT);
205                 if (sock == -1) {
206                         condlog(0, "error getting socket, exit");
207                         return 1;
208                 }
209
210                 condlog(3, "reading events from kernel.");
211
212                 /*
213                  * try to avoid dropping uevents, even so, this is not a guarantee,
214                  * but it does help to change the netlink uevent socket's
215                  * receive buffer threshold from the default value of 106,496 to
216                  * the maximum value of 262,142.
217                  */
218                 retval = setsockopt(sock, SOL_SOCKET, SO_RCVBUF, &rcvbufsz,
219                                     sizeof(rcvbufsz));
220
221                 if (retval < 0) {
222                         condlog(0, "error setting receive buffer size for socket, exit");
223                         exit(1);
224                 }
225                 retval = getsockopt(sock, SOL_SOCKET, SO_RCVBUF, &rcvsz, prcvszsz);
226                 if (retval < 0) {
227                         condlog(0, "error setting receive buffer size for socket, exit");
228                         exit(1);
229                 }
230                 condlog(3, "receive buffer size for socket is %u.", rcvsz);
231
232                 /* enable receiving of the sender credentials */
233                 setsockopt(sock, SOL_SOCKET, SO_PASSCRED,
234                            &feature_on, sizeof(feature_on));
235
236                 retval = bind(sock, (struct sockaddr *) &snl,
237                               sizeof(struct sockaddr_nl));
238                 if (retval < 0) {
239                         condlog(0, "bind failed, exit");
240                         goto exit;
241                 }
242         }
243
244         while (1) {
245                 int i;
246                 char *pos;
247                 size_t bufpos;
248                 ssize_t buflen;
249                 struct uevent *uev;
250                 char *buffer;
251                 struct msghdr smsg;
252                 struct iovec iov;
253                 char cred_msg[CMSG_SPACE(sizeof(struct ucred))];
254                 struct cmsghdr *cmsg;
255                 struct ucred *cred;
256                 static char buf[HOTPLUG_BUFFER_SIZE + OBJECT_SIZE];
257
258                 memset(buf, 0x00, sizeof(buf));
259                 iov.iov_base = &buf;
260                 iov.iov_len = sizeof(buf);
261                 memset (&smsg, 0x00, sizeof(struct msghdr));
262                 smsg.msg_iov = &iov;
263                 smsg.msg_iovlen = 1;
264                 smsg.msg_control = cred_msg;
265                 smsg.msg_controllen = sizeof(cred_msg);
266
267                 buflen = recvmsg(sock, &smsg, 0);
268                 if (buflen < 0) {
269                         if (errno != EINTR)
270                                 condlog(0, "error receiving message");
271                         continue;
272                 }
273
274                 cmsg = CMSG_FIRSTHDR(&smsg);
275                 if (cmsg == NULL || cmsg->cmsg_type != SCM_CREDENTIALS) {
276                         condlog(3, "no sender credentials received, message ignored");
277                         continue;
278                 }
279
280                 cred = (struct ucred *)CMSG_DATA(cmsg);
281                 if (cred->uid != 0) {
282                         condlog(3, "sender uid=%d, message ignored", cred->uid);
283                         continue;
284                 }
285
286                 /* skip header */
287                 bufpos = strlen(buf) + 1;
288                 if (bufpos < sizeof("a@/d") || bufpos >= sizeof(buf)) {
289                         condlog(3, "invalid message length");
290                         continue;
291                 }
292
293                 /* check message header */
294                 if (strstr(buf, "@/") == NULL) {
295                         condlog(3, "unrecognized message header");
296                         continue;
297                 }
298
299                 uev = alloc_uevent();
300
301                 if (!uev) {
302                         condlog(1, "lost uevent, oom");
303                         continue;
304                 }
305
306                 if ((size_t)buflen > sizeof(buf)-1)
307                         buflen = sizeof(buf)-1;
308
309                 /*
310                  * Copy the shared receive buffer contents to buffer private
311                  * to this uevent so we can immediately reuse the shared buffer.
312                  */
313                 memcpy(uev->buffer, buf, HOTPLUG_BUFFER_SIZE + OBJECT_SIZE);
314                 buffer = uev->buffer;
315                 buffer[buflen] = '\0';
316
317                 /* save start of payload */
318                 bufpos = strlen(buffer) + 1;
319
320                 /* action string */
321                 uev->action = buffer;
322                 pos = strchr(buffer, '@');
323                 if (!pos) {
324                         condlog(3, "bad action string '%s'", buffer);
325                         continue;
326                 }
327                 pos[0] = '\0';
328
329                 /* sysfs path */
330                 uev->devpath = &pos[1];
331
332                 /* hotplug events have the environment attached - reconstruct envp[] */
333                 for (i = 0; (bufpos < (size_t)buflen) && (i < HOTPLUG_NUM_ENVP-1); i++) {
334                         int keylen;
335                         char *key;
336
337                         key = &buffer[bufpos];
338                         keylen = strlen(key);
339                         uev->envp[i] = key;
340                         bufpos += keylen + 1;
341                 }
342                 uev->envp[i] = NULL;
343
344                 condlog(3, "uevent '%s' from '%s'", uev->action, uev->devpath);
345
346                 /* print payload environment */
347                 for (i = 0; uev->envp[i] != NULL; i++)
348                         condlog(3, "%s", uev->envp[i]);
349
350                 /*
351                  * Queue uevent and poke service pthread.
352                  */
353                 pthread_mutex_lock(uevq_lockp);
354                 if (uevqtp)
355                         uevqtp->next = uev;
356                 else
357                         uevqhp = uev;
358                 uevqtp = uev;
359                 uev->next = NULL;
360                 pthread_mutex_unlock(uevq_lockp);
361
362                 pthread_mutex_lock(uevc_lockp);
363                 pthread_cond_signal(uev_condp);
364                 pthread_mutex_unlock(uevc_lockp);
365         }
366
367 exit:
368         close(sock);
369
370         pthread_mutex_lock(uevq_lockp);
371         pthread_cancel(uevq_thr);
372         pthread_mutex_unlock(uevq_lockp);
373
374         pthread_mutex_destroy(uevq_lockp);
375         pthread_mutex_destroy(uevc_lockp);
376         pthread_cond_destroy(uev_condp);
377
378         return 1;
379 }