libmultipath: fix marginal paths queueing errors
[multipath-tools/.git] / multipathd / main.c
1 /*
2  * Copyright (c) 2004, 2005 Christophe Varoqui
3  * Copyright (c) 2005 Kiyoshi Ueda, NEC
4  * Copyright (c) 2005 Benjamin Marzinski, Redhat
5  * Copyright (c) 2005 Edward Goggin, EMC
6  */
7 #include <unistd.h>
8 #include <sys/stat.h>
9 #include <libdevmapper.h>
10 #include <sys/wait.h>
11 #include <sys/mman.h>
12 #include <sys/types.h>
13 #include <fcntl.h>
14 #include <errno.h>
15 #include <limits.h>
16 #include <linux/oom.h>
17 #include <libudev.h>
18 #include <urcu.h>
19 #ifdef USE_SYSTEMD
20 #include <systemd/sd-daemon.h>
21 #endif
22 #include <semaphore.h>
23 #include <time.h>
24 #include <stdbool.h>
25
26 /*
27  * libmultipath
28  */
29 #include "time-util.h"
30
31 /*
32  * libcheckers
33  */
34 #include "checkers.h"
35
36 #ifdef USE_SYSTEMD
37 static int use_watchdog;
38 #endif
39
40 /*
41  * libmultipath
42  */
43 #include "parser.h"
44 #include "vector.h"
45 #include "memory.h"
46 #include "config.h"
47 #include "util.h"
48 #include "hwtable.h"
49 #include "defaults.h"
50 #include "structs.h"
51 #include "blacklist.h"
52 #include "structs_vec.h"
53 #include "dmparser.h"
54 #include "devmapper.h"
55 #include "sysfs.h"
56 #include "dict.h"
57 #include "discovery.h"
58 #include "debug.h"
59 #include "propsel.h"
60 #include "uevent.h"
61 #include "switchgroup.h"
62 #include "print.h"
63 #include "configure.h"
64 #include "prio.h"
65 #include "wwids.h"
66 #include "pgpolicies.h"
67 #include "uevent.h"
68 #include "log.h"
69 #include "uxsock.h"
70
71 #include "mpath_cmd.h"
72 #include "mpath_persist.h"
73
74 #include "prioritizers/alua_rtpg.h"
75
76 #include "main.h"
77 #include "pidfile.h"
78 #include "uxlsnr.h"
79 #include "uxclnt.h"
80 #include "cli.h"
81 #include "cli_handlers.h"
82 #include "lock.h"
83 #include "waiter.h"
84 #include "dmevents.h"
85 #include "io_err_stat.h"
86 #include "wwids.h"
87 #include "foreign.h"
88 #include "../third-party/valgrind/drd.h"
89
90 #define FILE_NAME_SIZE 256
91 #define CMDSIZE 160
92
93 #define LOG_MSG(lvl, verb, pp)                                  \
94 do {                                                            \
95         if (pp->mpp && checker_selected(&pp->checker) &&        \
96             lvl <= verb) {                                      \
97                 if (pp->offline)                                \
98                         condlog(lvl, "%s: %s - path offline",   \
99                                 pp->mpp->alias, pp->dev);       \
100                 else  {                                         \
101                         const char *__m =                       \
102                                 checker_message(&pp->checker);  \
103                                                                 \
104                         if (strlen(__m))                              \
105                                 condlog(lvl, "%s: %s - %s checker%s", \
106                                         pp->mpp->alias,               \
107                                         pp->dev,                      \
108                                         checker_name(&pp->checker),   \
109                                         __m);                         \
110                 }                                                     \
111         }                                                             \
112 } while(0)
113
114 struct mpath_event_param
115 {
116         char * devname;
117         struct multipath *mpp;
118 };
119
120 int logsink;
121 int uxsock_timeout;
122 int verbosity;
123 int bindings_read_only;
124 int ignore_new_devs;
125 #ifdef NO_DMEVENTS_POLL
126 int poll_dmevents = 0;
127 #else
128 int poll_dmevents = 1;
129 #endif
130 enum daemon_status running_state = DAEMON_INIT;
131 pid_t daemon_pid;
132 pthread_mutex_t config_lock = PTHREAD_MUTEX_INITIALIZER;
133 pthread_cond_t config_cond;
134
135 /*
136  * global copy of vecs for use in sig handlers
137  */
138 struct vectors * gvecs;
139
140 struct udev * udev;
141
142 struct config *multipath_conf;
143
144 /* Local variables */
145 static volatile sig_atomic_t exit_sig;
146 static volatile sig_atomic_t reconfig_sig;
147 static volatile sig_atomic_t log_reset_sig;
148
149 const char *
150 daemon_status(void)
151 {
152         switch (running_state) {
153         case DAEMON_INIT:
154                 return "init";
155         case DAEMON_START:
156                 return "startup";
157         case DAEMON_CONFIGURE:
158                 return "configure";
159         case DAEMON_IDLE:
160                 return "idle";
161         case DAEMON_RUNNING:
162                 return "running";
163         case DAEMON_SHUTDOWN:
164                 return "shutdown";
165         }
166         return NULL;
167 }
168
169 /*
170  * I love you too, systemd ...
171  */
172 const char *
173 sd_notify_status(void)
174 {
175         switch (running_state) {
176         case DAEMON_INIT:
177                 return "STATUS=init";
178         case DAEMON_START:
179                 return "STATUS=startup";
180         case DAEMON_CONFIGURE:
181                 return "STATUS=configure";
182         case DAEMON_IDLE:
183         case DAEMON_RUNNING:
184                 return "STATUS=up";
185         case DAEMON_SHUTDOWN:
186                 return "STATUS=shutdown";
187         }
188         return NULL;
189 }
190
191 #ifdef USE_SYSTEMD
192 static void do_sd_notify(enum daemon_status old_state)
193 {
194         /*
195          * Checkerloop switches back and forth between idle and running state.
196          * No need to tell systemd each time.
197          * These notifications cause a lot of overhead on dbus.
198          */
199         if ((running_state == DAEMON_IDLE || running_state == DAEMON_RUNNING) &&
200             (old_state == DAEMON_IDLE || old_state == DAEMON_RUNNING))
201                 return;
202         sd_notify(0, sd_notify_status());
203 }
204 #endif
205
206 static void config_cleanup(void *arg)
207 {
208         pthread_mutex_unlock(&config_lock);
209 }
210
211 static void __post_config_state(enum daemon_status state)
212 {
213         if (state != running_state && running_state != DAEMON_SHUTDOWN) {
214                 enum daemon_status old_state = running_state;
215
216                 running_state = state;
217                 pthread_cond_broadcast(&config_cond);
218 #ifdef USE_SYSTEMD
219                 do_sd_notify(old_state);
220 #endif
221         }
222 }
223
224 void post_config_state(enum daemon_status state)
225 {
226         pthread_mutex_lock(&config_lock);
227         pthread_cleanup_push(config_cleanup, NULL);
228         __post_config_state(state);
229         pthread_cleanup_pop(1);
230 }
231
232 int set_config_state(enum daemon_status state)
233 {
234         int rc = 0;
235
236         pthread_cleanup_push(config_cleanup, NULL);
237         pthread_mutex_lock(&config_lock);
238         if (running_state != state) {
239                 enum daemon_status old_state = running_state;
240
241                 if (running_state == DAEMON_SHUTDOWN)
242                         rc = EINVAL;
243                 else if (running_state != DAEMON_IDLE) {
244                         struct timespec ts;
245
246                         if (clock_gettime(CLOCK_MONOTONIC, &ts) == 0) {
247                                 ts.tv_sec += 1;
248                                 rc = pthread_cond_timedwait(&config_cond,
249                                                             &config_lock, &ts);
250                         }
251                 }
252                 if (!rc) {
253                         running_state = state;
254                         pthread_cond_broadcast(&config_cond);
255 #ifdef USE_SYSTEMD
256                         do_sd_notify(old_state);
257 #endif
258                 }
259         }
260         pthread_cleanup_pop(1);
261         return rc;
262 }
263
264 struct config *get_multipath_config(void)
265 {
266         rcu_read_lock();
267         return rcu_dereference(multipath_conf);
268 }
269
270 void put_multipath_config(void *arg)
271 {
272         rcu_read_unlock();
273 }
274
275 static int
276 need_switch_pathgroup (struct multipath * mpp, int refresh)
277 {
278         struct pathgroup * pgp;
279         struct path * pp;
280         unsigned int i, j;
281         struct config *conf;
282         int bestpg;
283
284         if (!mpp)
285                 return 0;
286
287         /*
288          * Refresh path priority values
289          */
290         if (refresh) {
291                 vector_foreach_slot (mpp->pg, pgp, i) {
292                         vector_foreach_slot (pgp->paths, pp, j) {
293                                 conf = get_multipath_config();
294                                 pthread_cleanup_push(put_multipath_config,
295                                                      conf);
296                                 pathinfo(pp, conf, DI_PRIO);
297                                 pthread_cleanup_pop(1);
298                         }
299                 }
300         }
301
302         if (!mpp->pg || VECTOR_SIZE(mpp->paths) == 0)
303                 return 0;
304
305         bestpg = select_path_group(mpp);
306         if (mpp->pgfailback == -FAILBACK_MANUAL)
307                 return 0;
308
309         mpp->bestpg = bestpg;
310         if (mpp->bestpg != mpp->nextpg)
311                 return 1;
312
313         return 0;
314 }
315
316 static void
317 switch_pathgroup (struct multipath * mpp)
318 {
319         mpp->stat_switchgroup++;
320         dm_switchgroup(mpp->alias, mpp->bestpg);
321         condlog(2, "%s: switch to path group #%i",
322                  mpp->alias, mpp->bestpg);
323 }
324
325 static int
326 wait_for_events(struct multipath *mpp, struct vectors *vecs)
327 {
328         if (poll_dmevents)
329                 return watch_dmevents(mpp->alias);
330         else
331                 return start_waiter_thread(mpp, vecs);
332 }
333
334 static void
335 remove_map_and_stop_waiter(struct multipath *mpp, struct vectors *vecs)
336 {
337         /* devices are automatically removed by the dmevent polling code,
338          * so they don't need to be manually removed here */
339         condlog(3, "%s: removing map from internal tables", mpp->alias);
340         if (!poll_dmevents)
341                 stop_waiter_thread(mpp, vecs);
342         remove_map(mpp, vecs, PURGE_VEC);
343 }
344
345 static void
346 remove_maps_and_stop_waiters(struct vectors *vecs)
347 {
348         int i;
349         struct multipath * mpp;
350
351         if (!vecs)
352                 return;
353
354         if (!poll_dmevents) {
355                 vector_foreach_slot(vecs->mpvec, mpp, i)
356                         stop_waiter_thread(mpp, vecs);
357         }
358         else
359                 unwatch_all_dmevents();
360
361         remove_maps(vecs);
362 }
363
364 static void
365 set_multipath_wwid (struct multipath * mpp)
366 {
367         if (strlen(mpp->wwid))
368                 return;
369
370         dm_get_uuid(mpp->alias, mpp->wwid);
371 }
372
373 static void set_no_path_retry(struct multipath *mpp)
374 {
375         char is_queueing = 0;
376
377         mpp->nr_active = pathcount(mpp, PATH_UP) + pathcount(mpp, PATH_GHOST);
378         if (mpp->features && strstr(mpp->features, "queue_if_no_path"))
379                 is_queueing = 1;
380
381         switch (mpp->no_path_retry) {
382         case NO_PATH_RETRY_UNDEF:
383                 break;
384         case NO_PATH_RETRY_FAIL:
385                 if (is_queueing)
386                         dm_queue_if_no_path(mpp->alias, 0);
387                 break;
388         case NO_PATH_RETRY_QUEUE:
389                 if (!is_queueing)
390                         dm_queue_if_no_path(mpp->alias, 1);
391                 break;
392         default:
393                 if (mpp->nr_active > 0) {
394                         mpp->retry_tick = 0;
395                         dm_queue_if_no_path(mpp->alias, 1);
396                 } else if (is_queueing && mpp->retry_tick == 0)
397                         enter_recovery_mode(mpp);
398                 break;
399         }
400 }
401
402 int __setup_multipath(struct vectors *vecs, struct multipath *mpp,
403                       int reset)
404 {
405         if (dm_get_info(mpp->alias, &mpp->dmi)) {
406                 /* Error accessing table */
407                 condlog(3, "%s: cannot access table", mpp->alias);
408                 goto out;
409         }
410
411         if (update_multipath_strings(mpp, vecs->pathvec, 1)) {
412                 condlog(0, "%s: failed to setup multipath", mpp->alias);
413                 goto out;
414         }
415
416         if (reset) {
417                 set_no_path_retry(mpp);
418                 if (VECTOR_SIZE(mpp->paths) != 0)
419                         dm_cancel_deferred_remove(mpp);
420         }
421
422         return 0;
423 out:
424         remove_map_and_stop_waiter(mpp, vecs);
425         return 1;
426 }
427
428 int update_multipath (struct vectors *vecs, char *mapname, int reset)
429 {
430         struct multipath *mpp;
431         struct pathgroup  *pgp;
432         struct path *pp;
433         int i, j;
434
435         mpp = find_mp_by_alias(vecs->mpvec, mapname);
436
437         if (!mpp) {
438                 condlog(3, "%s: multipath map not found", mapname);
439                 return 2;
440         }
441
442         if (__setup_multipath(vecs, mpp, reset))
443                 return 1; /* mpp freed in setup_multipath */
444
445         /*
446          * compare checkers states with DM states
447          */
448         vector_foreach_slot (mpp->pg, pgp, i) {
449                 vector_foreach_slot (pgp->paths, pp, j) {
450                         if (pp->dmstate != PSTATE_FAILED)
451                                 continue;
452
453                         if (pp->state != PATH_DOWN) {
454                                 struct config *conf;
455                                 int oldstate = pp->state;
456                                 int checkint;
457
458                                 conf = get_multipath_config();
459                                 checkint = conf->checkint;
460                                 put_multipath_config(conf);
461                                 condlog(2, "%s: mark as failed", pp->dev);
462                                 mpp->stat_path_failures++;
463                                 pp->state = PATH_DOWN;
464                                 if (oldstate == PATH_UP ||
465                                     oldstate == PATH_GHOST)
466                                         update_queue_mode_del_path(mpp);
467
468                                 /*
469                                  * if opportune,
470                                  * schedule the next check earlier
471                                  */
472                                 if (pp->tick > checkint)
473                                         pp->tick = checkint;
474                         }
475                 }
476         }
477         return 0;
478 }
479
480 static int
481 update_map (struct multipath *mpp, struct vectors *vecs, int new_map)
482 {
483         int retries = 3;
484         char params[PARAMS_SIZE] = {0};
485
486 retry:
487         condlog(4, "%s: updating new map", mpp->alias);
488         if (adopt_paths(vecs->pathvec, mpp)) {
489                 condlog(0, "%s: failed to adopt paths for new map update",
490                         mpp->alias);
491                 retries = -1;
492                 goto fail;
493         }
494         verify_paths(mpp, vecs);
495         mpp->action = ACT_RELOAD;
496
497         if (setup_map(mpp, params, PARAMS_SIZE, vecs)) {
498                 condlog(0, "%s: failed to setup new map in update", mpp->alias);
499                 retries = -1;
500                 goto fail;
501         }
502         if (domap(mpp, params, 1) == DOMAP_FAIL && retries-- > 0) {
503                 condlog(0, "%s: map_udate sleep", mpp->alias);
504                 sleep(1);
505                 goto retry;
506         }
507         dm_lib_release();
508
509 fail:
510         if (new_map && (retries < 0 || wait_for_events(mpp, vecs))) {
511                 condlog(0, "%s: failed to create new map", mpp->alias);
512                 remove_map(mpp, vecs, 1);
513                 return 1;
514         }
515
516         if (setup_multipath(vecs, mpp))
517                 return 1;
518
519         sync_map_state(mpp);
520
521         if (retries < 0)
522                 condlog(0, "%s: failed reload in new map update", mpp->alias);
523         return 0;
524 }
525
526 static struct multipath *
527 add_map_without_path (struct vectors *vecs, const char *alias)
528 {
529         struct multipath * mpp = alloc_multipath();
530         struct config *conf;
531
532         if (!mpp)
533                 return NULL;
534         if (!alias) {
535                 FREE(mpp);
536                 return NULL;
537         }
538
539         mpp->alias = STRDUP(alias);
540
541         if (dm_get_info(mpp->alias, &mpp->dmi)) {
542                 condlog(3, "%s: cannot access table", mpp->alias);
543                 goto out;
544         }
545         set_multipath_wwid(mpp);
546         conf = get_multipath_config();
547         mpp->mpe = find_mpe(conf->mptable, mpp->wwid);
548         put_multipath_config(conf);
549
550         if (update_multipath_table(mpp, vecs->pathvec, 1))
551                 goto out;
552         if (update_multipath_status(mpp))
553                 goto out;
554
555         if (!vector_alloc_slot(vecs->mpvec))
556                 goto out;
557
558         vector_set_slot(vecs->mpvec, mpp);
559
560         if (update_map(mpp, vecs, 1) != 0) /* map removed */
561                 return NULL;
562
563         return mpp;
564 out:
565         remove_map(mpp, vecs, PURGE_VEC);
566         return NULL;
567 }
568
569 static int
570 coalesce_maps(struct vectors *vecs, vector nmpv)
571 {
572         struct multipath * ompp;
573         vector ompv = vecs->mpvec;
574         unsigned int i, reassign_maps;
575         struct config *conf;
576
577         conf = get_multipath_config();
578         reassign_maps = conf->reassign_maps;
579         put_multipath_config(conf);
580         vector_foreach_slot (ompv, ompp, i) {
581                 condlog(3, "%s: coalesce map", ompp->alias);
582                 if (!find_mp_by_wwid(nmpv, ompp->wwid)) {
583                         /*
584                          * remove all current maps not allowed by the
585                          * current configuration
586                          */
587                         if (dm_flush_map(ompp->alias)) {
588                                 condlog(0, "%s: unable to flush devmap",
589                                         ompp->alias);
590                                 /*
591                                  * may be just because the device is open
592                                  */
593                                 if (setup_multipath(vecs, ompp) != 0) {
594                                         i--;
595                                         continue;
596                                 }
597                                 if (!vector_alloc_slot(nmpv))
598                                         return 1;
599
600                                 vector_set_slot(nmpv, ompp);
601
602                                 vector_del_slot(ompv, i);
603                                 i--;
604                         }
605                         else {
606                                 dm_lib_release();
607                                 condlog(2, "%s devmap removed", ompp->alias);
608                         }
609                 } else if (reassign_maps) {
610                         condlog(3, "%s: Reassign existing device-mapper"
611                                 " devices", ompp->alias);
612                         dm_reassign(ompp->alias);
613                 }
614         }
615         return 0;
616 }
617
618 static void
619 sync_maps_state(vector mpvec)
620 {
621         unsigned int i;
622         struct multipath *mpp;
623
624         vector_foreach_slot (mpvec, mpp, i)
625                 sync_map_state(mpp);
626 }
627
628 static int
629 flush_map(struct multipath * mpp, struct vectors * vecs, int nopaths)
630 {
631         int r;
632
633         if (nopaths)
634                 r = dm_flush_map_nopaths(mpp->alias, mpp->deferred_remove);
635         else
636                 r = dm_flush_map(mpp->alias);
637         /*
638          * clear references to this map before flushing so we can ignore
639          * the spurious uevent we may generate with the dm_flush_map call below
640          */
641         if (r) {
642                 /*
643                  * May not really be an error -- if the map was already flushed
644                  * from the device mapper by dmsetup(8) for instance.
645                  */
646                 if (r == 1)
647                         condlog(0, "%s: can't flush", mpp->alias);
648                 else {
649                         condlog(2, "%s: devmap deferred remove", mpp->alias);
650                         mpp->deferred_remove = DEFERRED_REMOVE_IN_PROGRESS;
651                 }
652                 return r;
653         }
654         else {
655                 dm_lib_release();
656                 condlog(2, "%s: map flushed", mpp->alias);
657         }
658
659         orphan_paths(vecs->pathvec, mpp, "map flushed");
660         remove_map_and_stop_waiter(mpp, vecs);
661
662         return 0;
663 }
664
665 static int
666 uev_add_map (struct uevent * uev, struct vectors * vecs)
667 {
668         char *alias;
669         int major = -1, minor = -1, rc;
670
671         condlog(3, "%s: add map (uevent)", uev->kernel);
672         alias = uevent_get_dm_name(uev);
673         if (!alias) {
674                 condlog(3, "%s: No DM_NAME in uevent", uev->kernel);
675                 major = uevent_get_major(uev);
676                 minor = uevent_get_minor(uev);
677                 alias = dm_mapname(major, minor);
678                 if (!alias) {
679                         condlog(2, "%s: mapname not found for %d:%d",
680                                 uev->kernel, major, minor);
681                         return 1;
682                 }
683         }
684         pthread_cleanup_push(cleanup_lock, &vecs->lock);
685         lock(&vecs->lock);
686         pthread_testcancel();
687         rc = ev_add_map(uev->kernel, alias, vecs);
688         lock_cleanup_pop(vecs->lock);
689         FREE(alias);
690         return rc;
691 }
692
693 /*
694  * ev_add_map expects that the multipath device already exists in kernel
695  * before it is called. It just adds a device to multipathd or updates an
696  * existing device.
697  */
698 int
699 ev_add_map (char * dev, const char * alias, struct vectors * vecs)
700 {
701         struct multipath * mpp;
702         int delayed_reconfig, reassign_maps;
703         struct config *conf;
704
705         if (dm_is_mpath(alias) != 1) {
706                 condlog(4, "%s: not a multipath map", alias);
707                 return 0;
708         }
709
710         mpp = find_mp_by_alias(vecs->mpvec, alias);
711
712         if (mpp) {
713                 if (mpp->wait_for_udev > 1) {
714                         condlog(2, "%s: performing delayed actions",
715                                 mpp->alias);
716                         if (update_map(mpp, vecs, 0))
717                                 /* setup multipathd removed the map */
718                                 return 1;
719                 }
720                 conf = get_multipath_config();
721                 delayed_reconfig = conf->delayed_reconfig;
722                 reassign_maps = conf->reassign_maps;
723                 put_multipath_config(conf);
724                 if (mpp->wait_for_udev) {
725                         mpp->wait_for_udev = 0;
726                         if (delayed_reconfig &&
727                             !need_to_delay_reconfig(vecs)) {
728                                 condlog(2, "reconfigure (delayed)");
729                                 set_config_state(DAEMON_CONFIGURE);
730                                 return 0;
731                         }
732                 }
733                 /*
734                  * Not really an error -- we generate our own uevent
735                  * if we create a multipath mapped device as a result
736                  * of uev_add_path
737                  */
738                 if (reassign_maps) {
739                         condlog(3, "%s: Reassign existing device-mapper devices",
740                                 alias);
741                         dm_reassign(alias);
742                 }
743                 return 0;
744         }
745         condlog(2, "%s: adding map", alias);
746
747         /*
748          * now we can register the map
749          */
750         if ((mpp = add_map_without_path(vecs, alias))) {
751                 sync_map_state(mpp);
752                 condlog(2, "%s: devmap %s registered", alias, dev);
753                 return 0;
754         } else {
755                 condlog(2, "%s: ev_add_map failed", dev);
756                 return 1;
757         }
758 }
759
760 static int
761 uev_remove_map (struct uevent * uev, struct vectors * vecs)
762 {
763         char *alias;
764         int minor;
765         struct multipath *mpp;
766
767         condlog(3, "%s: remove map (uevent)", uev->kernel);
768         alias = uevent_get_dm_name(uev);
769         if (!alias) {
770                 condlog(3, "%s: No DM_NAME in uevent, ignoring", uev->kernel);
771                 return 0;
772         }
773         minor = uevent_get_minor(uev);
774
775         pthread_cleanup_push(cleanup_lock, &vecs->lock);
776         lock(&vecs->lock);
777         pthread_testcancel();
778         mpp = find_mp_by_minor(vecs->mpvec, minor);
779
780         if (!mpp) {
781                 condlog(2, "%s: devmap not registered, can't remove",
782                         uev->kernel);
783                 goto out;
784         }
785         if (strcmp(mpp->alias, alias)) {
786                 condlog(2, "%s: map alias mismatch: have \"%s\", got \"%s\")",
787                         uev->kernel, mpp->alias, alias);
788                 goto out;
789         }
790
791         remove_map_and_stop_waiter(mpp, vecs);
792 out:
793         lock_cleanup_pop(vecs->lock);
794         FREE(alias);
795         return 0;
796 }
797
798 /* Called from CLI handler */
799 int
800 ev_remove_map (char * devname, char * alias, int minor, struct vectors * vecs)
801 {
802         struct multipath * mpp;
803
804         mpp = find_mp_by_minor(vecs->mpvec, minor);
805
806         if (!mpp) {
807                 condlog(2, "%s: devmap not registered, can't remove",
808                         devname);
809                 return 1;
810         }
811         if (strcmp(mpp->alias, alias)) {
812                 condlog(2, "%s: minor number mismatch (map %d, event %d)",
813                         mpp->alias, mpp->dmi->minor, minor);
814                 return 1;
815         }
816         return flush_map(mpp, vecs, 0);
817 }
818
819 static int
820 uev_add_path (struct uevent *uev, struct vectors * vecs, int need_do_map)
821 {
822         struct path *pp;
823         int ret = 0, i;
824         struct config *conf;
825
826         condlog(3, "%s: add path (uevent)", uev->kernel);
827         if (strstr(uev->kernel, "..") != NULL) {
828                 /*
829                  * Don't allow relative device names in the pathvec
830                  */
831                 condlog(0, "%s: path name is invalid", uev->kernel);
832                 return 1;
833         }
834
835         pthread_cleanup_push(cleanup_lock, &vecs->lock);
836         lock(&vecs->lock);
837         pthread_testcancel();
838         pp = find_path_by_dev(vecs->pathvec, uev->kernel);
839         if (pp) {
840                 int r;
841
842                 condlog(3, "%s: spurious uevent, path already in pathvec",
843                         uev->kernel);
844                 if (!pp->mpp && !strlen(pp->wwid)) {
845                         condlog(3, "%s: reinitialize path", uev->kernel);
846                         udev_device_unref(pp->udev);
847                         pp->udev = udev_device_ref(uev->udev);
848                         conf = get_multipath_config();
849                         pthread_cleanup_push(put_multipath_config, conf);
850                         r = pathinfo(pp, conf,
851                                      DI_ALL | DI_BLACKLIST);
852                         pthread_cleanup_pop(1);
853                         if (r == PATHINFO_OK)
854                                 ret = ev_add_path(pp, vecs, need_do_map);
855                         else if (r == PATHINFO_SKIPPED) {
856                                 condlog(3, "%s: remove blacklisted path",
857                                         uev->kernel);
858                                 i = find_slot(vecs->pathvec, (void *)pp);
859                                 if (i != -1)
860                                         vector_del_slot(vecs->pathvec, i);
861                                 free_path(pp);
862                         } else {
863                                 condlog(0, "%s: failed to reinitialize path",
864                                         uev->kernel);
865                                 ret = 1;
866                         }
867                 }
868         }
869         lock_cleanup_pop(vecs->lock);
870         if (pp)
871                 return ret;
872
873         /*
874          * get path vital state
875          */
876         conf = get_multipath_config();
877         pthread_cleanup_push(put_multipath_config, conf);
878         ret = alloc_path_with_pathinfo(conf, uev->udev,
879                                        uev->wwid, DI_ALL, &pp);
880         pthread_cleanup_pop(1);
881         if (!pp) {
882                 if (ret == PATHINFO_SKIPPED)
883                         return 0;
884                 condlog(3, "%s: failed to get path info", uev->kernel);
885                 return 1;
886         }
887         pthread_cleanup_push(cleanup_lock, &vecs->lock);
888         lock(&vecs->lock);
889         pthread_testcancel();
890         ret = store_path(vecs->pathvec, pp);
891         if (!ret) {
892                 conf = get_multipath_config();
893                 pp->checkint = conf->checkint;
894                 put_multipath_config(conf);
895                 ret = ev_add_path(pp, vecs, need_do_map);
896         } else {
897                 condlog(0, "%s: failed to store path info, "
898                         "dropping event",
899                         uev->kernel);
900                 free_path(pp);
901                 ret = 1;
902         }
903         lock_cleanup_pop(vecs->lock);
904         return ret;
905 }
906
907 /*
908  * returns:
909  * 0: added
910  * 1: error
911  */
912 int
913 ev_add_path (struct path * pp, struct vectors * vecs, int need_do_map)
914 {
915         struct multipath * mpp;
916         char params[PARAMS_SIZE] = {0};
917         int retries = 3;
918         int start_waiter = 0;
919         int ret;
920
921         /*
922          * need path UID to go any further
923          */
924         if (strlen(pp->wwid) == 0) {
925                 condlog(0, "%s: failed to get path uid", pp->dev);
926                 goto fail; /* leave path added to pathvec */
927         }
928         mpp = find_mp_by_wwid(vecs->mpvec, pp->wwid);
929         if (mpp && pp->size && mpp->size != pp->size) {
930                 condlog(0, "%s: failed to add new path %s, device size mismatch", mpp->alias, pp->dev);
931                 int i = find_slot(vecs->pathvec, (void *)pp);
932                 if (i != -1)
933                         vector_del_slot(vecs->pathvec, i);
934                 free_path(pp);
935                 return 1;
936         }
937         if (mpp && mpp->wait_for_udev &&
938             (pathcount(mpp, PATH_UP) > 0 ||
939              (pathcount(mpp, PATH_GHOST) > 0 && pp->tpgs != TPGS_IMPLICIT &&
940               mpp->ghost_delay_tick <= 0))) {
941                 /* if wait_for_udev is set and valid paths exist */
942                 condlog(3, "%s: delaying path addition until %s is fully initialized",
943                         pp->dev, mpp->alias);
944                 mpp->wait_for_udev = 2;
945                 orphan_path(pp, "waiting for create to complete");
946                 return 0;
947         }
948
949         pp->mpp = mpp;
950 rescan:
951         if (mpp) {
952                 condlog(4,"%s: adopting all paths for path %s",
953                         mpp->alias, pp->dev);
954                 if (adopt_paths(vecs->pathvec, mpp))
955                         goto fail; /* leave path added to pathvec */
956
957                 verify_paths(mpp, vecs);
958                 mpp->action = ACT_RELOAD;
959         } else {
960                 if (!should_multipath(pp, vecs->pathvec, vecs->mpvec)) {
961                         orphan_path(pp, "only one path");
962                         return 0;
963                 }
964                 condlog(4,"%s: creating new map", pp->dev);
965                 if ((mpp = add_map_with_path(vecs, pp, 1))) {
966                         mpp->action = ACT_CREATE;
967                         /*
968                          * We don't depend on ACT_CREATE, as domap will
969                          * set it to ACT_NOTHING when complete.
970                          */
971                         start_waiter = 1;
972                 }
973                 if (!start_waiter)
974                         goto fail; /* leave path added to pathvec */
975         }
976
977         /* persistent reservation check*/
978         mpath_pr_event_handle(pp);
979
980         if (!need_do_map)
981                 return 0;
982
983         if (!dm_map_present(mpp->alias)) {
984                 mpp->action = ACT_CREATE;
985                 start_waiter = 1;
986         }
987         /*
988          * push the map to the device-mapper
989          */
990         if (setup_map(mpp, params, PARAMS_SIZE, vecs)) {
991                 condlog(0, "%s: failed to setup map for addition of new "
992                         "path %s", mpp->alias, pp->dev);
993                 goto fail_map;
994         }
995         /*
996          * reload the map for the multipath mapped device
997          */
998         ret = domap(mpp, params, 1);
999         while (ret == DOMAP_RETRY && retries-- > 0) {
1000                 condlog(0, "%s: retry domap for addition of new "
1001                         "path %s", mpp->alias, pp->dev);
1002                 sleep(1);
1003                 ret = domap(mpp, params, 1);
1004         }
1005         if (ret == DOMAP_FAIL || ret == DOMAP_RETRY) {
1006                 condlog(0, "%s: failed in domap for addition of new "
1007                         "path %s", mpp->alias, pp->dev);
1008                 /*
1009                  * deal with asynchronous uevents :((
1010                  */
1011                 if (mpp->action == ACT_RELOAD && retries-- > 0) {
1012                         condlog(0, "%s: ev_add_path sleep", mpp->alias);
1013                         sleep(1);
1014                         update_mpp_paths(mpp, vecs->pathvec);
1015                         goto rescan;
1016                 }
1017                 else if (mpp->action == ACT_RELOAD)
1018                         condlog(0, "%s: giving up reload", mpp->alias);
1019                 else
1020                         goto fail_map;
1021         }
1022         dm_lib_release();
1023
1024         if ((mpp->action == ACT_CREATE ||
1025              (mpp->action == ACT_NOTHING && start_waiter && !mpp->waiter)) &&
1026             wait_for_events(mpp, vecs))
1027                         goto fail_map;
1028
1029         /*
1030          * update our state from kernel regardless of create or reload
1031          */
1032         if (setup_multipath(vecs, mpp))
1033                 goto fail; /* if setup_multipath fails, it removes the map */
1034
1035         sync_map_state(mpp);
1036
1037         if (retries >= 0) {
1038                 condlog(2, "%s [%s]: path added to devmap %s",
1039                         pp->dev, pp->dev_t, mpp->alias);
1040                 return 0;
1041         } else
1042                 goto fail;
1043
1044 fail_map:
1045         remove_map(mpp, vecs, 1);
1046 fail:
1047         orphan_path(pp, "failed to add path");
1048         return 1;
1049 }
1050
1051 static int
1052 uev_remove_path (struct uevent *uev, struct vectors * vecs, int need_do_map)
1053 {
1054         struct path *pp;
1055         int ret;
1056
1057         condlog(3, "%s: remove path (uevent)", uev->kernel);
1058         delete_foreign(uev->udev);
1059
1060         pthread_cleanup_push(cleanup_lock, &vecs->lock);
1061         lock(&vecs->lock);
1062         pthread_testcancel();
1063         pp = find_path_by_dev(vecs->pathvec, uev->kernel);
1064         if (pp)
1065                 ret = ev_remove_path(pp, vecs, need_do_map);
1066         lock_cleanup_pop(vecs->lock);
1067         if (!pp) {
1068                 /* Not an error; path might have been purged earlier */
1069                 condlog(0, "%s: path already removed", uev->kernel);
1070                 return 0;
1071         }
1072         return ret;
1073 }
1074
1075 int
1076 ev_remove_path (struct path *pp, struct vectors * vecs, int need_do_map)
1077 {
1078         struct multipath * mpp;
1079         int i, retval = 0;
1080         char params[PARAMS_SIZE] = {0};
1081
1082         /*
1083          * avoid referring to the map of an orphaned path
1084          */
1085         if ((mpp = pp->mpp)) {
1086                 /*
1087                  * transform the mp->pg vector of vectors of paths
1088                  * into a mp->params string to feed the device-mapper
1089                  */
1090                 if (update_mpp_paths(mpp, vecs->pathvec)) {
1091                         condlog(0, "%s: failed to update paths",
1092                                 mpp->alias);
1093                         goto fail;
1094                 }
1095
1096                 /*
1097                  * Make sure mpp->hwe doesn't point to freed memory
1098                  * We call extract_hwe_from_path() below to restore mpp->hwe
1099                  */
1100                 if (mpp->hwe == pp->hwe)
1101                         mpp->hwe = NULL;
1102
1103                 if ((i = find_slot(mpp->paths, (void *)pp)) != -1)
1104                         vector_del_slot(mpp->paths, i);
1105
1106                 /*
1107                  * remove the map IF removing the last path
1108                  */
1109                 if (VECTOR_SIZE(mpp->paths) == 0) {
1110                         char alias[WWID_SIZE];
1111
1112                         /*
1113                          * flush_map will fail if the device is open
1114                          */
1115                         strlcpy(alias, mpp->alias, WWID_SIZE);
1116                         if (mpp->flush_on_last_del == FLUSH_ENABLED) {
1117                                 condlog(2, "%s Last path deleted, disabling queueing", mpp->alias);
1118                                 mpp->retry_tick = 0;
1119                                 mpp->no_path_retry = NO_PATH_RETRY_FAIL;
1120                                 mpp->disable_queueing = 1;
1121                                 mpp->stat_map_failures++;
1122                                 dm_queue_if_no_path(mpp->alias, 0);
1123                         }
1124                         if (!flush_map(mpp, vecs, 1)) {
1125                                 condlog(2, "%s: removed map after"
1126                                         " removing all paths",
1127                                         alias);
1128                                 retval = 0;
1129                                 goto out;
1130                         }
1131                         /*
1132                          * Not an error, continue
1133                          */
1134                 }
1135
1136                 if (mpp->hwe == NULL)
1137                         extract_hwe_from_path(mpp);
1138
1139                 if (setup_map(mpp, params, PARAMS_SIZE, vecs)) {
1140                         condlog(0, "%s: failed to setup map for"
1141                                 " removal of path %s", mpp->alias, pp->dev);
1142                         goto fail;
1143                 }
1144
1145                 if (mpp->wait_for_udev) {
1146                         mpp->wait_for_udev = 2;
1147                         goto out;
1148                 }
1149
1150                 if (!need_do_map)
1151                         goto out;
1152                 /*
1153                  * reload the map
1154                  */
1155                 mpp->action = ACT_RELOAD;
1156                 if (domap(mpp, params, 1) == DOMAP_FAIL) {
1157                         condlog(0, "%s: failed in domap for "
1158                                 "removal of path %s",
1159                                 mpp->alias, pp->dev);
1160                         retval = 1;
1161                 } else {
1162                         /*
1163                          * update our state from kernel
1164                          */
1165                         if (setup_multipath(vecs, mpp))
1166                                 return 1;
1167                         sync_map_state(mpp);
1168
1169                         condlog(2, "%s [%s]: path removed from map %s",
1170                                 pp->dev, pp->dev_t, mpp->alias);
1171                 }
1172         }
1173
1174 out:
1175         if ((i = find_slot(vecs->pathvec, (void *)pp)) != -1)
1176                 vector_del_slot(vecs->pathvec, i);
1177
1178         free_path(pp);
1179
1180         return retval;
1181
1182 fail:
1183         remove_map_and_stop_waiter(mpp, vecs);
1184         return 1;
1185 }
1186
1187 static int
1188 uev_update_path (struct uevent *uev, struct vectors * vecs)
1189 {
1190         int ro, retval = 0, rc;
1191         struct path * pp;
1192         struct config *conf;
1193         int disable_changed_wwids;
1194         int needs_reinit = 0;
1195
1196         switch ((rc = change_foreign(uev->udev))) {
1197         case FOREIGN_OK:
1198                 /* known foreign path, ignore event */
1199                 return 0;
1200         case FOREIGN_IGNORED:
1201                 break;
1202         case FOREIGN_ERR:
1203                 condlog(3, "%s: error in change_foreign", __func__);
1204                 break;
1205         default:
1206                 condlog(1, "%s: return code %d of change_forein is unsupported",
1207                         __func__, rc);
1208                 break;
1209         }
1210
1211         conf = get_multipath_config();
1212         disable_changed_wwids = conf->disable_changed_wwids;
1213         put_multipath_config(conf);
1214
1215         ro = uevent_get_disk_ro(uev);
1216
1217         pthread_cleanup_push(cleanup_lock, &vecs->lock);
1218         lock(&vecs->lock);
1219         pthread_testcancel();
1220
1221         pp = find_path_by_dev(vecs->pathvec, uev->kernel);
1222         if (pp) {
1223                 struct multipath *mpp = pp->mpp;
1224                 char wwid[WWID_SIZE];
1225
1226                 if (pp->initialized == INIT_REQUESTED_UDEV) {
1227                         needs_reinit = 1;
1228                         goto out;
1229                 }
1230                 /* Don't deal with other types of failed initialization
1231                  * now. check_path will handle it */
1232                 if (!strlen(pp->wwid))
1233                         goto out;
1234
1235                 strcpy(wwid, pp->wwid);
1236                 get_uid(pp, pp->state, uev->udev);
1237
1238                 if (strncmp(wwid, pp->wwid, WWID_SIZE) != 0) {
1239                         condlog(0, "%s: path wwid changed from '%s' to '%s'. %s",
1240                                 uev->kernel, wwid, pp->wwid,
1241                                 (disable_changed_wwids ? "disallowing" :
1242                                  "continuing"));
1243                         strcpy(pp->wwid, wwid);
1244                         if (disable_changed_wwids) {
1245                                 if (!pp->wwid_changed) {
1246                                         pp->wwid_changed = 1;
1247                                         pp->tick = 1;
1248                                         if (pp->mpp)
1249                                                 dm_fail_path(pp->mpp->alias, pp->dev_t);
1250                                 }
1251                                 goto out;
1252                         }
1253                 } else {
1254                         pp->wwid_changed = 0;
1255                         udev_device_unref(pp->udev);
1256                         pp->udev = udev_device_ref(uev->udev);
1257                         conf = get_multipath_config();
1258                         pthread_cleanup_push(put_multipath_config, conf);
1259                         if (pathinfo(pp, conf, DI_SYSFS|DI_NOIO) != PATHINFO_OK)
1260                                 condlog(1, "%s: pathinfo failed after change uevent",
1261                                         uev->kernel);
1262                         pthread_cleanup_pop(1);
1263                 }
1264
1265                 if (mpp && ro >= 0) {
1266                         condlog(2, "%s: update path write_protect to '%d' (uevent)", uev->kernel, ro);
1267
1268                         if (mpp->wait_for_udev)
1269                                 mpp->wait_for_udev = 2;
1270                         else {
1271                                 if (ro == 1)
1272                                         pp->mpp->force_readonly = 1;
1273                                 retval = reload_map(vecs, mpp, 0, 1);
1274                                 pp->mpp->force_readonly = 0;
1275                                 condlog(2, "%s: map %s reloaded (retval %d)",
1276                                         uev->kernel, mpp->alias, retval);
1277                         }
1278                 }
1279         }
1280 out:
1281         lock_cleanup_pop(vecs->lock);
1282         if (!pp) {
1283                 /* If the path is blacklisted, print a debug/non-default verbosity message. */
1284                 if (uev->udev) {
1285                         int flag = DI_SYSFS | DI_WWID;
1286
1287                         conf = get_multipath_config();
1288                         pthread_cleanup_push(put_multipath_config, conf);
1289                         retval = alloc_path_with_pathinfo(conf, uev->udev, uev->wwid, flag, NULL);
1290                         pthread_cleanup_pop(1);
1291
1292                         if (retval == PATHINFO_SKIPPED) {
1293                                 condlog(3, "%s: spurious uevent, path is blacklisted", uev->kernel);
1294                                 return 0;
1295                         }
1296                 }
1297
1298                 condlog(0, "%s: spurious uevent, path not found", uev->kernel);
1299         }
1300         if (needs_reinit)
1301                 retval = uev_add_path(uev, vecs, 1);
1302         return retval;
1303 }
1304
1305 static int
1306 uev_pathfail_check(struct uevent *uev, struct vectors *vecs)
1307 {
1308         char *action = NULL, *devt = NULL;
1309         struct path *pp;
1310         int r = 1;
1311
1312         action = uevent_get_dm_action(uev);
1313         if (!action)
1314                 return 1;
1315         if (strncmp(action, "PATH_FAILED", 11))
1316                 goto out;
1317         devt = uevent_get_dm_path(uev);
1318         if (!devt) {
1319                 condlog(3, "%s: No DM_PATH in uevent", uev->kernel);
1320                 goto out;
1321         }
1322
1323         pthread_cleanup_push(cleanup_lock, &vecs->lock);
1324         lock(&vecs->lock);
1325         pthread_testcancel();
1326         pp = find_path_by_devt(vecs->pathvec, devt);
1327         if (!pp)
1328                 goto out_lock;
1329         r = io_err_stat_handle_pathfail(pp);
1330         if (r)
1331                 condlog(3, "io_err_stat: %s: cannot handle pathfail uevent",
1332                                 pp->dev);
1333 out_lock:
1334         lock_cleanup_pop(vecs->lock);
1335         FREE(devt);
1336         FREE(action);
1337         return r;
1338 out:
1339         FREE(action);
1340         return 1;
1341 }
1342
1343 static int
1344 map_discovery (struct vectors * vecs)
1345 {
1346         struct multipath * mpp;
1347         unsigned int i;
1348
1349         if (dm_get_maps(vecs->mpvec))
1350                 return 1;
1351
1352         vector_foreach_slot (vecs->mpvec, mpp, i)
1353                 if (update_multipath_table(mpp, vecs->pathvec, 1) ||
1354                     update_multipath_status(mpp)) {
1355                         remove_map(mpp, vecs, 1);
1356                         i--;
1357                 }
1358
1359         return 0;
1360 }
1361
1362 int
1363 uxsock_trigger (char * str, char ** reply, int * len, bool is_root,
1364                 void * trigger_data)
1365 {
1366         struct vectors * vecs;
1367         int r;
1368
1369         *reply = NULL;
1370         *len = 0;
1371         vecs = (struct vectors *)trigger_data;
1372
1373         if ((str != NULL) && (is_root == false) &&
1374             (strncmp(str, "list", strlen("list")) != 0) &&
1375             (strncmp(str, "show", strlen("show")) != 0)) {
1376                 *reply = STRDUP("permission deny: need to be root");
1377                 if (*reply)
1378                         *len = strlen(*reply) + 1;
1379                 return 1;
1380         }
1381
1382         r = parse_cmd(str, reply, len, vecs, uxsock_timeout / 1000);
1383
1384         if (r > 0) {
1385                 if (r == ETIMEDOUT)
1386                         *reply = STRDUP("timeout\n");
1387                 else
1388                         *reply = STRDUP("fail\n");
1389                 if (*reply)
1390                         *len = strlen(*reply) + 1;
1391                 r = 1;
1392         }
1393         else if (!r && *len == 0) {
1394                 *reply = STRDUP("ok\n");
1395                 if (*reply)
1396                         *len = strlen(*reply) + 1;
1397                 r = 0;
1398         }
1399         /* else if (r < 0) leave *reply alone */
1400
1401         return r;
1402 }
1403
1404 int
1405 uev_trigger (struct uevent * uev, void * trigger_data)
1406 {
1407         int r = 0;
1408         struct vectors * vecs;
1409         struct uevent *merge_uev, *tmp;
1410
1411         vecs = (struct vectors *)trigger_data;
1412
1413         pthread_cleanup_push(config_cleanup, NULL);
1414         pthread_mutex_lock(&config_lock);
1415         if (running_state != DAEMON_IDLE &&
1416             running_state != DAEMON_RUNNING)
1417                 pthread_cond_wait(&config_cond, &config_lock);
1418         pthread_cleanup_pop(1);
1419
1420         if (running_state == DAEMON_SHUTDOWN)
1421                 return 0;
1422
1423         /*
1424          * device map event
1425          * Add events are ignored here as the tables
1426          * are not fully initialised then.
1427          */
1428         if (!strncmp(uev->kernel, "dm-", 3)) {
1429                 if (!uevent_is_mpath(uev)) {
1430                         if (!strncmp(uev->action, "change", 6))
1431                                 (void)add_foreign(uev->udev);
1432                         else if (!strncmp(uev->action, "remove", 6))
1433                                 (void)delete_foreign(uev->udev);
1434                         goto out;
1435                 }
1436                 if (!strncmp(uev->action, "change", 6)) {
1437                         r = uev_add_map(uev, vecs);
1438
1439                         /*
1440                          * the kernel-side dm-mpath issues a PATH_FAILED event
1441                          * when it encounters a path IO error. It is reason-
1442                          * able be the entry of path IO error accounting pro-
1443                          * cess.
1444                          */
1445                         uev_pathfail_check(uev, vecs);
1446                 } else if (!strncmp(uev->action, "remove", 6)) {
1447                         r = uev_remove_map(uev, vecs);
1448                 }
1449                 goto out;
1450         }
1451
1452         /*
1453          * path add/remove/change event, add/remove maybe merged
1454          */
1455         list_for_each_entry_safe(merge_uev, tmp, &uev->merge_node, node) {
1456                 if (!strncmp(merge_uev->action, "add", 3))
1457                         r += uev_add_path(merge_uev, vecs, 0);
1458                 if (!strncmp(merge_uev->action, "remove", 6))
1459                         r += uev_remove_path(merge_uev, vecs, 0);
1460         }
1461
1462         if (!strncmp(uev->action, "add", 3))
1463                 r += uev_add_path(uev, vecs, 1);
1464         if (!strncmp(uev->action, "remove", 6))
1465                 r += uev_remove_path(uev, vecs, 1);
1466         if (!strncmp(uev->action, "change", 6))
1467                 r += uev_update_path(uev, vecs);
1468
1469 out:
1470         return r;
1471 }
1472
1473 static void rcu_unregister(void *param)
1474 {
1475         rcu_unregister_thread();
1476 }
1477
1478 static void *
1479 ueventloop (void * ap)
1480 {
1481         struct udev *udev = ap;
1482
1483         pthread_cleanup_push(rcu_unregister, NULL);
1484         rcu_register_thread();
1485         if (uevent_listen(udev))
1486                 condlog(0, "error starting uevent listener");
1487         pthread_cleanup_pop(1);
1488         return NULL;
1489 }
1490
1491 static void *
1492 uevqloop (void * ap)
1493 {
1494         pthread_cleanup_push(rcu_unregister, NULL);
1495         rcu_register_thread();
1496         if (uevent_dispatch(&uev_trigger, ap))
1497                 condlog(0, "error starting uevent dispatcher");
1498         pthread_cleanup_pop(1);
1499         return NULL;
1500 }
1501 static void *
1502 uxlsnrloop (void * ap)
1503 {
1504         long ux_sock;
1505
1506         pthread_cleanup_push(rcu_unregister, NULL);
1507         rcu_register_thread();
1508
1509         ux_sock = ux_socket_listen(DEFAULT_SOCKET);
1510         if (ux_sock == -1) {
1511                 condlog(1, "could not create uxsock: %d", errno);
1512                 exit_daemon();
1513                 goto out;
1514         }
1515         pthread_cleanup_push(uxsock_cleanup, (void *)ux_sock);
1516
1517         if (cli_init()) {
1518                 condlog(1, "Failed to init uxsock listener");
1519                 exit_daemon();
1520                 goto out_sock;
1521         }
1522
1523         /* Tell main thread that thread has started */
1524         post_config_state(DAEMON_CONFIGURE);
1525
1526         set_handler_callback(LIST+PATHS, cli_list_paths);
1527         set_handler_callback(LIST+PATHS+FMT, cli_list_paths_fmt);
1528         set_handler_callback(LIST+PATHS+RAW+FMT, cli_list_paths_raw);
1529         set_handler_callback(LIST+PATH, cli_list_path);
1530         set_handler_callback(LIST+MAPS, cli_list_maps);
1531         set_handler_callback(LIST+STATUS, cli_list_status);
1532         set_unlocked_handler_callback(LIST+DAEMON, cli_list_daemon);
1533         set_handler_callback(LIST+MAPS+STATUS, cli_list_maps_status);
1534         set_handler_callback(LIST+MAPS+STATS, cli_list_maps_stats);
1535         set_handler_callback(LIST+MAPS+FMT, cli_list_maps_fmt);
1536         set_handler_callback(LIST+MAPS+RAW+FMT, cli_list_maps_raw);
1537         set_handler_callback(LIST+MAPS+TOPOLOGY, cli_list_maps_topology);
1538         set_handler_callback(LIST+TOPOLOGY, cli_list_maps_topology);
1539         set_handler_callback(LIST+MAPS+JSON, cli_list_maps_json);
1540         set_handler_callback(LIST+MAP+TOPOLOGY, cli_list_map_topology);
1541         set_handler_callback(LIST+MAP+FMT, cli_list_map_fmt);
1542         set_handler_callback(LIST+MAP+RAW+FMT, cli_list_map_fmt);
1543         set_handler_callback(LIST+MAP+JSON, cli_list_map_json);
1544         set_handler_callback(LIST+CONFIG+LOCAL, cli_list_config_local);
1545         set_handler_callback(LIST+CONFIG, cli_list_config);
1546         set_handler_callback(LIST+BLACKLIST, cli_list_blacklist);
1547         set_handler_callback(LIST+DEVICES, cli_list_devices);
1548         set_handler_callback(LIST+WILDCARDS, cli_list_wildcards);
1549         set_handler_callback(RESET+MAPS+STATS, cli_reset_maps_stats);
1550         set_handler_callback(RESET+MAP+STATS, cli_reset_map_stats);
1551         set_handler_callback(ADD+PATH, cli_add_path);
1552         set_handler_callback(DEL+PATH, cli_del_path);
1553         set_handler_callback(ADD+MAP, cli_add_map);
1554         set_handler_callback(DEL+MAP, cli_del_map);
1555         set_handler_callback(SWITCH+MAP+GROUP, cli_switch_group);
1556         set_unlocked_handler_callback(RECONFIGURE, cli_reconfigure);
1557         set_handler_callback(SUSPEND+MAP, cli_suspend);
1558         set_handler_callback(RESUME+MAP, cli_resume);
1559         set_handler_callback(RESIZE+MAP, cli_resize);
1560         set_handler_callback(RELOAD+MAP, cli_reload);
1561         set_handler_callback(RESET+MAP, cli_reassign);
1562         set_handler_callback(REINSTATE+PATH, cli_reinstate);
1563         set_handler_callback(FAIL+PATH, cli_fail);
1564         set_handler_callback(DISABLEQ+MAP, cli_disable_queueing);
1565         set_handler_callback(RESTOREQ+MAP, cli_restore_queueing);
1566         set_handler_callback(DISABLEQ+MAPS, cli_disable_all_queueing);
1567         set_handler_callback(RESTOREQ+MAPS, cli_restore_all_queueing);
1568         set_unlocked_handler_callback(QUIT, cli_quit);
1569         set_unlocked_handler_callback(SHUTDOWN, cli_shutdown);
1570         set_handler_callback(GETPRSTATUS+MAP, cli_getprstatus);
1571         set_handler_callback(SETPRSTATUS+MAP, cli_setprstatus);
1572         set_handler_callback(UNSETPRSTATUS+MAP, cli_unsetprstatus);
1573         set_handler_callback(FORCEQ+DAEMON, cli_force_no_daemon_q);
1574         set_handler_callback(RESTOREQ+DAEMON, cli_restore_no_daemon_q);
1575         set_handler_callback(GETPRKEY+MAP, cli_getprkey);
1576         set_handler_callback(SETPRKEY+MAP+KEY, cli_setprkey);
1577         set_handler_callback(UNSETPRKEY+MAP, cli_unsetprkey);
1578
1579         umask(077);
1580         uxsock_listen(&uxsock_trigger, ux_sock, ap);
1581
1582 out_sock:
1583         pthread_cleanup_pop(1); /* uxsock_cleanup */
1584 out:
1585         pthread_cleanup_pop(1); /* rcu_unregister */
1586         return NULL;
1587 }
1588
1589 void
1590 exit_daemon (void)
1591 {
1592         post_config_state(DAEMON_SHUTDOWN);
1593 }
1594
1595 static void
1596 fail_path (struct path * pp, int del_active)
1597 {
1598         if (!pp->mpp)
1599                 return;
1600
1601         condlog(2, "checker failed path %s in map %s",
1602                  pp->dev_t, pp->mpp->alias);
1603
1604         dm_fail_path(pp->mpp->alias, pp->dev_t);
1605         if (del_active)
1606                 update_queue_mode_del_path(pp->mpp);
1607 }
1608
1609 /*
1610  * caller must have locked the path list before calling that function
1611  */
1612 static int
1613 reinstate_path (struct path * pp, int add_active)
1614 {
1615         int ret = 0;
1616
1617         if (!pp->mpp)
1618                 return 0;
1619
1620         if (dm_reinstate_path(pp->mpp->alias, pp->dev_t)) {
1621                 condlog(0, "%s: reinstate failed", pp->dev_t);
1622                 ret = 1;
1623         } else {
1624                 condlog(2, "%s: reinstated", pp->dev_t);
1625                 if (add_active)
1626                         update_queue_mode_add_path(pp->mpp);
1627         }
1628         return ret;
1629 }
1630
1631 static void
1632 enable_group(struct path * pp)
1633 {
1634         struct pathgroup * pgp;
1635
1636         /*
1637          * if path is added through uev_add_path, pgindex can be unset.
1638          * next update_strings() will set it, upon map reload event.
1639          *
1640          * we can safely return here, because upon map reload, all
1641          * PG will be enabled.
1642          */
1643         if (!pp->mpp->pg || !pp->pgindex)
1644                 return;
1645
1646         pgp = VECTOR_SLOT(pp->mpp->pg, pp->pgindex - 1);
1647
1648         if (pgp->status == PGSTATE_DISABLED) {
1649                 condlog(2, "%s: enable group #%i", pp->mpp->alias, pp->pgindex);
1650                 dm_enablegroup(pp->mpp->alias, pp->pgindex);
1651         }
1652 }
1653
1654 static void
1655 mpvec_garbage_collector (struct vectors * vecs)
1656 {
1657         struct multipath * mpp;
1658         unsigned int i;
1659
1660         if (!vecs->mpvec)
1661                 return;
1662
1663         vector_foreach_slot (vecs->mpvec, mpp, i) {
1664                 if (mpp && mpp->alias && !dm_map_present(mpp->alias)) {
1665                         condlog(2, "%s: remove dead map", mpp->alias);
1666                         remove_map_and_stop_waiter(mpp, vecs);
1667                         i--;
1668                 }
1669         }
1670 }
1671
1672 /* This is called after a path has started working again. It the multipath
1673  * device for this path uses the followover failback type, and this is the
1674  * best pathgroup, and this is the first path in the pathgroup to come back
1675  * up, then switch to this pathgroup */
1676 static int
1677 followover_should_failback(struct path * pp)
1678 {
1679         struct pathgroup * pgp;
1680         struct path *pp1;
1681         int i;
1682
1683         if (pp->mpp->pgfailback != -FAILBACK_FOLLOWOVER ||
1684             !pp->mpp->pg || !pp->pgindex ||
1685             pp->pgindex != pp->mpp->bestpg)
1686                 return 0;
1687
1688         pgp = VECTOR_SLOT(pp->mpp->pg, pp->pgindex - 1);
1689         vector_foreach_slot(pgp->paths, pp1, i) {
1690                 if (pp1 == pp)
1691                         continue;
1692                 if (pp1->chkrstate != PATH_DOWN && pp1->chkrstate != PATH_SHAKY)
1693                         return 0;
1694         }
1695         return 1;
1696 }
1697
1698 static void
1699 missing_uev_wait_tick(struct vectors *vecs)
1700 {
1701         struct multipath * mpp;
1702         unsigned int i;
1703         int timed_out = 0, delayed_reconfig;
1704         struct config *conf;
1705
1706         vector_foreach_slot (vecs->mpvec, mpp, i) {
1707                 if (mpp->wait_for_udev && --mpp->uev_wait_tick <= 0) {
1708                         timed_out = 1;
1709                         condlog(0, "%s: timeout waiting on creation uevent. enabling reloads", mpp->alias);
1710                         if (mpp->wait_for_udev > 1 &&
1711                             update_map(mpp, vecs, 0)) {
1712                                 /* update_map removed map */
1713                                 i--;
1714                                 continue;
1715                         }
1716                         mpp->wait_for_udev = 0;
1717                 }
1718         }
1719
1720         conf = get_multipath_config();
1721         delayed_reconfig = conf->delayed_reconfig;
1722         put_multipath_config(conf);
1723         if (timed_out && delayed_reconfig &&
1724             !need_to_delay_reconfig(vecs)) {
1725                 condlog(2, "reconfigure (delayed)");
1726                 set_config_state(DAEMON_CONFIGURE);
1727         }
1728 }
1729
1730 static void
1731 ghost_delay_tick(struct vectors *vecs)
1732 {
1733         struct multipath * mpp;
1734         unsigned int i;
1735
1736         vector_foreach_slot (vecs->mpvec, mpp, i) {
1737                 if (mpp->ghost_delay_tick <= 0)
1738                         continue;
1739                 if (--mpp->ghost_delay_tick <= 0) {
1740                         condlog(0, "%s: timed out waiting for active path",
1741                                 mpp->alias);
1742                         mpp->force_udev_reload = 1;
1743                         if (update_map(mpp, vecs, 0) != 0) {
1744                                 /* update_map removed map */
1745                                 i--;
1746                                 continue;
1747                         }
1748                 }
1749         }
1750 }
1751
1752 static void
1753 defered_failback_tick (vector mpvec)
1754 {
1755         struct multipath * mpp;
1756         unsigned int i;
1757
1758         vector_foreach_slot (mpvec, mpp, i) {
1759                 /*
1760                  * deferred failback getting sooner
1761                  */
1762                 if (mpp->pgfailback > 0 && mpp->failback_tick > 0) {
1763                         mpp->failback_tick--;
1764
1765                         if (!mpp->failback_tick && need_switch_pathgroup(mpp, 1))
1766                                 switch_pathgroup(mpp);
1767                 }
1768         }
1769 }
1770
1771 static void
1772 retry_count_tick(vector mpvec)
1773 {
1774         struct multipath *mpp;
1775         unsigned int i;
1776
1777         vector_foreach_slot (mpvec, mpp, i) {
1778                 if (mpp->retry_tick > 0) {
1779                         mpp->stat_total_queueing_time++;
1780                         condlog(4, "%s: Retrying.. No active path", mpp->alias);
1781                         if(--mpp->retry_tick == 0) {
1782                                 mpp->stat_map_failures++;
1783                                 dm_queue_if_no_path(mpp->alias, 0);
1784                                 condlog(2, "%s: Disable queueing", mpp->alias);
1785                         }
1786                 }
1787         }
1788 }
1789
1790 int update_prio(struct path *pp, int refresh_all)
1791 {
1792         int oldpriority;
1793         struct path *pp1;
1794         struct pathgroup * pgp;
1795         int i, j, changed = 0;
1796         struct config *conf;
1797
1798         if (refresh_all) {
1799                 vector_foreach_slot (pp->mpp->pg, pgp, i) {
1800                         vector_foreach_slot (pgp->paths, pp1, j) {
1801                                 oldpriority = pp1->priority;
1802                                 conf = get_multipath_config();
1803                                 pthread_cleanup_push(put_multipath_config,
1804                                                      conf);
1805                                 pathinfo(pp1, conf, DI_PRIO);
1806                                 pthread_cleanup_pop(1);
1807                                 if (pp1->priority != oldpriority)
1808                                         changed = 1;
1809                         }
1810                 }
1811                 return changed;
1812         }
1813         oldpriority = pp->priority;
1814         conf = get_multipath_config();
1815         pthread_cleanup_push(put_multipath_config, conf);
1816         if (pp->state != PATH_DOWN)
1817                 pathinfo(pp, conf, DI_PRIO);
1818         pthread_cleanup_pop(1);
1819
1820         if (pp->priority == oldpriority)
1821                 return 0;
1822         return 1;
1823 }
1824
1825 int update_path_groups(struct multipath *mpp, struct vectors *vecs, int refresh)
1826 {
1827         if (reload_map(vecs, mpp, refresh, 1))
1828                 return 1;
1829
1830         dm_lib_release();
1831         if (setup_multipath(vecs, mpp) != 0)
1832                 return 1;
1833         sync_map_state(mpp);
1834
1835         return 0;
1836 }
1837
1838 static int check_path_reinstate_state(struct path * pp) {
1839         struct timespec curr_time;
1840
1841         /*
1842          * This function is only called when the path state changes
1843          * from "bad" to "good". pp->state reflects the *previous* state.
1844          * If this was "bad", we know that a failure must have occured
1845          * beforehand, and count that.
1846          * Note that we count path state _changes_ this way. If a path
1847          * remains in "bad" state, failure count is not increased.
1848          */
1849
1850         if (!((pp->mpp->san_path_err_threshold > 0) &&
1851                                 (pp->mpp->san_path_err_forget_rate > 0) &&
1852                                 (pp->mpp->san_path_err_recovery_time >0))) {
1853                 return 0;
1854         }
1855
1856         if (pp->disable_reinstate) {
1857                 /* If we don't know how much time has passed, automatically
1858                  * reinstate the path, just to be safe. Also, if there are
1859                  * no other usable paths, reinstate the path
1860                  */
1861                 if (clock_gettime(CLOCK_MONOTONIC, &curr_time) != 0 ||
1862                                 pp->mpp->nr_active == 0) {
1863                         condlog(2, "%s : reinstating path early", pp->dev);
1864                         goto reinstate_path;
1865                 }
1866                 if ((curr_time.tv_sec - pp->dis_reinstate_time ) > pp->mpp->san_path_err_recovery_time) {
1867                         condlog(2,"%s : reinstate the path after err recovery time", pp->dev);
1868                         goto reinstate_path;
1869                 }
1870                 return 1;
1871         }
1872         /* forget errors on a working path */
1873         if ((pp->state == PATH_UP || pp->state == PATH_GHOST) &&
1874                         pp->path_failures > 0) {
1875                 if (pp->san_path_err_forget_rate > 0){
1876                         pp->san_path_err_forget_rate--;
1877                 } else {
1878                         /* for every san_path_err_forget_rate number of
1879                          * successful path checks decrement path_failures by 1
1880                          */
1881                         pp->path_failures--;
1882                         pp->san_path_err_forget_rate = pp->mpp->san_path_err_forget_rate;
1883                 }
1884                 return 0;
1885         }
1886
1887         /* If the path isn't recovering from a failed state, do nothing */
1888         if (pp->state != PATH_DOWN && pp->state != PATH_SHAKY &&
1889                         pp->state != PATH_TIMEOUT)
1890                 return 0;
1891
1892         if (pp->path_failures == 0)
1893                 pp->san_path_err_forget_rate = pp->mpp->san_path_err_forget_rate;
1894
1895         pp->path_failures++;
1896
1897         /* if we don't know the currently time, we don't know how long to
1898          * delay the path, so there's no point in checking if we should
1899          */
1900
1901         if (clock_gettime(CLOCK_MONOTONIC, &curr_time) != 0)
1902                 return 0;
1903         /* when path failures has exceeded the san_path_err_threshold
1904          * place the path in delayed state till san_path_err_recovery_time
1905          * so that the cutomer can rectify the issue within this time. After
1906          * the completion of san_path_err_recovery_time it should
1907          * automatically reinstate the path
1908          */
1909         if (pp->path_failures > pp->mpp->san_path_err_threshold) {
1910                 condlog(2, "%s : hit error threshold. Delaying path reinstatement", pp->dev);
1911                 pp->dis_reinstate_time = curr_time.tv_sec;
1912                 pp->disable_reinstate = 1;
1913
1914                 return 1;
1915         } else {
1916                 return 0;
1917         }
1918
1919 reinstate_path:
1920         pp->path_failures = 0;
1921         pp->disable_reinstate = 0;
1922         pp->san_path_err_forget_rate = 0;
1923         return 0;
1924 }
1925
1926 /*
1927  * Returns '1' if the path has been checked, '-1' if it was blacklisted
1928  * and '0' otherwise
1929  */
1930 int
1931 check_path (struct vectors * vecs, struct path * pp, int ticks)
1932 {
1933         int newstate;
1934         int new_path_up = 0;
1935         int chkr_new_path_up = 0;
1936         int add_active;
1937         int disable_reinstate = 0;
1938         int oldchkrstate = pp->chkrstate;
1939         int retrigger_tries, checkint, max_checkint, verbosity;
1940         struct config *conf;
1941         int ret;
1942
1943         if ((pp->initialized == INIT_OK ||
1944              pp->initialized == INIT_REQUESTED_UDEV) && !pp->mpp)
1945                 return 0;
1946
1947         if (pp->tick)
1948                 pp->tick -= (pp->tick > ticks) ? ticks : pp->tick;
1949         if (pp->tick)
1950                 return 0; /* don't check this path yet */
1951
1952         conf = get_multipath_config();
1953         retrigger_tries = conf->retrigger_tries;
1954         checkint = conf->checkint;
1955         max_checkint = conf->max_checkint;
1956         verbosity = conf->verbosity;
1957         put_multipath_config(conf);
1958
1959         if (pp->checkint == CHECKINT_UNDEF) {
1960                 condlog(0, "%s: BUG: checkint is not set", pp->dev);
1961                 pp->checkint = checkint;
1962         };
1963
1964         if (!pp->mpp && pp->initialized == INIT_MISSING_UDEV) {
1965                 if (pp->retriggers < retrigger_tries) {
1966                         condlog(2, "%s: triggering change event to reinitialize",
1967                                 pp->dev);
1968                         pp->initialized = INIT_REQUESTED_UDEV;
1969                         pp->retriggers++;
1970                         sysfs_attr_set_value(pp->udev, "uevent", "change",
1971                                              strlen("change"));
1972                         return 0;
1973                 } else {
1974                         condlog(1, "%s: not initialized after %d udev retriggers",
1975                                 pp->dev, retrigger_tries);
1976                         /*
1977                          * Make sure that the "add missing path" code path
1978                          * below may reinstate the path later, if it ever
1979                          * comes up again.
1980                          * The WWID needs not be cleared; if it was set, the
1981                          * state hadn't been INIT_MISSING_UDEV in the first
1982                          * place.
1983                          */
1984                         pp->initialized = INIT_FAILED;
1985                         return 0;
1986                 }
1987         }
1988
1989         /*
1990          * provision a next check soonest,
1991          * in case we exit abnormaly from here
1992          */
1993         pp->tick = checkint;
1994
1995         newstate = path_offline(pp);
1996         if (newstate == PATH_UP) {
1997                 conf = get_multipath_config();
1998                 pthread_cleanup_push(put_multipath_config, conf);
1999                 newstate = get_state(pp, conf, 1, newstate);
2000                 pthread_cleanup_pop(1);
2001         } else {
2002                 checker_clear_message(&pp->checker);
2003                 condlog(3, "%s: state %s, checker not called",
2004                         pp->dev, checker_state_name(newstate));
2005         }
2006         /*
2007          * Wait for uevent for removed paths;
2008          * some LLDDs like zfcp keep paths unavailable
2009          * without sending uevents.
2010          */
2011         if (newstate == PATH_REMOVED)
2012                 newstate = PATH_DOWN;
2013
2014         if (pp->wwid_changed) {
2015                 condlog(2, "%s: path wwid has changed. Refusing to use",
2016                         pp->dev);
2017                 newstate = PATH_DOWN;
2018         }
2019
2020         if (newstate == PATH_WILD || newstate == PATH_UNCHECKED) {
2021                 condlog(2, "%s: unusable path (%s) - checker failed",
2022                         pp->dev, checker_state_name(newstate));
2023                 LOG_MSG(2, verbosity, pp);
2024                 conf = get_multipath_config();
2025                 pthread_cleanup_push(put_multipath_config, conf);
2026                 pathinfo(pp, conf, 0);
2027                 pthread_cleanup_pop(1);
2028                 return 1;
2029         }
2030         if (!pp->mpp) {
2031                 if (!strlen(pp->wwid) &&
2032                     (pp->initialized == INIT_FAILED ||
2033                      pp->initialized == INIT_NEW) &&
2034                     (newstate == PATH_UP || newstate == PATH_GHOST)) {
2035                         condlog(2, "%s: add missing path", pp->dev);
2036                         conf = get_multipath_config();
2037                         pthread_cleanup_push(put_multipath_config, conf);
2038                         ret = pathinfo(pp, conf, DI_ALL | DI_BLACKLIST);
2039                         pthread_cleanup_pop(1);
2040                         /* INIT_OK implies ret == PATHINFO_OK */
2041                         if (pp->initialized == INIT_OK) {
2042                                 ev_add_path(pp, vecs, 1);
2043                                 pp->tick = 1;
2044                         } else {
2045                                 /*
2046                                  * We failed multiple times to initialize this
2047                                  * path properly. Don't re-check too often.
2048                                  */
2049                                 pp->checkint = max_checkint;
2050                                 if (ret == PATHINFO_SKIPPED)
2051                                         return -1;
2052                         }
2053                 }
2054                 return 0;
2055         }
2056         /*
2057          * Async IO in flight. Keep the previous path state
2058          * and reschedule as soon as possible
2059          */
2060         if (newstate == PATH_PENDING) {
2061                 pp->tick = 1;
2062                 return 0;
2063         }
2064         /*
2065          * Synchronize with kernel state
2066          */
2067         if (update_multipath_strings(pp->mpp, vecs->pathvec, 1)) {
2068                 condlog(1, "%s: Could not synchronize with kernel state",
2069                         pp->dev);
2070                 pp->dmstate = PSTATE_UNDEF;
2071         }
2072         /* if update_multipath_strings orphaned the path, quit early */
2073         if (!pp->mpp)
2074                 return 0;
2075
2076         if ((newstate == PATH_UP || newstate == PATH_GHOST) &&
2077                         check_path_reinstate_state(pp)) {
2078                 pp->state = PATH_DELAYED;
2079                 return 1;
2080         }
2081
2082         if ((newstate == PATH_UP || newstate == PATH_GHOST) &&
2083             pp->io_err_disable_reinstate && need_io_err_check(pp)) {
2084                 pp->state = PATH_SHAKY;
2085                 /*
2086                  * to reschedule as soon as possible,so that this path can
2087                  * be recoverd in time
2088                  */
2089                 pp->tick = 1;
2090                 return 1;
2091         }
2092
2093         if ((newstate == PATH_UP || newstate == PATH_GHOST) &&
2094              pp->wait_checks > 0) {
2095                 if (pp->mpp->nr_active > 0) {
2096                         pp->state = PATH_DELAYED;
2097                         pp->wait_checks--;
2098                         return 1;
2099                 } else
2100                         pp->wait_checks = 0;
2101         }
2102
2103         /*
2104          * don't reinstate failed path, if its in stand-by
2105          * and if target supports only implicit tpgs mode.
2106          * this will prevent unnecessary i/o by dm on stand-by
2107          * paths if there are no other active paths in map.
2108          */
2109         disable_reinstate = (newstate == PATH_GHOST &&
2110                             pp->mpp->nr_active == 0 &&
2111                             pp->tpgs == TPGS_IMPLICIT) ? 1 : 0;
2112
2113         pp->chkrstate = newstate;
2114         if (newstate != pp->state) {
2115                 int oldstate = pp->state;
2116                 pp->state = newstate;
2117
2118                 LOG_MSG(1, verbosity, pp);
2119
2120                 /*
2121                  * upon state change, reset the checkint
2122                  * to the shortest delay
2123                  */
2124                 conf = get_multipath_config();
2125                 pp->checkint = conf->checkint;
2126                 put_multipath_config(conf);
2127
2128                 if (newstate != PATH_UP && newstate != PATH_GHOST) {
2129                         /*
2130                          * proactively fail path in the DM
2131                          */
2132                         if (oldstate == PATH_UP ||
2133                             oldstate == PATH_GHOST) {
2134                                 fail_path(pp, 1);
2135                                 if (pp->mpp->delay_wait_checks > 0 &&
2136                                     pp->watch_checks > 0) {
2137                                         pp->wait_checks = pp->mpp->delay_wait_checks;
2138                                         pp->watch_checks = 0;
2139                                 }
2140                         } else {
2141                                 fail_path(pp, 0);
2142                                 if (pp->wait_checks > 0)
2143                                         pp->wait_checks =
2144                                                 pp->mpp->delay_wait_checks;
2145                         }
2146
2147                         /*
2148                          * cancel scheduled failback
2149                          */
2150                         pp->mpp->failback_tick = 0;
2151
2152                         pp->mpp->stat_path_failures++;
2153                         return 1;
2154                 }
2155
2156                 if (newstate == PATH_UP || newstate == PATH_GHOST) {
2157                         if (pp->mpp->prflag) {
2158                                 /*
2159                                  * Check Persistent Reservation.
2160                                  */
2161                                 condlog(2, "%s: checking persistent "
2162                                         "reservation registration", pp->dev);
2163                                 mpath_pr_event_handle(pp);
2164                         }
2165                 }
2166
2167                 /*
2168                  * reinstate this path
2169                  */
2170                 if (oldstate != PATH_UP &&
2171                     oldstate != PATH_GHOST) {
2172                         if (pp->mpp->delay_watch_checks > 0)
2173                                 pp->watch_checks = pp->mpp->delay_watch_checks;
2174                         add_active = 1;
2175                 } else {
2176                         if (pp->watch_checks > 0)
2177                                 pp->watch_checks--;
2178                         add_active = 0;
2179                 }
2180                 if (!disable_reinstate && reinstate_path(pp, add_active)) {
2181                         condlog(3, "%s: reload map", pp->dev);
2182                         ev_add_path(pp, vecs, 1);
2183                         pp->tick = 1;
2184                         return 0;
2185                 }
2186                 new_path_up = 1;
2187
2188                 if (oldchkrstate != PATH_UP && oldchkrstate != PATH_GHOST)
2189                         chkr_new_path_up = 1;
2190
2191                 /*
2192                  * if at least one path is up in a group, and
2193                  * the group is disabled, re-enable it
2194                  */
2195                 if (newstate == PATH_UP)
2196                         enable_group(pp);
2197         }
2198         else if (newstate == PATH_UP || newstate == PATH_GHOST) {
2199                 if ((pp->dmstate == PSTATE_FAILED ||
2200                     pp->dmstate == PSTATE_UNDEF) &&
2201                     !disable_reinstate) {
2202                         /* Clear IO errors */
2203                         if (reinstate_path(pp, 0)) {
2204                                 condlog(3, "%s: reload map", pp->dev);
2205                                 ev_add_path(pp, vecs, 1);
2206                                 pp->tick = 1;
2207                                 return 0;
2208                         }
2209                 } else {
2210                         LOG_MSG(4, verbosity, pp);
2211                         if (pp->checkint != max_checkint) {
2212                                 /*
2213                                  * double the next check delay.
2214                                  * max at conf->max_checkint
2215                                  */
2216                                 if (pp->checkint < (max_checkint / 2))
2217                                         pp->checkint = 2 * pp->checkint;
2218                                 else
2219                                         pp->checkint = max_checkint;
2220
2221                                 condlog(4, "%s: delay next check %is",
2222                                         pp->dev_t, pp->checkint);
2223                         }
2224                         if (pp->watch_checks > 0)
2225                                 pp->watch_checks--;
2226                         pp->tick = pp->checkint;
2227                 }
2228         }
2229         else if (newstate != PATH_UP && newstate != PATH_GHOST) {
2230                 if (pp->dmstate == PSTATE_ACTIVE ||
2231                     pp->dmstate == PSTATE_UNDEF)
2232                         fail_path(pp, 0);
2233                 if (newstate == PATH_DOWN) {
2234                         int log_checker_err;
2235
2236                         conf = get_multipath_config();
2237                         log_checker_err = conf->log_checker_err;
2238                         put_multipath_config(conf);
2239                         if (log_checker_err == LOG_CHKR_ERR_ONCE)
2240                                 LOG_MSG(3, verbosity, pp);
2241                         else
2242                                 LOG_MSG(2, verbosity, pp);
2243                 }
2244         }
2245
2246         pp->state = newstate;
2247
2248         if (pp->mpp->wait_for_udev)
2249                 return 1;
2250         /*
2251          * path prio refreshing
2252          */
2253         condlog(4, "path prio refresh");
2254
2255         if (update_prio(pp, new_path_up) &&
2256             (pp->mpp->pgpolicyfn == (pgpolicyfn *)group_by_prio) &&
2257              pp->mpp->pgfailback == -FAILBACK_IMMEDIATE)
2258                 update_path_groups(pp->mpp, vecs, !new_path_up);
2259         else if (need_switch_pathgroup(pp->mpp, 0)) {
2260                 if (pp->mpp->pgfailback > 0 &&
2261                     (new_path_up || pp->mpp->failback_tick <= 0))
2262                         pp->mpp->failback_tick =
2263                                 pp->mpp->pgfailback + 1;
2264                 else if (pp->mpp->pgfailback == -FAILBACK_IMMEDIATE ||
2265                          (chkr_new_path_up && followover_should_failback(pp)))
2266                         switch_pathgroup(pp->mpp);
2267         }
2268         return 1;
2269 }
2270
2271 static void *
2272 checkerloop (void *ap)
2273 {
2274         struct vectors *vecs;
2275         struct path *pp;
2276         int count = 0;
2277         unsigned int i;
2278         struct timespec last_time;
2279         struct config *conf;
2280         int foreign_tick = 0;
2281
2282         pthread_cleanup_push(rcu_unregister, NULL);
2283         rcu_register_thread();
2284         mlockall(MCL_CURRENT | MCL_FUTURE);
2285         vecs = (struct vectors *)ap;
2286         condlog(2, "path checkers start up");
2287
2288         /* Tweak start time for initial path check */
2289         if (clock_gettime(CLOCK_MONOTONIC, &last_time) != 0)
2290                 last_time.tv_sec = 0;
2291         else
2292                 last_time.tv_sec -= 1;
2293
2294         while (1) {
2295                 struct timespec diff_time, start_time, end_time;
2296                 int num_paths = 0, ticks = 0, strict_timing, rc = 0;
2297
2298                 if (clock_gettime(CLOCK_MONOTONIC, &start_time) != 0)
2299                         start_time.tv_sec = 0;
2300                 if (start_time.tv_sec && last_time.tv_sec) {
2301                         timespecsub(&start_time, &last_time, &diff_time);
2302                         condlog(4, "tick (%lu.%06lu secs)",
2303                                 diff_time.tv_sec, diff_time.tv_nsec / 1000);
2304                         last_time = start_time;
2305                         ticks = diff_time.tv_sec;
2306                 } else {
2307                         ticks = 1;
2308                         condlog(4, "tick (%d ticks)", ticks);
2309                 }
2310 #ifdef USE_SYSTEMD
2311                 if (use_watchdog)
2312                         sd_notify(0, "WATCHDOG=1");
2313 #endif
2314                 rc = set_config_state(DAEMON_RUNNING);
2315                 if (rc == ETIMEDOUT) {
2316                         condlog(4, "timeout waiting for DAEMON_IDLE");
2317                         continue;
2318                 } else if (rc == EINVAL)
2319                         /* daemon shutdown */
2320                         break;
2321
2322                 pthread_cleanup_push(cleanup_lock, &vecs->lock);
2323                 lock(&vecs->lock);
2324                 pthread_testcancel();
2325                 vector_foreach_slot (vecs->pathvec, pp, i) {
2326                         rc = check_path(vecs, pp, ticks);
2327                         if (rc < 0) {
2328                                 vector_del_slot(vecs->pathvec, i);
2329                                 free_path(pp);
2330                                 i--;
2331                         } else
2332                                 num_paths += rc;
2333                 }
2334                 lock_cleanup_pop(vecs->lock);
2335
2336                 pthread_cleanup_push(cleanup_lock, &vecs->lock);
2337                 lock(&vecs->lock);
2338                 pthread_testcancel();
2339                 defered_failback_tick(vecs->mpvec);
2340                 retry_count_tick(vecs->mpvec);
2341                 missing_uev_wait_tick(vecs);
2342                 ghost_delay_tick(vecs);
2343                 lock_cleanup_pop(vecs->lock);
2344
2345                 if (count)
2346                         count--;
2347                 else {
2348                         pthread_cleanup_push(cleanup_lock, &vecs->lock);
2349                         lock(&vecs->lock);
2350                         pthread_testcancel();
2351                         condlog(4, "map garbage collection");
2352                         mpvec_garbage_collector(vecs);
2353                         count = MAPGCINT;
2354                         lock_cleanup_pop(vecs->lock);
2355                 }
2356
2357                 diff_time.tv_nsec = 0;
2358                 if (start_time.tv_sec &&
2359                     clock_gettime(CLOCK_MONOTONIC, &end_time) == 0) {
2360                         timespecsub(&end_time, &start_time, &diff_time);
2361                         if (num_paths) {
2362                                 unsigned int max_checkint;
2363
2364                                 condlog(4, "checked %d path%s in %lu.%06lu secs",
2365                                         num_paths, num_paths > 1 ? "s" : "",
2366                                         diff_time.tv_sec,
2367                                         diff_time.tv_nsec / 1000);
2368                                 conf = get_multipath_config();
2369                                 max_checkint = conf->max_checkint;
2370                                 put_multipath_config(conf);
2371                                 if (diff_time.tv_sec > max_checkint)
2372                                         condlog(1, "path checkers took longer "
2373                                                 "than %lu seconds, consider "
2374                                                 "increasing max_polling_interval",
2375                                                 diff_time.tv_sec);
2376                         }
2377                 }
2378
2379                 if (foreign_tick == 0) {
2380                         conf = get_multipath_config();
2381                         foreign_tick = conf->max_checkint;
2382                         put_multipath_config(conf);
2383                 }
2384                 if (--foreign_tick == 0)
2385                         check_foreign();
2386
2387                 post_config_state(DAEMON_IDLE);
2388                 conf = get_multipath_config();
2389                 strict_timing = conf->strict_timing;
2390                 put_multipath_config(conf);
2391                 if (!strict_timing)
2392                         sleep(1);
2393                 else {
2394                         if (diff_time.tv_nsec) {
2395                                 diff_time.tv_sec = 0;
2396                                 diff_time.tv_nsec =
2397                                      1000UL * 1000 * 1000 - diff_time.tv_nsec;
2398                         } else
2399                                 diff_time.tv_sec = 1;
2400
2401                         condlog(3, "waiting for %lu.%06lu secs",
2402                                 diff_time.tv_sec,
2403                                 diff_time.tv_nsec / 1000);
2404                         if (nanosleep(&diff_time, NULL) != 0) {
2405                                 condlog(3, "nanosleep failed with error %d",
2406                                         errno);
2407                                 conf = get_multipath_config();
2408                                 conf->strict_timing = 0;
2409                                 put_multipath_config(conf);
2410                                 break;
2411                         }
2412                 }
2413         }
2414         pthread_cleanup_pop(1);
2415         return NULL;
2416 }
2417
2418 int
2419 configure (struct vectors * vecs)
2420 {
2421         struct multipath * mpp;
2422         struct path * pp;
2423         vector mpvec;
2424         int i, ret;
2425         struct config *conf;
2426         static int force_reload = FORCE_RELOAD_WEAK;
2427
2428         if (!vecs->pathvec && !(vecs->pathvec = vector_alloc())) {
2429                 condlog(0, "couldn't allocate path vec in configure");
2430                 return 1;
2431         }
2432
2433         if (!vecs->mpvec && !(vecs->mpvec = vector_alloc())) {
2434                 condlog(0, "couldn't allocate multipath vec in configure");
2435                 return 1;
2436         }
2437
2438         if (!(mpvec = vector_alloc())) {
2439                 condlog(0, "couldn't allocate new maps vec in configure");
2440                 return 1;
2441         }
2442
2443         /*
2444          * probe for current path (from sysfs) and map (from dm) sets
2445          */
2446         ret = path_discovery(vecs->pathvec, DI_ALL);
2447         if (ret < 0) {
2448                 condlog(0, "configure failed at path discovery");
2449                 goto fail;
2450         }
2451
2452         conf = get_multipath_config();
2453         pthread_cleanup_push(put_multipath_config, conf);
2454         vector_foreach_slot (vecs->pathvec, pp, i){
2455                 if (filter_path(conf, pp) > 0){
2456                         vector_del_slot(vecs->pathvec, i);
2457                         free_path(pp);
2458                         i--;
2459                 }
2460         }
2461         pthread_cleanup_pop(1);
2462
2463         if (map_discovery(vecs)) {
2464                 condlog(0, "configure failed at map discovery");
2465                 goto fail;
2466         }
2467
2468         /*
2469          * create new set of maps & push changed ones into dm
2470          * In the first call, use FORCE_RELOAD_WEAK to avoid making
2471          * superfluous ACT_RELOAD ioctls. Later calls are done
2472          * with FORCE_RELOAD_YES.
2473          */
2474         ret = coalesce_paths(vecs, mpvec, NULL, force_reload, CMD_NONE);
2475         if (force_reload == FORCE_RELOAD_WEAK)
2476                 force_reload = FORCE_RELOAD_YES;
2477         if (ret != CP_OK) {
2478                 condlog(0, "configure failed while coalescing paths");
2479                 goto fail;
2480         }
2481
2482         /*
2483          * may need to remove some maps which are no longer relevant
2484          * e.g., due to blacklist changes in conf file
2485          */
2486         if (coalesce_maps(vecs, mpvec)) {
2487                 condlog(0, "configure failed while coalescing maps");
2488                 goto fail;
2489         }
2490
2491         dm_lib_release();
2492
2493         sync_maps_state(mpvec);
2494         vector_foreach_slot(mpvec, mpp, i){
2495                 if (remember_wwid(mpp->wwid) == 1)
2496                         trigger_paths_udev_change(mpp, true);
2497                 update_map_pr(mpp);
2498         }
2499
2500         /*
2501          * purge dm of old maps
2502          */
2503         remove_maps(vecs);
2504
2505         /*
2506          * save new set of maps formed by considering current path state
2507          */
2508         vector_free(vecs->mpvec);
2509         vecs->mpvec = mpvec;
2510
2511         /*
2512          * start dm event waiter threads for these new maps
2513          */
2514         vector_foreach_slot(vecs->mpvec, mpp, i) {
2515                 if (wait_for_events(mpp, vecs)) {
2516                         remove_map(mpp, vecs, 1);
2517                         i--;
2518                         continue;
2519                 }
2520                 if (setup_multipath(vecs, mpp))
2521                         i--;
2522         }
2523         return 0;
2524
2525 fail:
2526         vector_free(mpvec);
2527         return 1;
2528 }
2529
2530 int
2531 need_to_delay_reconfig(struct vectors * vecs)
2532 {
2533         struct multipath *mpp;
2534         int i;
2535
2536         if (!VECTOR_SIZE(vecs->mpvec))
2537                 return 0;
2538
2539         vector_foreach_slot(vecs->mpvec, mpp, i) {
2540                 if (mpp->wait_for_udev)
2541                         return 1;
2542         }
2543         return 0;
2544 }
2545
2546 void rcu_free_config(struct rcu_head *head)
2547 {
2548         struct config *conf = container_of(head, struct config, rcu);
2549
2550         free_config(conf);
2551 }
2552
2553 int
2554 reconfigure (struct vectors * vecs)
2555 {
2556         struct config * old, *conf;
2557
2558         conf = load_config(DEFAULT_CONFIGFILE);
2559         if (!conf)
2560                 return 1;
2561
2562         /*
2563          * free old map and path vectors ... they use old conf state
2564          */
2565         if (VECTOR_SIZE(vecs->mpvec))
2566                 remove_maps_and_stop_waiters(vecs);
2567
2568         free_pathvec(vecs->pathvec, FREE_PATHS);
2569         vecs->pathvec = NULL;
2570         delete_all_foreign();
2571
2572         /* Re-read any timezone changes */
2573         tzset();
2574
2575         dm_tgt_version(conf->version, TGT_MPATH);
2576         if (verbosity)
2577                 conf->verbosity = verbosity;
2578         if (bindings_read_only)
2579                 conf->bindings_read_only = bindings_read_only;
2580         uxsock_timeout = conf->uxsock_timeout;
2581
2582         old = rcu_dereference(multipath_conf);
2583         rcu_assign_pointer(multipath_conf, conf);
2584         call_rcu(&old->rcu, rcu_free_config);
2585
2586         configure(vecs);
2587
2588
2589         return 0;
2590 }
2591
2592 static struct vectors *
2593 init_vecs (void)
2594 {
2595         struct vectors * vecs;
2596
2597         vecs = (struct vectors *)MALLOC(sizeof(struct vectors));
2598
2599         if (!vecs)
2600                 return NULL;
2601
2602         pthread_mutex_init(&vecs->lock.mutex, NULL);
2603
2604         return vecs;
2605 }
2606
2607 static void *
2608 signal_set(int signo, void (*func) (int))
2609 {
2610         int r;
2611         struct sigaction sig;
2612         struct sigaction osig;
2613
2614         sig.sa_handler = func;
2615         sigemptyset(&sig.sa_mask);
2616         sig.sa_flags = 0;
2617
2618         r = sigaction(signo, &sig, &osig);
2619
2620         if (r < 0)
2621                 return (SIG_ERR);
2622         else
2623                 return (osig.sa_handler);
2624 }
2625
2626 void
2627 handle_signals(bool nonfatal)
2628 {
2629         if (exit_sig) {
2630                 condlog(2, "exit (signal)");
2631                 exit_sig = 0;
2632                 exit_daemon();
2633         }
2634         if (!nonfatal)
2635                 return;
2636         if (reconfig_sig) {
2637                 condlog(2, "reconfigure (signal)");
2638                 set_config_state(DAEMON_CONFIGURE);
2639         }
2640         if (log_reset_sig) {
2641                 condlog(2, "reset log (signal)");
2642                 if (logsink == 1)
2643                         log_thread_reset();
2644         }
2645         reconfig_sig = 0;
2646         log_reset_sig = 0;
2647 }
2648
2649 static void
2650 sighup (int sig)
2651 {
2652         reconfig_sig = 1;
2653 }
2654
2655 static void
2656 sigend (int sig)
2657 {
2658         exit_sig = 1;
2659 }
2660
2661 static void
2662 sigusr1 (int sig)
2663 {
2664         log_reset_sig = 1;
2665 }
2666
2667 static void
2668 sigusr2 (int sig)
2669 {
2670         condlog(3, "SIGUSR2 received");
2671 }
2672
2673 static void
2674 signal_init(void)
2675 {
2676         sigset_t set;
2677
2678         /* block all signals */
2679         sigfillset(&set);
2680         /* SIGPIPE occurs if logging fails */
2681         sigdelset(&set, SIGPIPE);
2682         pthread_sigmask(SIG_SETMASK, &set, NULL);
2683
2684         /* Other signals will be unblocked in the uxlsnr thread */
2685         signal_set(SIGHUP, sighup);
2686         signal_set(SIGUSR1, sigusr1);
2687         signal_set(SIGUSR2, sigusr2);
2688         signal_set(SIGINT, sigend);
2689         signal_set(SIGTERM, sigend);
2690         signal_set(SIGPIPE, sigend);
2691 }
2692
2693 static void
2694 setscheduler (void)
2695 {
2696         int res;
2697         static struct sched_param sched_param = {
2698                 .sched_priority = 99
2699         };
2700
2701         res = sched_setscheduler (0, SCHED_RR, &sched_param);
2702
2703         if (res == -1)
2704                 condlog(LOG_WARNING, "Could not set SCHED_RR at priority 99");
2705         return;
2706 }
2707
2708 static void
2709 set_oom_adj (void)
2710 {
2711 #ifdef OOM_SCORE_ADJ_MIN
2712         int retry = 1;
2713         char *file = "/proc/self/oom_score_adj";
2714         int score = OOM_SCORE_ADJ_MIN;
2715 #else
2716         int retry = 0;
2717         char *file = "/proc/self/oom_adj";
2718         int score = OOM_ADJUST_MIN;
2719 #endif
2720         FILE *fp;
2721         struct stat st;
2722         char *envp;
2723
2724         envp = getenv("OOMScoreAdjust");
2725         if (envp) {
2726                 condlog(3, "Using systemd provided OOMScoreAdjust");
2727                 return;
2728         }
2729         do {
2730                 if (stat(file, &st) == 0){
2731                         fp = fopen(file, "w");
2732                         if (!fp) {
2733                                 condlog(0, "couldn't fopen %s : %s", file,
2734                                         strerror(errno));
2735                                 return;
2736                         }
2737                         fprintf(fp, "%i", score);
2738                         fclose(fp);
2739                         return;
2740                 }
2741                 if (errno != ENOENT) {
2742                         condlog(0, "couldn't stat %s : %s", file,
2743                                 strerror(errno));
2744                         return;
2745                 }
2746 #ifdef OOM_ADJUST_MIN
2747                 file = "/proc/self/oom_adj";
2748                 score = OOM_ADJUST_MIN;
2749 #else
2750                 retry = 0;
2751 #endif
2752         } while (retry--);
2753         condlog(0, "couldn't adjust oom score");
2754 }
2755
2756 static int
2757 child (void * param)
2758 {
2759         pthread_t check_thr, uevent_thr, uxlsnr_thr, uevq_thr, dmevent_thr;
2760         pthread_attr_t log_attr, misc_attr, uevent_attr;
2761         struct vectors * vecs;
2762         struct multipath * mpp;
2763         int i;
2764 #ifdef USE_SYSTEMD
2765         unsigned long checkint;
2766         int startup_done = 0;
2767 #endif
2768         int rc;
2769         int pid_fd = -1;
2770         struct config *conf;
2771         char *envp;
2772         int queue_without_daemon;
2773
2774         mlockall(MCL_CURRENT | MCL_FUTURE);
2775         signal_init();
2776         rcu_init();
2777
2778         setup_thread_attr(&misc_attr, 64 * 1024, 0);
2779         setup_thread_attr(&uevent_attr, DEFAULT_UEVENT_STACKSIZE * 1024, 0);
2780         setup_thread_attr(&waiter_attr, 32 * 1024, 1);
2781         setup_thread_attr(&io_err_stat_attr, 32 * 1024, 0);
2782
2783         if (logsink == 1) {
2784                 setup_thread_attr(&log_attr, 64 * 1024, 0);
2785                 log_thread_start(&log_attr);
2786                 pthread_attr_destroy(&log_attr);
2787         }
2788         pid_fd = pidfile_create(DEFAULT_PIDFILE, daemon_pid);
2789         if (pid_fd < 0) {
2790                 condlog(1, "failed to create pidfile");
2791                 if (logsink == 1)
2792                         log_thread_stop();
2793                 exit(1);
2794         }
2795
2796         post_config_state(DAEMON_START);
2797
2798         condlog(2, "--------start up--------");
2799         condlog(2, "read " DEFAULT_CONFIGFILE);
2800
2801         conf = load_config(DEFAULT_CONFIGFILE);
2802         if (!conf)
2803                 goto failed;
2804
2805         if (verbosity)
2806                 conf->verbosity = verbosity;
2807         if (bindings_read_only)
2808                 conf->bindings_read_only = bindings_read_only;
2809         uxsock_timeout = conf->uxsock_timeout;
2810         rcu_assign_pointer(multipath_conf, conf);
2811         if (init_checkers(conf->multipath_dir)) {
2812                 condlog(0, "failed to initialize checkers");
2813                 goto failed;
2814         }
2815         if (init_prio(conf->multipath_dir)) {
2816                 condlog(0, "failed to initialize prioritizers");
2817                 goto failed;
2818         }
2819         /* Failing this is non-fatal */
2820
2821         init_foreign(conf->multipath_dir);
2822
2823         if (poll_dmevents)
2824                 poll_dmevents = dmevent_poll_supported();
2825         setlogmask(LOG_UPTO(conf->verbosity + 3));
2826
2827         envp = getenv("LimitNOFILE");
2828
2829         if (envp)
2830                 condlog(2,"Using systemd provided open fds limit of %s", envp);
2831         else
2832                 set_max_fds(conf->max_fds);
2833
2834         vecs = gvecs = init_vecs();
2835         if (!vecs)
2836                 goto failed;
2837
2838         setscheduler();
2839         set_oom_adj();
2840
2841 #ifdef USE_SYSTEMD
2842         envp = getenv("WATCHDOG_USEC");
2843         if (envp && sscanf(envp, "%lu", &checkint) == 1) {
2844                 /* Value is in microseconds */
2845                 conf->max_checkint = checkint / 1000000;
2846                 /* Rescale checkint */
2847                 if (conf->checkint > conf->max_checkint)
2848                         conf->checkint = conf->max_checkint;
2849                 else
2850                         conf->checkint = conf->max_checkint / 4;
2851                 condlog(3, "enabling watchdog, interval %d max %d",
2852                         conf->checkint, conf->max_checkint);
2853                 use_watchdog = conf->checkint;
2854         }
2855 #endif
2856         /*
2857          * Startup done, invalidate configuration
2858          */
2859         conf = NULL;
2860
2861         pthread_cleanup_push(config_cleanup, NULL);
2862         pthread_mutex_lock(&config_lock);
2863
2864         __post_config_state(DAEMON_IDLE);
2865         rc = pthread_create(&uxlsnr_thr, &misc_attr, uxlsnrloop, vecs);
2866         if (!rc) {
2867                 /* Wait for uxlsnr startup */
2868                 while (running_state == DAEMON_IDLE)
2869                         pthread_cond_wait(&config_cond, &config_lock);
2870         }
2871         pthread_cleanup_pop(1);
2872
2873         if (rc) {
2874                 condlog(0, "failed to create cli listener: %d", rc);
2875                 goto failed;
2876         }
2877         else if (running_state != DAEMON_CONFIGURE) {
2878                 condlog(0, "cli listener failed to start");
2879                 goto failed;
2880         }
2881
2882         if (poll_dmevents) {
2883                 if (init_dmevent_waiter(vecs)) {
2884                         condlog(0, "failed to allocate dmevents waiter info");
2885                         goto failed;
2886                 }
2887                 if ((rc = pthread_create(&dmevent_thr, &misc_attr,
2888                                          wait_dmevents, NULL))) {
2889                         condlog(0, "failed to create dmevent waiter thread: %d",
2890                                 rc);
2891                         goto failed;
2892                 }
2893         }
2894
2895         /*
2896          * Start uevent listener early to catch events
2897          */
2898         if ((rc = pthread_create(&uevent_thr, &uevent_attr, ueventloop, udev))) {
2899                 condlog(0, "failed to create uevent thread: %d", rc);
2900                 goto failed;
2901         }
2902         pthread_attr_destroy(&uevent_attr);
2903
2904         /*
2905          * start threads
2906          */
2907         if ((rc = pthread_create(&check_thr, &misc_attr, checkerloop, vecs))) {
2908                 condlog(0,"failed to create checker loop thread: %d", rc);
2909                 goto failed;
2910         }
2911         if ((rc = pthread_create(&uevq_thr, &misc_attr, uevqloop, vecs))) {
2912                 condlog(0, "failed to create uevent dispatcher: %d", rc);
2913                 goto failed;
2914         }
2915         pthread_attr_destroy(&misc_attr);
2916
2917         while (running_state != DAEMON_SHUTDOWN) {
2918                 pthread_cleanup_push(config_cleanup, NULL);
2919                 pthread_mutex_lock(&config_lock);
2920                 if (running_state != DAEMON_CONFIGURE &&
2921                     running_state != DAEMON_SHUTDOWN) {
2922                         pthread_cond_wait(&config_cond, &config_lock);
2923                 }
2924                 pthread_cleanup_pop(1);
2925                 if (running_state == DAEMON_CONFIGURE) {
2926                         pthread_cleanup_push(cleanup_lock, &vecs->lock);
2927                         lock(&vecs->lock);
2928                         pthread_testcancel();
2929                         if (!need_to_delay_reconfig(vecs)) {
2930                                 reconfigure(vecs);
2931                         } else {
2932                                 conf = get_multipath_config();
2933                                 conf->delayed_reconfig = 1;
2934                                 put_multipath_config(conf);
2935                         }
2936                         lock_cleanup_pop(vecs->lock);
2937                         post_config_state(DAEMON_IDLE);
2938 #ifdef USE_SYSTEMD
2939                         if (!startup_done) {
2940                                 sd_notify(0, "READY=1");
2941                                 startup_done = 1;
2942                         }
2943 #endif
2944                 }
2945         }
2946
2947         lock(&vecs->lock);
2948         conf = get_multipath_config();
2949         queue_without_daemon = conf->queue_without_daemon;
2950         put_multipath_config(conf);
2951         if (queue_without_daemon == QUE_NO_DAEMON_OFF)
2952                 vector_foreach_slot(vecs->mpvec, mpp, i)
2953                         dm_queue_if_no_path(mpp->alias, 0);
2954         remove_maps_and_stop_waiters(vecs);
2955         unlock(&vecs->lock);
2956
2957         pthread_cancel(check_thr);
2958         pthread_cancel(uevent_thr);
2959         pthread_cancel(uxlsnr_thr);
2960         pthread_cancel(uevq_thr);
2961         if (poll_dmevents)
2962                 pthread_cancel(dmevent_thr);
2963
2964         pthread_join(check_thr, NULL);
2965         pthread_join(uevent_thr, NULL);
2966         pthread_join(uxlsnr_thr, NULL);
2967         pthread_join(uevq_thr, NULL);
2968         if (poll_dmevents)
2969                 pthread_join(dmevent_thr, NULL);
2970
2971         stop_io_err_stat_thread();
2972
2973         lock(&vecs->lock);
2974         free_pathvec(vecs->pathvec, FREE_PATHS);
2975         vecs->pathvec = NULL;
2976         unlock(&vecs->lock);
2977
2978         pthread_mutex_destroy(&vecs->lock.mutex);
2979         FREE(vecs);
2980         vecs = NULL;
2981
2982         cleanup_foreign();
2983         cleanup_checkers();
2984         cleanup_prio();
2985         if (poll_dmevents)
2986                 cleanup_dmevent_waiter();
2987
2988         dm_lib_release();
2989         dm_lib_exit();
2990
2991         /* We're done here */
2992         condlog(3, "unlink pidfile");
2993         unlink(DEFAULT_PIDFILE);
2994
2995         condlog(2, "--------shut down-------");
2996
2997         if (logsink == 1)
2998                 log_thread_stop();
2999
3000         /*
3001          * Freeing config must be done after condlog() and dm_lib_exit(),
3002          * because logging functions like dlog() and dm_write_log()
3003          * reference the config.
3004          */
3005         conf = rcu_dereference(multipath_conf);
3006         rcu_assign_pointer(multipath_conf, NULL);
3007         call_rcu(&conf->rcu, rcu_free_config);
3008         udev_unref(udev);
3009         udev = NULL;
3010         pthread_attr_destroy(&waiter_attr);
3011         pthread_attr_destroy(&io_err_stat_attr);
3012 #ifdef _DEBUG_
3013         dbg_free_final(NULL);
3014 #endif
3015
3016 #ifdef USE_SYSTEMD
3017         sd_notify(0, "ERRNO=0");
3018 #endif
3019         exit(0);
3020
3021 failed:
3022 #ifdef USE_SYSTEMD
3023         sd_notify(0, "ERRNO=1");
3024 #endif
3025         if (pid_fd >= 0)
3026                 close(pid_fd);
3027         exit(1);
3028 }
3029
3030 static int
3031 daemonize(void)
3032 {
3033         int pid;
3034         int dev_null_fd;
3035
3036         if( (pid = fork()) < 0){
3037                 fprintf(stderr, "Failed first fork : %s\n", strerror(errno));
3038                 return -1;
3039         }
3040         else if (pid != 0)
3041                 return pid;
3042
3043         setsid();
3044
3045         if ( (pid = fork()) < 0)
3046                 fprintf(stderr, "Failed second fork : %s\n", strerror(errno));
3047         else if (pid != 0)
3048                 _exit(0);
3049
3050         if (chdir("/") < 0)
3051                 fprintf(stderr, "cannot chdir to '/', continuing\n");
3052
3053         dev_null_fd = open("/dev/null", O_RDWR);
3054         if (dev_null_fd < 0){
3055                 fprintf(stderr, "cannot open /dev/null for input & output : %s\n",
3056                         strerror(errno));
3057                 _exit(0);
3058         }
3059
3060         close(STDIN_FILENO);
3061         if (dup(dev_null_fd) < 0) {
3062                 fprintf(stderr, "cannot dup /dev/null to stdin : %s\n",
3063                         strerror(errno));
3064                 _exit(0);
3065         }
3066         close(STDOUT_FILENO);
3067         if (dup(dev_null_fd) < 0) {
3068                 fprintf(stderr, "cannot dup /dev/null to stdout : %s\n",
3069                         strerror(errno));
3070                 _exit(0);
3071         }
3072         close(STDERR_FILENO);
3073         if (dup(dev_null_fd) < 0) {
3074                 fprintf(stderr, "cannot dup /dev/null to stderr : %s\n",
3075                         strerror(errno));
3076                 _exit(0);
3077         }
3078         close(dev_null_fd);
3079         daemon_pid = getpid();
3080         return 0;
3081 }
3082
3083 int
3084 main (int argc, char *argv[])
3085 {
3086         extern char *optarg;
3087         extern int optind;
3088         int arg;
3089         int err;
3090         int foreground = 0;
3091         struct config *conf;
3092
3093         ANNOTATE_BENIGN_RACE_SIZED(&multipath_conf, sizeof(multipath_conf),
3094                                    "Manipulated through RCU");
3095         ANNOTATE_BENIGN_RACE_SIZED(&running_state, sizeof(running_state),
3096                 "Suppress complaints about unprotected running_state reads");
3097         ANNOTATE_BENIGN_RACE_SIZED(&uxsock_timeout, sizeof(uxsock_timeout),
3098                 "Suppress complaints about this scalar variable");
3099
3100         logsink = 1;
3101
3102         if (getuid() != 0) {
3103                 fprintf(stderr, "need to be root\n");
3104                 exit(1);
3105         }
3106
3107         /* make sure we don't lock any path */
3108         if (chdir("/") < 0)
3109                 fprintf(stderr, "can't chdir to root directory : %s\n",
3110                         strerror(errno));
3111         umask(umask(077) | 022);
3112
3113         pthread_cond_init_mono(&config_cond);
3114
3115         udev = udev_new();
3116         libmp_udev_set_sync_support(0);
3117
3118         while ((arg = getopt(argc, argv, ":dsv:k::Bniw")) != EOF ) {
3119                 switch(arg) {
3120                 case 'd':
3121                         foreground = 1;
3122                         if (logsink > 0)
3123                                 logsink = 0;
3124                         //debug=1; /* ### comment me out ### */
3125                         break;
3126                 case 'v':
3127                         if (sizeof(optarg) > sizeof(char *) ||
3128                             !isdigit(optarg[0]))
3129                                 exit(1);
3130
3131                         verbosity = atoi(optarg);
3132                         break;
3133                 case 's':
3134                         logsink = -1;
3135                         break;
3136                 case 'k':
3137                         logsink = 0;
3138                         conf = load_config(DEFAULT_CONFIGFILE);
3139                         if (!conf)
3140                                 exit(1);
3141                         if (verbosity)
3142                                 conf->verbosity = verbosity;
3143                         uxsock_timeout = conf->uxsock_timeout;
3144                         err = uxclnt(optarg, uxsock_timeout + 100);
3145                         free_config(conf);
3146                         return err;
3147                 case 'B':
3148                         bindings_read_only = 1;
3149                         break;
3150                 case 'n':
3151                         condlog(0, "WARNING: ignoring deprecated option -n, use 'ignore_wwids = no' instead");
3152                         break;
3153                 case 'w':
3154                         poll_dmevents = 0;
3155                         break;
3156                 default:
3157                         fprintf(stderr, "Invalid argument '-%c'\n",
3158                                 optopt);
3159                         exit(1);
3160                 }
3161         }
3162         if (optind < argc) {
3163                 char cmd[CMDSIZE];
3164                 char * s = cmd;
3165                 char * c = s;
3166
3167                 logsink = 0;
3168                 conf = load_config(DEFAULT_CONFIGFILE);
3169                 if (!conf)
3170                         exit(1);
3171                 if (verbosity)
3172                         conf->verbosity = verbosity;
3173                 uxsock_timeout = conf->uxsock_timeout;
3174                 memset(cmd, 0x0, CMDSIZE);
3175                 while (optind < argc) {
3176                         if (strchr(argv[optind], ' '))
3177                                 c += snprintf(c, s + CMDSIZE - c, "\"%s\" ", argv[optind]);
3178                         else
3179                                 c += snprintf(c, s + CMDSIZE - c, "%s ", argv[optind]);
3180                         optind++;
3181                 }
3182                 c += snprintf(c, s + CMDSIZE - c, "\n");
3183                 err = uxclnt(s, uxsock_timeout + 100);
3184                 free_config(conf);
3185                 return err;
3186         }
3187
3188         if (foreground) {
3189                 if (!isatty(fileno(stdout)))
3190                         setbuf(stdout, NULL);
3191                 err = 0;
3192                 daemon_pid = getpid();
3193         } else
3194                 err = daemonize();
3195
3196         if (err < 0)
3197                 /* error */
3198                 exit(1);
3199         else if (err > 0)
3200                 /* parent dies */
3201                 exit(0);
3202         else
3203                 /* child lives */
3204                 return (child(NULL));
3205 }
3206
3207 void *  mpath_pr_event_handler_fn (void * pathp )
3208 {
3209         struct multipath * mpp;
3210         int i, ret, isFound;