libmultipath: fix (max_)polling_interval setting logic
[multipath-tools/.git] / multipathd / main.c
1 /*
2  * Copyright (c) 2004, 2005 Christophe Varoqui
3  * Copyright (c) 2005 Kiyoshi Ueda, NEC
4  * Copyright (c) 2005 Benjamin Marzinski, Redhat
5  * Copyright (c) 2005 Edward Goggin, EMC
6  */
7 #include <unistd.h>
8 #include <sys/stat.h>
9 #include <libdevmapper.h>
10 #include <sys/wait.h>
11 #include <sys/mman.h>
12 #include <sys/types.h>
13 #include <fcntl.h>
14 #include <errno.h>
15 #include <limits.h>
16 #include <linux/oom.h>
17 #include <libudev.h>
18 #include <urcu.h>
19 #ifdef USE_SYSTEMD
20 #include <systemd/sd-daemon.h>
21 #endif
22 #include <semaphore.h>
23 #include <time.h>
24 #include <stdbool.h>
25
26 /*
27  * libmultipath
28  */
29 #include "time-util.h"
30
31 /*
32  * libcheckers
33  */
34 #include "checkers.h"
35
36 /*
37  * libmultipath
38  */
39 #include "parser.h"
40 #include "vector.h"
41 #include "memory.h"
42 #include "config.h"
43 #include "util.h"
44 #include "hwtable.h"
45 #include "defaults.h"
46 #include "structs.h"
47 #include "blacklist.h"
48 #include "structs_vec.h"
49 #include "dmparser.h"
50 #include "devmapper.h"
51 #include "sysfs.h"
52 #include "dict.h"
53 #include "discovery.h"
54 #include "debug.h"
55 #include "propsel.h"
56 #include "uevent.h"
57 #include "switchgroup.h"
58 #include "print.h"
59 #include "configure.h"
60 #include "prio.h"
61 #include "wwids.h"
62 #include "pgpolicies.h"
63 #include "uevent.h"
64 #include "log.h"
65 #include "uxsock.h"
66
67 #include "mpath_cmd.h"
68 #include "mpath_persist.h"
69
70 #include "prioritizers/alua_rtpg.h"
71
72 #include "main.h"
73 #include "pidfile.h"
74 #include "uxlsnr.h"
75 #include "uxclnt.h"
76 #include "cli.h"
77 #include "cli_handlers.h"
78 #include "lock.h"
79 #include "waiter.h"
80 #include "dmevents.h"
81 #include "io_err_stat.h"
82 #include "wwids.h"
83 #include "foreign.h"
84 #include "../third-party/valgrind/drd.h"
85
86 #define FILE_NAME_SIZE 256
87 #define CMDSIZE 160
88
89 #define LOG_MSG(lvl, verb, pp)                                  \
90 do {                                                            \
91         if (pp->mpp && checker_selected(&pp->checker) &&        \
92             lvl <= verb) {                                      \
93                 if (pp->offline)                                \
94                         condlog(lvl, "%s: %s - path offline",   \
95                                 pp->mpp->alias, pp->dev);       \
96                 else  {                                         \
97                         const char *__m =                       \
98                                 checker_message(&pp->checker);  \
99                                                                 \
100                         if (strlen(__m))                              \
101                                 condlog(lvl, "%s: %s - %s checker%s", \
102                                         pp->mpp->alias,               \
103                                         pp->dev,                      \
104                                         checker_name(&pp->checker),   \
105                                         __m);                         \
106                 }                                                     \
107         }                                                             \
108 } while(0)
109
110 struct mpath_event_param
111 {
112         char * devname;
113         struct multipath *mpp;
114 };
115
116 int logsink;
117 int uxsock_timeout;
118 int verbosity;
119 int bindings_read_only;
120 int ignore_new_devs;
121 #ifdef NO_DMEVENTS_POLL
122 int poll_dmevents = 0;
123 #else
124 int poll_dmevents = 1;
125 #endif
126 /* Don't access this variable without holding config_lock */
127 enum daemon_status running_state = DAEMON_INIT;
128 pid_t daemon_pid;
129 pthread_mutex_t config_lock = PTHREAD_MUTEX_INITIALIZER;
130 pthread_cond_t config_cond;
131
132 static inline enum daemon_status get_running_state(void)
133 {
134         enum daemon_status st;
135
136         pthread_mutex_lock(&config_lock);
137         st = running_state;
138         pthread_mutex_unlock(&config_lock);
139         return st;
140 }
141
142 /*
143  * global copy of vecs for use in sig handlers
144  */
145 struct vectors * gvecs;
146
147 struct udev * udev;
148
149 struct config *multipath_conf;
150
151 /* Local variables */
152 static volatile sig_atomic_t exit_sig;
153 static volatile sig_atomic_t reconfig_sig;
154 static volatile sig_atomic_t log_reset_sig;
155
156 const char *
157 daemon_status(void)
158 {
159         switch (get_running_state()) {
160         case DAEMON_INIT:
161                 return "init";
162         case DAEMON_START:
163                 return "startup";
164         case DAEMON_CONFIGURE:
165                 return "configure";
166         case DAEMON_IDLE:
167                 return "idle";
168         case DAEMON_RUNNING:
169                 return "running";
170         case DAEMON_SHUTDOWN:
171                 return "shutdown";
172         }
173         return NULL;
174 }
175
176 /*
177  * I love you too, systemd ...
178  */
179 static const char *
180 sd_notify_status(enum daemon_status state)
181 {
182         switch (state) {
183         case DAEMON_INIT:
184                 return "STATUS=init";
185         case DAEMON_START:
186                 return "STATUS=startup";
187         case DAEMON_CONFIGURE:
188                 return "STATUS=configure";
189         case DAEMON_IDLE:
190         case DAEMON_RUNNING:
191                 return "STATUS=up";
192         case DAEMON_SHUTDOWN:
193                 return "STATUS=shutdown";
194         }
195         return NULL;
196 }
197
198 #ifdef USE_SYSTEMD
199 static void do_sd_notify(enum daemon_status old_state,
200                          enum daemon_status new_state)
201 {
202         /*
203          * Checkerloop switches back and forth between idle and running state.
204          * No need to tell systemd each time.
205          * These notifications cause a lot of overhead on dbus.
206          */
207         if ((new_state == DAEMON_IDLE || new_state == DAEMON_RUNNING) &&
208             (old_state == DAEMON_IDLE || old_state == DAEMON_RUNNING))
209                 return;
210         sd_notify(0, sd_notify_status(new_state));
211 }
212 #endif
213
214 static void config_cleanup(__attribute__((unused)) void *arg)
215 {
216         pthread_mutex_unlock(&config_lock);
217 }
218
219 /*
220  * If the current status is @oldstate, wait for at most @ms milliseconds
221  * for the state to change, and return the new state, which may still be
222  * @oldstate.
223  */
224 enum daemon_status wait_for_state_change_if(enum daemon_status oldstate,
225                                             unsigned long ms)
226 {
227         enum daemon_status st;
228         struct timespec tmo;
229
230         if (oldstate == DAEMON_SHUTDOWN)
231                 return DAEMON_SHUTDOWN;
232
233         pthread_mutex_lock(&config_lock);
234         pthread_cleanup_push(config_cleanup, NULL);
235         st = running_state;
236         if (st == oldstate && clock_gettime(CLOCK_MONOTONIC, &tmo) == 0) {
237                 tmo.tv_nsec += ms * 1000 * 1000;
238                 normalize_timespec(&tmo);
239                 (void)pthread_cond_timedwait(&config_cond, &config_lock, &tmo);
240                 st = running_state;
241         }
242         pthread_cleanup_pop(1);
243         return st;
244 }
245
246 /* must be called with config_lock held */
247 static void __post_config_state(enum daemon_status state)
248 {
249         if (state != running_state && running_state != DAEMON_SHUTDOWN) {
250                 enum daemon_status old_state = running_state;
251
252                 running_state = state;
253                 pthread_cond_broadcast(&config_cond);
254 #ifdef USE_SYSTEMD
255                 do_sd_notify(old_state, state);
256 #endif
257         }
258 }
259
260 void post_config_state(enum daemon_status state)
261 {
262         pthread_mutex_lock(&config_lock);
263         pthread_cleanup_push(config_cleanup, NULL);
264         __post_config_state(state);
265         pthread_cleanup_pop(1);
266 }
267
268 int set_config_state(enum daemon_status state)
269 {
270         int rc = 0;
271
272         pthread_cleanup_push(config_cleanup, NULL);
273         pthread_mutex_lock(&config_lock);
274         if (running_state != state) {
275                 enum daemon_status old_state = running_state;
276
277                 if (running_state == DAEMON_SHUTDOWN)
278                         rc = EINVAL;
279                 else if (running_state != DAEMON_IDLE) {
280                         struct timespec ts;
281
282                         get_monotonic_time(&ts);
283                         ts.tv_sec += 1;
284                         rc = pthread_cond_timedwait(&config_cond,
285                                                     &config_lock, &ts);
286                 }
287                 if (!rc && (running_state != DAEMON_SHUTDOWN)) {
288                         running_state = state;
289                         pthread_cond_broadcast(&config_cond);
290 #ifdef USE_SYSTEMD
291                         do_sd_notify(old_state, state);
292 #endif
293                 }
294         }
295         pthread_cleanup_pop(1);
296         return rc;
297 }
298
299 struct config *get_multipath_config(void)
300 {
301         rcu_read_lock();
302         return rcu_dereference(multipath_conf);
303 }
304
305 void put_multipath_config(__attribute__((unused)) void *arg)
306 {
307         rcu_read_unlock();
308 }
309
310 static int
311 need_switch_pathgroup (struct multipath * mpp, int refresh)
312 {
313         struct pathgroup * pgp;
314         struct path * pp;
315         unsigned int i, j;
316         struct config *conf;
317         int bestpg;
318
319         if (!mpp)
320                 return 0;
321
322         /*
323          * Refresh path priority values
324          */
325         if (refresh) {
326                 vector_foreach_slot (mpp->pg, pgp, i) {
327                         vector_foreach_slot (pgp->paths, pp, j) {
328                                 conf = get_multipath_config();
329                                 pthread_cleanup_push(put_multipath_config,
330                                                      conf);
331                                 pathinfo(pp, conf, DI_PRIO);
332                                 pthread_cleanup_pop(1);
333                         }
334                 }
335         }
336
337         if (!mpp->pg || VECTOR_SIZE(mpp->paths) == 0)
338                 return 0;
339
340         bestpg = select_path_group(mpp);
341         if (mpp->pgfailback == -FAILBACK_MANUAL)
342                 return 0;
343
344         mpp->bestpg = bestpg;
345         if (mpp->bestpg != mpp->nextpg)
346                 return 1;
347
348         return 0;
349 }
350
351 static void
352 switch_pathgroup (struct multipath * mpp)
353 {
354         mpp->stat_switchgroup++;
355         dm_switchgroup(mpp->alias, mpp->bestpg);
356         condlog(2, "%s: switch to path group #%i",
357                  mpp->alias, mpp->bestpg);
358 }
359
360 static int
361 wait_for_events(struct multipath *mpp, struct vectors *vecs)
362 {
363         if (poll_dmevents)
364                 return watch_dmevents(mpp->alias);
365         else
366                 return start_waiter_thread(mpp, vecs);
367 }
368
369 static void
370 remove_map_and_stop_waiter(struct multipath *mpp, struct vectors *vecs)
371 {
372         /* devices are automatically removed by the dmevent polling code,
373          * so they don't need to be manually removed here */
374         condlog(3, "%s: removing map from internal tables", mpp->alias);
375         if (!poll_dmevents)
376                 stop_waiter_thread(mpp);
377         remove_map(mpp, vecs, PURGE_VEC);
378 }
379
380 static void
381 remove_maps_and_stop_waiters(struct vectors *vecs)
382 {
383         int i;
384         struct multipath * mpp;
385
386         if (!vecs)
387                 return;
388
389         if (!poll_dmevents) {
390                 vector_foreach_slot(vecs->mpvec, mpp, i)
391                         stop_waiter_thread(mpp);
392         }
393         else
394                 unwatch_all_dmevents();
395
396         remove_maps(vecs);
397 }
398
399 static void
400 set_multipath_wwid (struct multipath * mpp)
401 {
402         if (strlen(mpp->wwid))
403                 return;
404
405         dm_get_uuid(mpp->alias, mpp->wwid, WWID_SIZE);
406 }
407
408 int __setup_multipath(struct vectors *vecs, struct multipath *mpp,
409                       int reset)
410 {
411         if (dm_get_info(mpp->alias, &mpp->dmi)) {
412                 /* Error accessing table */
413                 condlog(3, "%s: cannot access table", mpp->alias);
414                 goto out;
415         }
416
417         if (update_multipath_strings(mpp, vecs->pathvec, 1)) {
418                 condlog(0, "%s: failed to setup multipath", mpp->alias);
419                 goto out;
420         }
421
422         if (reset) {
423                 set_no_path_retry(mpp);
424                 if (VECTOR_SIZE(mpp->paths) != 0)
425                         dm_cancel_deferred_remove(mpp);
426         }
427
428         return 0;
429 out:
430         remove_map_and_stop_waiter(mpp, vecs);
431         return 1;
432 }
433
434 int update_multipath (struct vectors *vecs, char *mapname, int reset)
435 {
436         struct multipath *mpp;
437         struct pathgroup  *pgp;
438         struct path *pp;
439         int i, j;
440
441         mpp = find_mp_by_alias(vecs->mpvec, mapname);
442
443         if (!mpp) {
444                 condlog(3, "%s: multipath map not found", mapname);
445                 return 2;
446         }
447
448         if (__setup_multipath(vecs, mpp, reset))
449                 return 1; /* mpp freed in setup_multipath */
450
451         /*
452          * compare checkers states with DM states
453          */
454         vector_foreach_slot (mpp->pg, pgp, i) {
455                 vector_foreach_slot (pgp->paths, pp, j) {
456                         if (pp->dmstate != PSTATE_FAILED)
457                                 continue;
458
459                         if (pp->state != PATH_DOWN) {
460                                 struct config *conf;
461                                 int oldstate = pp->state;
462                                 unsigned int checkint;
463
464                                 conf = get_multipath_config();
465                                 checkint = conf->checkint;
466                                 put_multipath_config(conf);
467                                 condlog(2, "%s: mark as failed", pp->dev);
468                                 mpp->stat_path_failures++;
469                                 pp->state = PATH_DOWN;
470                                 if (oldstate == PATH_UP ||
471                                     oldstate == PATH_GHOST)
472                                         update_queue_mode_del_path(mpp);
473
474                                 /*
475                                  * if opportune,
476                                  * schedule the next check earlier
477                                  */
478                                 if (pp->tick > checkint)
479                                         pp->tick = checkint;
480                         }
481                 }
482         }
483         return 0;
484 }
485
486 static int
487 update_map (struct multipath *mpp, struct vectors *vecs, int new_map)
488 {
489         int retries = 3;
490         char params[PARAMS_SIZE] = {0};
491
492 retry:
493         condlog(4, "%s: updating new map", mpp->alias);
494         if (adopt_paths(vecs->pathvec, mpp)) {
495                 condlog(0, "%s: failed to adopt paths for new map update",
496                         mpp->alias);
497                 retries = -1;
498                 goto fail;
499         }
500         verify_paths(mpp, vecs);
501         mpp->action = ACT_RELOAD;
502
503         if (setup_map(mpp, params, PARAMS_SIZE, vecs)) {
504                 condlog(0, "%s: failed to setup new map in update", mpp->alias);
505                 retries = -1;
506                 goto fail;
507         }
508         if (domap(mpp, params, 1) == DOMAP_FAIL && retries-- > 0) {
509                 condlog(0, "%s: map_udate sleep", mpp->alias);
510                 sleep(1);
511                 goto retry;
512         }
513         dm_lib_release();
514
515 fail:
516         if (new_map && (retries < 0 || wait_for_events(mpp, vecs))) {
517                 condlog(0, "%s: failed to create new map", mpp->alias);
518                 remove_map(mpp, vecs, 1);
519                 return 1;
520         }
521
522         if (setup_multipath(vecs, mpp))
523                 return 1;
524
525         sync_map_state(mpp);
526
527         if (retries < 0)
528                 condlog(0, "%s: failed reload in new map update", mpp->alias);
529         return 0;
530 }
531
532 static struct multipath *
533 add_map_without_path (struct vectors *vecs, const char *alias)
534 {
535         struct multipath * mpp = alloc_multipath();
536         struct config *conf;
537
538         if (!mpp)
539                 return NULL;
540         if (!alias) {
541                 FREE(mpp);
542                 return NULL;
543         }
544
545         mpp->alias = STRDUP(alias);
546
547         if (dm_get_info(mpp->alias, &mpp->dmi)) {
548                 condlog(3, "%s: cannot access table", mpp->alias);
549                 goto out;
550         }
551         set_multipath_wwid(mpp);
552         conf = get_multipath_config();
553         mpp->mpe = find_mpe(conf->mptable, mpp->wwid);
554         put_multipath_config(conf);
555
556         if (update_multipath_table(mpp, vecs->pathvec, 1))
557                 goto out;
558         if (update_multipath_status(mpp))
559                 goto out;
560
561         if (!vector_alloc_slot(vecs->mpvec))
562                 goto out;
563
564         vector_set_slot(vecs->mpvec, mpp);
565
566         if (update_map(mpp, vecs, 1) != 0) /* map removed */
567                 return NULL;
568
569         return mpp;
570 out:
571         remove_map(mpp, vecs, PURGE_VEC);
572         return NULL;
573 }
574
575 static int
576 coalesce_maps(struct vectors *vecs, vector nmpv)
577 {
578         struct multipath * ompp;
579         vector ompv = vecs->mpvec;
580         unsigned int i, reassign_maps;
581         struct config *conf;
582
583         conf = get_multipath_config();
584         reassign_maps = conf->reassign_maps;
585         put_multipath_config(conf);
586         vector_foreach_slot (ompv, ompp, i) {
587                 condlog(3, "%s: coalesce map", ompp->alias);
588                 if (!find_mp_by_wwid(nmpv, ompp->wwid)) {
589                         /*
590                          * remove all current maps not allowed by the
591                          * current configuration
592                          */
593                         if (dm_flush_map(ompp->alias)) {
594                                 condlog(0, "%s: unable to flush devmap",
595                                         ompp->alias);
596                                 /*
597                                  * may be just because the device is open
598                                  */
599                                 if (setup_multipath(vecs, ompp) != 0) {
600                                         i--;
601                                         continue;
602                                 }
603                                 if (!vector_alloc_slot(nmpv))
604                                         return 1;
605
606                                 vector_set_slot(nmpv, ompp);
607
608                                 vector_del_slot(ompv, i);
609                                 i--;
610                         }
611                         else {
612                                 dm_lib_release();
613                                 condlog(2, "%s devmap removed", ompp->alias);
614                         }
615                 } else if (reassign_maps) {
616                         condlog(3, "%s: Reassign existing device-mapper"
617                                 " devices", ompp->alias);
618                         dm_reassign(ompp->alias);
619                 }
620         }
621         return 0;
622 }
623
624 static void
625 sync_maps_state(vector mpvec)
626 {
627         unsigned int i;
628         struct multipath *mpp;
629
630         vector_foreach_slot (mpvec, mpp, i)
631                 sync_map_state(mpp);
632 }
633
634 static int
635 flush_map(struct multipath * mpp, struct vectors * vecs, int nopaths)
636 {
637         int r;
638
639         if (nopaths)
640                 r = dm_flush_map_nopaths(mpp->alias, mpp->deferred_remove);
641         else
642                 r = dm_flush_map(mpp->alias);
643         /*
644          * clear references to this map before flushing so we can ignore
645          * the spurious uevent we may generate with the dm_flush_map call below
646          */
647         if (r) {
648                 /*
649                  * May not really be an error -- if the map was already flushed
650                  * from the device mapper by dmsetup(8) for instance.
651                  */
652                 if (r == 1)
653                         condlog(0, "%s: can't flush", mpp->alias);
654                 else {
655                         condlog(2, "%s: devmap deferred remove", mpp->alias);
656                         mpp->deferred_remove = DEFERRED_REMOVE_IN_PROGRESS;
657                 }
658                 return r;
659         }
660         else {
661                 dm_lib_release();
662                 condlog(2, "%s: map flushed", mpp->alias);
663         }
664
665         orphan_paths(vecs->pathvec, mpp, "map flushed");
666         remove_map_and_stop_waiter(mpp, vecs);
667
668         return 0;
669 }
670
671 static int
672 uev_add_map (struct uevent * uev, struct vectors * vecs)
673 {
674         char *alias;
675         int major = -1, minor = -1, rc;
676
677         condlog(3, "%s: add map (uevent)", uev->kernel);
678         alias = uevent_get_dm_name(uev);
679         if (!alias) {
680                 condlog(3, "%s: No DM_NAME in uevent", uev->kernel);
681                 major = uevent_get_major(uev);
682                 minor = uevent_get_minor(uev);
683                 alias = dm_mapname(major, minor);
684                 if (!alias) {
685                         condlog(2, "%s: mapname not found for %d:%d",
686                                 uev->kernel, major, minor);
687                         return 1;
688                 }
689         }
690         pthread_cleanup_push(cleanup_lock, &vecs->lock);
691         lock(&vecs->lock);
692         pthread_testcancel();
693         rc = ev_add_map(uev->kernel, alias, vecs);
694         lock_cleanup_pop(vecs->lock);
695         FREE(alias);
696         return rc;
697 }
698
699 /*
700  * ev_add_map expects that the multipath device already exists in kernel
701  * before it is called. It just adds a device to multipathd or updates an
702  * existing device.
703  */
704 int
705 ev_add_map (char * dev, const char * alias, struct vectors * vecs)
706 {
707         struct multipath * mpp;
708         int delayed_reconfig, reassign_maps;
709         struct config *conf;
710
711         if (dm_is_mpath(alias) != 1) {
712                 condlog(4, "%s: not a multipath map", alias);
713                 return 0;
714         }
715
716         mpp = find_mp_by_alias(vecs->mpvec, alias);
717
718         if (mpp) {
719                 if (mpp->wait_for_udev > 1) {
720                         condlog(2, "%s: performing delayed actions",
721                                 mpp->alias);
722                         if (update_map(mpp, vecs, 0))
723                                 /* setup multipathd removed the map */
724                                 return 1;
725                 }
726                 conf = get_multipath_config();
727                 delayed_reconfig = conf->delayed_reconfig;
728                 reassign_maps = conf->reassign_maps;
729                 put_multipath_config(conf);
730                 if (mpp->wait_for_udev) {
731                         mpp->wait_for_udev = 0;
732                         if (delayed_reconfig &&
733                             !need_to_delay_reconfig(vecs)) {
734                                 condlog(2, "reconfigure (delayed)");
735                                 set_config_state(DAEMON_CONFIGURE);
736                                 return 0;
737                         }
738                 }
739                 /*
740                  * Not really an error -- we generate our own uevent
741                  * if we create a multipath mapped device as a result
742                  * of uev_add_path
743                  */
744                 if (reassign_maps) {
745                         condlog(3, "%s: Reassign existing device-mapper devices",
746                                 alias);
747                         dm_reassign(alias);
748                 }
749                 return 0;
750         }
751         condlog(2, "%s: adding map", alias);
752
753         /*
754          * now we can register the map
755          */
756         if ((mpp = add_map_without_path(vecs, alias))) {
757                 sync_map_state(mpp);
758                 condlog(2, "%s: devmap %s registered", alias, dev);
759                 return 0;
760         } else {
761                 condlog(2, "%s: ev_add_map failed", dev);
762                 return 1;
763         }
764 }
765
766 static int
767 uev_remove_map (struct uevent * uev, struct vectors * vecs)
768 {
769         char *alias;
770         int minor;
771         struct multipath *mpp;
772
773         condlog(3, "%s: remove map (uevent)", uev->kernel);
774         alias = uevent_get_dm_name(uev);
775         if (!alias) {
776                 condlog(3, "%s: No DM_NAME in uevent, ignoring", uev->kernel);
777                 return 0;
778         }
779         minor = uevent_get_minor(uev);
780
781         pthread_cleanup_push(cleanup_lock, &vecs->lock);
782         lock(&vecs->lock);
783         pthread_testcancel();
784         mpp = find_mp_by_minor(vecs->mpvec, minor);
785
786         if (!mpp) {
787                 condlog(2, "%s: devmap not registered, can't remove",
788                         uev->kernel);
789                 goto out;
790         }
791         if (strcmp(mpp->alias, alias)) {
792                 condlog(2, "%s: map alias mismatch: have \"%s\", got \"%s\")",
793                         uev->kernel, mpp->alias, alias);
794                 goto out;
795         }
796
797         remove_map_and_stop_waiter(mpp, vecs);
798 out:
799         lock_cleanup_pop(vecs->lock);
800         FREE(alias);
801         return 0;
802 }
803
804 /* Called from CLI handler */
805 int
806 ev_remove_map (char * devname, char * alias, int minor, struct vectors * vecs)
807 {
808         struct multipath * mpp;
809
810         mpp = find_mp_by_minor(vecs->mpvec, minor);
811
812         if (!mpp) {
813                 condlog(2, "%s: devmap not registered, can't remove",
814                         devname);
815                 return 1;
816         }
817         if (strcmp(mpp->alias, alias)) {
818                 condlog(2, "%s: minor number mismatch (map %d, event %d)",
819                         mpp->alias, mpp->dmi->minor, minor);
820                 return 1;
821         }
822         return flush_map(mpp, vecs, 0);
823 }
824
825 static int
826 uev_add_path (struct uevent *uev, struct vectors * vecs, int need_do_map)
827 {
828         struct path *pp;
829         int ret = 0, i;
830         struct config *conf;
831
832         condlog(3, "%s: add path (uevent)", uev->kernel);
833         if (strstr(uev->kernel, "..") != NULL) {
834                 /*
835                  * Don't allow relative device names in the pathvec
836                  */
837                 condlog(0, "%s: path name is invalid", uev->kernel);
838                 return 1;
839         }
840
841         pthread_cleanup_push(cleanup_lock, &vecs->lock);
842         lock(&vecs->lock);
843         pthread_testcancel();
844         pp = find_path_by_dev(vecs->pathvec, uev->kernel);
845         if (pp) {
846                 int r;
847
848                 condlog(3, "%s: spurious uevent, path already in pathvec",
849                         uev->kernel);
850                 if (!pp->mpp && !strlen(pp->wwid)) {
851                         condlog(3, "%s: reinitialize path", uev->kernel);
852                         udev_device_unref(pp->udev);
853                         pp->udev = udev_device_ref(uev->udev);
854                         conf = get_multipath_config();
855                         pthread_cleanup_push(put_multipath_config, conf);
856                         r = pathinfo(pp, conf,
857                                      DI_ALL | DI_BLACKLIST);
858                         pthread_cleanup_pop(1);
859                         if (r == PATHINFO_OK)
860                                 ret = ev_add_path(pp, vecs, need_do_map);
861                         else if (r == PATHINFO_SKIPPED) {
862                                 condlog(3, "%s: remove blacklisted path",
863                                         uev->kernel);
864                                 i = find_slot(vecs->pathvec, (void *)pp);
865                                 if (i != -1)
866                                         vector_del_slot(vecs->pathvec, i);
867                                 free_path(pp);
868                         } else {
869                                 condlog(0, "%s: failed to reinitialize path",
870                                         uev->kernel);
871                                 ret = 1;
872                         }
873                 }
874         }
875         if (pp)
876                 goto out;
877
878         /*
879          * get path vital state
880          */
881         conf = get_multipath_config();
882         pthread_cleanup_push(put_multipath_config, conf);
883         ret = alloc_path_with_pathinfo(conf, uev->udev,
884                                        uev->wwid, DI_ALL, &pp);
885         pthread_cleanup_pop(1);
886         if (!pp) {
887                 if (ret == PATHINFO_SKIPPED)
888                         ret = 0;
889                 else {
890                         condlog(3, "%s: failed to get path info", uev->kernel);
891                         ret = 1;
892                 }
893                 goto out;
894         }
895         ret = store_path(vecs->pathvec, pp);
896         if (!ret) {
897                 conf = get_multipath_config();
898                 pp->checkint = conf->checkint;
899                 put_multipath_config(conf);
900                 ret = ev_add_path(pp, vecs, need_do_map);
901         } else {
902                 condlog(0, "%s: failed to store path info, "
903                         "dropping event",
904                         uev->kernel);
905                 free_path(pp);
906                 ret = 1;
907         }
908 out:
909         lock_cleanup_pop(vecs->lock);
910         return ret;
911 }
912
913 /*
914  * returns:
915  * 0: added
916  * 1: error
917  */
918 int
919 ev_add_path (struct path * pp, struct vectors * vecs, int need_do_map)
920 {
921         struct multipath * mpp;
922         char params[PARAMS_SIZE] = {0};
923         int retries = 3;
924         int start_waiter = 0;
925         int ret;
926
927         /*
928          * need path UID to go any further
929          */
930         if (strlen(pp->wwid) == 0) {
931                 condlog(0, "%s: failed to get path uid", pp->dev);
932                 goto fail; /* leave path added to pathvec */
933         }
934         mpp = find_mp_by_wwid(vecs->mpvec, pp->wwid);
935         if (mpp && pp->size && mpp->size != pp->size) {
936                 condlog(0, "%s: failed to add new path %s, device size mismatch", mpp->alias, pp->dev);
937                 int i = find_slot(vecs->pathvec, (void *)pp);
938                 if (i != -1)
939                         vector_del_slot(vecs->pathvec, i);
940                 free_path(pp);
941                 return 1;
942         }
943         if (mpp && mpp->wait_for_udev &&
944             (pathcount(mpp, PATH_UP) > 0 ||
945              (pathcount(mpp, PATH_GHOST) > 0 &&
946               path_get_tpgs(pp) != TPGS_IMPLICIT &&
947               mpp->ghost_delay_tick <= 0))) {
948                 /* if wait_for_udev is set and valid paths exist */
949                 condlog(3, "%s: delaying path addition until %s is fully initialized",
950                         pp->dev, mpp->alias);
951                 mpp->wait_for_udev = 2;
952                 orphan_path(pp, "waiting for create to complete");
953                 return 0;
954         }
955
956         pp->mpp = mpp;
957 rescan:
958         if (mpp) {
959                 condlog(4,"%s: adopting all paths for path %s",
960                         mpp->alias, pp->dev);
961                 if (adopt_paths(vecs->pathvec, mpp))
962                         goto fail; /* leave path added to pathvec */
963
964                 verify_paths(mpp, vecs);
965                 mpp->action = ACT_RELOAD;
966         } else {
967                 if (!should_multipath(pp, vecs->pathvec, vecs->mpvec)) {
968                         orphan_path(pp, "only one path");
969                         return 0;
970                 }
971                 condlog(4,"%s: creating new map", pp->dev);
972                 if ((mpp = add_map_with_path(vecs, pp, 1))) {
973                         mpp->action = ACT_CREATE;
974                         /*
975                          * We don't depend on ACT_CREATE, as domap will
976                          * set it to ACT_NOTHING when complete.
977                          */
978                         start_waiter = 1;
979                 }
980                 if (!start_waiter)
981                         goto fail; /* leave path added to pathvec */
982         }
983
984         /* persistent reservation check*/
985         mpath_pr_event_handle(pp);
986
987         if (!need_do_map)
988                 return 0;
989
990         if (!dm_map_present(mpp->alias)) {
991                 mpp->action = ACT_CREATE;
992                 start_waiter = 1;
993         }
994         /*
995          * push the map to the device-mapper
996          */
997         if (setup_map(mpp, params, PARAMS_SIZE, vecs)) {
998                 condlog(0, "%s: failed to setup map for addition of new "
999                         "path %s", mpp->alias, pp->dev);
1000                 goto fail_map;
1001         }
1002         /*
1003          * reload the map for the multipath mapped device
1004          */
1005         ret = domap(mpp, params, 1);
1006         while (ret == DOMAP_RETRY && retries-- > 0) {
1007                 condlog(0, "%s: retry domap for addition of new "
1008                         "path %s", mpp->alias, pp->dev);
1009                 sleep(1);
1010                 ret = domap(mpp, params, 1);
1011         }
1012         if (ret == DOMAP_FAIL || ret == DOMAP_RETRY) {
1013                 condlog(0, "%s: failed in domap for addition of new "
1014                         "path %s", mpp->alias, pp->dev);
1015                 /*
1016                  * deal with asynchronous uevents :((
1017                  */
1018                 if (mpp->action == ACT_RELOAD && retries-- > 0) {
1019                         condlog(0, "%s: ev_add_path sleep", mpp->alias);
1020                         sleep(1);
1021                         update_mpp_paths(mpp, vecs->pathvec);
1022                         goto rescan;
1023                 }
1024                 else if (mpp->action == ACT_RELOAD)
1025                         condlog(0, "%s: giving up reload", mpp->alias);
1026                 else
1027                         goto fail_map;
1028         }
1029         dm_lib_release();
1030
1031         if ((mpp->action == ACT_CREATE ||
1032              (mpp->action == ACT_NOTHING && start_waiter && !mpp->waiter)) &&
1033             wait_for_events(mpp, vecs))
1034                         goto fail_map;
1035
1036         /*
1037          * update our state from kernel regardless of create or reload
1038          */
1039         if (setup_multipath(vecs, mpp))
1040                 goto fail; /* if setup_multipath fails, it removes the map */
1041
1042         sync_map_state(mpp);
1043
1044         if (retries >= 0) {
1045                 condlog(2, "%s [%s]: path added to devmap %s",
1046                         pp->dev, pp->dev_t, mpp->alias);
1047                 return 0;
1048         } else
1049                 goto fail;
1050
1051 fail_map:
1052         remove_map(mpp, vecs, 1);
1053 fail:
1054         orphan_path(pp, "failed to add path");
1055         return 1;
1056 }
1057
1058 static int
1059 uev_remove_path (struct uevent *uev, struct vectors * vecs, int need_do_map)
1060 {
1061         struct path *pp;
1062         int ret;
1063
1064         condlog(3, "%s: remove path (uevent)", uev->kernel);
1065         delete_foreign(uev->udev);
1066
1067         pthread_cleanup_push(cleanup_lock, &vecs->lock);
1068         lock(&vecs->lock);
1069         pthread_testcancel();
1070         pp = find_path_by_dev(vecs->pathvec, uev->kernel);
1071         if (pp)
1072                 ret = ev_remove_path(pp, vecs, need_do_map);
1073         lock_cleanup_pop(vecs->lock);
1074         if (!pp) {
1075                 /* Not an error; path might have been purged earlier */
1076                 condlog(0, "%s: path already removed", uev->kernel);
1077                 return 0;
1078         }
1079         return ret;
1080 }
1081
1082 int
1083 ev_remove_path (struct path *pp, struct vectors * vecs, int need_do_map)
1084 {
1085         struct multipath * mpp;
1086         int i, retval = 0;
1087         char params[PARAMS_SIZE] = {0};
1088
1089         /*
1090          * avoid referring to the map of an orphaned path
1091          */
1092         if ((mpp = pp->mpp)) {
1093                 /*
1094                  * transform the mp->pg vector of vectors of paths
1095                  * into a mp->params string to feed the device-mapper
1096                  */
1097                 if (update_mpp_paths(mpp, vecs->pathvec)) {
1098                         condlog(0, "%s: failed to update paths",
1099                                 mpp->alias);
1100                         goto fail;
1101                 }
1102
1103                 /*
1104                  * Make sure mpp->hwe doesn't point to freed memory
1105                  * We call extract_hwe_from_path() below to restore mpp->hwe
1106                  */
1107                 if (mpp->hwe == pp->hwe)
1108                         mpp->hwe = NULL;
1109
1110                 if ((i = find_slot(mpp->paths, (void *)pp)) != -1)
1111                         vector_del_slot(mpp->paths, i);
1112
1113                 /*
1114                  * remove the map IF removing the last path
1115                  */
1116                 if (VECTOR_SIZE(mpp->paths) == 0) {
1117                         char alias[WWID_SIZE];
1118
1119                         /*
1120                          * flush_map will fail if the device is open
1121                          */
1122                         strlcpy(alias, mpp->alias, WWID_SIZE);
1123                         if (mpp->flush_on_last_del == FLUSH_ENABLED) {
1124                                 condlog(2, "%s Last path deleted, disabling queueing", mpp->alias);
1125                                 mpp->retry_tick = 0;
1126                                 mpp->no_path_retry = NO_PATH_RETRY_FAIL;
1127                                 mpp->disable_queueing = 1;
1128                                 mpp->stat_map_failures++;
1129                                 dm_queue_if_no_path(mpp->alias, 0);
1130                         }
1131                         if (!flush_map(mpp, vecs, 1)) {
1132                                 condlog(2, "%s: removed map after"
1133                                         " removing all paths",
1134                                         alias);
1135                                 retval = 0;
1136                                 goto out;
1137                         }
1138                         /*
1139                          * Not an error, continue
1140                          */
1141                 }
1142
1143                 if (mpp->hwe == NULL)
1144                         extract_hwe_from_path(mpp);
1145
1146                 if (setup_map(mpp, params, PARAMS_SIZE, vecs)) {
1147                         condlog(0, "%s: failed to setup map for"
1148                                 " removal of path %s", mpp->alias, pp->dev);
1149                         goto fail;
1150                 }
1151
1152                 if (mpp->wait_for_udev) {
1153                         mpp->wait_for_udev = 2;
1154                         goto out;
1155                 }
1156
1157                 if (!need_do_map)
1158                         goto out;
1159                 /*
1160                  * reload the map
1161                  */
1162                 mpp->action = ACT_RELOAD;
1163                 if (domap(mpp, params, 1) == DOMAP_FAIL) {
1164                         condlog(0, "%s: failed in domap for "
1165                                 "removal of path %s",
1166                                 mpp->alias, pp->dev);
1167                         retval = 1;
1168                 } else {
1169                         /*
1170                          * update our state from kernel
1171                          */
1172                         if (setup_multipath(vecs, mpp))
1173                                 return 1;
1174                         sync_map_state(mpp);
1175
1176                         condlog(2, "%s [%s]: path removed from map %s",
1177                                 pp->dev, pp->dev_t, mpp->alias);
1178                 }
1179         }
1180
1181 out:
1182         if ((i = find_slot(vecs->pathvec, (void *)pp)) != -1)
1183                 vector_del_slot(vecs->pathvec, i);
1184
1185         free_path(pp);
1186
1187         return retval;
1188
1189 fail:
1190         remove_map_and_stop_waiter(mpp, vecs);
1191         return 1;
1192 }
1193
1194 static int
1195 uev_update_path (struct uevent *uev, struct vectors * vecs)
1196 {
1197         int ro, retval = 0, rc;
1198         struct path * pp;
1199         struct config *conf;
1200         int needs_reinit = 0;
1201
1202         switch ((rc = change_foreign(uev->udev))) {
1203         case FOREIGN_OK:
1204                 /* known foreign path, ignore event */
1205                 return 0;
1206         case FOREIGN_IGNORED:
1207                 break;
1208         case FOREIGN_ERR:
1209                 condlog(3, "%s: error in change_foreign", __func__);
1210                 break;
1211         default:
1212                 condlog(1, "%s: return code %d of change_forein is unsupported",
1213                         __func__, rc);
1214                 break;
1215         }
1216
1217         pthread_cleanup_push(cleanup_lock, &vecs->lock);
1218         lock(&vecs->lock);
1219         pthread_testcancel();
1220
1221         pp = find_path_by_dev(vecs->pathvec, uev->kernel);
1222         if (pp) {
1223                 struct multipath *mpp = pp->mpp;
1224                 char wwid[WWID_SIZE];
1225
1226                 if (pp->initialized == INIT_REQUESTED_UDEV) {
1227                         needs_reinit = 1;
1228                         goto out;
1229                 }
1230                 /* Don't deal with other types of failed initialization
1231                  * now. check_path will handle it */
1232                 if (!strlen(pp->wwid))
1233                         goto out;
1234
1235                 strcpy(wwid, pp->wwid);
1236                 rc = get_uid(pp, pp->state, uev->udev, 0);
1237
1238                 if (rc != 0)
1239                         strcpy(pp->wwid, wwid);
1240                 else if (strncmp(wwid, pp->wwid, WWID_SIZE) != 0) {
1241                         condlog(0, "%s: path wwid changed from '%s' to '%s'",
1242                                 uev->kernel, wwid, pp->wwid);
1243                         ev_remove_path(pp, vecs, 1);
1244                         needs_reinit = 1;
1245                         goto out;
1246                 } else {
1247                         udev_device_unref(pp->udev);
1248                         pp->udev = udev_device_ref(uev->udev);
1249                         conf = get_multipath_config();
1250                         pthread_cleanup_push(put_multipath_config, conf);
1251                         if (pathinfo(pp, conf, DI_SYSFS|DI_NOIO) != PATHINFO_OK)
1252                                 condlog(1, "%s: pathinfo failed after change uevent",
1253                                         uev->kernel);
1254                         pthread_cleanup_pop(1);
1255                 }
1256
1257                 ro = uevent_get_disk_ro(uev);
1258                 if (mpp && ro >= 0) {
1259                         condlog(2, "%s: update path write_protect to '%d' (uevent)", uev->kernel, ro);
1260
1261                         if (mpp->wait_for_udev)
1262                                 mpp->wait_for_udev = 2;
1263                         else {
1264                                 if (ro == 1)
1265                                         pp->mpp->force_readonly = 1;
1266                                 retval = update_path_groups(mpp, vecs, 0);
1267                                 if (retval == 2)
1268                                         condlog(2, "%s: map removed during reload", pp->dev);
1269                                 else {
1270                                         pp->mpp->force_readonly = 0;
1271                                         condlog(2, "%s: map %s reloaded (retval %d)", uev->kernel, mpp->alias, retval);
1272                                 }
1273                         }
1274                 }
1275         }
1276 out:
1277         lock_cleanup_pop(vecs->lock);
1278         if (!pp) {
1279                 /* If the path is blacklisted, print a debug/non-default verbosity message. */
1280                 if (uev->udev) {
1281                         int flag = DI_SYSFS | DI_WWID;
1282
1283                         conf = get_multipath_config();
1284                         pthread_cleanup_push(put_multipath_config, conf);
1285                         retval = alloc_path_with_pathinfo(conf, uev->udev, uev->wwid, flag, NULL);
1286                         pthread_cleanup_pop(1);
1287
1288                         if (retval == PATHINFO_SKIPPED) {
1289                                 condlog(3, "%s: spurious uevent, path is blacklisted", uev->kernel);
1290                                 return 0;
1291                         }
1292                 }
1293
1294                 condlog(0, "%s: spurious uevent, path not found", uev->kernel);
1295         }
1296         if (needs_reinit)
1297                 retval = uev_add_path(uev, vecs, 1);
1298         return retval;
1299 }
1300
1301 static int
1302 uev_pathfail_check(struct uevent *uev, struct vectors *vecs)
1303 {
1304         char *action = NULL, *devt = NULL;
1305         struct path *pp;
1306         int r = 1;
1307
1308         action = uevent_get_dm_action(uev);
1309         if (!action)
1310                 return 1;
1311         if (strncmp(action, "PATH_FAILED", 11))
1312                 goto out;
1313         devt = uevent_get_dm_path(uev);
1314         if (!devt) {
1315                 condlog(3, "%s: No DM_PATH in uevent", uev->kernel);
1316                 goto out;
1317         }
1318
1319         pthread_cleanup_push(cleanup_lock, &vecs->lock);
1320         lock(&vecs->lock);
1321         pthread_testcancel();
1322         pp = find_path_by_devt(vecs->pathvec, devt);
1323         if (!pp)
1324                 goto out_lock;
1325         r = io_err_stat_handle_pathfail(pp);
1326         if (r)
1327                 condlog(3, "io_err_stat: %s: cannot handle pathfail uevent",
1328                                 pp->dev);
1329 out_lock:
1330         lock_cleanup_pop(vecs->lock);
1331         FREE(devt);
1332         FREE(action);
1333         return r;
1334 out:
1335         FREE(action);
1336         return 1;
1337 }
1338
1339 static int
1340 map_discovery (struct vectors * vecs)
1341 {
1342         struct multipath * mpp;
1343         unsigned int i;
1344
1345         if (dm_get_maps(vecs->mpvec))
1346                 return 1;
1347
1348         vector_foreach_slot (vecs->mpvec, mpp, i)
1349                 if (update_multipath_table(mpp, vecs->pathvec, 1) ||
1350                     update_multipath_status(mpp)) {
1351                         remove_map(mpp, vecs, 1);
1352                         i--;
1353                 }
1354
1355         return 0;
1356 }
1357
1358 int
1359 uxsock_trigger (char * str, char ** reply, int * len, bool is_root,
1360                 void * trigger_data)
1361 {
1362         struct vectors * vecs;
1363         int r;
1364
1365         *reply = NULL;
1366         *len = 0;
1367         vecs = (struct vectors *)trigger_data;
1368
1369         if ((str != NULL) && (is_root == false) &&
1370             (strncmp(str, "list", strlen("list")) != 0) &&
1371             (strncmp(str, "show", strlen("show")) != 0)) {
1372                 *reply = STRDUP("permission deny: need to be root");
1373                 if (*reply)
1374                         *len = strlen(*reply) + 1;
1375                 return 1;
1376         }
1377
1378         r = parse_cmd(str, reply, len, vecs, uxsock_timeout / 1000);
1379
1380         if (r > 0) {
1381                 if (r == ETIMEDOUT)
1382                         *reply = STRDUP("timeout\n");
1383                 else
1384                         *reply = STRDUP("fail\n");
1385                 if (*reply)
1386                         *len = strlen(*reply) + 1;
1387                 r = 1;
1388         }
1389         else if (!r && *len == 0) {
1390                 *reply = STRDUP("ok\n");
1391                 if (*reply)
1392                         *len = strlen(*reply) + 1;
1393                 r = 0;
1394         }
1395         /* else if (r < 0) leave *reply alone */
1396
1397         return r;
1398 }
1399
1400 int
1401 uev_trigger (struct uevent * uev, void * trigger_data)
1402 {
1403         int r = 0;
1404         struct vectors * vecs;
1405         struct uevent *merge_uev, *tmp;
1406         enum daemon_status state;
1407
1408         vecs = (struct vectors *)trigger_data;
1409
1410         pthread_cleanup_push(config_cleanup, NULL);
1411         pthread_mutex_lock(&config_lock);
1412         while (running_state != DAEMON_IDLE &&
1413                running_state != DAEMON_RUNNING &&
1414                running_state != DAEMON_SHUTDOWN)
1415                 pthread_cond_wait(&config_cond, &config_lock);
1416         state = running_state;
1417         pthread_cleanup_pop(1);
1418
1419         if (state == DAEMON_SHUTDOWN)
1420                 return 0;
1421
1422         /*
1423          * device map event
1424          * Add events are ignored here as the tables
1425          * are not fully initialised then.
1426          */
1427         if (!strncmp(uev->kernel, "dm-", 3)) {
1428                 if (!uevent_is_mpath(uev)) {
1429                         if (!strncmp(uev->action, "change", 6))
1430                                 (void)add_foreign(uev->udev);
1431                         else if (!strncmp(uev->action, "remove", 6))
1432                                 (void)delete_foreign(uev->udev);
1433                         goto out;
1434                 }
1435                 if (!strncmp(uev->action, "change", 6)) {
1436                         r = uev_add_map(uev, vecs);
1437
1438                         /*
1439                          * the kernel-side dm-mpath issues a PATH_FAILED event
1440                          * when it encounters a path IO error. It is reason-
1441                          * able be the entry of path IO error accounting pro-
1442                          * cess.
1443                          */
1444                         uev_pathfail_check(uev, vecs);
1445                 } else if (!strncmp(uev->action, "remove", 6)) {
1446                         r = uev_remove_map(uev, vecs);
1447                 }
1448                 goto out;
1449         }
1450
1451         /*
1452          * path add/remove/change event, add/remove maybe merged
1453          */
1454         list_for_each_entry_safe(merge_uev, tmp, &uev->merge_node, node) {
1455                 if (!strncmp(merge_uev->action, "add", 3))
1456                         r += uev_add_path(merge_uev, vecs, 0);
1457                 if (!strncmp(merge_uev->action, "remove", 6))
1458                         r += uev_remove_path(merge_uev, vecs, 0);
1459         }
1460
1461         if (!strncmp(uev->action, "add", 3))
1462                 r += uev_add_path(uev, vecs, 1);
1463         if (!strncmp(uev->action, "remove", 6))
1464                 r += uev_remove_path(uev, vecs, 1);
1465         if (!strncmp(uev->action, "change", 6))
1466                 r += uev_update_path(uev, vecs);
1467
1468 out:
1469         return r;
1470 }
1471
1472 static void rcu_unregister(__attribute__((unused)) void *param)
1473 {
1474         rcu_unregister_thread();
1475 }
1476
1477 static void *
1478 ueventloop (void * ap)
1479 {
1480         struct udev *udev = ap;
1481
1482         pthread_cleanup_push(rcu_unregister, NULL);
1483         rcu_register_thread();
1484         if (uevent_listen(udev))
1485                 condlog(0, "error starting uevent listener");
1486         pthread_cleanup_pop(1);
1487         return NULL;
1488 }
1489
1490 static void *
1491 uevqloop (void * ap)
1492 {
1493         pthread_cleanup_push(rcu_unregister, NULL);
1494         rcu_register_thread();
1495         if (uevent_dispatch(&uev_trigger, ap))
1496                 condlog(0, "error starting uevent dispatcher");
1497         pthread_cleanup_pop(1);
1498         return NULL;
1499 }
1500 static void *
1501 uxlsnrloop (void * ap)
1502 {
1503         long ux_sock;
1504
1505         pthread_cleanup_push(rcu_unregister, NULL);
1506         rcu_register_thread();
1507
1508         ux_sock = ux_socket_listen(DEFAULT_SOCKET);
1509         if (ux_sock == -1) {
1510                 condlog(1, "could not create uxsock: %d", errno);
1511                 exit_daemon();
1512                 goto out;
1513         }
1514         pthread_cleanup_push(uxsock_cleanup, (void *)ux_sock);
1515
1516         if (cli_init()) {
1517                 condlog(1, "Failed to init uxsock listener");
1518                 exit_daemon();
1519                 goto out_sock;
1520         }
1521
1522         /* Tell main thread that thread has started */
1523         post_config_state(DAEMON_CONFIGURE);
1524
1525         set_handler_callback(LIST+PATHS, cli_list_paths);
1526         set_handler_callback(LIST+PATHS+FMT, cli_list_paths_fmt);
1527         set_handler_callback(LIST+PATHS+RAW+FMT, cli_list_paths_raw);
1528         set_handler_callback(LIST+PATH, cli_list_path);
1529         set_handler_callback(LIST+MAPS, cli_list_maps);
1530         set_handler_callback(LIST+STATUS, cli_list_status);
1531         set_unlocked_handler_callback(LIST+DAEMON, cli_list_daemon);
1532         set_handler_callback(LIST+MAPS+STATUS, cli_list_maps_status);
1533         set_handler_callback(LIST+MAPS+STATS, cli_list_maps_stats);
1534         set_handler_callback(LIST+MAPS+FMT, cli_list_maps_fmt);
1535         set_handler_callback(LIST+MAPS+RAW+FMT, cli_list_maps_raw);
1536         set_handler_callback(LIST+MAPS+TOPOLOGY, cli_list_maps_topology);
1537         set_handler_callback(LIST+TOPOLOGY, cli_list_maps_topology);
1538         set_handler_callback(LIST+MAPS+JSON, cli_list_maps_json);
1539         set_handler_callback(LIST+MAP+TOPOLOGY, cli_list_map_topology);
1540         set_handler_callback(LIST+MAP+FMT, cli_list_map_fmt);
1541         set_handler_callback(LIST+MAP+RAW+FMT, cli_list_map_fmt);
1542         set_handler_callback(LIST+MAP+JSON, cli_list_map_json);
1543         set_handler_callback(LIST+CONFIG+LOCAL, cli_list_config_local);
1544         set_handler_callback(LIST+CONFIG, cli_list_config);
1545         set_handler_callback(LIST+BLACKLIST, cli_list_blacklist);
1546         set_handler_callback(LIST+DEVICES, cli_list_devices);
1547         set_handler_callback(LIST+WILDCARDS, cli_list_wildcards);
1548         set_handler_callback(RESET+MAPS+STATS, cli_reset_maps_stats);
1549         set_handler_callback(RESET+MAP+STATS, cli_reset_map_stats);
1550         set_handler_callback(ADD+PATH, cli_add_path);
1551         set_handler_callback(DEL+PATH, cli_del_path);
1552         set_handler_callback(ADD+MAP, cli_add_map);
1553         set_handler_callback(DEL+MAP, cli_del_map);
1554         set_handler_callback(SWITCH+MAP+GROUP, cli_switch_group);
1555         set_unlocked_handler_callback(RECONFIGURE, cli_reconfigure);
1556         set_handler_callback(SUSPEND+MAP, cli_suspend);
1557         set_handler_callback(RESUME+MAP, cli_resume);
1558         set_handler_callback(RESIZE+MAP, cli_resize);
1559         set_handler_callback(RELOAD+MAP, cli_reload);
1560         set_handler_callback(RESET+MAP, cli_reassign);
1561         set_handler_callback(REINSTATE+PATH, cli_reinstate);
1562         set_handler_callback(FAIL+PATH, cli_fail);
1563         set_handler_callback(DISABLEQ+MAP, cli_disable_queueing);
1564         set_handler_callback(RESTOREQ+MAP, cli_restore_queueing);
1565         set_handler_callback(DISABLEQ+MAPS, cli_disable_all_queueing);
1566         set_handler_callback(RESTOREQ+MAPS, cli_restore_all_queueing);
1567         set_unlocked_handler_callback(QUIT, cli_quit);
1568         set_unlocked_handler_callback(SHUTDOWN, cli_shutdown);
1569         set_handler_callback(GETPRSTATUS+MAP, cli_getprstatus);
1570         set_handler_callback(SETPRSTATUS+MAP, cli_setprstatus);
1571         set_handler_callback(UNSETPRSTATUS+MAP, cli_unsetprstatus);
1572         set_handler_callback(FORCEQ+DAEMON, cli_force_no_daemon_q);
1573         set_handler_callback(RESTOREQ+DAEMON, cli_restore_no_daemon_q);
1574         set_handler_callback(GETPRKEY+MAP, cli_getprkey);
1575         set_handler_callback(SETPRKEY+MAP+KEY, cli_setprkey);
1576         set_handler_callback(UNSETPRKEY+MAP, cli_unsetprkey);
1577         set_handler_callback(SETMARGINAL+PATH, cli_set_marginal);
1578         set_handler_callback(UNSETMARGINAL+PATH, cli_unset_marginal);
1579         set_handler_callback(UNSETMARGINAL+MAP, cli_unset_all_marginal);
1580
1581         umask(077);
1582         uxsock_listen(&uxsock_trigger, ux_sock, ap);
1583
1584 out_sock:
1585         pthread_cleanup_pop(1); /* uxsock_cleanup */
1586 out:
1587         pthread_cleanup_pop(1); /* rcu_unregister */
1588         return NULL;
1589 }
1590
1591 void
1592 exit_daemon (void)
1593 {
1594         post_config_state(DAEMON_SHUTDOWN);
1595 }
1596
1597 static void
1598 fail_path (struct path * pp, int del_active)
1599 {
1600         if (!pp->mpp)
1601                 return;
1602
1603         condlog(2, "checker failed path %s in map %s",
1604                  pp->dev_t, pp->mpp->alias);
1605
1606         dm_fail_path(pp->mpp->alias, pp->dev_t);
1607         if (del_active)
1608                 update_queue_mode_del_path(pp->mpp);
1609 }
1610
1611 /*
1612  * caller must have locked the path list before calling that function
1613  */
1614 static int
1615 reinstate_path (struct path * pp)
1616 {
1617         int ret = 0;
1618
1619         if (!pp->mpp)
1620                 return 0;
1621
1622         if (dm_reinstate_path(pp->mpp->alias, pp->dev_t)) {
1623                 condlog(0, "%s: reinstate failed", pp->dev_t);
1624                 ret = 1;
1625         } else {
1626                 condlog(2, "%s: reinstated", pp->dev_t);
1627                 update_queue_mode_add_path(pp->mpp);
1628         }
1629         return ret;
1630 }
1631
1632 static void
1633 enable_group(struct path * pp)
1634 {
1635         struct pathgroup * pgp;
1636
1637         /*
1638          * if path is added through uev_add_path, pgindex can be unset.
1639          * next update_strings() will set it, upon map reload event.
1640          *
1641          * we can safely return here, because upon map reload, all
1642          * PG will be enabled.
1643          */
1644         if (!pp->mpp->pg || !pp->pgindex)
1645                 return;
1646
1647         pgp = VECTOR_SLOT(pp->mpp->pg, pp->pgindex - 1);
1648
1649         if (pgp->status == PGSTATE_DISABLED) {
1650                 condlog(2, "%s: enable group #%i", pp->mpp->alias, pp->pgindex);
1651                 dm_enablegroup(pp->mpp->alias, pp->pgindex);
1652         }
1653 }
1654
1655 static void
1656 mpvec_garbage_collector (struct vectors * vecs)
1657 {
1658         struct multipath * mpp;
1659         unsigned int i;
1660
1661         if (!vecs->mpvec)
1662                 return;
1663
1664         vector_foreach_slot (vecs->mpvec, mpp, i) {
1665                 if (mpp && mpp->alias && !dm_map_present(mpp->alias)) {
1666                         condlog(2, "%s: remove dead map", mpp->alias);
1667                         remove_map_and_stop_waiter(mpp, vecs);
1668                         i--;
1669                 }
1670         }
1671 }
1672
1673 /* This is called after a path has started working again. It the multipath
1674  * device for this path uses the followover failback type, and this is the
1675  * best pathgroup, and this is the first path in the pathgroup to come back
1676  * up, then switch to this pathgroup */
1677 static int
1678 followover_should_failback(struct path * pp)
1679 {
1680         struct pathgroup * pgp;
1681         struct path *pp1;
1682         int i;
1683
1684         if (pp->mpp->pgfailback != -FAILBACK_FOLLOWOVER ||
1685             !pp->mpp->pg || !pp->pgindex ||
1686             pp->pgindex != pp->mpp->bestpg)
1687                 return 0;
1688
1689         pgp = VECTOR_SLOT(pp->mpp->pg, pp->pgindex - 1);
1690         vector_foreach_slot(pgp->paths, pp1, i) {
1691                 if (pp1 == pp)
1692                         continue;
1693                 if (pp1->chkrstate != PATH_DOWN && pp1->chkrstate != PATH_SHAKY)
1694                         return 0;
1695         }
1696         return 1;
1697 }
1698
1699 static void
1700 missing_uev_wait_tick(struct vectors *vecs)
1701 {
1702         struct multipath * mpp;
1703         unsigned int i;
1704         int timed_out = 0, delayed_reconfig;
1705         struct config *conf;
1706
1707         vector_foreach_slot (vecs->mpvec, mpp, i) {
1708                 if (mpp->wait_for_udev && --mpp->uev_wait_tick <= 0) {
1709                         timed_out = 1;
1710                         condlog(0, "%s: timeout waiting on creation uevent. enabling reloads", mpp->alias);
1711                         if (mpp->wait_for_udev > 1 &&
1712                             update_map(mpp, vecs, 0)) {
1713                                 /* update_map removed map */
1714                                 i--;
1715                                 continue;
1716                         }
1717                         mpp->wait_for_udev = 0;
1718                 }
1719         }
1720
1721         conf = get_multipath_config();
1722         delayed_reconfig = conf->delayed_reconfig;
1723         put_multipath_config(conf);
1724         if (timed_out && delayed_reconfig &&
1725             !need_to_delay_reconfig(vecs)) {
1726                 condlog(2, "reconfigure (delayed)");
1727                 set_config_state(DAEMON_CONFIGURE);
1728         }
1729 }
1730
1731 static void
1732 ghost_delay_tick(struct vectors *vecs)
1733 {
1734         struct multipath * mpp;
1735         unsigned int i;
1736
1737         vector_foreach_slot (vecs->mpvec, mpp, i) {
1738                 if (mpp->ghost_delay_tick <= 0)
1739                         continue;
1740                 if (--mpp->ghost_delay_tick <= 0) {
1741                         condlog(0, "%s: timed out waiting for active path",
1742                                 mpp->alias);
1743                         mpp->force_udev_reload = 1;
1744                         if (update_map(mpp, vecs, 0) != 0) {
1745                                 /* update_map removed map */
1746                                 i--;
1747                                 continue;
1748                         }
1749                 }
1750         }
1751 }
1752
1753 static void
1754 defered_failback_tick (vector mpvec)
1755 {
1756         struct multipath * mpp;
1757         unsigned int i;
1758
1759         vector_foreach_slot (mpvec, mpp, i) {
1760                 /*
1761                  * deferred failback getting sooner
1762                  */
1763                 if (mpp->pgfailback > 0 && mpp->failback_tick > 0) {
1764                         mpp->failback_tick--;
1765
1766                         if (!mpp->failback_tick && need_switch_pathgroup(mpp, 1))
1767                                 switch_pathgroup(mpp);
1768                 }
1769         }
1770 }
1771
1772 static void
1773 retry_count_tick(vector mpvec)
1774 {
1775         struct multipath *mpp;
1776         unsigned int i;
1777
1778         vector_foreach_slot (mpvec, mpp, i) {
1779                 if (mpp->retry_tick > 0) {
1780                         mpp->stat_total_queueing_time++;
1781                         condlog(4, "%s: Retrying.. No active path", mpp->alias);
1782                         if(--mpp->retry_tick == 0) {
1783                                 mpp->stat_map_failures++;
1784                                 dm_queue_if_no_path(mpp->alias, 0);
1785                                 condlog(2, "%s: Disable queueing", mpp->alias);
1786                         }
1787                 }
1788         }
1789 }
1790
1791 int update_prio(struct path *pp, int refresh_all)
1792 {
1793         int oldpriority;
1794         struct path *pp1;
1795         struct pathgroup * pgp;
1796         int i, j, changed = 0;
1797         struct config *conf;
1798
1799         if (refresh_all) {
1800                 vector_foreach_slot (pp->mpp->pg, pgp, i) {
1801                         vector_foreach_slot (pgp->paths, pp1, j) {
1802                                 oldpriority = pp1->priority;
1803                                 conf = get_multipath_config();
1804                                 pthread_cleanup_push(put_multipath_config,
1805                                                      conf);
1806                                 pathinfo(pp1, conf, DI_PRIO);
1807                                 pthread_cleanup_pop(1);
1808                                 if (pp1->priority != oldpriority)
1809                                         changed = 1;
1810                         }
1811                 }
1812                 return changed;
1813         }
1814         oldpriority = pp->priority;
1815         conf = get_multipath_config();
1816         pthread_cleanup_push(put_multipath_config, conf);
1817         if (pp->state != PATH_DOWN)
1818                 pathinfo(pp, conf, DI_PRIO);
1819         pthread_cleanup_pop(1);
1820
1821         if (pp->priority == oldpriority)
1822                 return 0;
1823         return 1;
1824 }
1825
1826 int update_path_groups(struct multipath *mpp, struct vectors *vecs, int refresh)
1827 {
1828         if (reload_map(vecs, mpp, refresh, 1))
1829                 return 1;
1830
1831         dm_lib_release();
1832         if (setup_multipath(vecs, mpp) != 0)
1833                 return 2;
1834         sync_map_state(mpp);
1835
1836         return 0;
1837 }
1838
1839 static int check_path_reinstate_state(struct path * pp) {
1840         struct timespec curr_time;
1841
1842         /*
1843          * This function is only called when the path state changes
1844          * from "bad" to "good". pp->state reflects the *previous* state.
1845          * If this was "bad", we know that a failure must have occured
1846          * beforehand, and count that.
1847          * Note that we count path state _changes_ this way. If a path
1848          * remains in "bad" state, failure count is not increased.
1849          */
1850
1851         if (!((pp->mpp->san_path_err_threshold > 0) &&
1852                                 (pp->mpp->san_path_err_forget_rate > 0) &&
1853                                 (pp->mpp->san_path_err_recovery_time >0))) {
1854                 return 0;
1855         }
1856
1857         if (pp->disable_reinstate) {
1858                 /* If there are no other usable paths, reinstate the path */
1859                 if (count_active_paths(pp->mpp) == 0) {
1860                         condlog(2, "%s : reinstating path early", pp->dev);
1861                         goto reinstate_path;
1862                 }
1863                 get_monotonic_time(&curr_time);
1864
1865                 /* If path became failed again or continue failed, should reset
1866                  * path san_path_err_forget_rate and path dis_reinstate_time to
1867                  * start a new stable check. 
1868                  */
1869                 if ((pp->state != PATH_UP) && (pp->state != PATH_GHOST) &&
1870                         (pp->state != PATH_DELAYED)) {
1871                         pp->san_path_err_forget_rate =
1872                                 pp->mpp->san_path_err_forget_rate;
1873                         pp->dis_reinstate_time = curr_time.tv_sec;
1874                 }
1875
1876                 if ((curr_time.tv_sec - pp->dis_reinstate_time ) > pp->mpp->san_path_err_recovery_time) {
1877                         condlog(2,"%s : reinstate the path after err recovery time", pp->dev);
1878                         goto reinstate_path;
1879                 }
1880                 return 1;
1881         }
1882         /* forget errors on a working path */
1883         if ((pp->state == PATH_UP || pp->state == PATH_GHOST) &&
1884                         pp->path_failures > 0) {
1885                 if (pp->san_path_err_forget_rate > 0){
1886                         pp->san_path_err_forget_rate--;
1887                 } else {
1888                         /* for every san_path_err_forget_rate number of
1889                          * successful path checks decrement path_failures by 1
1890                          */
1891                         pp->path_failures--;
1892                         pp->san_path_err_forget_rate = pp->mpp->san_path_err_forget_rate;
1893                 }
1894                 return 0;
1895         }
1896
1897         /* If the path isn't recovering from a failed state, do nothing */
1898         if (pp->state != PATH_DOWN && pp->state != PATH_SHAKY &&
1899                         pp->state != PATH_TIMEOUT)
1900                 return 0;
1901
1902         if (pp->path_failures == 0)
1903                 pp->san_path_err_forget_rate = pp->mpp->san_path_err_forget_rate;
1904
1905         pp->path_failures++;
1906
1907         /* if we don't know the currently time, we don't know how long to
1908          * delay the path, so there's no point in checking if we should
1909          */
1910
1911         get_monotonic_time(&curr_time);
1912         /* when path failures has exceeded the san_path_err_threshold
1913          * place the path in delayed state till san_path_err_recovery_time
1914          * so that the cutomer can rectify the issue within this time. After
1915          * the completion of san_path_err_recovery_time it should
1916          * automatically reinstate the path
1917          * (note: we know that san_path_err_threshold > 0 here).
1918          */
1919         if (pp->path_failures > (unsigned int)pp->mpp->san_path_err_threshold) {
1920                 condlog(2, "%s : hit error threshold. Delaying path reinstatement", pp->dev);
1921                 pp->dis_reinstate_time = curr_time.tv_sec;
1922                 pp->disable_reinstate = 1;
1923
1924                 return 1;
1925         } else {
1926                 return 0;
1927         }
1928
1929 reinstate_path:
1930         pp->path_failures = 0;
1931         pp->disable_reinstate = 0;
1932         pp->san_path_err_forget_rate = 0;
1933         return 0;
1934 }
1935
1936 static int
1937 should_skip_path(struct path *pp){
1938         if (marginal_path_check_enabled(pp->mpp)) {
1939                 if (pp->io_err_disable_reinstate && need_io_err_check(pp))
1940                         return 1;
1941         } else if (san_path_check_enabled(pp->mpp)) {
1942                 if (check_path_reinstate_state(pp))
1943                         return 1;
1944         }
1945         return 0;
1946 }
1947
1948 /*
1949  * Returns '1' if the path has been checked, '-1' if it was blacklisted
1950  * and '0' otherwise
1951  */
1952 int
1953 check_path (struct vectors * vecs, struct path * pp, unsigned int ticks)
1954 {
1955         int newstate;
1956         int new_path_up = 0;
1957         int chkr_new_path_up = 0;
1958         int disable_reinstate = 0;
1959         int oldchkrstate = pp->chkrstate;
1960         int retrigger_tries, verbosity;
1961         unsigned int checkint, max_checkint;
1962         struct config *conf;
1963         int marginal_pathgroups, marginal_changed = 0;
1964         int ret;
1965
1966         if ((pp->initialized == INIT_OK ||
1967              pp->initialized == INIT_REQUESTED_UDEV) && !pp->mpp)
1968                 return 0;
1969
1970         if (pp->tick)
1971                 pp->tick -= (pp->tick > ticks) ? ticks : pp->tick;
1972         if (pp->tick)
1973                 return 0; /* don't check this path yet */
1974
1975         conf = get_multipath_config();
1976         retrigger_tries = conf->retrigger_tries;
1977         checkint = conf->checkint;
1978         max_checkint = conf->max_checkint;
1979         verbosity = conf->verbosity;
1980         marginal_pathgroups = conf->marginal_pathgroups;
1981         put_multipath_config(conf);
1982
1983         if (pp->checkint == CHECKINT_UNDEF) {
1984                 condlog(0, "%s: BUG: checkint is not set", pp->dev);
1985                 pp->checkint = checkint;
1986         };
1987
1988         if (!pp->mpp && pp->initialized == INIT_MISSING_UDEV) {
1989                 if (pp->retriggers < retrigger_tries) {
1990                         condlog(2, "%s: triggering change event to reinitialize",
1991                                 pp->dev);
1992                         pp->initialized = INIT_REQUESTED_UDEV;
1993                         pp->retriggers++;
1994                         sysfs_attr_set_value(pp->udev, "uevent", "change",
1995                                              strlen("change"));
1996                         return 0;
1997                 } else {
1998                         condlog(1, "%s: not initialized after %d udev retriggers",
1999                                 pp->dev, retrigger_tries);
2000                         /*
2001                          * Make sure that the "add missing path" code path
2002                          * below may reinstate the path later, if it ever
2003                          * comes up again.
2004                          * The WWID needs not be cleared; if it was set, the
2005                          * state hadn't been INIT_MISSING_UDEV in the first
2006                          * place.
2007                          */
2008                         pp->initialized = INIT_FAILED;
2009                         return 0;
2010                 }
2011         }
2012
2013         /*
2014          * provision a next check soonest,
2015          * in case we exit abnormaly from here
2016          */
2017         pp->tick = checkint;
2018
2019         newstate = path_offline(pp);
2020         if (newstate == PATH_UP) {
2021                 conf = get_multipath_config();
2022                 pthread_cleanup_push(put_multipath_config, conf);
2023                 newstate = get_state(pp, conf, 1, newstate);
2024                 pthread_cleanup_pop(1);
2025         } else {
2026                 checker_clear_message(&pp->checker);
2027                 condlog(3, "%s: state %s, checker not called",
2028                         pp->dev, checker_state_name(newstate));
2029         }
2030         /*
2031          * Wait for uevent for removed paths;
2032          * some LLDDs like zfcp keep paths unavailable
2033          * without sending uevents.
2034          */
2035         if (newstate == PATH_REMOVED)
2036                 newstate = PATH_DOWN;
2037
2038         if (newstate == PATH_WILD || newstate == PATH_UNCHECKED) {
2039                 condlog(2, "%s: unusable path (%s) - checker failed",
2040                         pp->dev, checker_state_name(newstate));
2041                 LOG_MSG(2, verbosity, pp);
2042                 conf = get_multipath_config();
2043                 pthread_cleanup_push(put_multipath_config, conf);
2044                 pathinfo(pp, conf, 0);
2045                 pthread_cleanup_pop(1);
2046                 return 1;
2047         } else if ((newstate != PATH_UP && newstate != PATH_GHOST) &&
2048                         (pp->state == PATH_DELAYED)) {
2049                 /* If path state become failed again cancel path delay state */
2050                 pp->state = newstate;
2051                 return 1;
2052         }
2053         if (!pp->mpp) {
2054                 if (!strlen(pp->wwid) &&
2055                     (pp->initialized == INIT_FAILED ||
2056                      pp->initialized == INIT_NEW) &&
2057                     (newstate == PATH_UP || newstate == PATH_GHOST)) {
2058                         condlog(2, "%s: add missing path", pp->dev);
2059                         conf = get_multipath_config();
2060                         pthread_cleanup_push(put_multipath_config, conf);
2061                         ret = pathinfo(pp, conf, DI_ALL | DI_BLACKLIST);
2062                         pthread_cleanup_pop(1);
2063                         /* INIT_OK implies ret == PATHINFO_OK */
2064                         if (pp->initialized == INIT_OK) {
2065                                 ev_add_path(pp, vecs, 1);
2066                                 pp->tick = 1;
2067                         } else {
2068                                 /*
2069                                  * We failed multiple times to initialize this
2070                                  * path properly. Don't re-check too often.
2071                                  */
2072                                 pp->checkint = max_checkint;
2073                                 if (ret == PATHINFO_SKIPPED)
2074                                         return -1;
2075                         }
2076                 }
2077                 return 0;
2078         }
2079         /*
2080          * Async IO in flight. Keep the previous path state
2081          * and reschedule as soon as possible
2082          */
2083         if (newstate == PATH_PENDING) {
2084                 pp->tick = 1;
2085                 return 0;
2086         }
2087         /*
2088          * Synchronize with kernel state
2089          */
2090         if (update_multipath_strings(pp->mpp, vecs->pathvec, 1)) {
2091                 condlog(1, "%s: Could not synchronize with kernel state",
2092                         pp->dev);
2093                 pp->dmstate = PSTATE_UNDEF;
2094         }
2095         /* if update_multipath_strings orphaned the path, quit early */
2096         if (!pp->mpp)
2097                 return 0;
2098         set_no_path_retry(pp->mpp);
2099
2100         if ((newstate == PATH_UP || newstate == PATH_GHOST) &&
2101             (san_path_check_enabled(pp->mpp) ||
2102              marginal_path_check_enabled(pp->mpp))) {
2103                 int was_marginal = pp->marginal;
2104                 if (should_skip_path(pp)) {
2105                         if (!marginal_pathgroups) {
2106                                 if (marginal_path_check_enabled(pp->mpp))
2107                                         /* to reschedule as soon as possible,
2108                                          * so that this path can be recovered
2109                                          * in time */
2110                                         pp->tick = 1;
2111                                 pp->state = PATH_DELAYED;
2112                                 return 1;
2113                         }
2114                         if (!was_marginal) {
2115                                 pp->marginal = 1;
2116                                 marginal_changed = 1;
2117                         }
2118                 } else if (marginal_pathgroups && was_marginal) {
2119                         pp->marginal = 0;
2120                         marginal_changed = 1;
2121                 }
2122         }
2123
2124         /*
2125          * don't reinstate failed path, if its in stand-by
2126          * and if target supports only implicit tpgs mode.
2127          * this will prevent unnecessary i/o by dm on stand-by
2128          * paths if there are no other active paths in map.
2129          */
2130         disable_reinstate = (newstate == PATH_GHOST &&
2131                              count_active_paths(pp->mpp) == 0 &&
2132                              path_get_tpgs(pp) == TPGS_IMPLICIT) ? 1 : 0;
2133
2134         pp->chkrstate = newstate;
2135         if (newstate != pp->state) {
2136                 int oldstate = pp->state;
2137                 pp->state = newstate;
2138
2139                 LOG_MSG(1, verbosity, pp);
2140
2141                 /*
2142                  * upon state change, reset the checkint
2143                  * to the shortest delay
2144                  */
2145                 conf = get_multipath_config();
2146                 pp->checkint = conf->checkint;
2147                 put_multipath_config(conf);
2148
2149                 if (newstate != PATH_UP && newstate != PATH_GHOST) {
2150                         /*
2151                          * proactively fail path in the DM
2152                          */
2153                         if (oldstate == PATH_UP ||
2154                             oldstate == PATH_GHOST)
2155                                 fail_path(pp, 1);
2156                         else
2157                                 fail_path(pp, 0);
2158
2159                         /*
2160                          * cancel scheduled failback
2161                          */
2162                         pp->mpp->failback_tick = 0;
2163
2164                         pp->mpp->stat_path_failures++;
2165                         return 1;
2166                 }
2167
2168                 if (newstate == PATH_UP || newstate == PATH_GHOST) {
2169                         if (pp->mpp->prflag) {
2170                                 /*
2171                                  * Check Persistent Reservation.
2172                                  */
2173                                 condlog(2, "%s: checking persistent "
2174                                         "reservation registration", pp->dev);
2175                                 mpath_pr_event_handle(pp);
2176                         }
2177                 }
2178
2179                 /*
2180                  * reinstate this path
2181                  */
2182                 if (!disable_reinstate && reinstate_path(pp)) {
2183                         condlog(3, "%s: reload map", pp->dev);
2184                         ev_add_path(pp, vecs, 1);
2185                         pp->tick = 1;
2186                         return 0;
2187                 }
2188                 new_path_up = 1;
2189
2190                 if (oldchkrstate != PATH_UP && oldchkrstate != PATH_GHOST)
2191                         chkr_new_path_up = 1;
2192
2193                 /*
2194                  * if at least one path is up in a group, and
2195                  * the group is disabled, re-enable it
2196                  */
2197                 if (newstate == PATH_UP)
2198                         enable_group(pp);
2199         }
2200         else if (newstate == PATH_UP || newstate == PATH_GHOST) {
2201                 if ((pp->dmstate == PSTATE_FAILED ||
2202                     pp->dmstate == PSTATE_UNDEF) &&
2203                     !disable_reinstate) {
2204                         /* Clear IO errors */
2205                         if (reinstate_path(pp)) {
2206                                 condlog(3, "%s: reload map", pp->dev);
2207                                 ev_add_path(pp, vecs, 1);
2208                                 pp->tick = 1;
2209                                 return 0;
2210                         }
2211                 } else {
2212                         LOG_MSG(4, verbosity, pp);
2213                         if (pp->checkint != max_checkint) {
2214                                 /*
2215                                  * double the next check delay.
2216                                  * max at conf->max_checkint
2217                                  */
2218                                 if (pp->checkint < (max_checkint / 2))
2219                                         pp->checkint = 2 * pp->checkint;
2220                                 else
2221                                         pp->checkint = max_checkint;
2222
2223                                 condlog(4, "%s: delay next check %is",
2224                                         pp->dev_t, pp->checkint);
2225                         }
2226                         pp->tick = pp->checkint;
2227                 }
2228         }
2229         else if (newstate != PATH_UP && newstate != PATH_GHOST) {
2230                 if (pp->dmstate == PSTATE_ACTIVE ||
2231                     pp->dmstate == PSTATE_UNDEF)
2232                         fail_path(pp, 0);
2233                 if (newstate == PATH_DOWN) {
2234                         int log_checker_err;
2235
2236                         conf = get_multipath_config();
2237                         log_checker_err = conf->log_checker_err;
2238                         put_multipath_config(conf);
2239                         if (log_checker_err == LOG_CHKR_ERR_ONCE)
2240                                 LOG_MSG(3, verbosity, pp);
2241                         else
2242                                 LOG_MSG(2, verbosity, pp);
2243                 }
2244         }
2245
2246         pp->state = newstate;
2247
2248         if (pp->mpp->wait_for_udev)
2249                 return 1;
2250         /*
2251          * path prio refreshing
2252          */
2253         condlog(4, "path prio refresh");
2254
2255         if (marginal_changed)
2256                 update_path_groups(pp->mpp, vecs, 1);
2257         else if (update_prio(pp, new_path_up) &&
2258             (pp->mpp->pgpolicyfn == (pgpolicyfn *)group_by_prio) &&
2259              pp->mpp->pgfailback == -FAILBACK_IMMEDIATE)
2260                 update_path_groups(pp->mpp, vecs, !new_path_up);
2261         else if (need_switch_pathgroup(pp->mpp, 0)) {
2262                 if (pp->mpp->pgfailback > 0 &&
2263                     (new_path_up || pp->mpp->failback_tick <= 0))
2264                         pp->mpp->failback_tick =
2265                                 pp->mpp->pgfailback + 1;
2266                 else if (pp->mpp->pgfailback == -FAILBACK_IMMEDIATE ||
2267                          (chkr_new_path_up && followover_should_failback(pp)))
2268                         switch_pathgroup(pp->mpp);
2269         }
2270         return 1;
2271 }
2272
2273 static void *
2274 checkerloop (void *ap)
2275 {
2276         struct vectors *vecs;
2277         struct path *pp;
2278         int count = 0;
2279         unsigned int i;
2280         struct timespec last_time;
2281         struct config *conf;
2282         int foreign_tick = 0;
2283         bool use_watchdog;
2284
2285         pthread_cleanup_push(rcu_unregister, NULL);
2286         rcu_register_thread();
2287         mlockall(MCL_CURRENT | MCL_FUTURE);
2288         vecs = (struct vectors *)ap;
2289         condlog(2, "path checkers start up");
2290
2291         /* Tweak start time for initial path check */
2292         get_monotonic_time(&last_time);
2293         last_time.tv_sec -= 1;
2294
2295         /* use_watchdog is set from process environment and never changes */
2296         conf = get_multipath_config();
2297         use_watchdog = conf->use_watchdog;
2298         put_multipath_config(conf);
2299
2300         while (1) {
2301                 struct timespec diff_time, start_time, end_time;
2302                 int num_paths = 0, strict_timing, rc = 0;
2303                 unsigned int ticks = 0;
2304
2305                 get_monotonic_time(&start_time);
2306                 if (start_time.tv_sec && last_time.tv_sec) {
2307                         timespecsub(&start_time, &last_time, &diff_time);
2308                         condlog(4, "tick (%lu.%06lu secs)",
2309                                 diff_time.tv_sec, diff_time.tv_nsec / 1000);
2310                         last_time = start_time;
2311                         ticks = diff_time.tv_sec;
2312                 } else {
2313                         ticks = 1;
2314                         condlog(4, "tick (%d ticks)", ticks);
2315                 }
2316 #ifdef USE_SYSTEMD
2317                 if (use_watchdog)
2318                         sd_notify(0, "WATCHDOG=1");
2319 #endif
2320                 rc = set_config_state(DAEMON_RUNNING);
2321                 if (rc == ETIMEDOUT) {
2322                         condlog(4, "timeout waiting for DAEMON_IDLE");
2323                         continue;
2324                 } else if (rc == EINVAL)
2325                         /* daemon shutdown */
2326                         break;
2327
2328                 pthread_cleanup_push(cleanup_lock, &vecs->lock);
2329                 lock(&vecs->lock);
2330                 pthread_testcancel();
2331                 vector_foreach_slot (vecs->pathvec, pp, i) {
2332                         rc = check_path(vecs, pp, ticks);
2333                         if (rc < 0) {
2334                                 vector_del_slot(vecs->pathvec, i);
2335                                 free_path(pp);
2336                                 i--;
2337                         } else
2338                                 num_paths += rc;
2339                 }
2340                 lock_cleanup_pop(vecs->lock);
2341
2342                 pthread_cleanup_push(cleanup_lock, &vecs->lock);
2343                 lock(&vecs->lock);
2344                 pthread_testcancel();
2345                 defered_failback_tick(vecs->mpvec);
2346                 retry_count_tick(vecs->mpvec);
2347                 missing_uev_wait_tick(vecs);
2348                 ghost_delay_tick(vecs);
2349                 lock_cleanup_pop(vecs->lock);
2350
2351                 if (count)
2352                         count--;
2353                 else {
2354                         pthread_cleanup_push(cleanup_lock, &vecs->lock);
2355                         lock(&vecs->lock);
2356                         pthread_testcancel();
2357                         condlog(4, "map garbage collection");
2358                         mpvec_garbage_collector(vecs);
2359                         count = MAPGCINT;
2360                         lock_cleanup_pop(vecs->lock);
2361                 }
2362
2363                 diff_time.tv_nsec = 0;
2364                 if (start_time.tv_sec) {
2365                         get_monotonic_time(&end_time);
2366                         timespecsub(&end_time, &start_time, &diff_time);
2367                         if (num_paths) {
2368                                 unsigned int max_checkint;
2369
2370                                 condlog(4, "checked %d path%s in %lu.%06lu secs",
2371                                         num_paths, num_paths > 1 ? "s" : "",
2372                                         diff_time.tv_sec,
2373                                         diff_time.tv_nsec / 1000);
2374                                 conf = get_multipath_config();
2375                                 max_checkint = conf->max_checkint;
2376                                 put_multipath_config(conf);
2377                                 if (diff_time.tv_sec > max_checkint)
2378                                         condlog(1, "path checkers took longer "
2379                                                 "than %lu seconds, consider "
2380                                                 "increasing max_polling_interval",
2381                                                 diff_time.tv_sec);
2382                         }
2383                 }
2384
2385                 if (foreign_tick == 0) {
2386                         conf = get_multipath_config();
2387                         foreign_tick = conf->max_checkint;
2388                         put_multipath_config(conf);
2389                 }
2390                 if (--foreign_tick == 0)
2391                         check_foreign();
2392
2393                 post_config_state(DAEMON_IDLE);
2394                 conf = get_multipath_config();
2395                 strict_timing = conf->strict_timing;
2396                 put_multipath_config(conf);
2397                 if (!strict_timing)
2398                         sleep(1);
2399                 else {
2400                         if (diff_time.tv_nsec) {
2401                                 diff_time.tv_sec = 0;
2402                                 diff_time.tv_nsec =
2403                                      1000UL * 1000 * 1000 - diff_time.tv_nsec;
2404                         } else
2405                                 diff_time.tv_sec = 1;
2406
2407                         condlog(3, "waiting for %lu.%06lu secs",
2408                                 diff_time.tv_sec,
2409                                 diff_time.tv_nsec / 1000);
2410                         if (nanosleep(&diff_time, NULL) != 0) {
2411                                 condlog(3, "nanosleep failed with error %d",
2412                                         errno);
2413                                 conf = get_multipath_config();
2414                                 conf->strict_timing = 0;
2415                                 put_multipath_config(conf);
2416                                 break;
2417                         }
2418                 }
2419         }
2420         pthread_cleanup_pop(1);
2421         return NULL;
2422 }
2423
2424 int
2425 configure (struct vectors * vecs)
2426 {
2427         struct multipath * mpp;
2428         struct path * pp;
2429         vector mpvec;
2430         int i, ret;
2431         struct config *conf;
2432         static int force_reload = FORCE_RELOAD_WEAK;
2433
2434         if (!vecs->pathvec && !(vecs->pathvec = vector_alloc())) {
2435                 condlog(0, "couldn't allocate path vec in configure");
2436                 return 1;
2437         }
2438
2439         if (!vecs->mpvec && !(vecs->mpvec = vector_alloc())) {
2440                 condlog(0, "couldn't allocate multipath vec in configure");
2441                 return 1;
2442         }
2443
2444         if (!(mpvec = vector_alloc())) {
2445                 condlog(0, "couldn't allocate new maps vec in configure");
2446                 return 1;
2447         }
2448
2449         /*
2450          * probe for current path (from sysfs) and map (from dm) sets
2451          */
2452         ret = path_discovery(vecs->pathvec, DI_ALL);
2453         if (ret < 0) {
2454                 condlog(0, "configure failed at path discovery");
2455                 goto fail;
2456         }
2457
2458         conf = get_multipath_config();
2459         pthread_cleanup_push(put_multipath_config, conf);
2460         vector_foreach_slot (vecs->pathvec, pp, i){
2461                 if (filter_path(conf, pp) > 0){
2462                         vector_del_slot(vecs->pathvec, i);
2463                         free_path(pp);
2464                         i--;
2465                 }
2466         }
2467         pthread_cleanup_pop(1);
2468
2469         if (map_discovery(vecs)) {
2470                 condlog(0, "configure failed at map discovery");
2471                 goto fail;
2472         }
2473
2474         /*
2475          * create new set of maps & push changed ones into dm
2476          * In the first call, use FORCE_RELOAD_WEAK to avoid making
2477          * superfluous ACT_RELOAD ioctls. Later calls are done
2478          * with FORCE_RELOAD_YES.
2479          */
2480         ret = coalesce_paths(vecs, mpvec, NULL, force_reload, CMD_NONE);
2481         if (force_reload == FORCE_RELOAD_WEAK)
2482                 force_reload = FORCE_RELOAD_YES;
2483         if (ret != CP_OK) {
2484                 condlog(0, "configure failed while coalescing paths");
2485                 goto fail;
2486         }
2487
2488         /*
2489          * may need to remove some maps which are no longer relevant
2490          * e.g., due to blacklist changes in conf file
2491          */
2492         if (coalesce_maps(vecs, mpvec)) {
2493                 condlog(0, "configure failed while coalescing maps");
2494                 goto fail;
2495         }
2496
2497         dm_lib_release();
2498
2499         sync_maps_state(mpvec);
2500         vector_foreach_slot(mpvec, mpp, i){
2501                 if (remember_wwid(mpp->wwid) == 1)
2502                         trigger_paths_udev_change(mpp, true);
2503                 update_map_pr(mpp);
2504         }
2505
2506         /*
2507          * purge dm of old maps
2508          */
2509         remove_maps(vecs);
2510
2511         /*
2512          * save new set of maps formed by considering current path state
2513          */
2514         vector_free(vecs->mpvec);
2515         vecs->mpvec = mpvec;
2516
2517         /*
2518          * start dm event waiter threads for these new maps
2519          */
2520         vector_foreach_slot(vecs->mpvec, mpp, i) {
2521                 if (wait_for_events(mpp, vecs)) {
2522                         remove_map(mpp, vecs, 1);
2523                         i--;
2524                         continue;
2525                 }
2526                 if (setup_multipath(vecs, mpp))
2527                         i--;
2528         }
2529         return 0;
2530
2531 fail:
2532         vector_free(mpvec);
2533         return 1;
2534 }
2535
2536 int
2537 need_to_delay_reconfig(struct vectors * vecs)
2538 {
2539         struct multipath *mpp;
2540         int i;
2541
2542         if (!VECTOR_SIZE(vecs->mpvec))
2543                 return 0;
2544
2545         vector_foreach_slot(vecs->mpvec, mpp, i) {
2546                 if (mpp->wait_for_udev)
2547                         return 1;
2548         }
2549         return 0;
2550 }
2551
2552 void rcu_free_config(struct rcu_head *head)
2553 {
2554         struct config *conf = container_of(head, struct config, rcu);
2555
2556         free_config(conf);
2557 }
2558
2559 int
2560 reconfigure (struct vectors * vecs)
2561 {
2562         struct config * old, *conf;
2563
2564         conf = load_config(DEFAULT_CONFIGFILE);
2565         if (!conf)
2566                 return 1;
2567
2568         /*
2569          * free old map and path vectors ... they use old conf state
2570          */
2571         if (VECTOR_SIZE(vecs->mpvec))
2572                 remove_maps_and_stop_waiters(vecs);
2573
2574         free_pathvec(vecs->pathvec, FREE_PATHS);
2575         vecs->pathvec = NULL;
2576         delete_all_foreign();
2577
2578         reset_checker_classes();
2579         /* Re-read any timezone changes */
2580         tzset();
2581
2582         dm_tgt_version(conf->version, TGT_MPATH);
2583         if (verbosity)
2584                 conf->verbosity = verbosity;
2585         if (bindings_read_only)
2586                 conf->bindings_read_only = bindings_read_only;
2587         uxsock_timeout = conf->uxsock_timeout;
2588
2589         old = rcu_dereference(multipath_conf);
2590         conf->sequence_nr = old->sequence_nr + 1;
2591         rcu_assign_pointer(multipath_conf, conf);
2592         call_rcu(&old->rcu, rcu_free_config);
2593
2594         configure(vecs);
2595
2596
2597         return 0;
2598 }
2599
2600 static struct vectors *
2601 init_vecs (void)
2602 {
2603         struct vectors * vecs;
2604
2605         vecs = (struct vectors *)MALLOC(sizeof(struct vectors));
2606
2607         if (!vecs)
2608                 return NULL;
2609
2610         pthread_mutex_init(&vecs->lock.mutex, NULL);
2611
2612         return vecs;
2613 }
2614
2615 static void *
2616 signal_set(int signo, void (*func) (int))
2617 {
2618         int r;
2619         struct sigaction sig;
2620         struct sigaction osig;
2621
2622         sig.sa_handler = func;
2623         sigemptyset(&sig.sa_mask);
2624         sig.sa_flags = 0;
2625
2626         r = sigaction(signo, &sig, &osig);
2627
2628         if (r < 0)
2629                 return (SIG_ERR);
2630         else
2631                 return (osig.sa_handler);
2632 }
2633
2634 void
2635 handle_signals(bool nonfatal)
2636 {
2637         if (exit_sig) {
2638                 condlog(2, "exit (signal)");
2639                 exit_sig = 0;
2640                 exit_daemon();
2641         }
2642         if (!nonfatal)
2643                 return;
2644         if (reconfig_sig) {
2645                 condlog(2, "reconfigure (signal)");
2646                 set_config_state(DAEMON_CONFIGURE);
2647         }
2648         if (log_reset_sig) {
2649                 condlog(2, "reset log (signal)");
2650                 if (logsink == 1)
2651                         log_thread_reset();
2652         }
2653         reconfig_sig = 0;
2654         log_reset_sig = 0;
2655 }
2656
2657 static void
2658 sighup(__attribute__((unused)) int sig)
2659 {
2660         reconfig_sig = 1;
2661 }
2662
2663 static void
2664 sigend(__attribute__((unused)) int sig)
2665 {
2666         exit_sig = 1;
2667 }
2668
2669 static void
2670 sigusr1(__attribute__((unused)) int sig)
2671 {
2672         log_reset_sig = 1;
2673 }
2674
2675 static void
2676 sigusr2(__attribute__((unused)) int sig)
2677 {
2678         condlog(3, "SIGUSR2 received");
2679 }
2680
2681 static void
2682 signal_init(void)
2683 {
2684         sigset_t set;
2685
2686         /* block all signals */
2687         sigfillset(&set);
2688         /* SIGPIPE occurs if logging fails */
2689         sigdelset(&set, SIGPIPE);
2690         pthread_sigmask(SIG_SETMASK, &set, NULL);
2691
2692         /* Other signals will be unblocked in the uxlsnr thread */
2693         signal_set(SIGHUP, sighup);
2694         signal_set(SIGUSR1, sigusr1);
2695         signal_set(SIGUSR2, sigusr2);
2696         signal_set(SIGINT, sigend);
2697         signal_set(SIGTERM, sigend);
2698         signal_set(SIGPIPE, sigend);
2699 }
2700
2701 static void
2702 setscheduler (void)
2703 {
2704         int res;
2705         static struct sched_param sched_param = {
2706                 .sched_priority = 99
2707         };
2708
2709         res = sched_setscheduler (0, SCHED_RR, &sched_param);
2710
2711         if (res == -1)
2712                 condlog(LOG_WARNING, "Could not set SCHED_RR at priority 99");
2713         return;
2714 }
2715
2716 static void
2717 set_oom_adj (void)
2718 {
2719 #ifdef OOM_SCORE_ADJ_MIN
2720         int retry = 1;
2721         char *file = "/proc/self/oom_score_adj";
2722         int score = OOM_SCORE_ADJ_MIN;
2723 #else
2724         int retry = 0;
2725         char *file = "/proc/self/oom_adj";
2726         int score = OOM_ADJUST_MIN;
2727 #endif
2728         FILE *fp;
2729         struct stat st;
2730         char *envp;
2731
2732         envp = getenv("OOMScoreAdjust");
2733         if (envp) {
2734                 condlog(3, "Using systemd provided OOMScoreAdjust");
2735                 return;
2736         }
2737         do {
2738                 if (stat(file, &st) == 0){
2739                         fp = fopen(file, "w");
2740                         if (!fp) {
2741                                 condlog(0, "couldn't fopen %s : %s", file,
2742                                         strerror(errno));
2743                                 return;
2744                         }
2745                         fprintf(fp, "%i", score);
2746                         fclose(fp);
2747                         return;
2748                 }
2749                 if (errno != ENOENT) {
2750                         condlog(0, "couldn't stat %s : %s", file,
2751                                 strerror(errno));
2752                         return;
2753                 }
2754 #ifdef OOM_ADJUST_MIN
2755                 file = "/proc/self/oom_adj";
2756                 score = OOM_ADJUST_MIN;
2757 #else
2758                 retry = 0;
2759 #endif
2760         } while (retry--);
2761         condlog(0, "couldn't adjust oom score");
2762 }
2763
2764 static int
2765 child (__attribute__((unused)) void *param)
2766 {
2767         pthread_t check_thr, uevent_thr, uxlsnr_thr, uevq_thr, dmevent_thr;
2768         pthread_attr_t log_attr, misc_attr, uevent_attr;
2769         struct vectors * vecs;
2770         struct multipath * mpp;
2771         int i;
2772 #ifdef USE_SYSTEMD
2773         int startup_done = 0;
2774 #endif
2775         int rc;
2776         int pid_fd = -1;
2777         struct config *conf;
2778         char *envp;
2779         int queue_without_daemon;
2780         enum daemon_status state;
2781
2782         mlockall(MCL_CURRENT | MCL_FUTURE);
2783         signal_init();
2784         rcu_init();
2785
2786         setup_thread_attr(&misc_attr, 64 * 1024, 0);
2787         setup_thread_attr(&uevent_attr, DEFAULT_UEVENT_STACKSIZE * 1024, 0);
2788         setup_thread_attr(&waiter_attr, 32 * 1024, 1);
2789         setup_thread_attr(&io_err_stat_attr, 32 * 1024, 0);
2790
2791         if (logsink == 1) {
2792                 setup_thread_attr(&log_attr, 64 * 1024, 0);
2793                 log_thread_start(&log_attr);
2794                 pthread_attr_destroy(&log_attr);
2795         }
2796         pid_fd = pidfile_create(DEFAULT_PIDFILE, daemon_pid);
2797         if (pid_fd < 0) {
2798                 condlog(1, "failed to create pidfile");
2799                 if (logsink == 1)
2800                         log_thread_stop();
2801                 exit(1);
2802         }
2803
2804         post_config_state(DAEMON_START);
2805
2806         condlog(2, "--------start up--------");
2807         condlog(2, "read " DEFAULT_CONFIGFILE);
2808
2809         conf = load_config(DEFAULT_CONFIGFILE);
2810         if (!conf)
2811                 goto failed;
2812
2813         if (verbosity)
2814                 conf->verbosity = verbosity;
2815         if (bindings_read_only)
2816                 conf->bindings_read_only = bindings_read_only;
2817         uxsock_timeout = conf->uxsock_timeout;
2818         rcu_assign_pointer(multipath_conf, conf);
2819         if (init_checkers(conf->multipath_dir)) {
2820                 condlog(0, "failed to initialize checkers");
2821                 goto failed;
2822         }
2823         if (init_prio(conf->multipath_dir)) {
2824                 condlog(0, "failed to initialize prioritizers");
2825                 goto failed;
2826         }
2827         /* Failing this is non-fatal */
2828
2829         init_foreign(conf->multipath_dir, conf->enable_foreign);
2830
2831         if (poll_dmevents)
2832                 poll_dmevents = dmevent_poll_supported();
2833         setlogmask(LOG_UPTO(conf->verbosity + 3));
2834
2835         envp = getenv("LimitNOFILE");
2836
2837         if (envp)
2838                 condlog(2,"Using systemd provided open fds limit of %s", envp);
2839         else
2840                 set_max_fds(conf->max_fds);
2841
2842         vecs = gvecs = init_vecs();
2843         if (!vecs)
2844                 goto failed;
2845
2846         setscheduler();
2847         set_oom_adj();
2848
2849         /*
2850          * Startup done, invalidate configuration
2851          */
2852         conf = NULL;
2853
2854         pthread_cleanup_push(config_cleanup, NULL);
2855         pthread_mutex_lock(&config_lock);
2856
2857         __post_config_state(DAEMON_IDLE);
2858         rc = pthread_create(&uxlsnr_thr, &misc_attr, uxlsnrloop, vecs);
2859         if (!rc) {
2860                 /* Wait for uxlsnr startup */
2861                 while (running_state == DAEMON_IDLE)
2862                         pthread_cond_wait(&config_cond, &config_lock);
2863                 state = running_state;
2864         }
2865         pthread_cleanup_pop(1);
2866
2867         if (rc) {
2868                 condlog(0, "failed to create cli listener: %d", rc);
2869                 goto failed;
2870         }
2871         else if (state != DAEMON_CONFIGURE) {
2872                 condlog(0, "cli listener failed to start");
2873                 goto failed;
2874         }
2875
2876         if (poll_dmevents) {
2877                 if (init_dmevent_waiter(vecs)) {
2878                         condlog(0, "failed to allocate dmevents waiter info");
2879                         goto failed;
2880                 }
2881                 if ((rc = pthread_create(&dmevent_thr, &misc_attr,
2882                                          wait_dmevents, NULL))) {
2883                         condlog(0, "failed to create dmevent waiter thread: %d",
2884                                 rc);
2885                         goto failed;
2886                 }
2887         }
2888
2889         /*
2890          * Start uevent listener early to catch events
2891          */
2892         if ((rc = pthread_create(&uevent_thr, &uevent_attr, ueventloop, udev))) {
2893                 condlog(0, "failed to create uevent thread: %d", rc);
2894                 goto failed;
2895         }
2896         pthread_attr_destroy(&uevent_attr);
2897
2898         /*
2899          * start threads
2900          */
2901         if ((rc = pthread_create(&check_thr, &misc_attr, checkerloop, vecs))) {
2902                 condlog(0,"failed to create checker loop thread: %d", rc);
2903                 goto failed;
2904         }
2905         if ((rc = pthread_create(&uevq_thr, &misc_attr, uevqloop, vecs))) {
2906                 condlog(0, "failed to create uevent dispatcher: %d", rc);
2907                 goto failed;
2908         }
2909         pthread_attr_destroy(&misc_attr);
2910
2911         while (1) {
2912                 pthread_cleanup_push(config_cleanup, NULL);
2913                 pthread_mutex_lock(&config_lock);
2914                 while (running_state != DAEMON_CONFIGURE &&
2915                        running_state != DAEMON_SHUTDOWN)
2916                         pthread_cond_wait(&config_cond, &config_lock);
2917                 state = running_state;
2918                 pthread_cleanup_pop(1);
2919                 if (state == DAEMON_SHUTDOWN)
2920                         break;
2921                 if (state == DAEMON_CONFIGURE) {
2922                         pthread_cleanup_push(cleanup_lock, &vecs->lock);
2923                         lock(&vecs->lock);
2924                         pthread_testcancel();
2925                         if (!need_to_delay_reconfig(vecs)) {
2926                                 reconfigure(vecs);
2927                         } else {
2928                                 conf = get_multipath_config();
2929                                 conf->delayed_reconfig = 1;
2930                                 put_multipath_config(conf);
2931                         }
2932                         lock_cleanup_pop(vecs->lock);
2933                         post_config_state(DAEMON_IDLE);
2934 #ifdef USE_SYSTEMD
2935                         if (!startup_done) {
2936                                 sd_notify(0, "READY=1");
2937                                 startup_done = 1;
2938                         }
2939 #endif
2940                 }
2941         }
2942
2943         lock(&vecs->lock);
2944         conf = get_multipath_config();
2945         queue_without_daemon = conf->queue_without_daemon;
2946         put_multipath_config(conf);
2947         if (queue_without_daemon == QUE_NO_DAEMON_OFF)
2948                 vector_foreach_slot(vecs->mpvec, mpp, i)
2949                         dm_queue_if_no_path(mpp->alias, 0);
2950         remove_maps_and_stop_waiters(vecs);
2951         unlock(&vecs->lock);
2952
2953         pthread_cancel(check_thr);
2954         pthread_cancel(uevent_thr);
2955         pthread_cancel(uxlsnr_thr);
2956         pthread_cancel(uevq_thr);
2957         if (poll_dmevents)
2958                 pthread_cancel(dmevent_thr);
2959
2960         pthread_join(check_thr, NULL);
2961         pthread_join(uevent_thr, NULL);
2962         pthread_join(uxlsnr_thr, NULL);
2963         pthread_join(uevq_thr, NULL);
2964         if (poll_dmevents)
2965                 pthread_join(dmevent_thr, NULL);
2966
2967         stop_io_err_stat_thread();
2968
2969         lock(&vecs->lock);
2970         free_pathvec(vecs->pathvec, FREE_PATHS);
2971         vecs->pathvec = NULL;
2972         unlock(&vecs->lock);
2973
2974         pthread_mutex_destroy(&vecs->lock.mutex);
2975         FREE(vecs);
2976         vecs = NULL;
2977
2978         cleanup_foreign();
2979         cleanup_checkers();
2980         cleanup_prio();
2981         if (poll_dmevents)
2982                 cleanup_dmevent_waiter();
2983
2984         dm_lib_release();
2985         dm_lib_exit();
2986
2987         /* We're done here */
2988         condlog(3, "unlink pidfile");
2989         unlink(DEFAULT_PIDFILE);
2990
2991         condlog(2, "--------shut down-------");
2992
2993         if (logsink == 1)
2994                 log_thread_stop();
2995
2996         /*
2997          * Freeing config must be done after condlog() and dm_lib_exit(),
2998          * because logging functions like dlog() and dm_write_log()
2999          * reference the config.
3000          */
3001         conf = rcu_dereference(multipath_conf);
3002         rcu_assign_pointer(multipath_conf, NULL);
3003         call_rcu(&conf->rcu, rcu_free_config);
3004         udev_unref(udev);
3005         udev = NULL;
3006         pthread_attr_destroy(&waiter_attr);
3007         pthread_attr_destroy(&io_err_stat_attr);
3008 #ifdef _DEBUG_
3009         dbg_free_final(NULL);
3010 #endif
3011
3012 #ifdef USE_SYSTEMD
3013         sd_notify(0, "ERRNO=0");
3014 #endif
3015         exit(0);
3016
3017 failed:
3018 #ifdef USE_SYSTEMD
3019         sd_notify(0, "ERRNO=1");
3020 #endif
3021         if (pid_fd >= 0)
3022                 close(pid_fd);
3023         exit(1);
3024 }
3025
3026 static int
3027 daemonize(void)
3028 {
3029         int pid;
3030         int dev_null_fd;
3031
3032         if( (pid = fork()) < 0){
3033                 fprintf(stderr, "Failed first fork : %s\n", strerror(errno));
3034                 return -1;
3035         }
3036         else if (pid != 0)
3037                 return pid;
3038
3039         setsid();
3040
3041         if ( (pid = fork()) < 0)
3042                 fprintf(stderr, "Failed second fork : %s\n", strerror(errno));
3043         else if (pid != 0)
3044                 _exit(0);
3045
3046         if (chdir("/") < 0)
3047                 fprintf(stderr, "cannot chdir to '/', continuing\n");
3048
3049         dev_null_fd = open("/dev/null", O_RDWR);
3050         if (dev_null_fd < 0){
3051                 fprintf(stderr, "cannot open /dev/null for input & output : %s\n",
3052                         strerror(errno));
3053                 _exit(0);
3054         }
3055
3056         close(STDIN_FILENO);
3057         if (dup(dev_null_fd) < 0) {
3058                 fprintf(stderr, "cannot dup /dev/null to stdin : %s\n",
3059                         strerror(errno));
3060                 _exit(0);
3061         }
3062         close(STDOUT_FILENO);
3063         if (dup(dev_null_fd) < 0) {
3064                 fprintf(stderr, "cannot dup /dev/null to stdout : %s\n",
3065                         strerror(errno));
3066                 _exit(0);
3067         }
3068         close(STDERR_FILENO);
3069         if (dup(dev_null_fd) < 0) {
3070                 fprintf(stderr, "cannot dup /dev/null to stderr : %s\n",
3071                         strerror(errno));
3072                 _exit(0);
3073         }
3074         close(dev_null_fd);
3075         daemon_pid = getpid();
3076         return 0;
3077 }
3078
3079 int
3080 main (int argc, char *argv[])
3081 {
3082         extern char *optarg;
3083         extern int optind;
3084         int arg;
3085         int err;
3086         int foreground = 0;
3087         struct config *conf;
3088
3089         ANNOTATE_BENIGN_RACE_SIZED(&multipath_conf, sizeof(multipath_conf),
3090                                    "Manipulated through RCU");
3091         ANNOTATE_BENIGN_RACE_SIZED(&uxsock_timeout, sizeof(uxsock_timeout),
3092                 "Suppress complaints about this scalar variable");
3093
3094         logsink = 1;
3095
3096         if (getuid() != 0) {
3097                 fprintf(stderr, "need to be root\n");
3098                 exit(1);
3099         }
3100
3101         /* make sure we don't lock any path */
3102         if (chdir("/") < 0)
3103                 fprintf(stderr, "can't chdir to root directory : %s\n",
3104                         strerror(errno));
3105         umask(umask(077) | 022);
3106
3107         pthread_cond_init_mono(&config_cond);
3108
3109         udev = udev_new();
3110         libmp_udev_set_sync_support(0);
3111
3112         while ((arg = getopt(argc, argv, ":dsv:k::Bniw")) != EOF ) {
3113                 switch(arg) {
3114                 case 'd':
3115                         foreground = 1;
3116                         if (logsink > 0)
3117                                 logsink = 0;
3118                         //debug=1; /* ### comment me out ### */
3119                         break;
3120                 case 'v':
3121                         if (sizeof(optarg) > sizeof(char *) ||
3122                             !isdigit(optarg[0]))
3123                                 exit(1);
3124
3125                         verbosity = atoi(optarg);
3126                         break;
3127                 case 's':
3128                         logsink = -1;
3129                         break;
3130                 case 'k':
3131                         logsink = 0;
3132                         conf = load_config(DEFAULT_CONFIGFILE);
3133                         if (!conf)
3134                                 exit(1);
3135                         if (verbosity)
3136                                 conf->verbosity = verbosity;
3137                         uxsock_timeout = conf->uxsock_timeout;
3138                         err = uxclnt(optarg, uxsock_timeout + 100);
3139                         free_config(conf);
3140                         return err;
3141                 case 'B':
3142                         bindings_read_only = 1;
3143                         break;
3144                 case 'n':
3145                         condlog(0, "WARNING: ignoring deprecated option -n, use 'ignore_wwids = no' instead");
3146                         break;
3147                 case 'w':
3148                         poll_dmevents = 0;
3149                         break;
3150                 default:
3151                         fprintf(stderr, "Invalid argument '-%c'\n",
3152                                 optopt);
3153                         exit(1);
3154                 }
3155         }
3156         if (optind < argc) {
3157                 char cmd[CMDSIZE];
3158                 char * s = cmd;
3159                 char * c = s;
3160
3161                 logsink = 0;
3162                 conf = load_config(DEFAULT_CONFIGFILE);
3163                 if (!conf)
3164                         exit(1);
3165                 if (verbosity)
3166                         conf->verbosity = verbosity;
3167                 uxsock_timeout = conf->uxsock_timeout;
3168                 memset(cmd, 0x0, CMDSIZE);
3169                 while (optind < argc) {
3170                         if (strchr(argv[optind], ' '))
3171                                 c += snprintf(c, s + CMDSIZE - c, "\"%s\" ", argv[optind]);
3172                         else
3173                                 c += snprintf(c, s + CMDSIZE - c, "%s ", argv[optind]);
3174                         optind++;
3175                 }
3176                 c += snprintf(c, s + CMDSIZE - c, "\n");
3177                 err = uxclnt(s, uxsock_timeout + 100);
3178                 free_config(conf);
3179                 return err;
3180         }
3181
3182         if (foreground) {
3183                 if (!isatty(fileno(stdout)))
3184                         setbuf(stdout, NULL);
3185                 err = 0;
3186                 daemon_pid = getpid();
3187         } else
3188                 err = daemonize();
3189
3190         if (err < 0)
3191                 /* error */
3192                 exit(1);
3193         else if (err > 0)
3194                 /* parent dies */
3195                 exit(0);
3196         else
3197                 /* child lives */
3198                 return (child(NULL));
3199 }
3200
3201 void *  mpath_pr_event_handler_fn (void * pathp )
3202 {
3203         struct multipath * mpp;
3204         unsigned int i;
3205         int ret, isFound;
3206         struct path * pp = (struct path *)pathp;
3207         struct prout_param_descriptor *param;
3208         struct prin_resp *resp;
3209
3210         rcu_register_thread();
3211         mpp = pp->mpp;
3212
3213         resp = mpath_alloc_prin_response(MPATH_PRIN_RKEY_SA);
3214         if (!resp){
3215                 condlog(0,"%s Alloc failed for prin response", pp->dev);
3216                 goto out;
3217         }