multipathd: Do not perform POSIX mutex operations inside a signal handler
[multipath-tools/.git] / multipathd / main.c
1 /*
2  * Copyright (c) 2004, 2005 Christophe Varoqui
3  * Copyright (c) 2005 Kiyoshi Ueda, NEC
4  * Copyright (c) 2005 Benjamin Marzinski, Redhat
5  * Copyright (c) 2005 Edward Goggin, EMC
6  */
7 #include <unistd.h>
8 #include <sys/stat.h>
9 #include <libdevmapper.h>
10 #include <sys/wait.h>
11 #include <sys/mman.h>
12 #include <sys/types.h>
13 #include <fcntl.h>
14 #include <errno.h>
15 #include <sys/time.h>
16 #include <sys/resource.h>
17 #include <limits.h>
18 #include <linux/oom.h>
19 #include <libudev.h>
20 #include <urcu.h>
21 #ifdef USE_SYSTEMD
22 #include <systemd/sd-daemon.h>
23 #endif
24 #include <semaphore.h>
25 #include <time.h>
26
27 /*
28  * libcheckers
29  */
30 #include "checkers.h"
31
32 #ifdef USE_SYSTEMD
33 static int use_watchdog;
34 #endif
35
36 int uxsock_timeout;
37
38 /*
39  * libmultipath
40  */
41 #include "parser.h"
42 #include "vector.h"
43 #include "memory.h"
44 #include "config.h"
45 #include "util.h"
46 #include "hwtable.h"
47 #include "defaults.h"
48 #include "structs.h"
49 #include "blacklist.h"
50 #include "structs_vec.h"
51 #include "dmparser.h"
52 #include "devmapper.h"
53 #include "sysfs.h"
54 #include "dict.h"
55 #include "discovery.h"
56 #include "debug.h"
57 #include "propsel.h"
58 #include "uevent.h"
59 #include "switchgroup.h"
60 #include "print.h"
61 #include "configure.h"
62 #include "prio.h"
63 #include "wwids.h"
64 #include "pgpolicies.h"
65 #include "uevent.h"
66 #include "log.h"
67
68 #include "mpath_cmd.h"
69 #include "mpath_persist.h"
70
71 #include "prioritizers/alua_rtpg.h"
72
73 #include "main.h"
74 #include "pidfile.h"
75 #include "uxlsnr.h"
76 #include "uxclnt.h"
77 #include "cli.h"
78 #include "cli_handlers.h"
79 #include "lock.h"
80 #include "waiter.h"
81 #include "wwids.h"
82
83 #define FILE_NAME_SIZE 256
84 #define CMDSIZE 160
85
86 #define LOG_MSG(a, b) \
87 do { \
88         if (pp->offline) \
89                 condlog(a, "%s: %s - path offline", pp->mpp->alias, pp->dev); \
90         else if (strlen(b)) \
91                 condlog(a, "%s: %s - %s", pp->mpp->alias, pp->dev, b); \
92 } while(0)
93
94 struct mpath_event_param
95 {
96         char * devname;
97         struct multipath *mpp;
98 };
99
100 unsigned int mpath_mx_alloc_len;
101
102 int logsink;
103 int verbosity;
104 int bindings_read_only;
105 int ignore_new_devs;
106 enum daemon_status running_state = DAEMON_INIT;
107 pid_t daemon_pid;
108 pthread_mutex_t config_lock = PTHREAD_MUTEX_INITIALIZER;
109 pthread_cond_t config_cond = PTHREAD_COND_INITIALIZER;
110
111 /*
112  * global copy of vecs for use in sig handlers
113  */
114 struct vectors * gvecs;
115
116 struct udev * udev;
117
118 struct config *multipath_conf;
119
120 /* Local variables */
121 static volatile sig_atomic_t exit_sig;
122 static volatile sig_atomic_t reconfig_sig;
123 static volatile sig_atomic_t log_reset_sig;
124
125 const char *
126 daemon_status(void)
127 {
128         switch (running_state) {
129         case DAEMON_INIT:
130                 return "init";
131         case DAEMON_START:
132                 return "startup";
133         case DAEMON_CONFIGURE:
134                 return "configure";
135         case DAEMON_IDLE:
136                 return "idle";
137         case DAEMON_RUNNING:
138                 return "running";
139         case DAEMON_SHUTDOWN:
140                 return "shutdown";
141         }
142         return NULL;
143 }
144
145 /*
146  * I love you too, systemd ...
147  */
148 const char *
149 sd_notify_status(void)
150 {
151         switch (running_state) {
152         case DAEMON_INIT:
153                 return "STATUS=init";
154         case DAEMON_START:
155                 return "STATUS=startup";
156         case DAEMON_CONFIGURE:
157                 return "STATUS=configure";
158         case DAEMON_IDLE:
159                 return "STATUS=idle";
160         case DAEMON_RUNNING:
161                 return "STATUS=running";
162         case DAEMON_SHUTDOWN:
163                 return "STATUS=shutdown";
164         }
165         return NULL;
166 }
167
168 static void config_cleanup(void *arg)
169 {
170         pthread_mutex_unlock(&config_lock);
171 }
172
173 void post_config_state(enum daemon_status state)
174 {
175         pthread_mutex_lock(&config_lock);
176         if (state != running_state) {
177                 running_state = state;
178                 pthread_cond_broadcast(&config_cond);
179 #ifdef USE_SYSTEMD
180                 sd_notify(0, sd_notify_status());
181 #endif
182         }
183         pthread_mutex_unlock(&config_lock);
184 }
185
186 int set_config_state(enum daemon_status state)
187 {
188         int rc = 0;
189
190         pthread_cleanup_push(config_cleanup, NULL);
191         pthread_mutex_lock(&config_lock);
192         if (running_state != state) {
193                 if (running_state != DAEMON_IDLE) {
194                         struct timespec ts;
195
196                         clock_gettime(CLOCK_REALTIME, &ts);
197                         ts.tv_sec += 1;
198                         rc = pthread_cond_timedwait(&config_cond,
199                                                     &config_lock, &ts);
200                 }
201                 if (!rc) {
202                         running_state = state;
203                         pthread_cond_broadcast(&config_cond);
204 #ifdef USE_SYSTEMD
205                         sd_notify(0, sd_notify_status());
206 #endif
207                 }
208         }
209         pthread_cleanup_pop(1);
210         return rc;
211 }
212
213 struct config *get_multipath_config(void)
214 {
215         rcu_read_lock();
216         return rcu_dereference(multipath_conf);
217 }
218
219 void put_multipath_config(struct config *conf)
220 {
221         rcu_read_unlock();
222 }
223
224 static int
225 need_switch_pathgroup (struct multipath * mpp, int refresh)
226 {
227         struct pathgroup * pgp;
228         struct path * pp;
229         unsigned int i, j;
230         struct config *conf;
231
232         if (!mpp || mpp->pgfailback == -FAILBACK_MANUAL)
233                 return 0;
234
235         /*
236          * Refresh path priority values
237          */
238         if (refresh) {
239                 vector_foreach_slot (mpp->pg, pgp, i) {
240                         vector_foreach_slot (pgp->paths, pp, j) {
241                                 conf = get_multipath_config();
242                                 pathinfo(pp, conf, DI_PRIO);
243                                 put_multipath_config(conf);
244                         }
245                 }
246         }
247
248         if (!mpp->pg || VECTOR_SIZE(mpp->paths) == 0)
249                 return 0;
250
251         mpp->bestpg = select_path_group(mpp);
252
253         if (mpp->bestpg != mpp->nextpg)
254                 return 1;
255
256         return 0;
257 }
258
259 static void
260 switch_pathgroup (struct multipath * mpp)
261 {
262         mpp->stat_switchgroup++;
263         dm_switchgroup(mpp->alias, mpp->bestpg);
264         condlog(2, "%s: switch to path group #%i",
265                  mpp->alias, mpp->bestpg);
266 }
267
268 static int
269 coalesce_maps(struct vectors *vecs, vector nmpv)
270 {
271         struct multipath * ompp;
272         vector ompv = vecs->mpvec;
273         unsigned int i, reassign_maps;
274         struct config *conf;
275
276         conf = get_multipath_config();
277         reassign_maps = conf->reassign_maps;
278         put_multipath_config(conf);
279         vector_foreach_slot (ompv, ompp, i) {
280                 condlog(3, "%s: coalesce map", ompp->alias);
281                 if (!find_mp_by_wwid(nmpv, ompp->wwid)) {
282                         /*
283                          * remove all current maps not allowed by the
284                          * current configuration
285                          */
286                         if (dm_flush_map(ompp->alias)) {
287                                 condlog(0, "%s: unable to flush devmap",
288                                         ompp->alias);
289                                 /*
290                                  * may be just because the device is open
291                                  */
292                                 if (setup_multipath(vecs, ompp) != 0) {
293                                         i--;
294                                         continue;
295                                 }
296                                 if (!vector_alloc_slot(nmpv))
297                                         return 1;
298
299                                 vector_set_slot(nmpv, ompp);
300
301                                 vector_del_slot(ompv, i);
302                                 i--;
303                         }
304                         else {
305                                 dm_lib_release();
306                                 condlog(2, "%s devmap removed", ompp->alias);
307                         }
308                 } else if (reassign_maps) {
309                         condlog(3, "%s: Reassign existing device-mapper"
310                                 " devices", ompp->alias);
311                         dm_reassign(ompp->alias);
312                 }
313         }
314         return 0;
315 }
316
317 void
318 sync_map_state(struct multipath *mpp)
319 {
320         struct pathgroup *pgp;
321         struct path *pp;
322         unsigned int i, j;
323
324         if (!mpp->pg)
325                 return;
326
327         vector_foreach_slot (mpp->pg, pgp, i){
328                 vector_foreach_slot (pgp->paths, pp, j){
329                         if (pp->state == PATH_UNCHECKED ||
330                             pp->state == PATH_WILD ||
331                             pp->state == PATH_DELAYED)
332                                 continue;
333                         if ((pp->dmstate == PSTATE_FAILED ||
334                              pp->dmstate == PSTATE_UNDEF) &&
335                             (pp->state == PATH_UP || pp->state == PATH_GHOST))
336                                 dm_reinstate_path(mpp->alias, pp->dev_t);
337                         else if ((pp->dmstate == PSTATE_ACTIVE ||
338                                   pp->dmstate == PSTATE_UNDEF) &&
339                                  (pp->state == PATH_DOWN ||
340                                   pp->state == PATH_SHAKY))
341                                 dm_fail_path(mpp->alias, pp->dev_t);
342                 }
343         }
344 }
345
346 static void
347 sync_maps_state(vector mpvec)
348 {
349         unsigned int i;
350         struct multipath *mpp;
351
352         vector_foreach_slot (mpvec, mpp, i)
353                 sync_map_state(mpp);
354 }
355
356 static int
357 flush_map(struct multipath * mpp, struct vectors * vecs, int nopaths)
358 {
359         int r;
360
361         if (nopaths)
362                 r = dm_flush_map_nopaths(mpp->alias, mpp->deferred_remove);
363         else
364                 r = dm_flush_map(mpp->alias);
365         /*
366          * clear references to this map before flushing so we can ignore
367          * the spurious uevent we may generate with the dm_flush_map call below
368          */
369         if (r) {
370                 /*
371                  * May not really be an error -- if the map was already flushed
372                  * from the device mapper by dmsetup(8) for instance.
373                  */
374                 if (r == 1)
375                         condlog(0, "%s: can't flush", mpp->alias);
376                 else {
377                         condlog(2, "%s: devmap deferred remove", mpp->alias);
378                         mpp->deferred_remove = DEFERRED_REMOVE_IN_PROGRESS;
379                 }
380                 return r;
381         }
382         else {
383                 dm_lib_release();
384                 condlog(2, "%s: map flushed", mpp->alias);
385         }
386
387         orphan_paths(vecs->pathvec, mpp);
388         remove_map_and_stop_waiter(mpp, vecs, 1);
389
390         return 0;
391 }
392
393 int
394 update_map (struct multipath *mpp, struct vectors *vecs)
395 {
396         int retries = 3;
397         char params[PARAMS_SIZE] = {0};
398
399 retry:
400         condlog(4, "%s: updating new map", mpp->alias);
401         if (adopt_paths(vecs->pathvec, mpp)) {
402                 condlog(0, "%s: failed to adopt paths for new map update",
403                         mpp->alias);
404                 retries = -1;
405                 goto fail;
406         }
407         verify_paths(mpp, vecs);
408         mpp->flush_on_last_del = FLUSH_UNDEF;
409         mpp->action = ACT_RELOAD;
410
411         if (setup_map(mpp, params, PARAMS_SIZE)) {
412                 condlog(0, "%s: failed to setup new map in update", mpp->alias);
413                 retries = -1;
414                 goto fail;
415         }
416         if (domap(mpp, params, 1) <= 0 && retries-- > 0) {
417                 condlog(0, "%s: map_udate sleep", mpp->alias);
418                 sleep(1);
419                 goto retry;
420         }
421         dm_lib_release();
422
423 fail:
424         if (setup_multipath(vecs, mpp))
425                 return 1;
426
427         sync_map_state(mpp);
428
429         if (retries < 0)
430                 condlog(0, "%s: failed reload in new map update", mpp->alias);
431         return 0;
432 }
433
434 static int
435 uev_add_map (struct uevent * uev, struct vectors * vecs)
436 {
437         char *alias;
438         int major = -1, minor = -1, rc;
439
440         condlog(3, "%s: add map (uevent)", uev->kernel);
441         alias = uevent_get_dm_name(uev);
442         if (!alias) {
443                 condlog(3, "%s: No DM_NAME in uevent", uev->kernel);
444                 major = uevent_get_major(uev);
445                 minor = uevent_get_minor(uev);
446                 alias = dm_mapname(major, minor);
447                 if (!alias) {
448                         condlog(2, "%s: mapname not found for %d:%d",
449                                 uev->kernel, major, minor);
450                         return 1;
451                 }
452         }
453         pthread_cleanup_push(cleanup_lock, &vecs->lock);
454         lock(vecs->lock);
455         pthread_testcancel();
456         rc = ev_add_map(uev->kernel, alias, vecs);
457         lock_cleanup_pop(vecs->lock);
458         FREE(alias);
459         return rc;
460 }
461
462 int
463 ev_add_map (char * dev, char * alias, struct vectors * vecs)
464 {
465         char * refwwid;
466         struct multipath * mpp;
467         int map_present;
468         int r = 1, delayed_reconfig, reassign_maps;
469         struct config *conf;
470
471         map_present = dm_map_present(alias);
472
473         if (map_present && !dm_is_mpath(alias)) {
474                 condlog(4, "%s: not a multipath map", alias);
475                 return 0;
476         }
477
478         mpp = find_mp_by_alias(vecs->mpvec, alias);
479
480         if (mpp) {
481                 if (mpp->wait_for_udev > 1) {
482                         if (update_map(mpp, vecs))
483                                 /* setup multipathd removed the map */
484                                 return 1;
485                 }
486                 conf = get_multipath_config();
487                 delayed_reconfig = conf->delayed_reconfig;
488                 reassign_maps = conf->reassign_maps;
489                 put_multipath_config(conf);
490                 if (mpp->wait_for_udev) {
491                         mpp->wait_for_udev = 0;
492                         if (delayed_reconfig &&
493                             !need_to_delay_reconfig(vecs)) {
494                                 condlog(2, "reconfigure (delayed)");
495                                 set_config_state(DAEMON_CONFIGURE);
496                                 return 0;
497                         }
498                 }
499                 /*
500                  * Not really an error -- we generate our own uevent
501                  * if we create a multipath mapped device as a result
502                  * of uev_add_path
503                  */
504                 if (reassign_maps) {
505                         condlog(3, "%s: Reassign existing device-mapper devices",
506                                 alias);
507                         dm_reassign(alias);
508                 }
509                 return 0;
510         }
511         condlog(2, "%s: adding map", alias);
512
513         /*
514          * now we can register the map
515          */
516         if (map_present) {
517                 if ((mpp = add_map_without_path(vecs, alias))) {
518                         sync_map_state(mpp);
519                         condlog(2, "%s: devmap %s registered", alias, dev);
520                         return 0;
521                 } else {
522                         condlog(2, "%s: uev_add_map failed", dev);
523                         return 1;
524                 }
525         }
526         r = get_refwwid(CMD_NONE, dev, DEV_DEVMAP, vecs->pathvec, &refwwid);
527
528         if (refwwid) {
529                 r = coalesce_paths(vecs, NULL, refwwid, 0, CMD_NONE);
530                 dm_lib_release();
531         }
532
533         if (!r)
534                 condlog(2, "%s: devmap %s added", alias, dev);
535         else if (r == 2)
536                 condlog(2, "%s: uev_add_map %s blacklisted", alias, dev);
537         else
538                 condlog(0, "%s: uev_add_map %s failed", alias, dev);
539
540         FREE(refwwid);
541         return r;
542 }
543
544 static int
545 uev_remove_map (struct uevent * uev, struct vectors * vecs)
546 {
547         char *alias;
548         int minor;
549         struct multipath *mpp;
550
551         condlog(2, "%s: remove map (uevent)", uev->kernel);
552         alias = uevent_get_dm_name(uev);
553         if (!alias) {
554                 condlog(3, "%s: No DM_NAME in uevent, ignoring", uev->kernel);
555                 return 0;
556         }
557         minor = uevent_get_minor(uev);
558
559         pthread_cleanup_push(cleanup_lock, &vecs->lock);
560         lock(vecs->lock);
561         pthread_testcancel();
562         mpp = find_mp_by_minor(vecs->mpvec, minor);
563
564         if (!mpp) {
565                 condlog(2, "%s: devmap not registered, can't remove",
566                         uev->kernel);
567                 goto out;
568         }
569         if (strcmp(mpp->alias, alias)) {
570                 condlog(2, "%s: minor number mismatch (map %d, event %d)",
571                         mpp->alias, mpp->dmi->minor, minor);
572                 goto out;
573         }
574
575         orphan_paths(vecs->pathvec, mpp);
576         remove_map_and_stop_waiter(mpp, vecs, 1);
577 out:
578         lock_cleanup_pop(vecs->lock);
579         FREE(alias);
580         return 0;
581 }
582
583 /* Called from CLI handler */
584 int
585 ev_remove_map (char * devname, char * alias, int minor, struct vectors * vecs)
586 {
587         struct multipath * mpp;
588
589         mpp = find_mp_by_minor(vecs->mpvec, minor);
590
591         if (!mpp) {
592                 condlog(2, "%s: devmap not registered, can't remove",
593                         devname);
594                 return 1;
595         }
596         if (strcmp(mpp->alias, alias)) {
597                 condlog(2, "%s: minor number mismatch (map %d, event %d)",
598                         mpp->alias, mpp->dmi->minor, minor);
599                 return 1;
600         }
601         return flush_map(mpp, vecs, 0);
602 }
603
604 static int
605 uev_add_path (struct uevent *uev, struct vectors * vecs)
606 {
607         struct path *pp;
608         int ret = 0, i;
609         struct config *conf;
610
611         condlog(2, "%s: add path (uevent)", uev->kernel);
612         if (strstr(uev->kernel, "..") != NULL) {
613                 /*
614                  * Don't allow relative device names in the pathvec
615                  */
616                 condlog(0, "%s: path name is invalid", uev->kernel);
617                 return 1;
618         }
619
620         pthread_cleanup_push(cleanup_lock, &vecs->lock);
621         lock(vecs->lock);
622         pthread_testcancel();
623         pp = find_path_by_dev(vecs->pathvec, uev->kernel);
624         if (pp) {
625                 int r;
626
627                 condlog(0, "%s: spurious uevent, path already in pathvec",
628                         uev->kernel);
629                 if (!pp->mpp && !strlen(pp->wwid)) {
630                         condlog(3, "%s: reinitialize path", uev->kernel);
631                         udev_device_unref(pp->udev);
632                         pp->udev = udev_device_ref(uev->udev);
633                         conf = get_multipath_config();
634                         r = pathinfo(pp, conf,
635                                      DI_ALL | DI_BLACKLIST);
636                         put_multipath_config(conf);
637                         if (r == PATHINFO_OK)
638                                 ret = ev_add_path(pp, vecs);
639                         else if (r == PATHINFO_SKIPPED) {
640                                 condlog(3, "%s: remove blacklisted path",
641                                         uev->kernel);
642                                 i = find_slot(vecs->pathvec, (void *)pp);
643                                 if (i != -1)
644                                         vector_del_slot(vecs->pathvec, i);
645                                 free_path(pp);
646                         } else {
647                                 condlog(0, "%s: failed to reinitialize path",
648                                         uev->kernel);
649                                 ret = 1;
650                         }
651                 }
652         }
653         lock_cleanup_pop(vecs->lock);
654         if (pp)
655                 return ret;
656
657         /*
658          * get path vital state
659          */
660         conf = get_multipath_config();
661         ret = alloc_path_with_pathinfo(conf, uev->udev,
662                                        DI_ALL, &pp);
663         put_multipath_config(conf);
664         if (!pp) {
665                 if (ret == PATHINFO_SKIPPED)
666                         return 0;
667                 condlog(3, "%s: failed to get path info", uev->kernel);
668                 return 1;
669         }
670         pthread_cleanup_push(cleanup_lock, &vecs->lock);
671         lock(vecs->lock);
672         pthread_testcancel();
673         ret = store_path(vecs->pathvec, pp);
674         if (!ret) {
675                 conf = get_multipath_config();
676                 pp->checkint = conf->checkint;
677                 put_multipath_config(conf);
678                 ret = ev_add_path(pp, vecs);
679         } else {
680                 condlog(0, "%s: failed to store path info, "
681                         "dropping event",
682                         uev->kernel);
683                 free_path(pp);
684                 ret = 1;
685         }
686         lock_cleanup_pop(vecs->lock);
687         return ret;
688 }
689
690 /*
691  * returns:
692  * 0: added
693  * 1: error
694  */
695 int
696 ev_add_path (struct path * pp, struct vectors * vecs)
697 {
698         struct multipath * mpp;
699         char params[PARAMS_SIZE] = {0};
700         int retries = 3;
701         int start_waiter = 0;
702         int ret;
703
704         /*
705          * need path UID to go any further
706          */
707         if (strlen(pp->wwid) == 0) {
708                 condlog(0, "%s: failed to get path uid", pp->dev);
709                 goto fail; /* leave path added to pathvec */
710         }
711         mpp = find_mp_by_wwid(vecs->mpvec, pp->wwid);
712         if (mpp && mpp->wait_for_udev) {
713                 mpp->wait_for_udev = 2;
714                 orphan_path(pp, "waiting for create to complete");
715                 return 0;
716         }
717
718         pp->mpp = mpp;
719 rescan:
720         if (mpp) {
721                 if (pp->size && mpp->size != pp->size) {
722                         condlog(0, "%s: failed to add new path %s, "
723                                 "device size mismatch",
724                                 mpp->alias, pp->dev);
725                         int i = find_slot(vecs->pathvec, (void *)pp);
726                         if (i != -1)
727                                 vector_del_slot(vecs->pathvec, i);
728                         free_path(pp);
729                         return 1;
730                 }
731
732                 condlog(4,"%s: adopting all paths for path %s",
733                         mpp->alias, pp->dev);
734                 if (adopt_paths(vecs->pathvec, mpp))
735                         goto fail; /* leave path added to pathvec */
736
737                 verify_paths(mpp, vecs);
738                 mpp->flush_on_last_del = FLUSH_UNDEF;
739                 mpp->action = ACT_RELOAD;
740         } else {
741                 if (!should_multipath(pp, vecs->pathvec)) {
742                         orphan_path(pp, "only one path");
743                         return 0;
744                 }
745                 condlog(4,"%s: creating new map", pp->dev);
746                 if ((mpp = add_map_with_path(vecs, pp, 1))) {
747                         mpp->action = ACT_CREATE;
748                         /*
749                          * We don't depend on ACT_CREATE, as domap will
750                          * set it to ACT_NOTHING when complete.
751                          */
752                         start_waiter = 1;
753                 }
754                 if (!start_waiter)
755                         goto fail; /* leave path added to pathvec */
756         }
757
758         /* persistent reservation check*/
759         mpath_pr_event_handle(pp);
760
761         /*
762          * push the map to the device-mapper
763          */
764         if (setup_map(mpp, params, PARAMS_SIZE)) {
765                 condlog(0, "%s: failed to setup map for addition of new "
766                         "path %s", mpp->alias, pp->dev);
767                 goto fail_map;
768         }
769         /*
770          * reload the map for the multipath mapped device
771          */
772 retry:
773         ret = domap(mpp, params, 1);
774         if (ret <= 0) {
775                 if (ret < 0 && retries-- > 0) {
776                         condlog(0, "%s: retry domap for addition of new "
777                                 "path %s", mpp->alias, pp->dev);
778                         sleep(1);
779                         goto retry;
780                 }
781                 condlog(0, "%s: failed in domap for addition of new "
782                         "path %s", mpp->alias, pp->dev);
783                 /*
784                  * deal with asynchronous uevents :((
785                  */
786                 if (mpp->action == ACT_RELOAD && retries-- > 0) {
787                         condlog(0, "%s: ev_add_path sleep", mpp->alias);
788                         sleep(1);
789                         update_mpp_paths(mpp, vecs->pathvec);
790                         goto rescan;
791                 }
792                 else if (mpp->action == ACT_RELOAD)
793                         condlog(0, "%s: giving up reload", mpp->alias);
794                 else
795                         goto fail_map;
796         }
797         dm_lib_release();
798
799         /*
800          * update our state from kernel regardless of create or reload
801          */
802         if (setup_multipath(vecs, mpp))
803                 goto fail; /* if setup_multipath fails, it removes the map */
804
805         sync_map_state(mpp);
806
807         if ((mpp->action == ACT_CREATE ||
808              (mpp->action == ACT_NOTHING && start_waiter && !mpp->waiter)) &&
809             start_waiter_thread(mpp, vecs))
810                         goto fail_map;
811
812         if (retries >= 0) {
813                 condlog(2, "%s [%s]: path added to devmap %s",
814                         pp->dev, pp->dev_t, mpp->alias);
815                 return 0;
816         } else
817                 goto fail;
818
819 fail_map:
820         remove_map(mpp, vecs, 1);
821 fail:
822         orphan_path(pp, "failed to add path");
823         return 1;
824 }
825
826 static int
827 uev_remove_path (struct uevent *uev, struct vectors * vecs)
828 {
829         struct path *pp;
830         int ret;
831
832         condlog(2, "%s: remove path (uevent)", uev->kernel);
833         pthread_cleanup_push(cleanup_lock, &vecs->lock);
834         lock(vecs->lock);
835         pthread_testcancel();
836         pp = find_path_by_dev(vecs->pathvec, uev->kernel);
837         if (pp)
838                 ret = ev_remove_path(pp, vecs);
839         lock_cleanup_pop(vecs->lock);
840         if (!pp) {
841                 /* Not an error; path might have been purged earlier */
842                 condlog(0, "%s: path already removed", uev->kernel);
843                 return 0;
844         }
845         return ret;
846 }
847
848 int
849 ev_remove_path (struct path *pp, struct vectors * vecs)
850 {
851         struct multipath * mpp;
852         int i, retval = 0;
853         char params[PARAMS_SIZE] = {0};
854
855         /*
856          * avoid referring to the map of an orphaned path
857          */
858         if ((mpp = pp->mpp)) {
859                 /*
860                  * transform the mp->pg vector of vectors of paths
861                  * into a mp->params string to feed the device-mapper
862                  */
863                 if (update_mpp_paths(mpp, vecs->pathvec)) {
864                         condlog(0, "%s: failed to update paths",
865                                 mpp->alias);
866                         goto fail;
867                 }
868                 if ((i = find_slot(mpp->paths, (void *)pp)) != -1)
869                         vector_del_slot(mpp->paths, i);
870
871                 /*
872                  * remove the map IFF removing the last path
873                  */
874                 if (VECTOR_SIZE(mpp->paths) == 0) {
875                         char alias[WWID_SIZE];
876
877                         /*
878                          * flush_map will fail if the device is open
879                          */
880                         strncpy(alias, mpp->alias, WWID_SIZE);
881                         if (mpp->flush_on_last_del == FLUSH_ENABLED) {
882                                 condlog(2, "%s Last path deleted, disabling queueing", mpp->alias);
883                                 mpp->retry_tick = 0;
884                                 mpp->no_path_retry = NO_PATH_RETRY_FAIL;
885                                 mpp->flush_on_last_del = FLUSH_IN_PROGRESS;
886                                 dm_queue_if_no_path(mpp->alias, 0);
887                         }
888                         if (!flush_map(mpp, vecs, 1)) {
889                                 condlog(2, "%s: removed map after"
890                                         " removing all paths",
891                                         alias);
892                                 retval = 0;
893                                 goto out;
894                         }
895                         /*
896                          * Not an error, continue
897                          */
898                 }
899
900                 if (setup_map(mpp, params, PARAMS_SIZE)) {
901                         condlog(0, "%s: failed to setup map for"
902                                 " removal of path %s", mpp->alias, pp->dev);
903                         goto fail;
904                 }
905
906                 if (mpp->wait_for_udev) {
907                         mpp->wait_for_udev = 2;
908                         goto out;
909                 }
910
911                 /*
912                  * reload the map
913                  */
914                 mpp->action = ACT_RELOAD;
915                 if (domap(mpp, params, 1) <= 0) {
916                         condlog(0, "%s: failed in domap for "
917                                 "removal of path %s",
918                                 mpp->alias, pp->dev);
919                         retval = 1;
920                 } else {
921                         /*
922                          * update our state from kernel
923                          */
924                         if (setup_multipath(vecs, mpp))
925                                 return 1;
926                         sync_map_state(mpp);
927
928                         condlog(2, "%s [%s]: path removed from map %s",
929                                 pp->dev, pp->dev_t, mpp->alias);
930                 }
931         }
932
933 out:
934         if ((i = find_slot(vecs->pathvec, (void *)pp)) != -1)
935                 vector_del_slot(vecs->pathvec, i);
936
937         free_path(pp);
938
939         return retval;
940
941 fail:
942         remove_map_and_stop_waiter(mpp, vecs, 1);
943         return 1;
944 }
945
946 static int
947 uev_update_path (struct uevent *uev, struct vectors * vecs)
948 {
949         int ro, retval = 0;
950
951         ro = uevent_get_disk_ro(uev);
952
953         if (ro >= 0) {
954                 struct path * pp;
955                 struct multipath *mpp = NULL;
956
957                 condlog(2, "%s: update path write_protect to '%d' (uevent)",
958                         uev->kernel, ro);
959                 pthread_cleanup_push(cleanup_lock, &vecs->lock);
960                 lock(vecs->lock);
961                 pthread_testcancel();
962                 /*
963                  * pthread_mutex_lock() and pthread_mutex_unlock()
964                  * need to be at the same indentation level, hence
965                  * this slightly convoluted codepath.
966                  */
967                 pp = find_path_by_dev(vecs->pathvec, uev->kernel);
968                 if (pp) {
969                         if (pp->initialized == INIT_REQUESTED_UDEV) {
970                                 retval = 2;
971                         } else {
972                                 mpp = pp->mpp;
973                                 if (mpp && mpp->wait_for_udev) {
974                                         mpp->wait_for_udev = 2;
975                                         mpp = NULL;
976                                         retval = 0;
977                                 }
978                         }
979                         if (mpp) {
980                                 retval = reload_map(vecs, mpp, 0, 1);
981
982                                 condlog(2, "%s: map %s reloaded (retval %d)",
983                                         uev->kernel, mpp->alias, retval);
984                         }
985                 }
986                 lock_cleanup_pop(vecs->lock);
987                 if (!pp) {
988                         condlog(0, "%s: spurious uevent, path not found",
989                                 uev->kernel);
990                         return 1;
991                 }
992                 if (retval == 2)
993                         return uev_add_path(uev, vecs);
994         }
995
996         return retval;
997 }
998
999 static int
1000 map_discovery (struct vectors * vecs)
1001 {
1002         struct multipath * mpp;
1003         unsigned int i;
1004
1005         if (dm_get_maps(vecs->mpvec))
1006                 return 1;
1007
1008         vector_foreach_slot (vecs->mpvec, mpp, i)
1009                 if (setup_multipath(vecs, mpp))
1010                         return 1;
1011
1012         return 0;
1013 }
1014
1015 int
1016 uxsock_trigger (char * str, char ** reply, int * len, void * trigger_data)
1017 {
1018         struct vectors * vecs;
1019         int r;
1020
1021         *reply = NULL;
1022         *len = 0;
1023         vecs = (struct vectors *)trigger_data;
1024
1025         r = parse_cmd(str, reply, len, vecs, uxsock_timeout / 1000);
1026
1027         if (r > 0) {
1028                 if (r == ETIMEDOUT)
1029                         *reply = STRDUP("timeout\n");
1030                 else
1031                         *reply = STRDUP("fail\n");
1032                 *len = strlen(*reply) + 1;
1033                 r = 1;
1034         }
1035         else if (!r && *len == 0) {
1036                 *reply = STRDUP("ok\n");
1037                 *len = strlen(*reply) + 1;
1038                 r = 0;
1039         }
1040         /* else if (r < 0) leave *reply alone */
1041
1042         return r;
1043 }
1044
1045 static int
1046 uev_discard(char * devpath)
1047 {
1048         char *tmp;
1049         char a[11], b[11];
1050
1051         /*
1052          * keep only block devices, discard partitions
1053          */
1054         tmp = strstr(devpath, "/block/");
1055         if (tmp == NULL){
1056                 condlog(4, "no /block/ in '%s'", devpath);
1057                 return 1;
1058         }
1059         if (sscanf(tmp, "/block/%10s", a) != 1 ||
1060             sscanf(tmp, "/block/%10[^/]/%10s", a, b) == 2) {
1061                 condlog(4, "discard event on %s", devpath);
1062                 return 1;
1063         }
1064         return 0;
1065 }
1066
1067 int
1068 uev_trigger (struct uevent * uev, void * trigger_data)
1069 {
1070         int r = 0;
1071         struct vectors * vecs;
1072         struct config *conf;
1073
1074         vecs = (struct vectors *)trigger_data;
1075
1076         if (uev_discard(uev->devpath))
1077                 return 0;
1078
1079         pthread_cleanup_push(config_cleanup, NULL);
1080         pthread_mutex_lock(&config_lock);
1081         if (running_state != DAEMON_IDLE &&
1082             running_state != DAEMON_RUNNING)
1083                 pthread_cond_wait(&config_cond, &config_lock);
1084         pthread_cleanup_pop(1);
1085
1086         if (running_state == DAEMON_SHUTDOWN)
1087                 return 0;
1088
1089         /*
1090          * device map event
1091          * Add events are ignored here as the tables
1092          * are not fully initialised then.
1093          */
1094         if (!strncmp(uev->kernel, "dm-", 3)) {
1095                 if (!strncmp(uev->action, "change", 6)) {
1096                         r = uev_add_map(uev, vecs);
1097                         goto out;
1098                 }
1099                 if (!strncmp(uev->action, "remove", 6)) {
1100                         r = uev_remove_map(uev, vecs);
1101                         goto out;
1102                 }
1103                 goto out;
1104         }
1105
1106         /*
1107          * path add/remove event
1108          */
1109         conf = get_multipath_config();
1110         if (filter_devnode(conf->blist_devnode, conf->elist_devnode,
1111                            uev->kernel) > 0) {
1112                 put_multipath_config(conf);
1113                 goto out;
1114         }
1115         put_multipath_config(conf);
1116
1117         if (!strncmp(uev->action, "add", 3)) {
1118                 r = uev_add_path(uev, vecs);
1119                 goto out;
1120         }
1121         if (!strncmp(uev->action, "remove", 6)) {
1122                 r = uev_remove_path(uev, vecs);
1123                 goto out;
1124         }
1125         if (!strncmp(uev->action, "change", 6)) {
1126                 r = uev_update_path(uev, vecs);
1127                 goto out;
1128         }
1129
1130 out:
1131         return r;
1132 }
1133
1134 static void rcu_unregister(void *param)
1135 {
1136         rcu_unregister_thread();
1137 }
1138
1139 static void *
1140 ueventloop (void * ap)
1141 {
1142         struct udev *udev = ap;
1143
1144         pthread_cleanup_push(rcu_unregister, NULL);
1145         rcu_register_thread();
1146         if (uevent_listen(udev))
1147                 condlog(0, "error starting uevent listener");
1148         pthread_cleanup_pop(1);
1149         return NULL;
1150 }
1151
1152 static void *
1153 uevqloop (void * ap)
1154 {
1155         pthread_cleanup_push(rcu_unregister, NULL);
1156         rcu_register_thread();
1157         if (uevent_dispatch(&uev_trigger, ap))
1158                 condlog(0, "error starting uevent dispatcher");
1159         pthread_cleanup_pop(1);
1160         return NULL;
1161 }
1162 static void *
1163 uxlsnrloop (void * ap)
1164 {
1165         if (cli_init()) {
1166                 condlog(1, "Failed to init uxsock listener");
1167                 return NULL;
1168         }
1169         pthread_cleanup_push(rcu_unregister, NULL);
1170         rcu_register_thread();
1171         set_handler_callback(LIST+PATHS, cli_list_paths);
1172         set_handler_callback(LIST+PATHS+FMT, cli_list_paths_fmt);
1173         set_handler_callback(LIST+PATHS+RAW+FMT, cli_list_paths_raw);
1174         set_handler_callback(LIST+PATH, cli_list_path);
1175         set_handler_callback(LIST+MAPS, cli_list_maps);
1176         set_unlocked_handler_callback(LIST+STATUS, cli_list_status);
1177         set_unlocked_handler_callback(LIST+DAEMON, cli_list_daemon);
1178         set_handler_callback(LIST+MAPS+STATUS, cli_list_maps_status);
1179         set_handler_callback(LIST+MAPS+STATS, cli_list_maps_stats);
1180         set_handler_callback(LIST+MAPS+FMT, cli_list_maps_fmt);
1181         set_handler_callback(LIST+MAPS+RAW+FMT, cli_list_maps_raw);
1182         set_handler_callback(LIST+MAPS+TOPOLOGY, cli_list_maps_topology);
1183         set_handler_callback(LIST+TOPOLOGY, cli_list_maps_topology);
1184         set_handler_callback(LIST+MAPS+JSON, cli_list_maps_json);
1185         set_handler_callback(LIST+MAP+TOPOLOGY, cli_list_map_topology);
1186         set_handler_callback(LIST+MAP+FMT, cli_list_map_fmt);
1187         set_handler_callback(LIST+MAP+RAW+FMT, cli_list_map_fmt);
1188         set_handler_callback(LIST+MAP+JSON, cli_list_map_json);
1189         set_handler_callback(LIST+CONFIG, cli_list_config);
1190         set_handler_callback(LIST+BLACKLIST, cli_list_blacklist);
1191         set_handler_callback(LIST+DEVICES, cli_list_devices);
1192         set_handler_callback(LIST+WILDCARDS, cli_list_wildcards);
1193         set_handler_callback(ADD+PATH, cli_add_path);
1194         set_handler_callback(DEL+PATH, cli_del_path);
1195         set_handler_callback(ADD+MAP, cli_add_map);
1196         set_handler_callback(DEL+MAP, cli_del_map);
1197         set_handler_callback(SWITCH+MAP+GROUP, cli_switch_group);
1198         set_unlocked_handler_callback(RECONFIGURE, cli_reconfigure);
1199         set_handler_callback(SUSPEND+MAP, cli_suspend);
1200         set_handler_callback(RESUME+MAP, cli_resume);
1201         set_handler_callback(RESIZE+MAP, cli_resize);
1202         set_handler_callback(RELOAD+MAP, cli_reload);
1203         set_handler_callback(RESET+MAP, cli_reassign);
1204         set_handler_callback(REINSTATE+PATH, cli_reinstate);
1205         set_handler_callback(FAIL+PATH, cli_fail);
1206         set_handler_callback(DISABLEQ+MAP, cli_disable_queueing);
1207         set_handler_callback(RESTOREQ+MAP, cli_restore_queueing);
1208         set_handler_callback(DISABLEQ+MAPS, cli_disable_all_queueing);
1209         set_handler_callback(RESTOREQ+MAPS, cli_restore_all_queueing);
1210         set_unlocked_handler_callback(QUIT, cli_quit);
1211         set_unlocked_handler_callback(SHUTDOWN, cli_shutdown);
1212         set_handler_callback(GETPRSTATUS+MAP, cli_getprstatus);
1213         set_handler_callback(SETPRSTATUS+MAP, cli_setprstatus);
1214         set_handler_callback(UNSETPRSTATUS+MAP, cli_unsetprstatus);
1215         set_handler_callback(FORCEQ+DAEMON, cli_force_no_daemon_q);
1216         set_handler_callback(RESTOREQ+DAEMON, cli_restore_no_daemon_q);
1217
1218         umask(077);
1219         uxsock_listen(&uxsock_trigger, ap);
1220         pthread_cleanup_pop(1);
1221         return NULL;
1222 }
1223
1224 void
1225 exit_daemon (void)
1226 {
1227         post_config_state(DAEMON_SHUTDOWN);
1228 }
1229
1230 static void
1231 fail_path (struct path * pp, int del_active)
1232 {
1233         if (!pp->mpp)
1234                 return;
1235
1236         condlog(2, "checker failed path %s in map %s",
1237                  pp->dev_t, pp->mpp->alias);
1238
1239         dm_fail_path(pp->mpp->alias, pp->dev_t);
1240         if (del_active)
1241                 update_queue_mode_del_path(pp->mpp);
1242 }
1243
1244 /*
1245  * caller must have locked the path list before calling that function
1246  */
1247 static int
1248 reinstate_path (struct path * pp, int add_active)
1249 {
1250         int ret = 0;
1251
1252         if (!pp->mpp)
1253                 return 0;
1254
1255         if (dm_reinstate_path(pp->mpp->alias, pp->dev_t)) {
1256                 condlog(0, "%s: reinstate failed", pp->dev_t);
1257                 ret = 1;
1258         } else {
1259                 condlog(2, "%s: reinstated", pp->dev_t);
1260                 if (add_active)
1261                         update_queue_mode_add_path(pp->mpp);
1262         }
1263         return ret;
1264 }
1265
1266 static void
1267 enable_group(struct path * pp)
1268 {
1269         struct pathgroup * pgp;
1270
1271         /*
1272          * if path is added through uev_add_path, pgindex can be unset.
1273          * next update_strings() will set it, upon map reload event.
1274          *
1275          * we can safely return here, because upon map reload, all
1276          * PG will be enabled.
1277          */
1278         if (!pp->mpp->pg || !pp->pgindex)
1279                 return;
1280
1281         pgp = VECTOR_SLOT(pp->mpp->pg, pp->pgindex - 1);
1282
1283         if (pgp->status == PGSTATE_DISABLED) {
1284                 condlog(2, "%s: enable group #%i", pp->mpp->alias, pp->pgindex);
1285                 dm_enablegroup(pp->mpp->alias, pp->pgindex);
1286         }
1287 }
1288
1289 static void
1290 mpvec_garbage_collector (struct vectors * vecs)
1291 {
1292         struct multipath * mpp;
1293         unsigned int i;
1294
1295         if (!vecs->mpvec)
1296                 return;
1297
1298         vector_foreach_slot (vecs->mpvec, mpp, i) {
1299                 if (mpp && mpp->alias && !dm_map_present(mpp->alias)) {
1300                         condlog(2, "%s: remove dead map", mpp->alias);
1301                         remove_map_and_stop_waiter(mpp, vecs, 1);
1302                         i--;
1303                 }
1304         }
1305 }
1306
1307 /* This is called after a path has started working again. It the multipath
1308  * device for this path uses the followover failback type, and this is the
1309  * best pathgroup, and this is the first path in the pathgroup to come back
1310  * up, then switch to this pathgroup */
1311 static int
1312 followover_should_failback(struct path * pp)
1313 {
1314         struct pathgroup * pgp;
1315         struct path *pp1;
1316         int i;
1317
1318         if (pp->mpp->pgfailback != -FAILBACK_FOLLOWOVER ||
1319             !pp->mpp->pg || !pp->pgindex ||
1320             pp->pgindex != pp->mpp->bestpg)
1321                 return 0;
1322
1323         pgp = VECTOR_SLOT(pp->mpp->pg, pp->pgindex - 1);
1324         vector_foreach_slot(pgp->paths, pp1, i) {
1325                 if (pp1 == pp)
1326                         continue;
1327                 if (pp1->chkrstate != PATH_DOWN && pp1->chkrstate != PATH_SHAKY)
1328                         return 0;
1329         }
1330         return 1;
1331 }
1332
1333 static void
1334 missing_uev_wait_tick(struct vectors *vecs)
1335 {
1336         struct multipath * mpp;
1337         unsigned int i;
1338         int timed_out = 0, delayed_reconfig;
1339         struct config *conf;
1340
1341         vector_foreach_slot (vecs->mpvec, mpp, i) {
1342                 if (mpp->wait_for_udev && --mpp->uev_wait_tick <= 0) {
1343                         timed_out = 1;
1344                         condlog(0, "%s: timeout waiting on creation uevent. enabling reloads", mpp->alias);
1345                         if (mpp->wait_for_udev > 1 && update_map(mpp, vecs)) {
1346                                 /* update_map removed map */
1347                                 i--;
1348                                 continue;
1349                         }
1350                         mpp->wait_for_udev = 0;
1351                 }
1352         }
1353
1354         conf = get_multipath_config();
1355         delayed_reconfig = conf->delayed_reconfig;
1356         put_multipath_config(conf);
1357         if (timed_out && delayed_reconfig &&
1358             !need_to_delay_reconfig(vecs)) {
1359                 condlog(2, "reconfigure (delayed)");
1360                 set_config_state(DAEMON_CONFIGURE);
1361         }
1362 }
1363
1364 static void
1365 defered_failback_tick (vector mpvec)
1366 {
1367         struct multipath * mpp;
1368         unsigned int i;
1369
1370         vector_foreach_slot (mpvec, mpp, i) {
1371                 /*
1372                  * defered failback getting sooner
1373                  */
1374                 if (mpp->pgfailback > 0 && mpp->failback_tick > 0) {
1375                         mpp->failback_tick--;
1376
1377                         if (!mpp->failback_tick && need_switch_pathgroup(mpp, 1))
1378                                 switch_pathgroup(mpp);
1379                 }
1380         }
1381 }
1382
1383 static void
1384 retry_count_tick(vector mpvec)
1385 {
1386         struct multipath *mpp;
1387         unsigned int i;
1388
1389         vector_foreach_slot (mpvec, mpp, i) {
1390                 if (mpp->retry_tick > 0) {
1391                         mpp->stat_total_queueing_time++;
1392                         condlog(4, "%s: Retrying.. No active path", mpp->alias);
1393                         if(--mpp->retry_tick == 0) {
1394                                 dm_queue_if_no_path(mpp->alias, 0);
1395                                 condlog(2, "%s: Disable queueing", mpp->alias);
1396                         }
1397                 }
1398         }
1399 }
1400
1401 int update_prio(struct path *pp, int refresh_all)
1402 {
1403         int oldpriority;
1404         struct path *pp1;
1405         struct pathgroup * pgp;
1406         int i, j, changed = 0;
1407         struct config *conf;
1408
1409         if (refresh_all) {
1410                 vector_foreach_slot (pp->mpp->pg, pgp, i) {
1411                         vector_foreach_slot (pgp->paths, pp1, j) {
1412                                 oldpriority = pp1->priority;
1413                                 conf = get_multipath_config();
1414                                 pathinfo(pp1, conf, DI_PRIO);
1415                                 put_multipath_config(conf);
1416                                 if (pp1->priority != oldpriority)
1417                                         changed = 1;
1418                         }
1419                 }
1420                 return changed;
1421         }
1422         oldpriority = pp->priority;
1423         conf = get_multipath_config();
1424         pathinfo(pp, conf, DI_PRIO);
1425         put_multipath_config(conf);
1426
1427         if (pp->priority == oldpriority)
1428                 return 0;
1429         return 1;
1430 }
1431
1432 int update_path_groups(struct multipath *mpp, struct vectors *vecs, int refresh)
1433 {
1434         if (reload_map(vecs, mpp, refresh, 1))
1435                 return 1;
1436
1437         dm_lib_release();
1438         if (setup_multipath(vecs, mpp) != 0)
1439                 return 1;
1440         sync_map_state(mpp);
1441
1442         return 0;
1443 }
1444
1445 /*
1446  * Returns '1' if the path has been checked, '-1' if it was blacklisted
1447  * and '0' otherwise
1448  */
1449 int
1450 check_path (struct vectors * vecs, struct path * pp, int ticks)
1451 {
1452         int newstate;
1453         int new_path_up = 0;
1454         int chkr_new_path_up = 0;
1455         int add_active;
1456         int disable_reinstate = 0;
1457         int oldchkrstate = pp->chkrstate;
1458         int retrigger_tries, checkint;
1459         struct config *conf;
1460         int ret;
1461
1462         if ((pp->initialized == INIT_OK ||
1463              pp->initialized == INIT_REQUESTED_UDEV) && !pp->mpp)
1464                 return 0;
1465
1466         if (pp->tick)
1467                 pp->tick -= (pp->tick > ticks) ? ticks : pp->tick;
1468         if (pp->tick)
1469                 return 0; /* don't check this path yet */
1470
1471         conf = get_multipath_config();
1472         retrigger_tries = conf->retrigger_tries;
1473         checkint = conf->checkint;
1474         put_multipath_config(conf);
1475         if (!pp->mpp && pp->initialized == INIT_MISSING_UDEV &&
1476             pp->retriggers < retrigger_tries) {
1477                 condlog(2, "%s: triggering change event to reinitialize",
1478                         pp->dev);
1479                 pp->initialized = INIT_REQUESTED_UDEV;
1480                 pp->retriggers++;
1481                 sysfs_attr_set_value(pp->udev, "uevent", "change",
1482                                      strlen("change"));
1483                 return 0;
1484         }
1485
1486         /*
1487          * provision a next check soonest,
1488          * in case we exit abnormaly from here
1489          */
1490         pp->tick = checkint;
1491
1492         newstate = path_offline(pp);
1493         /*
1494          * Wait for uevent for removed paths;
1495          * some LLDDs like zfcp keep paths unavailable
1496          * without sending uevents.
1497          */
1498         if (newstate == PATH_REMOVED)
1499                 newstate = PATH_DOWN;
1500
1501         if (newstate == PATH_UP) {
1502                 conf = get_multipath_config();
1503                 newstate = get_state(pp, conf, 1);
1504                 put_multipath_config(conf);
1505         } else
1506                 checker_clear_message(&pp->checker);
1507
1508         if (newstate == PATH_WILD || newstate == PATH_UNCHECKED) {
1509                 condlog(2, "%s: unusable path", pp->dev);
1510                 conf = get_multipath_config();
1511                 pathinfo(pp, conf, 0);
1512                 put_multipath_config(conf);
1513                 return 1;
1514         }
1515         if (!pp->mpp) {
1516                 if (!strlen(pp->wwid) && pp->initialized != INIT_MISSING_UDEV &&
1517                     (newstate == PATH_UP || newstate == PATH_GHOST)) {
1518                         condlog(2, "%s: add missing path", pp->dev);
1519                         conf = get_multipath_config();
1520                         ret = pathinfo(pp, conf, DI_ALL | DI_BLACKLIST);
1521                         if (ret == PATHINFO_OK) {
1522                                 ev_add_path(pp, vecs);
1523                                 pp->tick = 1;
1524                         } else if (ret == PATHINFO_SKIPPED) {
1525                                 put_multipath_config(conf);
1526                                 return -1;
1527                         }
1528                         put_multipath_config(conf);
1529                 }
1530                 return 0;
1531         }
1532         /*
1533          * Async IO in flight. Keep the previous path state
1534          * and reschedule as soon as possible
1535          */
1536         if (newstate == PATH_PENDING) {
1537                 pp->tick = 1;
1538                 return 0;
1539         }
1540         /*
1541          * Synchronize with kernel state
1542          */
1543         if (update_multipath_strings(pp->mpp, vecs->pathvec, 1)) {
1544                 condlog(1, "%s: Could not synchronize with kernel state",
1545                         pp->dev);
1546                 pp->dmstate = PSTATE_UNDEF;
1547         }
1548         /* if update_multipath_strings orphaned the path, quit early */
1549         if (!pp->mpp)
1550                 return 0;
1551
1552         if ((newstate == PATH_UP || newstate == PATH_GHOST) &&
1553              pp->wait_checks > 0) {
1554                 if (pp->mpp && pp->mpp->nr_active > 0) {
1555                         pp->state = PATH_DELAYED;
1556                         pp->wait_checks--;
1557                         return 1;
1558                 } else
1559                         pp->wait_checks = 0;
1560         }
1561
1562         /*
1563          * don't reinstate failed path, if its in stand-by
1564          * and if target supports only implicit tpgs mode.
1565          * this will prevent unnecessary i/o by dm on stand-by
1566          * paths if there are no other active paths in map.
1567          */
1568         disable_reinstate = (newstate == PATH_GHOST &&
1569                             pp->mpp->nr_active == 0 &&
1570                             pp->tpgs == TPGS_IMPLICIT) ? 1 : 0;
1571
1572         pp->chkrstate = newstate;
1573         if (newstate != pp->state) {
1574                 int oldstate = pp->state;
1575                 pp->state = newstate;
1576
1577                 if (strlen(checker_message(&pp->checker)))
1578                         LOG_MSG(1, checker_message(&pp->checker));
1579
1580                 /*
1581                  * upon state change, reset the checkint
1582                  * to the shortest delay
1583                  */
1584                 conf = get_multipath_config();
1585                 pp->checkint = conf->checkint;
1586                 put_multipath_config(conf);
1587
1588                 if (newstate == PATH_DOWN || newstate == PATH_SHAKY) {
1589                         /*
1590                          * proactively fail path in the DM
1591                          */
1592                         if (oldstate == PATH_UP ||
1593                             oldstate == PATH_GHOST) {
1594                                 fail_path(pp, 1);
1595                                 if (pp->mpp->delay_wait_checks > 0 &&
1596                                     pp->watch_checks > 0) {
1597                                         pp->wait_checks = pp->mpp->delay_wait_checks;
1598                                         pp->watch_checks = 0;
1599                                 }
1600                         }else
1601                                 fail_path(pp, 0);
1602
1603                         /*
1604                          * cancel scheduled failback
1605                          */
1606                         pp->mpp->failback_tick = 0;
1607
1608                         pp->mpp->stat_path_failures++;
1609                         return 1;
1610                 }
1611
1612                 if(newstate == PATH_UP || newstate == PATH_GHOST){
1613                         if ( pp->mpp && pp->mpp->prflag ){
1614                                 /*
1615                                  * Check Persistent Reservation.
1616                                  */
1617                         condlog(2, "%s: checking persistent reservation "
1618                                 "registration", pp->dev);
1619                         mpath_pr_event_handle(pp);
1620                         }
1621                 }
1622
1623                 /*
1624                  * reinstate this path
1625                  */
1626                 if (oldstate != PATH_UP &&
1627                     oldstate != PATH_GHOST) {
1628                         if (pp->mpp->delay_watch_checks > 0)
1629                                 pp->watch_checks = pp->mpp->delay_watch_checks;
1630                         add_active = 1;
1631                 } else {
1632                         if (pp->watch_checks > 0)
1633                                 pp->watch_checks--;
1634                         add_active = 0;
1635                 }
1636                 if (!disable_reinstate && reinstate_path(pp, add_active)) {
1637                         condlog(3, "%s: reload map", pp->dev);
1638                         ev_add_path(pp, vecs);
1639                         pp->tick = 1;
1640                         return 0;
1641                 }
1642                 new_path_up = 1;
1643
1644                 if (oldchkrstate != PATH_UP && oldchkrstate != PATH_GHOST)
1645                         chkr_new_path_up = 1;
1646
1647                 /*
1648                  * if at least one path is up in a group, and
1649                  * the group is disabled, re-enable it
1650                  */
1651                 if (newstate == PATH_UP)
1652                         enable_group(pp);
1653         }
1654         else if (newstate == PATH_UP || newstate == PATH_GHOST) {
1655                 if ((pp->dmstate == PSTATE_FAILED ||
1656                     pp->dmstate == PSTATE_UNDEF) &&
1657                     !disable_reinstate) {
1658                         /* Clear IO errors */
1659                         if (reinstate_path(pp, 0)) {
1660                                 condlog(3, "%s: reload map", pp->dev);
1661                                 ev_add_path(pp, vecs);
1662                                 pp->tick = 1;
1663                                 return 0;
1664                         }
1665                 } else {
1666                         unsigned int max_checkint;
1667                         LOG_MSG(4, checker_message(&pp->checker));
1668                         conf = get_multipath_config();
1669                         max_checkint = conf->max_checkint;
1670                         put_multipath_config(conf);
1671                         if (pp->checkint != max_checkint) {
1672                                 /*
1673                                  * double the next check delay.
1674                                  * max at conf->max_checkint
1675                                  */
1676                                 if (pp->checkint < (max_checkint / 2))
1677                                         pp->checkint = 2 * pp->checkint;
1678                                 else
1679                                         pp->checkint = max_checkint;
1680
1681                                 condlog(4, "%s: delay next check %is",
1682                                         pp->dev_t, pp->checkint);
1683                         }
1684                         if (pp->watch_checks > 0)
1685                                 pp->watch_checks--;
1686                         pp->tick = pp->checkint;
1687                 }
1688         }
1689         else if (newstate == PATH_DOWN &&
1690                  strlen(checker_message(&pp->checker))) {
1691                 int log_checker_err;
1692
1693                 conf = get_multipath_config();
1694                 log_checker_err = conf->log_checker_err;
1695                 put_multipath_config(conf);
1696                 if (log_checker_err == LOG_CHKR_ERR_ONCE)
1697                         LOG_MSG(3, checker_message(&pp->checker));
1698                 else
1699                         LOG_MSG(2, checker_message(&pp->checker));
1700         }
1701
1702         pp->state = newstate;
1703
1704
1705         if (pp->mpp->wait_for_udev)
1706                 return 1;
1707         /*
1708          * path prio refreshing
1709          */
1710         condlog(4, "path prio refresh");
1711
1712         if (update_prio(pp, new_path_up) &&
1713             (pp->mpp->pgpolicyfn == (pgpolicyfn *)group_by_prio) &&
1714              pp->mpp->pgfailback == -FAILBACK_IMMEDIATE)
1715                 update_path_groups(pp->mpp, vecs, !new_path_up);
1716         else if (need_switch_pathgroup(pp->mpp, 0)) {
1717                 if (pp->mpp->pgfailback > 0 &&
1718                     (new_path_up || pp->mpp->failback_tick <= 0))
1719                         pp->mpp->failback_tick =
1720                                 pp->mpp->pgfailback + 1;
1721                 else if (pp->mpp->pgfailback == -FAILBACK_IMMEDIATE ||
1722                          (chkr_new_path_up && followover_should_failback(pp)))
1723                         switch_pathgroup(pp->mpp);
1724         }
1725         return 1;
1726 }
1727
1728 static void *
1729 checkerloop (void *ap)
1730 {
1731         struct vectors *vecs;
1732         struct path *pp;
1733         int count = 0;
1734         unsigned int i;
1735         struct itimerval timer_tick_it;
1736         struct timeval last_time;
1737         struct config *conf;
1738
1739         pthread_cleanup_push(rcu_unregister, NULL);
1740         rcu_register_thread();
1741         mlockall(MCL_CURRENT | MCL_FUTURE);
1742         vecs = (struct vectors *)ap;
1743         condlog(2, "path checkers start up");
1744
1745         /*
1746          * init the path check interval
1747          */
1748         vector_foreach_slot (vecs->pathvec, pp, i) {
1749                 conf = get_multipath_config();
1750                 pp->checkint = conf->checkint;
1751                 put_multipath_config(conf);
1752         }
1753
1754         /* Tweak start time for initial path check */
1755         if (gettimeofday(&last_time, NULL) != 0)
1756                 last_time.tv_sec = 0;
1757         else
1758                 last_time.tv_sec -= 1;
1759
1760         while (1) {
1761                 struct timeval diff_time, start_time, end_time;
1762                 int num_paths = 0, ticks = 0, signo, strict_timing, rc = 0;
1763                 sigset_t mask;
1764
1765                 if (gettimeofday(&start_time, NULL) != 0)
1766                         start_time.tv_sec = 0;
1767                 if (start_time.tv_sec && last_time.tv_sec) {
1768                         timersub(&start_time, &last_time, &diff_time);
1769                         condlog(4, "tick (%lu.%06lu secs)",
1770                                 diff_time.tv_sec, diff_time.tv_usec);
1771                         last_time.tv_sec = start_time.tv_sec;
1772                         last_time.tv_usec = start_time.tv_usec;
1773                         ticks = diff_time.tv_sec;
1774                 } else {
1775                         ticks = 1;
1776                         condlog(4, "tick (%d ticks)", ticks);
1777                 }
1778 #ifdef USE_SYSTEMD
1779                 if (use_watchdog)
1780                         sd_notify(0, "WATCHDOG=1");
1781 #endif
1782                 rc = set_config_state(DAEMON_RUNNING);
1783                 if (rc == ETIMEDOUT) {
1784                         condlog(4, "timeout waiting for DAEMON_IDLE");
1785                         continue;
1786                 }
1787                 if (vecs->pathvec) {
1788                         pthread_cleanup_push(cleanup_lock, &vecs->lock);
1789                         lock(vecs->lock);
1790                         pthread_testcancel();
1791                         vector_foreach_slot (vecs->pathvec, pp, i) {
1792                                 rc = check_path(vecs, pp, ticks);
1793                                 if (rc < 0) {
1794                                         vector_del_slot(vecs->pathvec, i);
1795                                         free_path(pp);
1796                                         i--;
1797                                 } else
1798                                         num_paths += rc;
1799                         }
1800                         lock_cleanup_pop(vecs->lock);
1801                 }
1802                 if (vecs->mpvec) {
1803                         pthread_cleanup_push(cleanup_lock, &vecs->lock);
1804                         lock(vecs->lock);
1805                         pthread_testcancel();
1806                         defered_failback_tick(vecs->mpvec);
1807                         retry_count_tick(vecs->mpvec);
1808                         missing_uev_wait_tick(vecs);
1809                         lock_cleanup_pop(vecs->lock);
1810                 }
1811                 if (count)
1812                         count--;
1813                 else {
1814                         pthread_cleanup_push(cleanup_lock, &vecs->lock);
1815                         lock(vecs->lock);
1816                         pthread_testcancel();
1817                         condlog(4, "map garbage collection");
1818                         mpvec_garbage_collector(vecs);
1819                         count = MAPGCINT;
1820                         lock_cleanup_pop(vecs->lock);
1821                 }
1822
1823                 diff_time.tv_usec = 0;
1824                 if (start_time.tv_sec &&
1825                     gettimeofday(&end_time, NULL) == 0) {
1826                         timersub(&end_time, &start_time, &diff_time);
1827                         if (num_paths) {
1828                                 unsigned int max_checkint;
1829
1830                                 condlog(3, "checked %d path%s in %lu.%06lu secs",
1831                                         num_paths, num_paths > 1 ? "s" : "",
1832                                         diff_time.tv_sec, diff_time.tv_usec);
1833                                 conf = get_multipath_config();
1834                                 max_checkint = conf->max_checkint;
1835                                 put_multipath_config(conf);
1836                                 if (diff_time.tv_sec > max_checkint)
1837                                         condlog(1, "path checkers took longer "
1838                                                 "than %lu seconds, consider "
1839                                                 "increasing max_polling_interval",
1840                                                 diff_time.tv_sec);
1841                         }
1842                 }
1843
1844                 post_config_state(DAEMON_IDLE);
1845                 conf = get_multipath_config();
1846                 strict_timing = conf->strict_timing;
1847                 put_multipath_config(conf);
1848                 if (!strict_timing)
1849                         sleep(1);
1850                 else {
1851                         timer_tick_it.it_interval.tv_sec = 0;
1852                         timer_tick_it.it_interval.tv_usec = 0;
1853                         if (diff_time.tv_usec) {
1854                                 timer_tick_it.it_value.tv_sec = 0;
1855                                 timer_tick_it.it_value.tv_usec =
1856                                         (unsigned long)1000000 - diff_time.tv_usec;
1857                         } else {
1858                                 timer_tick_it.it_value.tv_sec = 1;
1859                                 timer_tick_it.it_value.tv_usec = 0;
1860                         }
1861                         setitimer(ITIMER_REAL, &timer_tick_it, NULL);
1862
1863                         sigemptyset(&mask);
1864                         sigaddset(&mask, SIGALRM);
1865                         condlog(3, "waiting for %lu.%06lu secs",
1866                                 timer_tick_it.it_value.tv_sec,
1867                                 timer_tick_it.it_value.tv_usec);
1868                         if (sigwait(&mask, &signo) != 0) {
1869                                 condlog(3, "sigwait failed with error %d",
1870                                         errno);
1871                                 conf = get_multipath_config();
1872                                 conf->strict_timing = 0;
1873                                 put_multipath_config(conf);
1874                                 break;
1875                         }
1876                 }
1877         }
1878         pthread_cleanup_pop(1);
1879         return NULL;
1880 }
1881
1882 int
1883 configure (struct vectors * vecs, int start_waiters)
1884 {
1885         struct multipath * mpp;
1886         struct path * pp;
1887         vector mpvec;
1888         int i, ret;
1889         struct config *conf;
1890
1891         if (!vecs->pathvec && !(vecs->pathvec = vector_alloc()))
1892                 return 1;
1893
1894         if (!vecs->mpvec && !(vecs->mpvec = vector_alloc()))
1895                 return 1;
1896
1897         if (!(mpvec = vector_alloc()))
1898                 return 1;
1899
1900         /*
1901          * probe for current path (from sysfs) and map (from dm) sets
1902          */
1903         ret = path_discovery(vecs->pathvec, DI_ALL);
1904         if (ret < 0)
1905                 return 1;
1906
1907         vector_foreach_slot (vecs->pathvec, pp, i){
1908                 conf = get_multipath_config();
1909                 if (filter_path(conf, pp) > 0){
1910                         vector_del_slot(vecs->pathvec, i);
1911                         free_path(pp);
1912                         i--;
1913                 }
1914                 else
1915                         pp->checkint = conf->checkint;
1916                 put_multipath_config(conf);
1917         }
1918         if (map_discovery(vecs))
1919                 return 1;
1920
1921         /*
1922          * create new set of maps & push changed ones into dm
1923          */
1924         if (coalesce_paths(vecs, mpvec, NULL, 1, CMD_NONE))
1925                 return 1;
1926
1927         /*
1928          * may need to remove some maps which are no longer relevant
1929          * e.g., due to blacklist changes in conf file
1930          */
1931         if (coalesce_maps(vecs, mpvec))
1932                 return 1;
1933
1934         dm_lib_release();
1935
1936         sync_maps_state(mpvec);
1937         vector_foreach_slot(mpvec, mpp, i){
1938                 remember_wwid(mpp->wwid);
1939                 update_map_pr(mpp);
1940         }
1941
1942         /*
1943          * purge dm of old maps
1944          */
1945         remove_maps(vecs);
1946
1947         /*
1948          * save new set of maps formed by considering current path state
1949          */
1950         vector_free(vecs->mpvec);
1951         vecs->mpvec = mpvec;
1952
1953         /*
1954          * start dm event waiter threads for these new maps
1955          */
1956         vector_foreach_slot(vecs->mpvec, mpp, i) {
1957                 if (setup_multipath(vecs, mpp))
1958                         return 1;
1959                 if (start_waiters)
1960                         if (start_waiter_thread(mpp, vecs))
1961                                 return 1;
1962         }
1963         return 0;
1964 }
1965
1966 int
1967 need_to_delay_reconfig(struct vectors * vecs)
1968 {
1969         struct multipath *mpp;
1970         int i;
1971
1972         if (!VECTOR_SIZE(vecs->mpvec))
1973                 return 0;
1974
1975         vector_foreach_slot(vecs->mpvec, mpp, i) {
1976                 if (mpp->wait_for_udev)
1977                         return 1;
1978         }
1979         return 0;
1980 }
1981
1982 void rcu_free_config(struct rcu_head *head)
1983 {
1984         struct config *conf = container_of(head, struct config, rcu);
1985
1986         free_config(conf);
1987 }
1988
1989 int
1990 reconfigure (struct vectors * vecs)
1991 {
1992         struct config * old, *conf;
1993
1994         conf = load_config(DEFAULT_CONFIGFILE);
1995         if (!conf)
1996                 return 1;
1997
1998         /*
1999          * free old map and path vectors ... they use old conf state
2000          */
2001         if (VECTOR_SIZE(vecs->mpvec))
2002                 remove_maps_and_stop_waiters(vecs);
2003
2004         free_pathvec(vecs->pathvec, FREE_PATHS);
2005         vecs->pathvec = NULL;
2006
2007         /* Re-read any timezone changes */
2008         tzset();
2009
2010         dm_drv_version(conf->version, TGT_MPATH);
2011         if (verbosity)
2012                 conf->verbosity = verbosity;
2013         if (bindings_read_only)
2014                 conf->bindings_read_only = bindings_read_only;
2015         if (ignore_new_devs)
2016                 conf->ignore_new_devs = ignore_new_devs;
2017         uxsock_timeout = conf->uxsock_timeout;
2018
2019         old = rcu_dereference(multipath_conf);
2020         rcu_assign_pointer(multipath_conf, conf);
2021         call_rcu(&old->rcu, rcu_free_config);
2022
2023         configure(vecs, 1);
2024
2025
2026         return 0;
2027 }
2028
2029 static struct vectors *
2030 init_vecs (void)
2031 {
2032         struct vectors * vecs;
2033
2034         vecs = (struct vectors *)MALLOC(sizeof(struct vectors));
2035
2036         if (!vecs)
2037                 return NULL;
2038
2039         vecs->lock.mutex =
2040                 (pthread_mutex_t *)MALLOC(sizeof(pthread_mutex_t));
2041
2042         if (!vecs->lock.mutex)
2043                 goto out;
2044
2045         pthread_mutex_init(vecs->lock.mutex, NULL);
2046         vecs->lock.depth = 0;
2047
2048         return vecs;
2049
2050 out:
2051         FREE(vecs);
2052         condlog(0, "failed to init paths");
2053         return NULL;
2054 }
2055
2056 static void *
2057 signal_set(int signo, void (*func) (int))
2058 {
2059         int r;
2060         struct sigaction sig;
2061         struct sigaction osig;
2062
2063         sig.sa_handler = func;
2064         sigemptyset(&sig.sa_mask);
2065         sig.sa_flags = 0;
2066
2067         r = sigaction(signo, &sig, &osig);
2068
2069         if (r < 0)
2070                 return (SIG_ERR);
2071         else
2072                 return (osig.sa_handler);
2073 }
2074
2075 void
2076 handle_signals(void)
2077 {
2078         if (exit_sig) {
2079                 condlog(2, "exit (signal)");
2080                 exit_daemon();
2081         }
2082         if (reconfig_sig) {
2083                 condlog(2, "reconfigure (signal)");
2084                 set_config_state(DAEMON_CONFIGURE);
2085         }
2086         if (log_reset_sig) {
2087                 condlog(2, "reset log (signal)");
2088                 pthread_mutex_lock(&logq_lock);
2089                 log_reset("multipathd");
2090                 pthread_mutex_unlock(&logq_lock);
2091         }
2092         exit_sig = 0;
2093         reconfig_sig = 0;
2094         log_reset_sig = 0;
2095 }
2096
2097 static void
2098 sighup (int sig)
2099 {
2100         reconfig_sig = 1;
2101 }
2102
2103 static void
2104 sigend (int sig)
2105 {
2106         exit_sig = 1;
2107 }
2108
2109 static void
2110 sigusr1 (int sig)
2111 {
2112         log_reset_sig = 1;
2113 }
2114
2115 static void
2116 sigusr2 (int sig)
2117 {
2118         condlog(3, "SIGUSR2 received");
2119 }
2120
2121 static void
2122 signal_init(void)
2123 {
2124         sigset_t set;
2125
2126         sigemptyset(&set);
2127         sigaddset(&set, SIGHUP);
2128         sigaddset(&set, SIGUSR1);
2129         sigaddset(&set, SIGUSR2);
2130         sigaddset(&set, SIGALRM);
2131         pthread_sigmask(SIG_BLOCK, &set, NULL);
2132
2133         signal_set(SIGHUP, sighup);
2134         signal_set(SIGUSR1, sigusr1);
2135         signal_set(SIGUSR2, sigusr2);
2136         signal_set(SIGINT, sigend);
2137         signal_set(SIGTERM, sigend);
2138         signal(SIGPIPE, SIG_IGN);
2139 }
2140
2141 static void
2142 setscheduler (void)
2143 {
2144         int res;
2145         static struct sched_param sched_param = {
2146                 .sched_priority = 99
2147         };
2148
2149         res = sched_setscheduler (0, SCHED_RR, &sched_param);
2150
2151         if (res == -1)
2152                 condlog(LOG_WARNING, "Could not set SCHED_RR at priority 99");
2153         return;
2154 }
2155
2156 static void
2157 set_oom_adj (void)
2158 {
2159 #ifdef OOM_SCORE_ADJ_MIN
2160         int retry = 1;
2161         char *file = "/proc/self/oom_score_adj";
2162         int score = OOM_SCORE_ADJ_MIN;
2163 #else
2164         int retry = 0;
2165         char *file = "/proc/self/oom_adj";
2166         int score = OOM_ADJUST_MIN;
2167 #endif
2168         FILE *fp;
2169         struct stat st;
2170         char *envp;
2171
2172         envp = getenv("OOMScoreAdjust");
2173         if (envp) {
2174                 condlog(3, "Using systemd provided OOMScoreAdjust");
2175                 return;
2176         }
2177         do {
2178                 if (stat(file, &st) == 0){
2179                         fp = fopen(file, "w");
2180                         if (!fp) {
2181                                 condlog(0, "couldn't fopen %s : %s", file,
2182                                         strerror(errno));
2183                                 return;
2184                         }
2185                         fprintf(fp, "%i", score);
2186                         fclose(fp);
2187                         return;
2188                 }
2189                 if (errno != ENOENT) {
2190                         condlog(0, "couldn't stat %s : %s", file,
2191                                 strerror(errno));
2192                         return;
2193                 }
2194 #ifdef OOM_ADJUST_MIN
2195                 file = "/proc/self/oom_adj";
2196                 score = OOM_ADJUST_MIN;
2197 #else
2198                 retry = 0;
2199 #endif
2200         } while (retry--);
2201         condlog(0, "couldn't adjust oom score");
2202 }
2203
2204 static int
2205 child (void * param)
2206 {
2207         pthread_t check_thr, uevent_thr, uxlsnr_thr, uevq_thr;
2208         pthread_attr_t log_attr, misc_attr, uevent_attr;
2209         struct vectors * vecs;
2210         struct multipath * mpp;
2211         int i;
2212 #ifdef USE_SYSTEMD
2213         unsigned long checkint;
2214 #endif
2215         int rc;
2216         int pid_fd = -1;
2217         struct config *conf;
2218         char *envp;
2219
2220         mlockall(MCL_CURRENT | MCL_FUTURE);
2221         signal_init();
2222         rcu_init();
2223
2224         setup_thread_attr(&misc_attr, 64 * 1024, 1);
2225         setup_thread_attr(&uevent_attr, DEFAULT_UEVENT_STACKSIZE * 1024, 1);
2226         setup_thread_attr(&waiter_attr, 32 * 1024, 1);
2227
2228         if (logsink == 1) {
2229                 setup_thread_attr(&log_attr, 64 * 1024, 0);
2230                 log_thread_start(&log_attr);
2231                 pthread_attr_destroy(&log_attr);
2232         }
2233         pid_fd = pidfile_create(DEFAULT_PIDFILE, daemon_pid);
2234         if (pid_fd < 0) {
2235                 condlog(1, "failed to create pidfile");
2236                 if (logsink == 1)
2237                         log_thread_stop();
2238                 exit(1);
2239         }
2240
2241         post_config_state(DAEMON_START);
2242
2243         condlog(2, "--------start up--------");
2244         condlog(2, "read " DEFAULT_CONFIGFILE);
2245
2246         conf = load_config(DEFAULT_CONFIGFILE);
2247         if (!conf)
2248                 goto failed;
2249
2250         if (verbosity)
2251                 conf->verbosity = verbosity;
2252         if (bindings_read_only)
2253                 conf->bindings_read_only = bindings_read_only;
2254         if (ignore_new_devs)
2255                 conf->ignore_new_devs = ignore_new_devs;
2256         uxsock_timeout = conf->uxsock_timeout;
2257         rcu_assign_pointer(multipath_conf, conf);
2258         dm_init(conf->verbosity);
2259         dm_drv_version(conf->version, TGT_MPATH);
2260         if (init_checkers(conf->multipath_dir)) {
2261                 condlog(0, "failed to initialize checkers");
2262                 goto failed;
2263         }
2264         if (init_prio(conf->multipath_dir)) {
2265                 condlog(0, "failed to initialize prioritizers");
2266                 goto failed;
2267         }
2268
2269         setlogmask(LOG_UPTO(conf->verbosity + 3));
2270
2271         envp = getenv("LimitNOFILE");
2272
2273         if (envp) {
2274                 condlog(2,"Using systemd provided open fds limit of %s", envp);
2275         } else if (conf->max_fds) {
2276                 struct rlimit fd_limit;
2277
2278                 if (getrlimit(RLIMIT_NOFILE, &fd_limit) < 0) {
2279                         condlog(0, "can't get open fds limit: %s",
2280                                 strerror(errno));
2281                         fd_limit.rlim_cur = 0;
2282                         fd_limit.rlim_max = 0;
2283                 }
2284                 if (fd_limit.rlim_cur < conf->max_fds) {
2285                         fd_limit.rlim_cur = conf->max_fds;
2286                         if (fd_limit.rlim_max < conf->max_fds)
2287                                 fd_limit.rlim_max = conf->max_fds;
2288                         if (setrlimit(RLIMIT_NOFILE, &fd_limit) < 0) {
2289                                 condlog(0, "can't set open fds limit to "
2290                                         "%lu/%lu : %s",
2291                                         fd_limit.rlim_cur, fd_limit.rlim_max,
2292                                         strerror(errno));
2293                         } else {
2294                                 condlog(3, "set open fds limit to %lu/%lu",
2295                                         fd_limit.rlim_cur, fd_limit.rlim_max);
2296                         }
2297                 }
2298
2299         }
2300
2301         vecs = gvecs = init_vecs();
2302         if (!vecs)
2303                 goto failed;
2304
2305         setscheduler();
2306         set_oom_adj();
2307
2308         dm_udev_set_sync_support(0);
2309 #ifdef USE_SYSTEMD
2310         envp = getenv("WATCHDOG_USEC");
2311         if (envp && sscanf(envp, "%lu", &checkint) == 1) {
2312                 /* Value is in microseconds */
2313                 conf->max_checkint = checkint / 1000000;
2314                 /* Rescale checkint */
2315                 if (conf->checkint > conf->max_checkint)
2316                         conf->checkint = conf->max_checkint;
2317                 else
2318                         conf->checkint = conf->max_checkint / 4;
2319                 condlog(3, "enabling watchdog, interval %d max %d",
2320                         conf->checkint, conf->max_checkint);
2321                 use_watchdog = conf->checkint;
2322         }
2323 #endif
2324         /*
2325          * Startup done, invalidate configuration
2326          */
2327         conf = NULL;
2328
2329         /*
2330          * Signal start of configuration
2331          */
2332         post_config_state(DAEMON_CONFIGURE);
2333
2334         /*
2335          * Start uevent listener early to catch events
2336          */
2337         if ((rc = pthread_create(&uevent_thr, &uevent_attr, ueventloop, udev))) {
2338                 condlog(0, "failed to create uevent thread: %d", rc);
2339                 goto failed;
2340         }
2341         pthread_attr_destroy(&uevent_attr);
2342         if ((rc = pthread_create(&uxlsnr_thr, &misc_attr, uxlsnrloop, vecs))) {
2343                 condlog(0, "failed to create cli listener: %d", rc);
2344                 goto failed;
2345         }
2346
2347         /*
2348          * start threads
2349          */
2350         if ((rc = pthread_create(&check_thr, &misc_attr, checkerloop, vecs))) {
2351                 condlog(0,"failed to create checker loop thread: %d", rc);
2352                 goto failed;
2353         }
2354         if ((rc = pthread_create(&uevq_thr, &misc_attr, uevqloop, vecs))) {
2355                 condlog(0, "failed to create uevent dispatcher: %d", rc);
2356                 goto failed;
2357         }
2358         pthread_attr_destroy(&misc_attr);
2359
2360 #ifdef USE_SYSTEMD
2361         sd_notify(0, "READY=1");
2362 #endif
2363
2364         while (running_state != DAEMON_SHUTDOWN) {
2365                 pthread_cleanup_push(config_cleanup, NULL);
2366                 pthread_mutex_lock(&config_lock);
2367                 if (running_state != DAEMON_CONFIGURE &&
2368                     running_state != DAEMON_SHUTDOWN) {
2369                         pthread_cond_wait(&config_cond, &config_lock);
2370                 }
2371                 pthread_cleanup_pop(1);
2372                 if (running_state == DAEMON_CONFIGURE) {
2373                         pthread_cleanup_push(cleanup_lock, &vecs->lock);
2374                         lock(vecs->lock);
2375                         pthread_testcancel();
2376                         if (!need_to_delay_reconfig(vecs)) {
2377                                 reconfigure(vecs);
2378                         } else {
2379                                 conf = get_multipath_config();
2380                                 conf->delayed_reconfig = 1;
2381                                 put_multipath_config(conf);
2382                         }
2383                         lock_cleanup_pop(vecs->lock);
2384                         post_config_state(DAEMON_IDLE);
2385                 }
2386         }
2387
2388         lock(vecs->lock);
2389         conf = get_multipath_config();
2390         if (conf->queue_without_daemon == QUE_NO_DAEMON_OFF)
2391                 vector_foreach_slot(vecs->mpvec, mpp, i)
2392                         dm_queue_if_no_path(mpp->alias, 0);
2393         put_multipath_config(conf);
2394         remove_maps_and_stop_waiters(vecs);
2395         unlock(vecs->lock);
2396
2397         pthread_cancel(check_thr);
2398         pthread_cancel(uevent_thr);
2399         pthread_cancel(uxlsnr_thr);
2400         pthread_cancel(uevq_thr);
2401
2402         lock(vecs->lock);
2403         free_pathvec(vecs->pathvec, FREE_PATHS);
2404         vecs->pathvec = NULL;
2405         unlock(vecs->lock);
2406         /* Now all the waitevent threads will start rushing in. */
2407         while (vecs->lock.depth > 0) {
2408                 sleep (1); /* This is weak. */
2409                 condlog(3, "Have %d wait event checkers threads to de-alloc,"
2410                         " waiting...", vecs->lock.depth);
2411         }
2412         pthread_mutex_destroy(vecs->lock.mutex);
2413         FREE(vecs->lock.mutex);
2414         vecs->lock.depth = 0;
2415         vecs->lock.mutex = NULL;
2416         FREE(vecs);
2417         vecs = NULL;
2418
2419         cleanup_checkers();
2420         cleanup_prio();
2421
2422         dm_lib_release();
2423         dm_lib_exit();
2424
2425         /* We're done here */
2426         condlog(3, "unlink pidfile");
2427         unlink(DEFAULT_PIDFILE);
2428
2429         condlog(2, "--------shut down-------");
2430
2431         if (logsink == 1)
2432                 log_thread_stop();
2433
2434         /*
2435          * Freeing config must be done after condlog() and dm_lib_exit(),
2436          * because logging functions like dlog() and dm_write_log()
2437          * reference the config.
2438          */
2439         conf = rcu_dereference(multipath_conf);
2440         rcu_assign_pointer(multipath_conf, NULL);
2441         call_rcu(&conf->rcu, rcu_free_config);
2442         udev_unref(udev);
2443         udev = NULL;
2444         pthread_attr_destroy(&waiter_attr);
2445 #ifdef _DEBUG_
2446         dbg_free_final(NULL);
2447 #endif
2448
2449 #ifdef USE_SYSTEMD
2450         sd_notify(0, "ERRNO=0");
2451 #endif
2452         exit(0);
2453
2454 failed:
2455 #ifdef USE_SYSTEMD
2456         sd_notify(0, "ERRNO=1");
2457 #endif
2458         if (pid_fd >= 0)
2459                 close(pid_fd);
2460         exit(1);
2461 }
2462
2463 static int
2464 daemonize(void)
2465 {
2466         int pid;
2467         int dev_null_fd;
2468
2469         if( (pid = fork()) < 0){
2470                 fprintf(stderr, "Failed first fork : %s\n", strerror(errno));
2471                 return -1;
2472         }
2473         else if (pid != 0)
2474                 return pid;
2475
2476         setsid();
2477
2478         if ( (pid = fork()) < 0)
2479                 fprintf(stderr, "Failed second fork : %s\n", strerror(errno));
2480         else if (pid != 0)
2481                 _exit(0);
2482
2483         if (chdir("/") < 0)
2484                 fprintf(stderr, "cannot chdir to '/', continuing\n");
2485
2486         dev_null_fd = open("/dev/null", O_RDWR);
2487         if (dev_null_fd < 0){
2488                 fprintf(stderr, "cannot open /dev/null for input & output : %s\n",
2489                         strerror(errno));
2490                 _exit(0);
2491         }
2492
2493         close(STDIN_FILENO);
2494         if (dup(dev_null_fd) < 0) {
2495                 fprintf(stderr, "cannot dup /dev/null to stdin : %s\n",
2496                         strerror(errno));
2497                 _exit(0);
2498         }
2499         close(STDOUT_FILENO);
2500         if (dup(dev_null_fd) < 0) {
2501                 fprintf(stderr, "cannot dup /dev/null to stdout : %s\n",
2502                         strerror(errno));
2503                 _exit(0);
2504         }
2505         close(STDERR_FILENO);
2506         if (dup(dev_null_fd) < 0) {
2507                 fprintf(stderr, "cannot dup /dev/null to stderr : %s\n",
2508                         strerror(errno));
2509                 _exit(0);
2510         }
2511         close(dev_null_fd);
2512         daemon_pid = getpid();
2513         return 0;
2514 }
2515
2516 int
2517 main (int argc, char *argv[])
2518 {
2519         extern char *optarg;
2520         extern int optind;
2521         int arg;
2522         int err;
2523         int foreground = 0;
2524         struct config *conf;
2525
2526         logsink = 1;
2527
2528         if (getuid() != 0) {
2529                 fprintf(stderr, "need to be root\n");
2530                 exit(1);
2531         }
2532
2533         /* make sure we don't lock any path */
2534         if (chdir("/") < 0)
2535                 fprintf(stderr, "can't chdir to root directory : %s\n",
2536                         strerror(errno));
2537         umask(umask(077) | 022);
2538
2539         udev = udev_new();
2540
2541         while ((arg = getopt(argc, argv, ":dsv:k::Bn")) != EOF ) {
2542         switch(arg) {
2543                 case 'd':
2544                         foreground = 1;
2545                         if (logsink > 0)
2546                                 logsink = 0;
2547                         //debug=1; /* ### comment me out ### */
2548                         break;
2549                 case 'v':
2550                         if (sizeof(optarg) > sizeof(char *) ||
2551                             !isdigit(optarg[0]))
2552                                 exit(1);
2553
2554                         verbosity = atoi(optarg);
2555                         break;
2556                 case 's':
2557                         logsink = -1;
2558                         break;
2559                 case 'k':
2560                         conf = load_config(DEFAULT_CONFIGFILE);
2561                         if (!conf)
2562                                 exit(1);
2563                         if (verbosity)
2564                                 conf->verbosity = verbosity;
2565                         uxclnt(optarg, uxsock_timeout + 100);
2566                         exit(0);
2567                 case 'B':
2568                         bindings_read_only = 1;
2569                         break;
2570                 case 'n':
2571                         ignore_new_devs = 1;
2572                         break;
2573                 default:
2574                         fprintf(stderr, "Invalid argument '-%c'\n",
2575                                 optopt);
2576                         exit(1);
2577                 }
2578         }
2579         if (optind < argc) {
2580                 char cmd[CMDSIZE];
2581                 char * s = cmd;
2582                 char * c = s;
2583
2584                 conf = load_config(DEFAULT_CONFIGFILE);
2585                 if (!conf)
2586                         exit(1);
2587                 if (verbosity)
2588                         conf->verbosity = verbosity;
2589                 memset(cmd, 0x0, CMDSIZE);
2590                 while (optind < argc) {
2591                         if (strchr(argv[optind], ' '))
2592                                 c += snprintf(c, s + CMDSIZE - c, "\"%s\" ", argv[optind]);
2593                         else
2594                                 c += snprintf(c, s + CMDSIZE - c, "%s ", argv[optind]);
2595                         optind++;
2596                 }
2597                 c += snprintf(c, s + CMDSIZE - c, "\n");
2598                 uxclnt(s, uxsock_timeout + 100);
2599                 exit(0);
2600         }
2601
2602         if (foreground) {
2603                 if (!isatty(fileno(stdout)))
2604                         setbuf(stdout, NULL);
2605                 err = 0;
2606                 daemon_pid = getpid();
2607         } else
2608                 err = daemonize();
2609
2610         if (err < 0)
2611                 /* error */
2612                 exit(1);
2613         else if (err > 0)
2614                 /* parent dies */
2615                 exit(0);
2616         else
2617                 /* child lives */
2618                 return (child(NULL));
2619 }
2620
2621 void *  mpath_pr_event_handler_fn (void * pathp )
2622 {
2623         struct multipath * mpp;
2624         int i,j, ret, isFound;
2625         struct path * pp = (struct path *)pathp;
2626         unsigned char *keyp;
2627         uint64_t prkey;
2628         struct prout_param_descriptor *param;
2629         struct prin_resp *resp;
2630
2631         mpp = pp->mpp;
2632
2633         resp = mpath_alloc_prin_response(MPATH_PRIN_RKEY_SA);
2634         if (!resp){
2635                 condlog(0,"%s Alloc failed for prin response", pp->dev);
2636                 return NULL;
2637         }
2638
2639         ret = prin_do_scsi_ioctl(pp->dev, MPATH_PRIN_RKEY_SA, resp, 0);
2640         if (ret != MPATH_PR_SUCCESS )
2641         {
2642                 condlog(0,"%s : pr in read keys service action failed. Error=%d", pp->dev, ret);
2643                 goto out;
2644         }
2645
2646         condlog(3, " event pr=%d addlen=%d",resp->prin_descriptor.prin_readkeys.prgeneration,
2647                         resp->prin_descriptor.prin_readkeys.additional_length );
2648
2649         if (resp->prin_descriptor.prin_readkeys.additional_length == 0 )
2650         {
2651                 condlog(1, "%s: No key found. Device may not be registered.", pp->dev);
2652                 ret = MPATH_PR_SUCCESS;
2653                 goto out;
2654         }
2655         prkey = 0;
2656         keyp = (unsigned char *)mpp->reservation_key;
2657         for (j = 0; j < 8; ++j) {
2658                 if (j > 0)
2659                         prkey <<= 8;
2660                 prkey |= *keyp;
2661                 ++keyp;
2662         }
2663         condlog(2, "Multipath  reservation_key: 0x%" PRIx64 " ", prkey);
2664
2665         isFound =0;
2666         for (i = 0; i < resp->prin_descriptor.prin_readkeys.additional_length/8; i++ )
2667         {
2668                 condlog(2, "PR IN READKEYS[%d]  reservation key:",i);
2669                 dumpHex((char *)&resp->prin_descriptor.prin_readkeys.key_list[i*8], 8 , -1);
2670                 if (!memcmp(mpp->reservation_key, &resp->prin_descriptor.prin_readkeys.key_list[i*8], 8))
2671                 {
2672                         condlog(2, "%s: pr key found in prin readkeys response", mpp->alias);
2673                         isFound =1;
2674                         break;
2675                 }
2676         }
2677         if (!isFound)
2678         {
2679                 condlog(0, "%s: Either device not registered or ", pp->dev);
2680                 condlog(0, "host is not authorised for registration. Skip path");
2681                 ret = MPATH_PR_OTHER;
2682                 goto out;
2683         }
2684
2685         param= malloc(sizeof(struct prout_param_descriptor));
2686         memset(param, 0 , sizeof(struct prout_param_descriptor));
2687
2688         for (j = 7; j >= 0; --j) {
2689                 param->sa_key[j] = (prkey & 0xff);
2690                 prkey >>= 8;
2691         }
2692         param->num_transportid = 0;
2693
2694         condlog(3, "device %s:%s", pp->dev, pp->mpp->wwid);
2695
2696         ret = prout_do_scsi_ioctl(pp->dev, MPATH_PROUT_REG_IGN_SA, 0, 0, param, 0);
2697         if (ret != MPATH_PR_SUCCESS )
2698         {
2699                 condlog(0,"%s: Reservation registration failed. Error: %d", pp->dev, ret);
2700         }
2701         mpp->prflag = 1;
2702
2703         free(param);
2704 out:
2705         free(resp);
2706         return NULL;
2707 }
2708
2709 int mpath_pr_event_handle(struct path *pp)
2710 {
2711         pthread_t thread;
2712         int rc;
2713         pthread_attr_t attr;
2714         struct multipath * mpp;
2715
2716         mpp = pp->mpp;
2717
2718         if (!mpp->reservation_key)
2719                 return -1;
2720
2721         pthread_attr_init(&attr);
2722         pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
2723
2724         rc = pthread_create(&thread, NULL , mpath_pr_event_handler_fn, pp);
2725         if (rc) {
2726                 condlog(0, "%s: ERROR; return code from pthread_create() is %d", pp->dev, rc);
2727                 return -1;
2728         }
2729         pthread_attr_destroy(&attr);
2730         rc = pthread_join(thread, NULL);
2731         return 0;
2732 }