2 * Copyright (c) 2004, 2005 Christophe Varoqui
3 * Copyright (c) 2005 Kiyoshi Ueda, NEC
4 * Copyright (c) 2005 Benjamin Marzinski, Redhat
5 * Copyright (c) 2005 Edward Goggin, EMC
9 #include <libdevmapper.h>
12 #include <sys/types.h>
16 #include <linux/oom.h>
20 #include <systemd/sd-daemon.h>
22 #include <semaphore.h>
29 #include "time-util.h"
37 static int use_watchdog;
51 #include "blacklist.h"
52 #include "structs_vec.h"
54 #include "devmapper.h"
57 #include "discovery.h"
61 #include "switchgroup.h"
63 #include "configure.h"
66 #include "pgpolicies.h"
71 #include "mpath_cmd.h"
72 #include "mpath_persist.h"
74 #include "prioritizers/alua_rtpg.h"
81 #include "cli_handlers.h"
85 #include "io_err_stat.h"
88 #include "../third-party/valgrind/drd.h"
90 #define FILE_NAME_SIZE 256
93 #define LOG_MSG(lvl, verb, pp) \
95 if (pp->mpp && checker_selected(&pp->checker) && \
98 condlog(lvl, "%s: %s - path offline", \
99 pp->mpp->alias, pp->dev); \
102 checker_message(&pp->checker); \
105 condlog(lvl, "%s: %s - %s checker%s", \
108 checker_name(&pp->checker), \
114 struct mpath_event_param
117 struct multipath *mpp;
123 int bindings_read_only;
125 #ifdef NO_DMEVENTS_POLL
126 int poll_dmevents = 0;
128 int poll_dmevents = 1;
130 /* Don't access this variable without holding config_lock */
131 enum daemon_status running_state = DAEMON_INIT;
133 pthread_mutex_t config_lock = PTHREAD_MUTEX_INITIALIZER;
134 pthread_cond_t config_cond;
136 static inline enum daemon_status get_running_state(void)
138 enum daemon_status st;
140 pthread_mutex_lock(&config_lock);
142 pthread_mutex_unlock(&config_lock);
147 * global copy of vecs for use in sig handlers
149 struct vectors * gvecs;
153 struct config *multipath_conf;
155 /* Local variables */
156 static volatile sig_atomic_t exit_sig;
157 static volatile sig_atomic_t reconfig_sig;
158 static volatile sig_atomic_t log_reset_sig;
163 switch (get_running_state()) {
168 case DAEMON_CONFIGURE:
174 case DAEMON_SHUTDOWN:
181 * I love you too, systemd ...
184 sd_notify_status(enum daemon_status state)
188 return "STATUS=init";
190 return "STATUS=startup";
191 case DAEMON_CONFIGURE:
192 return "STATUS=configure";
196 case DAEMON_SHUTDOWN:
197 return "STATUS=shutdown";
203 static void do_sd_notify(enum daemon_status old_state,
204 enum daemon_status new_state)
207 * Checkerloop switches back and forth between idle and running state.
208 * No need to tell systemd each time.
209 * These notifications cause a lot of overhead on dbus.
211 if ((new_state == DAEMON_IDLE || new_state == DAEMON_RUNNING) &&
212 (old_state == DAEMON_IDLE || old_state == DAEMON_RUNNING))
214 sd_notify(0, sd_notify_status(new_state));
218 static void config_cleanup(__attribute__((unused)) void *arg)
220 pthread_mutex_unlock(&config_lock);
224 * If the current status is @oldstate, wait for at most @ms milliseconds
225 * for the state to change, and return the new state, which may still be
228 enum daemon_status wait_for_state_change_if(enum daemon_status oldstate,
231 enum daemon_status st;
234 if (oldstate == DAEMON_SHUTDOWN)
235 return DAEMON_SHUTDOWN;
237 pthread_mutex_lock(&config_lock);
238 pthread_cleanup_push(config_cleanup, NULL);
240 if (st == oldstate && clock_gettime(CLOCK_MONOTONIC, &tmo) == 0) {
241 tmo.tv_nsec += ms * 1000 * 1000;
242 normalize_timespec(&tmo);
243 (void)pthread_cond_timedwait(&config_cond, &config_lock, &tmo);
246 pthread_cleanup_pop(1);
250 /* must be called with config_lock held */
251 static void __post_config_state(enum daemon_status state)
253 if (state != running_state && running_state != DAEMON_SHUTDOWN) {
254 enum daemon_status old_state = running_state;
256 running_state = state;
257 pthread_cond_broadcast(&config_cond);
259 do_sd_notify(old_state, state);
264 void post_config_state(enum daemon_status state)
266 pthread_mutex_lock(&config_lock);
267 pthread_cleanup_push(config_cleanup, NULL);
268 __post_config_state(state);
269 pthread_cleanup_pop(1);
272 int set_config_state(enum daemon_status state)
276 pthread_cleanup_push(config_cleanup, NULL);
277 pthread_mutex_lock(&config_lock);
278 if (running_state != state) {
279 enum daemon_status old_state = running_state;
281 if (running_state == DAEMON_SHUTDOWN)
283 else if (running_state != DAEMON_IDLE) {
286 get_monotonic_time(&ts);
288 rc = pthread_cond_timedwait(&config_cond,
291 if (!rc && (running_state != DAEMON_SHUTDOWN)) {
292 running_state = state;
293 pthread_cond_broadcast(&config_cond);
295 do_sd_notify(old_state, state);
299 pthread_cleanup_pop(1);
303 struct config *get_multipath_config(void)
306 return rcu_dereference(multipath_conf);
309 void put_multipath_config(__attribute__((unused)) void *arg)
315 need_switch_pathgroup (struct multipath * mpp, int refresh)
317 struct pathgroup * pgp;
327 * Refresh path priority values
330 vector_foreach_slot (mpp->pg, pgp, i) {
331 vector_foreach_slot (pgp->paths, pp, j) {
332 conf = get_multipath_config();
333 pthread_cleanup_push(put_multipath_config,
335 pathinfo(pp, conf, DI_PRIO);
336 pthread_cleanup_pop(1);
341 if (!mpp->pg || VECTOR_SIZE(mpp->paths) == 0)
344 bestpg = select_path_group(mpp);
345 if (mpp->pgfailback == -FAILBACK_MANUAL)
348 mpp->bestpg = bestpg;
349 if (mpp->bestpg != mpp->nextpg)
356 switch_pathgroup (struct multipath * mpp)
358 mpp->stat_switchgroup++;
359 dm_switchgroup(mpp->alias, mpp->bestpg);
360 condlog(2, "%s: switch to path group #%i",
361 mpp->alias, mpp->bestpg);
365 wait_for_events(struct multipath *mpp, struct vectors *vecs)
368 return watch_dmevents(mpp->alias);
370 return start_waiter_thread(mpp, vecs);
374 remove_map_and_stop_waiter(struct multipath *mpp, struct vectors *vecs)
376 /* devices are automatically removed by the dmevent polling code,
377 * so they don't need to be manually removed here */
378 condlog(3, "%s: removing map from internal tables", mpp->alias);
380 stop_waiter_thread(mpp);
381 remove_map(mpp, vecs, PURGE_VEC);
385 remove_maps_and_stop_waiters(struct vectors *vecs)
388 struct multipath * mpp;
393 if (!poll_dmevents) {
394 vector_foreach_slot(vecs->mpvec, mpp, i)
395 stop_waiter_thread(mpp);
398 unwatch_all_dmevents();
404 set_multipath_wwid (struct multipath * mpp)
406 if (strlen(mpp->wwid))
409 dm_get_uuid(mpp->alias, mpp->wwid, WWID_SIZE);
412 int __setup_multipath(struct vectors *vecs, struct multipath *mpp,
415 if (dm_get_info(mpp->alias, &mpp->dmi)) {
416 /* Error accessing table */
417 condlog(3, "%s: cannot access table", mpp->alias);
421 if (update_multipath_strings(mpp, vecs->pathvec, 1)) {
422 condlog(0, "%s: failed to setup multipath", mpp->alias);
427 set_no_path_retry(mpp);
428 if (VECTOR_SIZE(mpp->paths) != 0)
429 dm_cancel_deferred_remove(mpp);
434 remove_map_and_stop_waiter(mpp, vecs);
438 int update_multipath (struct vectors *vecs, char *mapname, int reset)
440 struct multipath *mpp;
441 struct pathgroup *pgp;
445 mpp = find_mp_by_alias(vecs->mpvec, mapname);
448 condlog(3, "%s: multipath map not found", mapname);
452 if (__setup_multipath(vecs, mpp, reset))
453 return 1; /* mpp freed in setup_multipath */
456 * compare checkers states with DM states
458 vector_foreach_slot (mpp->pg, pgp, i) {
459 vector_foreach_slot (pgp->paths, pp, j) {
460 if (pp->dmstate != PSTATE_FAILED)
463 if (pp->state != PATH_DOWN) {
465 int oldstate = pp->state;
466 unsigned int checkint;
468 conf = get_multipath_config();
469 checkint = conf->checkint;
470 put_multipath_config(conf);
471 condlog(2, "%s: mark as failed", pp->dev);
472 mpp->stat_path_failures++;
473 pp->state = PATH_DOWN;
474 if (oldstate == PATH_UP ||
475 oldstate == PATH_GHOST)
476 update_queue_mode_del_path(mpp);
480 * schedule the next check earlier
482 if (pp->tick > checkint)
491 update_map (struct multipath *mpp, struct vectors *vecs, int new_map)
494 char params[PARAMS_SIZE] = {0};
497 condlog(4, "%s: updating new map", mpp->alias);
498 if (adopt_paths(vecs->pathvec, mpp)) {
499 condlog(0, "%s: failed to adopt paths for new map update",
504 verify_paths(mpp, vecs);
505 mpp->action = ACT_RELOAD;
507 if (setup_map(mpp, params, PARAMS_SIZE, vecs)) {
508 condlog(0, "%s: failed to setup new map in update", mpp->alias);
512 if (domap(mpp, params, 1) == DOMAP_FAIL && retries-- > 0) {
513 condlog(0, "%s: map_udate sleep", mpp->alias);
520 if (new_map && (retries < 0 || wait_for_events(mpp, vecs))) {
521 condlog(0, "%s: failed to create new map", mpp->alias);
522 remove_map(mpp, vecs, 1);
526 if (setup_multipath(vecs, mpp))
532 condlog(0, "%s: failed reload in new map update", mpp->alias);
536 static struct multipath *
537 add_map_without_path (struct vectors *vecs, const char *alias)
539 struct multipath * mpp = alloc_multipath();
549 mpp->alias = STRDUP(alias);
551 if (dm_get_info(mpp->alias, &mpp->dmi)) {
552 condlog(3, "%s: cannot access table", mpp->alias);
555 set_multipath_wwid(mpp);
556 conf = get_multipath_config();
557 mpp->mpe = find_mpe(conf->mptable, mpp->wwid);
558 put_multipath_config(conf);
560 if (update_multipath_table(mpp, vecs->pathvec, 1))
562 if (update_multipath_status(mpp))
565 if (!vector_alloc_slot(vecs->mpvec))
568 vector_set_slot(vecs->mpvec, mpp);
570 if (update_map(mpp, vecs, 1) != 0) /* map removed */
575 remove_map(mpp, vecs, PURGE_VEC);
580 coalesce_maps(struct vectors *vecs, vector nmpv)
582 struct multipath * ompp;
583 vector ompv = vecs->mpvec;
584 unsigned int i, reassign_maps;
587 conf = get_multipath_config();
588 reassign_maps = conf->reassign_maps;
589 put_multipath_config(conf);
590 vector_foreach_slot (ompv, ompp, i) {
591 condlog(3, "%s: coalesce map", ompp->alias);
592 if (!find_mp_by_wwid(nmpv, ompp->wwid)) {
594 * remove all current maps not allowed by the
595 * current configuration
597 if (dm_flush_map(ompp->alias)) {
598 condlog(0, "%s: unable to flush devmap",
601 * may be just because the device is open
603 if (setup_multipath(vecs, ompp) != 0) {
607 if (!vector_alloc_slot(nmpv))
610 vector_set_slot(nmpv, ompp);
612 vector_del_slot(ompv, i);
617 condlog(2, "%s devmap removed", ompp->alias);
619 } else if (reassign_maps) {
620 condlog(3, "%s: Reassign existing device-mapper"
621 " devices", ompp->alias);
622 dm_reassign(ompp->alias);
629 sync_maps_state(vector mpvec)
632 struct multipath *mpp;
634 vector_foreach_slot (mpvec, mpp, i)
639 flush_map(struct multipath * mpp, struct vectors * vecs, int nopaths)
644 r = dm_flush_map_nopaths(mpp->alias, mpp->deferred_remove);
646 r = dm_flush_map(mpp->alias);
648 * clear references to this map before flushing so we can ignore
649 * the spurious uevent we may generate with the dm_flush_map call below
653 * May not really be an error -- if the map was already flushed
654 * from the device mapper by dmsetup(8) for instance.
657 condlog(0, "%s: can't flush", mpp->alias);
659 condlog(2, "%s: devmap deferred remove", mpp->alias);
660 mpp->deferred_remove = DEFERRED_REMOVE_IN_PROGRESS;
666 condlog(2, "%s: map flushed", mpp->alias);
669 orphan_paths(vecs->pathvec, mpp, "map flushed");
670 remove_map_and_stop_waiter(mpp, vecs);
676 uev_add_map (struct uevent * uev, struct vectors * vecs)
679 int major = -1, minor = -1, rc;
681 condlog(3, "%s: add map (uevent)", uev->kernel);
682 alias = uevent_get_dm_name(uev);
684 condlog(3, "%s: No DM_NAME in uevent", uev->kernel);
685 major = uevent_get_major(uev);
686 minor = uevent_get_minor(uev);
687 alias = dm_mapname(major, minor);
689 condlog(2, "%s: mapname not found for %d:%d",
690 uev->kernel, major, minor);
694 pthread_cleanup_push(cleanup_lock, &vecs->lock);
696 pthread_testcancel();
697 rc = ev_add_map(uev->kernel, alias, vecs);
698 lock_cleanup_pop(vecs->lock);
704 * ev_add_map expects that the multipath device already exists in kernel
705 * before it is called. It just adds a device to multipathd or updates an
709 ev_add_map (char * dev, const char * alias, struct vectors * vecs)
711 struct multipath * mpp;
712 int delayed_reconfig, reassign_maps;
715 if (dm_is_mpath(alias) != 1) {
716 condlog(4, "%s: not a multipath map", alias);
720 mpp = find_mp_by_alias(vecs->mpvec, alias);
723 if (mpp->wait_for_udev > 1) {
724 condlog(2, "%s: performing delayed actions",
726 if (update_map(mpp, vecs, 0))
727 /* setup multipathd removed the map */
730 conf = get_multipath_config();
731 delayed_reconfig = conf->delayed_reconfig;
732 reassign_maps = conf->reassign_maps;
733 put_multipath_config(conf);
734 if (mpp->wait_for_udev) {
735 mpp->wait_for_udev = 0;
736 if (delayed_reconfig &&
737 !need_to_delay_reconfig(vecs)) {
738 condlog(2, "reconfigure (delayed)");
739 set_config_state(DAEMON_CONFIGURE);
744 * Not really an error -- we generate our own uevent
745 * if we create a multipath mapped device as a result
749 condlog(3, "%s: Reassign existing device-mapper devices",
755 condlog(2, "%s: adding map", alias);
758 * now we can register the map
760 if ((mpp = add_map_without_path(vecs, alias))) {
762 condlog(2, "%s: devmap %s registered", alias, dev);
765 condlog(2, "%s: ev_add_map failed", dev);
771 uev_remove_map (struct uevent * uev, struct vectors * vecs)
775 struct multipath *mpp;
777 condlog(3, "%s: remove map (uevent)", uev->kernel);
778 alias = uevent_get_dm_name(uev);
780 condlog(3, "%s: No DM_NAME in uevent, ignoring", uev->kernel);
783 minor = uevent_get_minor(uev);
785 pthread_cleanup_push(cleanup_lock, &vecs->lock);
787 pthread_testcancel();
788 mpp = find_mp_by_minor(vecs->mpvec, minor);
791 condlog(2, "%s: devmap not registered, can't remove",
795 if (strcmp(mpp->alias, alias)) {
796 condlog(2, "%s: map alias mismatch: have \"%s\", got \"%s\")",
797 uev->kernel, mpp->alias, alias);
801 remove_map_and_stop_waiter(mpp, vecs);
803 lock_cleanup_pop(vecs->lock);
808 /* Called from CLI handler */
810 ev_remove_map (char * devname, char * alias, int minor, struct vectors * vecs)
812 struct multipath * mpp;
814 mpp = find_mp_by_minor(vecs->mpvec, minor);
817 condlog(2, "%s: devmap not registered, can't remove",
821 if (strcmp(mpp->alias, alias)) {
822 condlog(2, "%s: minor number mismatch (map %d, event %d)",
823 mpp->alias, mpp->dmi->minor, minor);
826 return flush_map(mpp, vecs, 0);
830 uev_add_path (struct uevent *uev, struct vectors * vecs, int need_do_map)
836 condlog(3, "%s: add path (uevent)", uev->kernel);
837 if (strstr(uev->kernel, "..") != NULL) {
839 * Don't allow relative device names in the pathvec
841 condlog(0, "%s: path name is invalid", uev->kernel);
845 pthread_cleanup_push(cleanup_lock, &vecs->lock);
847 pthread_testcancel();
848 pp = find_path_by_dev(vecs->pathvec, uev->kernel);
852 condlog(3, "%s: spurious uevent, path already in pathvec",
854 if (!pp->mpp && !strlen(pp->wwid)) {
855 condlog(3, "%s: reinitialize path", uev->kernel);
856 udev_device_unref(pp->udev);
857 pp->udev = udev_device_ref(uev->udev);
858 conf = get_multipath_config();
859 pthread_cleanup_push(put_multipath_config, conf);
860 r = pathinfo(pp, conf,
861 DI_ALL | DI_BLACKLIST);
862 pthread_cleanup_pop(1);
863 if (r == PATHINFO_OK)
864 ret = ev_add_path(pp, vecs, need_do_map);
865 else if (r == PATHINFO_SKIPPED) {
866 condlog(3, "%s: remove blacklisted path",
868 i = find_slot(vecs->pathvec, (void *)pp);
870 vector_del_slot(vecs->pathvec, i);
873 condlog(0, "%s: failed to reinitialize path",
883 * get path vital state
885 conf = get_multipath_config();
886 pthread_cleanup_push(put_multipath_config, conf);
887 ret = alloc_path_with_pathinfo(conf, uev->udev,
888 uev->wwid, DI_ALL, &pp);
889 pthread_cleanup_pop(1);
891 if (ret == PATHINFO_SKIPPED)
894 condlog(3, "%s: failed to get path info", uev->kernel);
899 ret = store_path(vecs->pathvec, pp);
901 conf = get_multipath_config();
902 pp->checkint = conf->checkint;
903 put_multipath_config(conf);
904 ret = ev_add_path(pp, vecs, need_do_map);
906 condlog(0, "%s: failed to store path info, "
913 lock_cleanup_pop(vecs->lock);
923 ev_add_path (struct path * pp, struct vectors * vecs, int need_do_map)
925 struct multipath * mpp;
926 char params[PARAMS_SIZE] = {0};
928 int start_waiter = 0;
932 * need path UID to go any further
934 if (strlen(pp->wwid) == 0) {
935 condlog(0, "%s: failed to get path uid", pp->dev);
936 goto fail; /* leave path added to pathvec */
938 mpp = find_mp_by_wwid(vecs->mpvec, pp->wwid);
939 if (mpp && pp->size && mpp->size != pp->size) {
940 condlog(0, "%s: failed to add new path %s, device size mismatch", mpp->alias, pp->dev);
941 int i = find_slot(vecs->pathvec, (void *)pp);
943 vector_del_slot(vecs->pathvec, i);
947 if (mpp && mpp->wait_for_udev &&
948 (pathcount(mpp, PATH_UP) > 0 ||
949 (pathcount(mpp, PATH_GHOST) > 0 &&
950 path_get_tpgs(pp) != TPGS_IMPLICIT &&
951 mpp->ghost_delay_tick <= 0))) {
952 /* if wait_for_udev is set and valid paths exist */
953 condlog(3, "%s: delaying path addition until %s is fully initialized",
954 pp->dev, mpp->alias);
955 mpp->wait_for_udev = 2;
956 orphan_path(pp, "waiting for create to complete");
963 condlog(4,"%s: adopting all paths for path %s",
964 mpp->alias, pp->dev);
965 if (adopt_paths(vecs->pathvec, mpp))
966 goto fail; /* leave path added to pathvec */
968 verify_paths(mpp, vecs);
969 mpp->action = ACT_RELOAD;
971 if (!should_multipath(pp, vecs->pathvec, vecs->mpvec)) {
972 orphan_path(pp, "only one path");
975 condlog(4,"%s: creating new map", pp->dev);
976 if ((mpp = add_map_with_path(vecs, pp, 1))) {
977 mpp->action = ACT_CREATE;
979 * We don't depend on ACT_CREATE, as domap will
980 * set it to ACT_NOTHING when complete.
985 goto fail; /* leave path added to pathvec */
988 /* persistent reservation check*/
989 mpath_pr_event_handle(pp);
994 if (!dm_map_present(mpp->alias)) {
995 mpp->action = ACT_CREATE;
999 * push the map to the device-mapper
1001 if (setup_map(mpp, params, PARAMS_SIZE, vecs)) {
1002 condlog(0, "%s: failed to setup map for addition of new "
1003 "path %s", mpp->alias, pp->dev);
1007 * reload the map for the multipath mapped device
1009 ret = domap(mpp, params, 1);
1010 while (ret == DOMAP_RETRY && retries-- > 0) {
1011 condlog(0, "%s: retry domap for addition of new "
1012 "path %s", mpp->alias, pp->dev);
1014 ret = domap(mpp, params, 1);
1016 if (ret == DOMAP_FAIL || ret == DOMAP_RETRY) {
1017 condlog(0, "%s: failed in domap for addition of new "
1018 "path %s", mpp->alias, pp->dev);
1020 * deal with asynchronous uevents :((
1022 if (mpp->action == ACT_RELOAD && retries-- > 0) {
1023 condlog(0, "%s: ev_add_path sleep", mpp->alias);
1025 update_mpp_paths(mpp, vecs->pathvec);
1028 else if (mpp->action == ACT_RELOAD)
1029 condlog(0, "%s: giving up reload", mpp->alias);
1035 if ((mpp->action == ACT_CREATE ||
1036 (mpp->action == ACT_NOTHING && start_waiter && !mpp->waiter)) &&
1037 wait_for_events(mpp, vecs))
1041 * update our state from kernel regardless of create or reload
1043 if (setup_multipath(vecs, mpp))
1044 goto fail; /* if setup_multipath fails, it removes the map */
1046 sync_map_state(mpp);
1049 condlog(2, "%s [%s]: path added to devmap %s",
1050 pp->dev, pp->dev_t, mpp->alias);
1056 remove_map(mpp, vecs, 1);
1058 orphan_path(pp, "failed to add path");
1063 uev_remove_path (struct uevent *uev, struct vectors * vecs, int need_do_map)
1068 condlog(3, "%s: remove path (uevent)", uev->kernel);
1069 delete_foreign(uev->udev);
1071 pthread_cleanup_push(cleanup_lock, &vecs->lock);
1073 pthread_testcancel();
1074 pp = find_path_by_dev(vecs->pathvec, uev->kernel);
1076 ret = ev_remove_path(pp, vecs, need_do_map);
1077 lock_cleanup_pop(vecs->lock);
1079 /* Not an error; path might have been purged earlier */
1080 condlog(0, "%s: path already removed", uev->kernel);
1087 ev_remove_path (struct path *pp, struct vectors * vecs, int need_do_map)
1089 struct multipath * mpp;
1091 char params[PARAMS_SIZE] = {0};
1094 * avoid referring to the map of an orphaned path
1096 if ((mpp = pp->mpp)) {
1098 * transform the mp->pg vector of vectors of paths
1099 * into a mp->params string to feed the device-mapper
1101 if (update_mpp_paths(mpp, vecs->pathvec)) {
1102 condlog(0, "%s: failed to update paths",
1108 * Make sure mpp->hwe doesn't point to freed memory
1109 * We call extract_hwe_from_path() below to restore mpp->hwe
1111 if (mpp->hwe == pp->hwe)
1114 if ((i = find_slot(mpp->paths, (void *)pp)) != -1)
1115 vector_del_slot(mpp->paths, i);
1118 * remove the map IF removing the last path
1120 if (VECTOR_SIZE(mpp->paths) == 0) {
1121 char alias[WWID_SIZE];
1124 * flush_map will fail if the device is open
1126 strlcpy(alias, mpp->alias, WWID_SIZE);
1127 if (mpp->flush_on_last_del == FLUSH_ENABLED) {
1128 condlog(2, "%s Last path deleted, disabling queueing", mpp->alias);
1129 mpp->retry_tick = 0;
1130 mpp->no_path_retry = NO_PATH_RETRY_FAIL;
1131 mpp->disable_queueing = 1;
1132 mpp->stat_map_failures++;
1133 dm_queue_if_no_path(mpp->alias, 0);
1135 if (!flush_map(mpp, vecs, 1)) {
1136 condlog(2, "%s: removed map after"
1137 " removing all paths",
1143 * Not an error, continue
1147 if (mpp->hwe == NULL)
1148 extract_hwe_from_path(mpp);
1150 if (setup_map(mpp, params, PARAMS_SIZE, vecs)) {
1151 condlog(0, "%s: failed to setup map for"
1152 " removal of path %s", mpp->alias, pp->dev);
1156 if (mpp->wait_for_udev) {
1157 mpp->wait_for_udev = 2;
1166 mpp->action = ACT_RELOAD;
1167 if (domap(mpp, params, 1) == DOMAP_FAIL) {
1168 condlog(0, "%s: failed in domap for "
1169 "removal of path %s",
1170 mpp->alias, pp->dev);
1174 * update our state from kernel
1176 if (setup_multipath(vecs, mpp))
1178 sync_map_state(mpp);
1180 condlog(2, "%s [%s]: path removed from map %s",
1181 pp->dev, pp->dev_t, mpp->alias);
1186 if ((i = find_slot(vecs->pathvec, (void *)pp)) != -1)
1187 vector_del_slot(vecs->pathvec, i);
1194 remove_map_and_stop_waiter(mpp, vecs);
1199 uev_update_path (struct uevent *uev, struct vectors * vecs)
1201 int ro, retval = 0, rc;
1203 struct config *conf;
1204 int needs_reinit = 0;
1206 switch ((rc = change_foreign(uev->udev))) {
1208 /* known foreign path, ignore event */
1210 case FOREIGN_IGNORED:
1213 condlog(3, "%s: error in change_foreign", __func__);
1216 condlog(1, "%s: return code %d of change_forein is unsupported",
1221 pthread_cleanup_push(cleanup_lock, &vecs->lock);
1223 pthread_testcancel();
1225 pp = find_path_by_dev(vecs->pathvec, uev->kernel);
1227 struct multipath *mpp = pp->mpp;
1228 char wwid[WWID_SIZE];
1230 if (pp->initialized == INIT_REQUESTED_UDEV) {
1234 /* Don't deal with other types of failed initialization
1235 * now. check_path will handle it */
1236 if (!strlen(pp->wwid))
1239 strcpy(wwid, pp->wwid);
1240 rc = get_uid(pp, pp->state, uev->udev, 0);
1243 strcpy(pp->wwid, wwid);
1244 else if (strncmp(wwid, pp->wwid, WWID_SIZE) != 0) {
1245 condlog(0, "%s: path wwid changed from '%s' to '%s'",
1246 uev->kernel, wwid, pp->wwid);
1247 ev_remove_path(pp, vecs, 1);
1251 udev_device_unref(pp->udev);
1252 pp->udev = udev_device_ref(uev->udev);
1253 conf = get_multipath_config();
1254 pthread_cleanup_push(put_multipath_config, conf);
1255 if (pathinfo(pp, conf, DI_SYSFS|DI_NOIO) != PATHINFO_OK)
1256 condlog(1, "%s: pathinfo failed after change uevent",
1258 pthread_cleanup_pop(1);
1261 ro = uevent_get_disk_ro(uev);
1262 if (mpp && ro >= 0) {
1263 condlog(2, "%s: update path write_protect to '%d' (uevent)", uev->kernel, ro);
1265 if (mpp->wait_for_udev)
1266 mpp->wait_for_udev = 2;
1269 pp->mpp->force_readonly = 1;
1270 retval = update_path_groups(mpp, vecs, 0);
1272 condlog(2, "%s: map removed during reload", pp->dev);
1274 pp->mpp->force_readonly = 0;
1275 condlog(2, "%s: map %s reloaded (retval %d)", uev->kernel, mpp->alias, retval);
1281 lock_cleanup_pop(vecs->lock);
1283 /* If the path is blacklisted, print a debug/non-default verbosity message. */
1285 int flag = DI_SYSFS | DI_WWID;
1287 conf = get_multipath_config();
1288 pthread_cleanup_push(put_multipath_config, conf);
1289 retval = alloc_path_with_pathinfo(conf, uev->udev, uev->wwid, flag, NULL);
1290 pthread_cleanup_pop(1);
1292 if (retval == PATHINFO_SKIPPED) {
1293 condlog(3, "%s: spurious uevent, path is blacklisted", uev->kernel);
1298 condlog(0, "%s: spurious uevent, path not found", uev->kernel);
1301 retval = uev_add_path(uev, vecs, 1);
1306 uev_pathfail_check(struct uevent *uev, struct vectors *vecs)
1308 char *action = NULL, *devt = NULL;
1312 action = uevent_get_dm_action(uev);
1315 if (strncmp(action, "PATH_FAILED", 11))
1317 devt = uevent_get_dm_path(uev);
1319 condlog(3, "%s: No DM_PATH in uevent", uev->kernel);
1323 pthread_cleanup_push(cleanup_lock, &vecs->lock);
1325 pthread_testcancel();
1326 pp = find_path_by_devt(vecs->pathvec, devt);
1329 r = io_err_stat_handle_pathfail(pp);
1331 condlog(3, "io_err_stat: %s: cannot handle pathfail uevent",
1334 lock_cleanup_pop(vecs->lock);
1344 map_discovery (struct vectors * vecs)
1346 struct multipath * mpp;
1349 if (dm_get_maps(vecs->mpvec))
1352 vector_foreach_slot (vecs->mpvec, mpp, i)
1353 if (update_multipath_table(mpp, vecs->pathvec, 1) ||
1354 update_multipath_status(mpp)) {
1355 remove_map(mpp, vecs, 1);
1363 uxsock_trigger (char * str, char ** reply, int * len, bool is_root,
1364 void * trigger_data)
1366 struct vectors * vecs;
1371 vecs = (struct vectors *)trigger_data;
1373 if ((str != NULL) && (is_root == false) &&
1374 (strncmp(str, "list", strlen("list")) != 0) &&
1375 (strncmp(str, "show", strlen("show")) != 0)) {
1376 *reply = STRDUP("permission deny: need to be root");
1378 *len = strlen(*reply) + 1;
1382 r = parse_cmd(str, reply, len, vecs, uxsock_timeout / 1000);
1386 *reply = STRDUP("timeout\n");
1388 *reply = STRDUP("fail\n");
1390 *len = strlen(*reply) + 1;
1393 else if (!r && *len == 0) {
1394 *reply = STRDUP("ok\n");
1396 *len = strlen(*reply) + 1;
1399 /* else if (r < 0) leave *reply alone */
1405 uev_trigger (struct uevent * uev, void * trigger_data)
1408 struct vectors * vecs;
1409 struct uevent *merge_uev, *tmp;
1410 enum daemon_status state;
1412 vecs = (struct vectors *)trigger_data;
1414 pthread_cleanup_push(config_cleanup, NULL);
1415 pthread_mutex_lock(&config_lock);
1416 while (running_state != DAEMON_IDLE &&
1417 running_state != DAEMON_RUNNING &&
1418 running_state != DAEMON_SHUTDOWN)
1419 pthread_cond_wait(&config_cond, &config_lock);
1420 state = running_state;
1421 pthread_cleanup_pop(1);
1423 if (state == DAEMON_SHUTDOWN)
1428 * Add events are ignored here as the tables
1429 * are not fully initialised then.
1431 if (!strncmp(uev->kernel, "dm-", 3)) {
1432 if (!uevent_is_mpath(uev)) {
1433 if (!strncmp(uev->action, "change", 6))
1434 (void)add_foreign(uev->udev);
1435 else if (!strncmp(uev->action, "remove", 6))
1436 (void)delete_foreign(uev->udev);
1439 if (!strncmp(uev->action, "change", 6)) {
1440 r = uev_add_map(uev, vecs);
1443 * the kernel-side dm-mpath issues a PATH_FAILED event
1444 * when it encounters a path IO error. It is reason-
1445 * able be the entry of path IO error accounting pro-
1448 uev_pathfail_check(uev, vecs);
1449 } else if (!strncmp(uev->action, "remove", 6)) {
1450 r = uev_remove_map(uev, vecs);
1456 * path add/remove/change event, add/remove maybe merged
1458 list_for_each_entry_safe(merge_uev, tmp, &uev->merge_node, node) {
1459 if (!strncmp(merge_uev->action, "add", 3))
1460 r += uev_add_path(merge_uev, vecs, 0);
1461 if (!strncmp(merge_uev->action, "remove", 6))
1462 r += uev_remove_path(merge_uev, vecs, 0);
1465 if (!strncmp(uev->action, "add", 3))
1466 r += uev_add_path(uev, vecs, 1);
1467 if (!strncmp(uev->action, "remove", 6))
1468 r += uev_remove_path(uev, vecs, 1);
1469 if (!strncmp(uev->action, "change", 6))
1470 r += uev_update_path(uev, vecs);
1476 static void rcu_unregister(__attribute__((unused)) void *param)
1478 rcu_unregister_thread();
1482 ueventloop (void * ap)
1484 struct udev *udev = ap;
1486 pthread_cleanup_push(rcu_unregister, NULL);
1487 rcu_register_thread();
1488 if (uevent_listen(udev))
1489 condlog(0, "error starting uevent listener");
1490 pthread_cleanup_pop(1);
1495 uevqloop (void * ap)
1497 pthread_cleanup_push(rcu_unregister, NULL);
1498 rcu_register_thread();
1499 if (uevent_dispatch(&uev_trigger, ap))
1500 condlog(0, "error starting uevent dispatcher");
1501 pthread_cleanup_pop(1);
1505 uxlsnrloop (void * ap)
1509 pthread_cleanup_push(rcu_unregister, NULL);
1510 rcu_register_thread();
1512 ux_sock = ux_socket_listen(DEFAULT_SOCKET);
1513 if (ux_sock == -1) {
1514 condlog(1, "could not create uxsock: %d", errno);
1518 pthread_cleanup_push(uxsock_cleanup, (void *)ux_sock);
1521 condlog(1, "Failed to init uxsock listener");
1526 /* Tell main thread that thread has started */
1527 post_config_state(DAEMON_CONFIGURE);
1529 set_handler_callback(LIST+PATHS, cli_list_paths);
1530 set_handler_callback(LIST+PATHS+FMT, cli_list_paths_fmt);
1531 set_handler_callback(LIST+PATHS+RAW+FMT, cli_list_paths_raw);
1532 set_handler_callback(LIST+PATH, cli_list_path);
1533 set_handler_callback(LIST+MAPS, cli_list_maps);
1534 set_handler_callback(LIST+STATUS, cli_list_status);
1535 set_unlocked_handler_callback(LIST+DAEMON, cli_list_daemon);
1536 set_handler_callback(LIST+MAPS+STATUS, cli_list_maps_status);
1537 set_handler_callback(LIST+MAPS+STATS, cli_list_maps_stats);
1538 set_handler_callback(LIST+MAPS+FMT, cli_list_maps_fmt);
1539 set_handler_callback(LIST+MAPS+RAW+FMT, cli_list_maps_raw);
1540 set_handler_callback(LIST+MAPS+TOPOLOGY, cli_list_maps_topology);
1541 set_handler_callback(LIST+TOPOLOGY, cli_list_maps_topology);
1542 set_handler_callback(LIST+MAPS+JSON, cli_list_maps_json);
1543 set_handler_callback(LIST+MAP+TOPOLOGY, cli_list_map_topology);
1544 set_handler_callback(LIST+MAP+FMT, cli_list_map_fmt);
1545 set_handler_callback(LIST+MAP+RAW+FMT, cli_list_map_fmt);
1546 set_handler_callback(LIST+MAP+JSON, cli_list_map_json);
1547 set_handler_callback(LIST+CONFIG+LOCAL, cli_list_config_local);
1548 set_handler_callback(LIST+CONFIG, cli_list_config);
1549 set_handler_callback(LIST+BLACKLIST, cli_list_blacklist);
1550 set_handler_callback(LIST+DEVICES, cli_list_devices);
1551 set_handler_callback(LIST+WILDCARDS, cli_list_wildcards);
1552 set_handler_callback(RESET+MAPS+STATS, cli_reset_maps_stats);
1553 set_handler_callback(RESET+MAP+STATS, cli_reset_map_stats);
1554 set_handler_callback(ADD+PATH, cli_add_path);
1555 set_handler_callback(DEL+PATH, cli_del_path);
1556 set_handler_callback(ADD+MAP, cli_add_map);
1557 set_handler_callback(DEL+MAP, cli_del_map);
1558 set_handler_callback(SWITCH+MAP+GROUP, cli_switch_group);
1559 set_unlocked_handler_callback(RECONFIGURE, cli_reconfigure);
1560 set_handler_callback(SUSPEND+MAP, cli_suspend);
1561 set_handler_callback(RESUME+MAP, cli_resume);
1562 set_handler_callback(RESIZE+MAP, cli_resize);
1563 set_handler_callback(RELOAD+MAP, cli_reload);
1564 set_handler_callback(RESET+MAP, cli_reassign);
1565 set_handler_callback(REINSTATE+PATH, cli_reinstate);
1566 set_handler_callback(FAIL+PATH, cli_fail);
1567 set_handler_callback(DISABLEQ+MAP, cli_disable_queueing);
1568 set_handler_callback(RESTOREQ+MAP, cli_restore_queueing);
1569 set_handler_callback(DISABLEQ+MAPS, cli_disable_all_queueing);
1570 set_handler_callback(RESTOREQ+MAPS, cli_restore_all_queueing);
1571 set_unlocked_handler_callback(QUIT, cli_quit);
1572 set_unlocked_handler_callback(SHUTDOWN, cli_shutdown);
1573 set_handler_callback(GETPRSTATUS+MAP, cli_getprstatus);
1574 set_handler_callback(SETPRSTATUS+MAP, cli_setprstatus);
1575 set_handler_callback(UNSETPRSTATUS+MAP, cli_unsetprstatus);
1576 set_handler_callback(FORCEQ+DAEMON, cli_force_no_daemon_q);
1577 set_handler_callback(RESTOREQ+DAEMON, cli_restore_no_daemon_q);
1578 set_handler_callback(GETPRKEY+MAP, cli_getprkey);
1579 set_handler_callback(SETPRKEY+MAP+KEY, cli_setprkey);
1580 set_handler_callback(UNSETPRKEY+MAP, cli_unsetprkey);
1581 set_handler_callback(SETMARGINAL+PATH, cli_set_marginal);
1582 set_handler_callback(UNSETMARGINAL+PATH, cli_unset_marginal);
1583 set_handler_callback(UNSETMARGINAL+MAP, cli_unset_all_marginal);
1586 uxsock_listen(&uxsock_trigger, ux_sock, ap);
1589 pthread_cleanup_pop(1); /* uxsock_cleanup */
1591 pthread_cleanup_pop(1); /* rcu_unregister */
1598 post_config_state(DAEMON_SHUTDOWN);
1602 fail_path (struct path * pp, int del_active)
1607 condlog(2, "checker failed path %s in map %s",
1608 pp->dev_t, pp->mpp->alias);
1610 dm_fail_path(pp->mpp->alias, pp->dev_t);
1612 update_queue_mode_del_path(pp->mpp);
1616 * caller must have locked the path list before calling that function
1619 reinstate_path (struct path * pp, int add_active)
1626 if (dm_reinstate_path(pp->mpp->alias, pp->dev_t)) {
1627 condlog(0, "%s: reinstate failed", pp->dev_t);
1630 condlog(2, "%s: reinstated", pp->dev_t);
1632 update_queue_mode_add_path(pp->mpp);
1638 enable_group(struct path * pp)
1640 struct pathgroup * pgp;
1643 * if path is added through uev_add_path, pgindex can be unset.
1644 * next update_strings() will set it, upon map reload event.
1646 * we can safely return here, because upon map reload, all
1647 * PG will be enabled.
1649 if (!pp->mpp->pg || !pp->pgindex)
1652 pgp = VECTOR_SLOT(pp->mpp->pg, pp->pgindex - 1);
1654 if (pgp->status == PGSTATE_DISABLED) {
1655 condlog(2, "%s: enable group #%i", pp->mpp->alias, pp->pgindex);
1656 dm_enablegroup(pp->mpp->alias, pp->pgindex);
1661 mpvec_garbage_collector (struct vectors * vecs)
1663 struct multipath * mpp;
1669 vector_foreach_slot (vecs->mpvec, mpp, i) {
1670 if (mpp && mpp->alias && !dm_map_present(mpp->alias)) {
1671 condlog(2, "%s: remove dead map", mpp->alias);
1672 remove_map_and_stop_waiter(mpp, vecs);
1678 /* This is called after a path has started working again. It the multipath
1679 * device for this path uses the followover failback type, and this is the
1680 * best pathgroup, and this is the first path in the pathgroup to come back
1681 * up, then switch to this pathgroup */
1683 followover_should_failback(struct path * pp)
1685 struct pathgroup * pgp;
1689 if (pp->mpp->pgfailback != -FAILBACK_FOLLOWOVER ||
1690 !pp->mpp->pg || !pp->pgindex ||
1691 pp->pgindex != pp->mpp->bestpg)
1694 pgp = VECTOR_SLOT(pp->mpp->pg, pp->pgindex - 1);
1695 vector_foreach_slot(pgp->paths, pp1, i) {
1698 if (pp1->chkrstate != PATH_DOWN && pp1->chkrstate != PATH_SHAKY)
1705 missing_uev_wait_tick(struct vectors *vecs)
1707 struct multipath * mpp;
1709 int timed_out = 0, delayed_reconfig;
1710 struct config *conf;
1712 vector_foreach_slot (vecs->mpvec, mpp, i) {
1713 if (mpp->wait_for_udev && --mpp->uev_wait_tick <= 0) {
1715 condlog(0, "%s: timeout waiting on creation uevent. enabling reloads", mpp->alias);
1716 if (mpp->wait_for_udev > 1 &&
1717 update_map(mpp, vecs, 0)) {
1718 /* update_map removed map */
1722 mpp->wait_for_udev = 0;
1726 conf = get_multipath_config();
1727 delayed_reconfig = conf->delayed_reconfig;
1728 put_multipath_config(conf);
1729 if (timed_out && delayed_reconfig &&
1730 !need_to_delay_reconfig(vecs)) {
1731 condlog(2, "reconfigure (delayed)");
1732 set_config_state(DAEMON_CONFIGURE);
1737 ghost_delay_tick(struct vectors *vecs)
1739 struct multipath * mpp;
1742 vector_foreach_slot (vecs->mpvec, mpp, i) {
1743 if (mpp->ghost_delay_tick <= 0)
1745 if (--mpp->ghost_delay_tick <= 0) {
1746 condlog(0, "%s: timed out waiting for active path",
1748 mpp->force_udev_reload = 1;
1749 if (update_map(mpp, vecs, 0) != 0) {
1750 /* update_map removed map */
1759 defered_failback_tick (vector mpvec)
1761 struct multipath * mpp;
1764 vector_foreach_slot (mpvec, mpp, i) {
1766 * deferred failback getting sooner
1768 if (mpp->pgfailback > 0 && mpp->failback_tick > 0) {
1769 mpp->failback_tick--;
1771 if (!mpp->failback_tick && need_switch_pathgroup(mpp, 1))
1772 switch_pathgroup(mpp);
1778 retry_count_tick(vector mpvec)
1780 struct multipath *mpp;
1783 vector_foreach_slot (mpvec, mpp, i) {
1784 if (mpp->retry_tick > 0) {
1785 mpp->stat_total_queueing_time++;
1786 condlog(4, "%s: Retrying.. No active path", mpp->alias);
1787 if(--mpp->retry_tick == 0) {
1788 mpp->stat_map_failures++;
1789 dm_queue_if_no_path(mpp->alias, 0);
1790 condlog(2, "%s: Disable queueing", mpp->alias);
1796 int update_prio(struct path *pp, int refresh_all)
1800 struct pathgroup * pgp;
1801 int i, j, changed = 0;
1802 struct config *conf;
1805 vector_foreach_slot (pp->mpp->pg, pgp, i) {
1806 vector_foreach_slot (pgp->paths, pp1, j) {
1807 oldpriority = pp1->priority;
1808 conf = get_multipath_config();
1809 pthread_cleanup_push(put_multipath_config,
1811 pathinfo(pp1, conf, DI_PRIO);
1812 pthread_cleanup_pop(1);
1813 if (pp1->priority != oldpriority)
1819 oldpriority = pp->priority;
1820 conf = get_multipath_config();
1821 pthread_cleanup_push(put_multipath_config, conf);
1822 if (pp->state != PATH_DOWN)
1823 pathinfo(pp, conf, DI_PRIO);
1824 pthread_cleanup_pop(1);
1826 if (pp->priority == oldpriority)
1831 int update_path_groups(struct multipath *mpp, struct vectors *vecs, int refresh)
1833 if (reload_map(vecs, mpp, refresh, 1))
1837 if (setup_multipath(vecs, mpp) != 0)
1839 sync_map_state(mpp);
1844 static int check_path_reinstate_state(struct path * pp) {
1845 struct timespec curr_time;
1848 * This function is only called when the path state changes
1849 * from "bad" to "good". pp->state reflects the *previous* state.
1850 * If this was "bad", we know that a failure must have occured
1851 * beforehand, and count that.
1852 * Note that we count path state _changes_ this way. If a path
1853 * remains in "bad" state, failure count is not increased.
1856 if (!((pp->mpp->san_path_err_threshold > 0) &&
1857 (pp->mpp->san_path_err_forget_rate > 0) &&
1858 (pp->mpp->san_path_err_recovery_time >0))) {
1862 if (pp->disable_reinstate) {
1863 /* If there are no other usable paths, reinstate the path */
1864 if (pp->mpp->nr_active == 0) {
1865 condlog(2, "%s : reinstating path early", pp->dev);
1866 goto reinstate_path;
1868 get_monotonic_time(&curr_time);
1870 /* If path became failed again or continue failed, should reset
1871 * path san_path_err_forget_rate and path dis_reinstate_time to
1872 * start a new stable check.
1874 if ((pp->state != PATH_UP) && (pp->state != PATH_GHOST) &&
1875 (pp->state != PATH_DELAYED)) {
1876 pp->san_path_err_forget_rate =
1877 pp->mpp->san_path_err_forget_rate;
1878 pp->dis_reinstate_time = curr_time.tv_sec;
1881 if ((curr_time.tv_sec - pp->dis_reinstate_time ) > pp->mpp->san_path_err_recovery_time) {
1882 condlog(2,"%s : reinstate the path after err recovery time", pp->dev);
1883 goto reinstate_path;
1887 /* forget errors on a working path */
1888 if ((pp->state == PATH_UP || pp->state == PATH_GHOST) &&
1889 pp->path_failures > 0) {
1890 if (pp->san_path_err_forget_rate > 0){
1891 pp->san_path_err_forget_rate--;
1893 /* for every san_path_err_forget_rate number of
1894 * successful path checks decrement path_failures by 1
1896 pp->path_failures--;
1897 pp->san_path_err_forget_rate = pp->mpp->san_path_err_forget_rate;
1902 /* If the path isn't recovering from a failed state, do nothing */
1903 if (pp->state != PATH_DOWN && pp->state != PATH_SHAKY &&
1904 pp->state != PATH_TIMEOUT)
1907 if (pp->path_failures == 0)
1908 pp->san_path_err_forget_rate = pp->mpp->san_path_err_forget_rate;
1910 pp->path_failures++;
1912 /* if we don't know the currently time, we don't know how long to
1913 * delay the path, so there's no point in checking if we should
1916 get_monotonic_time(&curr_time);
1917 /* when path failures has exceeded the san_path_err_threshold
1918 * place the path in delayed state till san_path_err_recovery_time
1919 * so that the cutomer can rectify the issue within this time. After
1920 * the completion of san_path_err_recovery_time it should
1921 * automatically reinstate the path
1922 * (note: we know that san_path_err_threshold > 0 here).
1924 if (pp->path_failures > (unsigned int)pp->mpp->san_path_err_threshold) {
1925 condlog(2, "%s : hit error threshold. Delaying path reinstatement", pp->dev);
1926 pp->dis_reinstate_time = curr_time.tv_sec;
1927 pp->disable_reinstate = 1;
1935 pp->path_failures = 0;
1936 pp->disable_reinstate = 0;
1937 pp->san_path_err_forget_rate = 0;
1942 should_skip_path(struct path *pp){
1943 if (marginal_path_check_enabled(pp->mpp)) {
1944 if (pp->io_err_disable_reinstate && need_io_err_check(pp))
1946 } else if (san_path_check_enabled(pp->mpp)) {
1947 if (check_path_reinstate_state(pp))
1954 * Returns '1' if the path has been checked, '-1' if it was blacklisted
1958 check_path (struct vectors * vecs, struct path * pp, unsigned int ticks)
1961 int new_path_up = 0;
1962 int chkr_new_path_up = 0;
1964 int disable_reinstate = 0;
1965 int oldchkrstate = pp->chkrstate;
1966 int retrigger_tries, verbosity;
1967 unsigned int checkint, max_checkint;
1968 struct config *conf;
1969 int marginal_pathgroups, marginal_changed = 0;
1972 if ((pp->initialized == INIT_OK ||
1973 pp->initialized == INIT_REQUESTED_UDEV) && !pp->mpp)
1977 pp->tick -= (pp->tick > ticks) ? ticks : pp->tick;
1979 return 0; /* don't check this path yet */
1981 conf = get_multipath_config();
1982 retrigger_tries = conf->retrigger_tries;
1983 checkint = conf->checkint;
1984 max_checkint = conf->max_checkint;
1985 verbosity = conf->verbosity;
1986 marginal_pathgroups = conf->marginal_pathgroups;
1987 put_multipath_config(conf);
1989 if (pp->checkint == CHECKINT_UNDEF) {
1990 condlog(0, "%s: BUG: checkint is not set", pp->dev);
1991 pp->checkint = checkint;
1994 if (!pp->mpp && pp->initialized == INIT_MISSING_UDEV) {
1995 if (pp->retriggers < retrigger_tries) {
1996 condlog(2, "%s: triggering change event to reinitialize",
1998 pp->initialized = INIT_REQUESTED_UDEV;
2000 sysfs_attr_set_value(pp->udev, "uevent", "change",
2004 condlog(1, "%s: not initialized after %d udev retriggers",
2005 pp->dev, retrigger_tries);
2007 * Make sure that the "add missing path" code path
2008 * below may reinstate the path later, if it ever
2010 * The WWID needs not be cleared; if it was set, the
2011 * state hadn't been INIT_MISSING_UDEV in the first
2014 pp->initialized = INIT_FAILED;
2020 * provision a next check soonest,
2021 * in case we exit abnormaly from here
2023 pp->tick = checkint;
2025 newstate = path_offline(pp);
2026 if (newstate == PATH_UP) {
2027 conf = get_multipath_config();
2028 pthread_cleanup_push(put_multipath_config, conf);
2029 newstate = get_state(pp, conf, 1, newstate);
2030 pthread_cleanup_pop(1);
2032 checker_clear_message(&pp->checker);
2033 condlog(3, "%s: state %s, checker not called",
2034 pp->dev, checker_state_name(newstate));
2037 * Wait for uevent for removed paths;
2038 * some LLDDs like zfcp keep paths unavailable
2039 * without sending uevents.
2041 if (newstate == PATH_REMOVED)
2042 newstate = PATH_DOWN;
2044 if (newstate == PATH_WILD || newstate == PATH_UNCHECKED) {
2045 condlog(2, "%s: unusable path (%s) - checker failed",
2046 pp->dev, checker_state_name(newstate));
2047 LOG_MSG(2, verbosity, pp);
2048 conf = get_multipath_config();
2049 pthread_cleanup_push(put_multipath_config, conf);
2050 pathinfo(pp, conf, 0);
2051 pthread_cleanup_pop(1);
2053 } else if ((newstate != PATH_UP && newstate != PATH_GHOST) &&
2054 (pp->state == PATH_DELAYED)) {
2055 /* If path state become failed again cancel path delay state */
2056 pp->state = newstate;
2060 if (!strlen(pp->wwid) &&
2061 (pp->initialized == INIT_FAILED ||
2062 pp->initialized == INIT_NEW) &&
2063 (newstate == PATH_UP || newstate == PATH_GHOST)) {
2064 condlog(2, "%s: add missing path", pp->dev);
2065 conf = get_multipath_config();
2066 pthread_cleanup_push(put_multipath_config, conf);
2067 ret = pathinfo(pp, conf, DI_ALL | DI_BLACKLIST);
2068 pthread_cleanup_pop(1);
2069 /* INIT_OK implies ret == PATHINFO_OK */
2070 if (pp->initialized == INIT_OK) {
2071 ev_add_path(pp, vecs, 1);
2075 * We failed multiple times to initialize this
2076 * path properly. Don't re-check too often.
2078 pp->checkint = max_checkint;
2079 if (ret == PATHINFO_SKIPPED)
2086 * Async IO in flight. Keep the previous path state
2087 * and reschedule as soon as possible
2089 if (newstate == PATH_PENDING) {
2094 * Synchronize with kernel state
2096 if (update_multipath_strings(pp->mpp, vecs->pathvec, 1)) {
2097 condlog(1, "%s: Could not synchronize with kernel state",
2099 pp->dmstate = PSTATE_UNDEF;
2101 /* if update_multipath_strings orphaned the path, quit early */
2104 set_no_path_retry(pp->mpp);
2106 if ((newstate == PATH_UP || newstate == PATH_GHOST) &&
2107 (san_path_check_enabled(pp->mpp) ||
2108 marginal_path_check_enabled(pp->mpp))) {
2109 int was_marginal = pp->marginal;
2110 if (should_skip_path(pp)) {
2111 if (!marginal_pathgroups) {
2112 if (marginal_path_check_enabled(pp->mpp))
2113 /* to reschedule as soon as possible,
2114 * so that this path can be recovered
2117 pp->state = PATH_DELAYED;
2120 if (!was_marginal) {
2122 marginal_changed = 1;
2124 } else if (marginal_pathgroups && was_marginal) {
2126 marginal_changed = 1;
2131 * don't reinstate failed path, if its in stand-by
2132 * and if target supports only implicit tpgs mode.
2133 * this will prevent unnecessary i/o by dm on stand-by
2134 * paths if there are no other active paths in map.
2136 disable_reinstate = (newstate == PATH_GHOST &&
2137 pp->mpp->nr_active == 0 &&
2138 path_get_tpgs(pp) == TPGS_IMPLICIT) ? 1 : 0;
2140 pp->chkrstate = newstate;
2141 if (newstate != pp->state) {
2142 int oldstate = pp->state;
2143 pp->state = newstate;
2145 LOG_MSG(1, verbosity, pp);
2148 * upon state change, reset the checkint
2149 * to the shortest delay
2151 conf = get_multipath_config();
2152 pp->checkint = conf->checkint;
2153 put_multipath_config(conf);
2155 if (newstate != PATH_UP && newstate != PATH_GHOST) {
2157 * proactively fail path in the DM
2159 if (oldstate == PATH_UP ||
2160 oldstate == PATH_GHOST)
2166 * cancel scheduled failback
2168 pp->mpp->failback_tick = 0;
2170 pp->mpp->stat_path_failures++;
2174 if (newstate == PATH_UP || newstate == PATH_GHOST) {
2175 if (pp->mpp->prflag) {
2177 * Check Persistent Reservation.
2179 condlog(2, "%s: checking persistent "
2180 "reservation registration", pp->dev);
2181 mpath_pr_event_handle(pp);
2186 * reinstate this path
2188 if (oldstate != PATH_UP &&
2189 oldstate != PATH_GHOST)
2193 if (!disable_reinstate && reinstate_path(pp, add_active)) {
2194 condlog(3, "%s: reload map", pp->dev);
2195 ev_add_path(pp, vecs, 1);
2201 if (oldchkrstate != PATH_UP && oldchkrstate != PATH_GHOST)
2202 chkr_new_path_up = 1;
2205 * if at least one path is up in a group, and
2206 * the group is disabled, re-enable it
2208 if (newstate == PATH_UP)
2211 else if (newstate == PATH_UP || newstate == PATH_GHOST) {
2212 if ((pp->dmstate == PSTATE_FAILED ||
2213 pp->dmstate == PSTATE_UNDEF) &&
2214 !disable_reinstate) {
2215 /* Clear IO errors */
2216 if (reinstate_path(pp, 0)) {
2217 condlog(3, "%s: reload map", pp->dev);
2218 ev_add_path(pp, vecs, 1);
2223 LOG_MSG(4, verbosity, pp);
2224 if (pp->checkint != max_checkint) {
2226 * double the next check delay.
2227 * max at conf->max_checkint
2229 if (pp->checkint < (max_checkint / 2))
2230 pp->checkint = 2 * pp->checkint;
2232 pp->checkint = max_checkint;
2234 condlog(4, "%s: delay next check %is",
2235 pp->dev_t, pp->checkint);
2237 pp->tick = pp->checkint;
2240 else if (newstate != PATH_UP && newstate != PATH_GHOST) {
2241 if (pp->dmstate == PSTATE_ACTIVE ||
2242 pp->dmstate == PSTATE_UNDEF)
2244 if (newstate == PATH_DOWN) {
2245 int log_checker_err;
2247 conf = get_multipath_config();
2248 log_checker_err = conf->log_checker_err;
2249 put_multipath_config(conf);
2250 if (log_checker_err == LOG_CHKR_ERR_ONCE)
2251 LOG_MSG(3, verbosity, pp);
2253 LOG_MSG(2, verbosity, pp);
2257 pp->state = newstate;
2259 if (pp->mpp->wait_for_udev)
2262 * path prio refreshing
2264 condlog(4, "path prio refresh");
2266 if (marginal_changed)
2267 update_path_groups(pp->mpp, vecs, 1);
2268 else if (update_prio(pp, new_path_up) &&
2269 (pp->mpp->pgpolicyfn == (pgpolicyfn *)group_by_prio) &&
2270 pp->mpp->pgfailback == -FAILBACK_IMMEDIATE)
2271 update_path_groups(pp->mpp, vecs, !new_path_up);
2272 else if (need_switch_pathgroup(pp->mpp, 0)) {
2273 if (pp->mpp->pgfailback > 0 &&
2274 (new_path_up || pp->mpp->failback_tick <= 0))
2275 pp->mpp->failback_tick =
2276 pp->mpp->pgfailback + 1;
2277 else if (pp->mpp->pgfailback == -FAILBACK_IMMEDIATE ||
2278 (chkr_new_path_up && followover_should_failback(pp)))
2279 switch_pathgroup(pp->mpp);
2285 checkerloop (void *ap)
2287 struct vectors *vecs;
2291 struct timespec last_time;
2292 struct config *conf;
2293 int foreign_tick = 0;
2295 pthread_cleanup_push(rcu_unregister, NULL);
2296 rcu_register_thread();
2297 mlockall(MCL_CURRENT | MCL_FUTURE);
2298 vecs = (struct vectors *)ap;
2299 condlog(2, "path checkers start up");
2301 /* Tweak start time for initial path check */
2302 get_monotonic_time(&last_time);
2303 last_time.tv_sec -= 1;
2306 struct timespec diff_time, start_time, end_time;
2307 int num_paths = 0, strict_timing, rc = 0;
2308 unsigned int ticks = 0;
2310 get_monotonic_time(&start_time);
2311 if (start_time.tv_sec && last_time.tv_sec) {
2312 timespecsub(&start_time, &last_time, &diff_time);
2313 condlog(4, "tick (%lu.%06lu secs)",
2314 diff_time.tv_sec, diff_time.tv_nsec / 1000);
2315 last_time = start_time;
2316 ticks = diff_time.tv_sec;
2319 condlog(4, "tick (%d ticks)", ticks);
2323 sd_notify(0, "WATCHDOG=1");
2325 rc = set_config_state(DAEMON_RUNNING);
2326 if (rc == ETIMEDOUT) {
2327 condlog(4, "timeout waiting for DAEMON_IDLE");
2329 } else if (rc == EINVAL)
2330 /* daemon shutdown */
2333 pthread_cleanup_push(cleanup_lock, &vecs->lock);
2335 pthread_testcancel();
2336 vector_foreach_slot (vecs->pathvec, pp, i) {
2337 rc = check_path(vecs, pp, ticks);
2339 vector_del_slot(vecs->pathvec, i);
2345 lock_cleanup_pop(vecs->lock);
2347 pthread_cleanup_push(cleanup_lock, &vecs->lock);
2349 pthread_testcancel();
2350 defered_failback_tick(vecs->mpvec);
2351 retry_count_tick(vecs->mpvec);
2352 missing_uev_wait_tick(vecs);
2353 ghost_delay_tick(vecs);
2354 lock_cleanup_pop(vecs->lock);
2359 pthread_cleanup_push(cleanup_lock, &vecs->lock);
2361 pthread_testcancel();
2362 condlog(4, "map garbage collection");
2363 mpvec_garbage_collector(vecs);
2365 lock_cleanup_pop(vecs->lock);
2368 diff_time.tv_nsec = 0;
2369 if (start_time.tv_sec) {
2370 get_monotonic_time(&end_time);
2371 timespecsub(&end_time, &start_time, &diff_time);
2373 unsigned int max_checkint;
2375 condlog(4, "checked %d path%s in %lu.%06lu secs",
2376 num_paths, num_paths > 1 ? "s" : "",
2378 diff_time.tv_nsec / 1000);
2379 conf = get_multipath_config();
2380 max_checkint = conf->max_checkint;
2381 put_multipath_config(conf);
2382 if (diff_time.tv_sec > max_checkint)
2383 condlog(1, "path checkers took longer "
2384 "than %lu seconds, consider "
2385 "increasing max_polling_interval",
2390 if (foreign_tick == 0) {
2391 conf = get_multipath_config();
2392 foreign_tick = conf->max_checkint;
2393 put_multipath_config(conf);
2395 if (--foreign_tick == 0)
2398 post_config_state(DAEMON_IDLE);
2399 conf = get_multipath_config();
2400 strict_timing = conf->strict_timing;
2401 put_multipath_config(conf);
2405 if (diff_time.tv_nsec) {
2406 diff_time.tv_sec = 0;
2408 1000UL * 1000 * 1000 - diff_time.tv_nsec;
2410 diff_time.tv_sec = 1;
2412 condlog(3, "waiting for %lu.%06lu secs",
2414 diff_time.tv_nsec / 1000);
2415 if (nanosleep(&diff_time, NULL) != 0) {
2416 condlog(3, "nanosleep failed with error %d",
2418 conf = get_multipath_config();
2419 conf->strict_timing = 0;
2420 put_multipath_config(conf);
2425 pthread_cleanup_pop(1);
2430 configure (struct vectors * vecs)
2432 struct multipath * mpp;
2436 struct config *conf;
2437 static int force_reload = FORCE_RELOAD_WEAK;
2439 if (!vecs->pathvec && !(vecs->pathvec = vector_alloc())) {
2440 condlog(0, "couldn't allocate path vec in configure");
2444 if (!vecs->mpvec && !(vecs->mpvec = vector_alloc())) {
2445 condlog(0, "couldn't allocate multipath vec in configure");
2449 if (!(mpvec = vector_alloc())) {
2450 condlog(0, "couldn't allocate new maps vec in configure");
2455 * probe for current path (from sysfs) and map (from dm) sets
2457 ret = path_discovery(vecs->pathvec, DI_ALL);
2459 condlog(0, "configure failed at path discovery");
2463 conf = get_multipath_config();
2464 pthread_cleanup_push(put_multipath_config, conf);
2465 vector_foreach_slot (vecs->pathvec, pp, i){
2466 if (filter_path(conf, pp) > 0){
2467 vector_del_slot(vecs->pathvec, i);
2472 pthread_cleanup_pop(1);
2474 if (map_discovery(vecs)) {
2475 condlog(0, "configure failed at map discovery");
2480 * create new set of maps & push changed ones into dm
2481 * In the first call, use FORCE_RELOAD_WEAK to avoid making
2482 * superfluous ACT_RELOAD ioctls. Later calls are done
2483 * with FORCE_RELOAD_YES.
2485 ret = coalesce_paths(vecs, mpvec, NULL, force_reload, CMD_NONE);
2486 if (force_reload == FORCE_RELOAD_WEAK)
2487 force_reload = FORCE_RELOAD_YES;
2489 condlog(0, "configure failed while coalescing paths");
2494 * may need to remove some maps which are no longer relevant
2495 * e.g., due to blacklist changes in conf file
2497 if (coalesce_maps(vecs, mpvec)) {
2498 condlog(0, "configure failed while coalescing maps");
2504 sync_maps_state(mpvec);
2505 vector_foreach_slot(mpvec, mpp, i){
2506 if (remember_wwid(mpp->wwid) == 1)
2507 trigger_paths_udev_change(mpp, true);
2512 * purge dm of old maps
2517 * save new set of maps formed by considering current path state
2519 vector_free(vecs->mpvec);
2520 vecs->mpvec = mpvec;
2523 * start dm event waiter threads for these new maps
2525 vector_foreach_slot(vecs->mpvec, mpp, i) {
2526 if (wait_for_events(mpp, vecs)) {
2527 remove_map(mpp, vecs, 1);
2531 if (setup_multipath(vecs, mpp))
2542 need_to_delay_reconfig(struct vectors * vecs)
2544 struct multipath *mpp;
2547 if (!VECTOR_SIZE(vecs->mpvec))
2550 vector_foreach_slot(vecs->mpvec, mpp, i) {
2551 if (mpp->wait_for_udev)
2557 void rcu_free_config(struct rcu_head *head)
2559 struct config *conf = container_of(head, struct config, rcu);
2565 reconfigure (struct vectors * vecs)
2567 struct config * old, *conf;
2569 conf = load_config(DEFAULT_CONFIGFILE);
2574 * free old map and path vectors ... they use old conf state
2576 if (VECTOR_SIZE(vecs->mpvec))
2577 remove_maps_and_stop_waiters(vecs);
2579 free_pathvec(vecs->pathvec, FREE_PATHS);
2580 vecs->pathvec = NULL;
2581 delete_all_foreign();
2583 reset_checker_classes();
2584 /* Re-read any timezone changes */
2587 dm_tgt_version(conf->version, TGT_MPATH);
2589 conf->verbosity = verbosity;
2590 if (bindings_read_only)
2591 conf->bindings_read_only = bindings_read_only;
2592 uxsock_timeout = conf->uxsock_timeout;
2594 old = rcu_dereference(multipath_conf);
2595 conf->sequence_nr = old->sequence_nr + 1;
2596 rcu_assign_pointer(multipath_conf, conf);
2597 call_rcu(&old->rcu, rcu_free_config);
2605 static struct vectors *
2608 struct vectors * vecs;
2610 vecs = (struct vectors *)MALLOC(sizeof(struct vectors));
2615 pthread_mutex_init(&vecs->lock.mutex, NULL);
2621 signal_set(int signo, void (*func) (int))
2624 struct sigaction sig;
2625 struct sigaction osig;
2627 sig.sa_handler = func;
2628 sigemptyset(&sig.sa_mask);
2631 r = sigaction(signo, &sig, &osig);
2636 return (osig.sa_handler);
2640 handle_signals(bool nonfatal)
2643 condlog(2, "exit (signal)");
2650 condlog(2, "reconfigure (signal)");
2651 set_config_state(DAEMON_CONFIGURE);
2653 if (log_reset_sig) {
2654 condlog(2, "reset log (signal)");
2663 sighup(__attribute__((unused)) int sig)
2669 sigend(__attribute__((unused)) int sig)
2675 sigusr1(__attribute__((unused)) int sig)
2681 sigusr2(__attribute__((unused)) int sig)
2683 condlog(3, "SIGUSR2 received");
2691 /* block all signals */
2693 /* SIGPIPE occurs if logging fails */
2694 sigdelset(&set, SIGPIPE);
2695 pthread_sigmask(SIG_SETMASK, &set, NULL);
2697 /* Other signals will be unblocked in the uxlsnr thread */
2698 signal_set(SIGHUP, sighup);
2699 signal_set(SIGUSR1, sigusr1);
2700 signal_set(SIGUSR2, sigusr2);
2701 signal_set(SIGINT, sigend);
2702 signal_set(SIGTERM, sigend);
2703 signal_set(SIGPIPE, sigend);
2710 static struct sched_param sched_param = {
2711 .sched_priority = 99
2714 res = sched_setscheduler (0, SCHED_RR, &sched_param);
2717 condlog(LOG_WARNING, "Could not set SCHED_RR at priority 99");
2724 #ifdef OOM_SCORE_ADJ_MIN
2726 char *file = "/proc/self/oom_score_adj";
2727 int score = OOM_SCORE_ADJ_MIN;
2730 char *file = "/proc/self/oom_adj";
2731 int score = OOM_ADJUST_MIN;
2737 envp = getenv("OOMScoreAdjust");
2739 condlog(3, "Using systemd provided OOMScoreAdjust");
2743 if (stat(file, &st) == 0){
2744 fp = fopen(file, "w");
2746 condlog(0, "couldn't fopen %s : %s", file,
2750 fprintf(fp, "%i", score);
2754 if (errno != ENOENT) {
2755 condlog(0, "couldn't stat %s : %s", file,
2759 #ifdef OOM_ADJUST_MIN
2760 file = "/proc/self/oom_adj";
2761 score = OOM_ADJUST_MIN;
2766 condlog(0, "couldn't adjust oom score");
2770 child (__attribute__((unused)) void *param)
2772 pthread_t check_thr, uevent_thr, uxlsnr_thr, uevq_thr, dmevent_thr;
2773 pthread_attr_t log_attr, misc_attr, uevent_attr;
2774 struct vectors * vecs;
2775 struct multipath * mpp;
2778 unsigned long checkint;
2779 int startup_done = 0;
2783 struct config *conf;
2785 int queue_without_daemon;
2786 enum daemon_status state;
2788 mlockall(MCL_CURRENT | MCL_FUTURE);
2792 setup_thread_attr(&misc_attr, 64 * 1024, 0);
2793 setup_thread_attr(&uevent_attr, DEFAULT_UEVENT_STACKSIZE * 1024, 0);
2794 setup_thread_attr(&waiter_attr, 32 * 1024, 1);
2795 setup_thread_attr(&io_err_stat_attr, 32 * 1024, 0);
2798 setup_thread_attr(&log_attr, 64 * 1024, 0);
2799 log_thread_start(&log_attr);
2800 pthread_attr_destroy(&log_attr);
2802 pid_fd = pidfile_create(DEFAULT_PIDFILE, daemon_pid);
2804 condlog(1, "failed to create pidfile");
2810 post_config_state(DAEMON_START);
2812 condlog(2, "--------start up--------");
2813 condlog(2, "read " DEFAULT_CONFIGFILE);
2815 conf = load_config(DEFAULT_CONFIGFILE);
2820 conf->verbosity = verbosity;
2821 if (bindings_read_only)
2822 conf->bindings_read_only = bindings_read_only;
2823 uxsock_timeout = conf->uxsock_timeout;
2824 rcu_assign_pointer(multipath_conf, conf);
2825 if (init_checkers(conf->multipath_dir)) {
2826 condlog(0, "failed to initialize checkers");
2829 if (init_prio(conf->multipath_dir)) {
2830 condlog(0, "failed to initialize prioritizers");
2833 /* Failing this is non-fatal */
2835 init_foreign(conf->multipath_dir, conf->enable_foreign);
2838 poll_dmevents = dmevent_poll_supported();
2839 setlogmask(LOG_UPTO(conf->verbosity + 3));
2841 envp = getenv("LimitNOFILE");
2844 condlog(2,"Using systemd provided open fds limit of %s", envp);
2846 set_max_fds(conf->max_fds);
2848 vecs = gvecs = init_vecs();
2856 envp = getenv("WATCHDOG_USEC");
2857 if (envp && sscanf(envp, "%lu", &checkint) == 1) {
2858 /* Value is in microseconds */
2859 conf->max_checkint = checkint / 1000000;
2860 /* Rescale checkint */
2861 if (conf->checkint > conf->max_checkint)
2862 conf->checkint = conf->max_checkint;
2864 conf->checkint = conf->max_checkint / 4;
2865 condlog(3, "enabling watchdog, interval %d max %d",
2866 conf->checkint, conf->max_checkint);
2867 use_watchdog = conf->checkint;
2871 * Startup done, invalidate configuration
2875 pthread_cleanup_push(config_cleanup, NULL);
2876 pthread_mutex_lock(&config_lock);
2878 __post_config_state(DAEMON_IDLE);
2879 rc = pthread_create(&uxlsnr_thr, &misc_attr, uxlsnrloop, vecs);
2881 /* Wait for uxlsnr startup */
2882 while (running_state == DAEMON_IDLE)
2883 pthread_cond_wait(&config_cond, &config_lock);
2884 state = running_state;
2886 pthread_cleanup_pop(1);
2889 condlog(0, "failed to create cli listener: %d", rc);
2892 else if (state != DAEMON_CONFIGURE) {
2893 condlog(0, "cli listener failed to start");
2897 if (poll_dmevents) {
2898 if (init_dmevent_waiter(vecs)) {
2899 condlog(0, "failed to allocate dmevents waiter info");
2902 if ((rc = pthread_create(&dmevent_thr, &misc_attr,
2903 wait_dmevents, NULL))) {
2904 condlog(0, "failed to create dmevent waiter thread: %d",
2911 * Start uevent listener early to catch events
2913 if ((rc = pthread_create(&uevent_thr, &uevent_attr, ueventloop, udev))) {
2914 condlog(0, "failed to create uevent thread: %d", rc);
2917 pthread_attr_destroy(&uevent_attr);
2922 if ((rc = pthread_create(&check_thr, &misc_attr, checkerloop, vecs))) {
2923 condlog(0,"failed to create checker loop thread: %d", rc);
2926 if ((rc = pthread_create(&uevq_thr, &misc_attr, uevqloop, vecs))) {
2927 condlog(0, "failed to create uevent dispatcher: %d", rc);
2930 pthread_attr_destroy(&misc_attr);
2933 pthread_cleanup_push(config_cleanup, NULL);
2934 pthread_mutex_lock(&config_lock);
2935 while (running_state != DAEMON_CONFIGURE &&
2936 running_state != DAEMON_SHUTDOWN)
2937 pthread_cond_wait(&config_cond, &config_lock);
2938 state = running_state;
2939 pthread_cleanup_pop(1);
2940 if (state == DAEMON_SHUTDOWN)
2942 if (state == DAEMON_CONFIGURE) {
2943 pthread_cleanup_push(cleanup_lock, &vecs->lock);
2945 pthread_testcancel();
2946 if (!need_to_delay_reconfig(vecs)) {
2949 conf = get_multipath_config();
2950 conf->delayed_reconfig = 1;
2951 put_multipath_config(conf);
2953 lock_cleanup_pop(vecs->lock);
2954 post_config_state(DAEMON_IDLE);
2956 if (!startup_done) {
2957 sd_notify(0, "READY=1");
2965 conf = get_multipath_config();
2966 queue_without_daemon = conf->queue_without_daemon;
2967 put_multipath_config(conf);
2968 if (queue_without_daemon == QUE_NO_DAEMON_OFF)
2969 vector_foreach_slot(vecs->mpvec, mpp, i)
2970 dm_queue_if_no_path(mpp->alias, 0);
2971 remove_maps_and_stop_waiters(vecs);
2972 unlock(&vecs->lock);
2974 pthread_cancel(check_thr);
2975 pthread_cancel(uevent_thr);
2976 pthread_cancel(uxlsnr_thr);
2977 pthread_cancel(uevq_thr);
2979 pthread_cancel(dmevent_thr);
2981 pthread_join(check_thr, NULL);
2982 pthread_join(uevent_thr, NULL);
2983 pthread_join(uxlsnr_thr, NULL);
2984 pthread_join(uevq_thr, NULL);
2986 pthread_join(dmevent_thr, NULL);
2988 stop_io_err_stat_thread();
2991 free_pathvec(vecs->pathvec, FREE_PATHS);
2992 vecs->pathvec = NULL;
2993 unlock(&vecs->lock);
2995 pthread_mutex_destroy(&vecs->lock.mutex);
3003 cleanup_dmevent_waiter();
3008 /* We're done here */
3009 condlog(3, "unlink pidfile");
3010 unlink(DEFAULT_PIDFILE);
3012 condlog(2, "--------shut down-------");
3018 * Freeing config must be done after condlog() and dm_lib_exit(),
3019 * because logging functions like dlog() and dm_write_log()
3020 * reference the config.
3022 conf = rcu_dereference(multipath_conf);
3023 rcu_assign_pointer(multipath_conf, NULL);
3024 call_rcu(&conf->rcu, rcu_free_config);
3027 pthread_attr_destroy(&waiter_attr);
3028 pthread_attr_destroy(&io_err_stat_attr);
3030 dbg_free_final(NULL);
3034 sd_notify(0, "ERRNO=0");
3040 sd_notify(0, "ERRNO=1");
3053 if( (pid = fork()) < 0){
3054 fprintf(stderr, "Failed first fork : %s\n", strerror(errno));
3062 if ( (pid = fork()) < 0)
3063 fprintf(stderr, "Failed second fork : %s\n", strerror(errno));
3068 fprintf(stderr, "cannot chdir to '/', continuing\n");
3070 dev_null_fd = open("/dev/null", O_RDWR);
3071 if (dev_null_fd < 0){
3072 fprintf(stderr, "cannot open /dev/null for input & output : %s\n",
3077 close(STDIN_FILENO);
3078 if (dup(dev_null_fd) < 0) {
3079 fprintf(stderr, "cannot dup /dev/null to stdin : %s\n",
3083 close(STDOUT_FILENO);
3084 if (dup(dev_null_fd) < 0) {
3085 fprintf(stderr, "cannot dup /dev/null to stdout : %s\n",
3089 close(STDERR_FILENO);
3090 if (dup(dev_null_fd) < 0) {
3091 fprintf(stderr, "cannot dup /dev/null to stderr : %s\n",
3096 daemon_pid = getpid();
3101 main (int argc, char *argv[])
3103 extern char *optarg;
3108 struct config *conf;
3110 ANNOTATE_BENIGN_RACE_SIZED(&multipath_conf, sizeof(multipath_conf),
3111 "Manipulated through RCU");
3112 ANNOTATE_BENIGN_RACE_SIZED(&uxsock_timeout, sizeof(uxsock_timeout),
3113 "Suppress complaints about this scalar variable");
3117 if (getuid() != 0) {
3118 fprintf(stderr, "need to be root\n");
3122 /* make sure we don't lock any path */
3124 fprintf(stderr, "can't chdir to root directory : %s\n",
3126 umask(umask(077) | 022);
3128 pthread_cond_init_mono(&config_cond);
3131 libmp_udev_set_sync_support(0);
3133 while ((arg = getopt(argc, argv, ":dsv:k::Bniw")) != EOF ) {
3139 //debug=1; /* ### comment me out ### */
3142 if (sizeof(optarg) > sizeof(char *) ||
3143 !isdigit(optarg[0]))
3146 verbosity = atoi(optarg);
3153 conf = load_config(DEFAULT_CONFIGFILE);
3157 conf->verbosity = verbosity;
3158 uxsock_timeout = conf->uxsock_timeout;
3159 err = uxclnt(optarg, uxsock_timeout + 100);
3163 bindings_read_only = 1;
3166 condlog(0, "WARNING: ignoring deprecated option -n, use 'ignore_wwids = no' instead");
3172 fprintf(stderr, "Invalid argument '-%c'\n",
3177 if (optind < argc) {
3183 conf = load_config(DEFAULT_CONFIGFILE);
3187 conf->verbosity = verbosity;
3188 uxsock_timeout = conf->uxsock_timeout;
3189 memset(cmd, 0x0, CMDSIZE);
3190 while (optind < argc) {
3191 if (strchr(argv[optind], ' '))
3192 c += snprintf(c, s + CMDSIZE - c, "\"%s\" ", argv[optind]);
3194 c += snprintf(c, s + CMDSIZE - c, "%s ", argv[optind]);
3197 c += snprintf(c, s + CMDSIZE - c, "\n");
3198 err = uxclnt(s, uxsock_timeout + 100);
3204 if (!isatty(fileno(stdout)))
3205 setbuf(stdout, NULL);
3207 daemon_pid = getpid();
3219 return (child(NULL));
3222 void * mpath_pr_event_handler_fn (void * pathp )
3224 struct multipath * mpp;
3227 struct path * pp = (struct path *)pathp;
3228 struct prout_param_descriptor *param;
3229 struct prin_resp *resp;
3231 rcu_register_thread();
3234 resp = mpath_alloc_prin_response(MPATH_PRIN_RKEY_SA);
3236 condlog(0,"%s Alloc failed for prin response", pp->dev);
3240 ret = prin_do_scsi_ioctl(pp->dev, MPATH_PRIN_RKEY_SA, resp, 0);
3241 if (ret != MPATH_PR_SUCCESS )
3243 condlog(0,"%s : pr in read keys service action failed. Error=%d", pp->dev, ret);
3247 condlog(3, " event pr=%d addlen=%d",resp->prin_descriptor.prin_readkeys.prgeneration,
3248 resp->prin_descriptor.prin_readkeys.additional_length );
3250 if (resp->prin_descriptor.prin_readkeys.additional_length == 0 )
3252 condlog(1, "%s: No key found. Device may not be registered.", pp->dev);
3253 ret = MPATH_PR_SUCCESS;
3256 condlog(2, "Multipath reservation_key: 0x%" PRIx64 " ",
3257 get_be64(mpp->reservation_key));
3260 for (i = 0; i < resp->prin_descriptor.prin_readkeys.additional_length/8; i++ )
3262 condlog(2, "PR IN READKEYS[%d] reservation key:",i);
3263 dumpHex((char *)&resp->prin_descriptor.prin_readkeys.key_list[i*8], 8 , -1);
3264 if (!memcmp(&mpp->reservation_key, &resp->prin_descriptor.prin_readkeys.key_list[i*8], 8))
3266 condlog(2, "%s: pr key found in prin readkeys response", mpp->alias);
3273 condlog(0, "%s: Either device not registered or ", pp->dev);
3274 condlog(0, "host is not authorised for registration. Skip path");
3275 ret = MPATH_PR_OTHER;
3279 param= malloc(sizeof(struct prout_param_descriptor));
3280 memset(param, 0 , sizeof(struct prout_param_descriptor));
3281 param->sa_flags = mpp->sa_flags;
3282 memcpy(param->sa_key, &mpp->reservation_key, 8);
3283 param->num_transportid = 0;
3285 condlog(3, "device %s:%s", pp->dev, pp->mpp->wwid);
3287 ret = prout_do_scsi_ioctl(pp->dev, MPATH_PROUT_REG_IGN_SA, 0, 0, param, 0);
3288 if (ret != MPATH_PR_SUCCESS )
3290 condlog(0,"%s: Reservation registration failed. Error: %d", pp->dev, ret);
3298 rcu_unregister_thread();
3302 int mpath_pr_event_handle(struct path *pp)
3306 pthread_attr_t attr;
3307 struct multipath * mpp;
3309 if (pp->bus != SYSFS_BUS_SCSI)
3314 if (!get_be64(mpp->reservation_key))
3317 pthread_attr_init(&attr);
3318 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
3320 rc = pthread_create(&thread, NULL , mpath_pr_event_handler_fn, pp);
3322 condlog(0, "%s: ERROR; return code from pthread_create() is %d", pp->dev, rc);
3325 pthread_attr_destroy(&attr);
3326 rc = pthread_join(thread, NULL);