2 * Copyright (c) 2004, 2005 Christophe Varoqui
3 * Copyright (c) 2005 Kiyoshi Ueda, NEC
4 * Copyright (c) 2005 Benjamin Marzinski, Redhat
5 * Copyright (c) 2005 Edward Goggin, EMC
9 #include <libdevmapper.h>
12 #include <sys/types.h>
16 #include <sys/resource.h>
18 #include <linux/oom.h>
22 #include <systemd/sd-daemon.h>
24 #include <semaphore.h>
31 #include "time-util.h"
39 static int use_watchdog;
53 #include "blacklist.h"
54 #include "structs_vec.h"
56 #include "devmapper.h"
59 #include "discovery.h"
63 #include "switchgroup.h"
65 #include "configure.h"
68 #include "pgpolicies.h"
72 #include "mpath_cmd.h"
73 #include "mpath_persist.h"
75 #include "prioritizers/alua_rtpg.h"
82 #include "cli_handlers.h"
85 #include "io_err_stat.h"
87 #include "../third-party/valgrind/drd.h"
89 #define FILE_NAME_SIZE 256
92 #define LOG_MSG(a, b) \
95 condlog(a, "%s: %s - path offline", pp->mpp->alias, pp->dev); \
97 condlog(a, "%s: %s - %s", pp->mpp->alias, pp->dev, b); \
100 struct mpath_event_param
103 struct multipath *mpp;
109 int bindings_read_only;
111 enum daemon_status running_state = DAEMON_INIT;
113 pthread_mutex_t config_lock = PTHREAD_MUTEX_INITIALIZER;
114 pthread_cond_t config_cond;
117 * global copy of vecs for use in sig handlers
119 struct vectors * gvecs;
123 struct config *multipath_conf;
125 /* Local variables */
126 static volatile sig_atomic_t exit_sig;
127 static volatile sig_atomic_t reconfig_sig;
128 static volatile sig_atomic_t log_reset_sig;
133 switch (running_state) {
138 case DAEMON_CONFIGURE:
144 case DAEMON_SHUTDOWN:
151 * I love you too, systemd ...
154 sd_notify_status(void)
156 switch (running_state) {
158 return "STATUS=init";
160 return "STATUS=startup";
161 case DAEMON_CONFIGURE:
162 return "STATUS=configure";
166 case DAEMON_SHUTDOWN:
167 return "STATUS=shutdown";
173 static void do_sd_notify(enum daemon_status old_state)
176 * Checkerloop switches back and forth between idle and running state.
177 * No need to tell systemd each time.
178 * These notifications cause a lot of overhead on dbus.
180 if ((running_state == DAEMON_IDLE || running_state == DAEMON_RUNNING) &&
181 (old_state == DAEMON_IDLE || old_state == DAEMON_RUNNING))
183 sd_notify(0, sd_notify_status());
187 static void config_cleanup(void *arg)
189 pthread_mutex_unlock(&config_lock);
192 void post_config_state(enum daemon_status state)
194 pthread_mutex_lock(&config_lock);
195 if (state != running_state) {
196 enum daemon_status old_state = running_state;
198 running_state = state;
199 pthread_cond_broadcast(&config_cond);
201 do_sd_notify(old_state);
204 pthread_mutex_unlock(&config_lock);
207 int set_config_state(enum daemon_status state)
211 pthread_cleanup_push(config_cleanup, NULL);
212 pthread_mutex_lock(&config_lock);
213 if (running_state != state) {
214 enum daemon_status old_state = running_state;
216 if (running_state != DAEMON_IDLE) {
219 clock_gettime(CLOCK_MONOTONIC, &ts);
221 rc = pthread_cond_timedwait(&config_cond,
225 running_state = state;
226 pthread_cond_broadcast(&config_cond);
228 do_sd_notify(old_state);
232 pthread_cleanup_pop(1);
236 struct config *get_multipath_config(void)
239 return rcu_dereference(multipath_conf);
242 void put_multipath_config(struct config *conf)
248 need_switch_pathgroup (struct multipath * mpp, int refresh)
250 struct pathgroup * pgp;
255 if (!mpp || mpp->pgfailback == -FAILBACK_MANUAL)
259 * Refresh path priority values
262 vector_foreach_slot (mpp->pg, pgp, i) {
263 vector_foreach_slot (pgp->paths, pp, j) {
264 conf = get_multipath_config();
265 pathinfo(pp, conf, DI_PRIO);
266 put_multipath_config(conf);
271 if (!mpp->pg || VECTOR_SIZE(mpp->paths) == 0)
274 mpp->bestpg = select_path_group(mpp);
276 if (mpp->bestpg != mpp->nextpg)
283 switch_pathgroup (struct multipath * mpp)
285 mpp->stat_switchgroup++;
286 dm_switchgroup(mpp->alias, mpp->bestpg);
287 condlog(2, "%s: switch to path group #%i",
288 mpp->alias, mpp->bestpg);
292 coalesce_maps(struct vectors *vecs, vector nmpv)
294 struct multipath * ompp;
295 vector ompv = vecs->mpvec;
296 unsigned int i, reassign_maps;
299 conf = get_multipath_config();
300 reassign_maps = conf->reassign_maps;
301 put_multipath_config(conf);
302 vector_foreach_slot (ompv, ompp, i) {
303 condlog(3, "%s: coalesce map", ompp->alias);
304 if (!find_mp_by_wwid(nmpv, ompp->wwid)) {
306 * remove all current maps not allowed by the
307 * current configuration
309 if (dm_flush_map(ompp->alias)) {
310 condlog(0, "%s: unable to flush devmap",
313 * may be just because the device is open
315 if (setup_multipath(vecs, ompp) != 0) {
319 if (!vector_alloc_slot(nmpv))
322 vector_set_slot(nmpv, ompp);
324 vector_del_slot(ompv, i);
329 condlog(2, "%s devmap removed", ompp->alias);
331 } else if (reassign_maps) {
332 condlog(3, "%s: Reassign existing device-mapper"
333 " devices", ompp->alias);
334 dm_reassign(ompp->alias);
341 sync_maps_state(vector mpvec)
344 struct multipath *mpp;
346 vector_foreach_slot (mpvec, mpp, i)
351 flush_map(struct multipath * mpp, struct vectors * vecs, int nopaths)
356 r = dm_flush_map_nopaths(mpp->alias, mpp->deferred_remove);
358 r = dm_flush_map(mpp->alias);
360 * clear references to this map before flushing so we can ignore
361 * the spurious uevent we may generate with the dm_flush_map call below
365 * May not really be an error -- if the map was already flushed
366 * from the device mapper by dmsetup(8) for instance.
369 condlog(0, "%s: can't flush", mpp->alias);
371 condlog(2, "%s: devmap deferred remove", mpp->alias);
372 mpp->deferred_remove = DEFERRED_REMOVE_IN_PROGRESS;
378 condlog(2, "%s: map flushed", mpp->alias);
381 orphan_paths(vecs->pathvec, mpp);
382 remove_map_and_stop_waiter(mpp, vecs, 1);
388 uev_add_map (struct uevent * uev, struct vectors * vecs)
391 int major = -1, minor = -1, rc;
393 condlog(3, "%s: add map (uevent)", uev->kernel);
394 alias = uevent_get_dm_name(uev);
396 condlog(3, "%s: No DM_NAME in uevent", uev->kernel);
397 major = uevent_get_major(uev);
398 minor = uevent_get_minor(uev);
399 alias = dm_mapname(major, minor);
401 condlog(2, "%s: mapname not found for %d:%d",
402 uev->kernel, major, minor);
406 pthread_cleanup_push(cleanup_lock, &vecs->lock);
408 pthread_testcancel();
409 rc = ev_add_map(uev->kernel, alias, vecs);
410 lock_cleanup_pop(vecs->lock);
416 ev_add_map (char * dev, const char * alias, struct vectors * vecs)
419 struct multipath * mpp;
421 int r = 1, delayed_reconfig, reassign_maps;
424 map_present = dm_map_present(alias);
426 if (map_present && !dm_is_mpath(alias)) {
427 condlog(4, "%s: not a multipath map", alias);
431 mpp = find_mp_by_alias(vecs->mpvec, alias);
434 if (mpp->wait_for_udev > 1) {
435 condlog(2, "%s: performing delayed actions",
437 if (update_map(mpp, vecs))
438 /* setup multipathd removed the map */
441 conf = get_multipath_config();
442 delayed_reconfig = conf->delayed_reconfig;
443 reassign_maps = conf->reassign_maps;
444 put_multipath_config(conf);
445 if (mpp->wait_for_udev) {
446 mpp->wait_for_udev = 0;
447 if (delayed_reconfig &&
448 !need_to_delay_reconfig(vecs)) {
449 condlog(2, "reconfigure (delayed)");
450 set_config_state(DAEMON_CONFIGURE);
455 * Not really an error -- we generate our own uevent
456 * if we create a multipath mapped device as a result
460 condlog(3, "%s: Reassign existing device-mapper devices",
466 condlog(2, "%s: adding map", alias);
469 * now we can register the map
472 if ((mpp = add_map_without_path(vecs, alias))) {
474 condlog(2, "%s: devmap %s registered", alias, dev);
477 condlog(2, "%s: uev_add_map failed", dev);
481 r = get_refwwid(CMD_NONE, dev, DEV_DEVMAP, vecs->pathvec, &refwwid);
484 r = coalesce_paths(vecs, NULL, refwwid, FORCE_RELOAD_NONE,
490 condlog(2, "%s: devmap %s added", alias, dev);
492 condlog(2, "%s: uev_add_map %s blacklisted", alias, dev);
494 condlog(0, "%s: uev_add_map %s failed", alias, dev);
501 uev_remove_map (struct uevent * uev, struct vectors * vecs)
505 struct multipath *mpp;
507 condlog(2, "%s: remove map (uevent)", uev->kernel);
508 alias = uevent_get_dm_name(uev);
510 condlog(3, "%s: No DM_NAME in uevent, ignoring", uev->kernel);
513 minor = uevent_get_minor(uev);
515 pthread_cleanup_push(cleanup_lock, &vecs->lock);
517 pthread_testcancel();
518 mpp = find_mp_by_minor(vecs->mpvec, minor);
521 condlog(2, "%s: devmap not registered, can't remove",
525 if (strcmp(mpp->alias, alias)) {
526 condlog(2, "%s: minor number mismatch (map %d, event %d)",
527 mpp->alias, mpp->dmi->minor, minor);
531 orphan_paths(vecs->pathvec, mpp);
532 remove_map_and_stop_waiter(mpp, vecs, 1);
534 lock_cleanup_pop(vecs->lock);
539 /* Called from CLI handler */
541 ev_remove_map (char * devname, char * alias, int minor, struct vectors * vecs)
543 struct multipath * mpp;
545 mpp = find_mp_by_minor(vecs->mpvec, minor);
548 condlog(2, "%s: devmap not registered, can't remove",
552 if (strcmp(mpp->alias, alias)) {
553 condlog(2, "%s: minor number mismatch (map %d, event %d)",
554 mpp->alias, mpp->dmi->minor, minor);
557 return flush_map(mpp, vecs, 0);
561 uev_add_path (struct uevent *uev, struct vectors * vecs, int need_do_map)
567 condlog(2, "%s: add path (uevent)", uev->kernel);
568 if (strstr(uev->kernel, "..") != NULL) {
570 * Don't allow relative device names in the pathvec
572 condlog(0, "%s: path name is invalid", uev->kernel);
576 pthread_cleanup_push(cleanup_lock, &vecs->lock);
578 pthread_testcancel();
579 pp = find_path_by_dev(vecs->pathvec, uev->kernel);
583 condlog(0, "%s: spurious uevent, path already in pathvec",
585 if (!pp->mpp && !strlen(pp->wwid)) {
586 condlog(3, "%s: reinitialize path", uev->kernel);
587 udev_device_unref(pp->udev);
588 pp->udev = udev_device_ref(uev->udev);
589 conf = get_multipath_config();
590 r = pathinfo(pp, conf,
591 DI_ALL | DI_BLACKLIST);
592 put_multipath_config(conf);
593 if (r == PATHINFO_OK)
594 ret = ev_add_path(pp, vecs, need_do_map);
595 else if (r == PATHINFO_SKIPPED) {
596 condlog(3, "%s: remove blacklisted path",
598 i = find_slot(vecs->pathvec, (void *)pp);
600 vector_del_slot(vecs->pathvec, i);
603 condlog(0, "%s: failed to reinitialize path",
609 lock_cleanup_pop(vecs->lock);
614 * get path vital state
616 conf = get_multipath_config();
617 ret = alloc_path_with_pathinfo(conf, uev->udev,
618 uev->wwid, DI_ALL, &pp);
619 put_multipath_config(conf);
621 if (ret == PATHINFO_SKIPPED)
623 condlog(3, "%s: failed to get path info", uev->kernel);
626 pthread_cleanup_push(cleanup_lock, &vecs->lock);
628 pthread_testcancel();
629 ret = store_path(vecs->pathvec, pp);
631 conf = get_multipath_config();
632 pp->checkint = conf->checkint;
633 put_multipath_config(conf);
634 ret = ev_add_path(pp, vecs, need_do_map);
636 condlog(0, "%s: failed to store path info, "
642 lock_cleanup_pop(vecs->lock);
652 ev_add_path (struct path * pp, struct vectors * vecs, int need_do_map)
654 struct multipath * mpp;
655 char params[PARAMS_SIZE] = {0};
657 int start_waiter = 0;
661 * need path UID to go any further
663 if (strlen(pp->wwid) == 0) {
664 condlog(0, "%s: failed to get path uid", pp->dev);
665 goto fail; /* leave path added to pathvec */
667 mpp = find_mp_by_wwid(vecs->mpvec, pp->wwid);
668 if (mpp && mpp->wait_for_udev &&
669 (pathcount(mpp, PATH_UP) > 0 ||
670 (pathcount(mpp, PATH_GHOST) > 0 && pp->tpgs != TPGS_IMPLICIT &&
671 mpp->ghost_delay_tick <= 0))) {
672 /* if wait_for_udev is set and valid paths exist */
673 condlog(2, "%s: delaying path addition until %s is fully initialized", pp->dev, mpp->alias);
674 mpp->wait_for_udev = 2;
675 orphan_path(pp, "waiting for create to complete");
682 if (pp->size && mpp->size != pp->size) {
683 condlog(0, "%s: failed to add new path %s, "
684 "device size mismatch",
685 mpp->alias, pp->dev);
686 int i = find_slot(vecs->pathvec, (void *)pp);
688 vector_del_slot(vecs->pathvec, i);
693 condlog(4,"%s: adopting all paths for path %s",
694 mpp->alias, pp->dev);
695 if (adopt_paths(vecs->pathvec, mpp))
696 goto fail; /* leave path added to pathvec */
698 verify_paths(mpp, vecs);
699 mpp->action = ACT_RELOAD;
700 extract_hwe_from_path(mpp);
702 if (!should_multipath(pp, vecs->pathvec)) {
703 orphan_path(pp, "only one path");
706 condlog(4,"%s: creating new map", pp->dev);
707 if ((mpp = add_map_with_path(vecs, pp, 1))) {
708 mpp->action = ACT_CREATE;
710 * We don't depend on ACT_CREATE, as domap will
711 * set it to ACT_NOTHING when complete.
716 goto fail; /* leave path added to pathvec */
719 /* persistent reservation check*/
720 mpath_pr_event_handle(pp);
725 if (!dm_map_present(mpp->alias)) {
726 mpp->action = ACT_CREATE;
730 * push the map to the device-mapper
732 if (setup_map(mpp, params, PARAMS_SIZE)) {
733 condlog(0, "%s: failed to setup map for addition of new "
734 "path %s", mpp->alias, pp->dev);
738 * reload the map for the multipath mapped device
741 ret = domap(mpp, params, 1);
743 if (ret < 0 && retries-- > 0) {
744 condlog(0, "%s: retry domap for addition of new "
745 "path %s", mpp->alias, pp->dev);
749 condlog(0, "%s: failed in domap for addition of new "
750 "path %s", mpp->alias, pp->dev);
752 * deal with asynchronous uevents :((
754 if (mpp->action == ACT_RELOAD && retries-- > 0) {
755 condlog(0, "%s: ev_add_path sleep", mpp->alias);
757 update_mpp_paths(mpp, vecs->pathvec);
760 else if (mpp->action == ACT_RELOAD)
761 condlog(0, "%s: giving up reload", mpp->alias);
768 * update our state from kernel regardless of create or reload
770 if (setup_multipath(vecs, mpp))
771 goto fail; /* if setup_multipath fails, it removes the map */
775 if ((mpp->action == ACT_CREATE ||
776 (mpp->action == ACT_NOTHING && start_waiter && !mpp->waiter)) &&
777 start_waiter_thread(mpp, vecs))
781 condlog(2, "%s [%s]: path added to devmap %s",
782 pp->dev, pp->dev_t, mpp->alias);
788 remove_map(mpp, vecs, 1);
790 orphan_path(pp, "failed to add path");
795 uev_remove_path (struct uevent *uev, struct vectors * vecs, int need_do_map)
800 condlog(2, "%s: remove path (uevent)", uev->kernel);
801 pthread_cleanup_push(cleanup_lock, &vecs->lock);
803 pthread_testcancel();
804 pp = find_path_by_dev(vecs->pathvec, uev->kernel);
806 ret = ev_remove_path(pp, vecs, need_do_map);
807 lock_cleanup_pop(vecs->lock);
809 /* Not an error; path might have been purged earlier */
810 condlog(0, "%s: path already removed", uev->kernel);
817 ev_remove_path (struct path *pp, struct vectors * vecs, int need_do_map)
819 struct multipath * mpp;
821 char params[PARAMS_SIZE] = {0};
824 * avoid referring to the map of an orphaned path
826 if ((mpp = pp->mpp)) {
828 * transform the mp->pg vector of vectors of paths
829 * into a mp->params string to feed the device-mapper
831 if (update_mpp_paths(mpp, vecs->pathvec)) {
832 condlog(0, "%s: failed to update paths",
836 if ((i = find_slot(mpp->paths, (void *)pp)) != -1)
837 vector_del_slot(mpp->paths, i);
840 * remove the map IFF removing the last path
842 if (VECTOR_SIZE(mpp->paths) == 0) {
843 char alias[WWID_SIZE];
846 * flush_map will fail if the device is open
848 strncpy(alias, mpp->alias, WWID_SIZE);
849 if (mpp->flush_on_last_del == FLUSH_ENABLED) {
850 condlog(2, "%s Last path deleted, disabling queueing", mpp->alias);
852 mpp->no_path_retry = NO_PATH_RETRY_FAIL;
853 mpp->disable_queueing = 1;
854 mpp->stat_map_failures++;
855 dm_queue_if_no_path(mpp->alias, 0);
857 if (!flush_map(mpp, vecs, 1)) {
858 condlog(2, "%s: removed map after"
859 " removing all paths",
865 * Not an error, continue
869 if (setup_map(mpp, params, PARAMS_SIZE)) {
870 condlog(0, "%s: failed to setup map for"
871 " removal of path %s", mpp->alias, pp->dev);
875 if (mpp->wait_for_udev) {
876 mpp->wait_for_udev = 2;
885 mpp->action = ACT_RELOAD;
886 if (domap(mpp, params, 1) <= 0) {
887 condlog(0, "%s: failed in domap for "
888 "removal of path %s",
889 mpp->alias, pp->dev);
893 * update our state from kernel
895 if (setup_multipath(vecs, mpp))
899 condlog(2, "%s [%s]: path removed from map %s",
900 pp->dev, pp->dev_t, mpp->alias);
905 if ((i = find_slot(vecs->pathvec, (void *)pp)) != -1)
906 vector_del_slot(vecs->pathvec, i);
913 remove_map_and_stop_waiter(mpp, vecs, 1);
918 uev_update_path (struct uevent *uev, struct vectors * vecs)
923 int disable_changed_wwids;
924 int needs_reinit = 0;
926 conf = get_multipath_config();
927 disable_changed_wwids = conf->disable_changed_wwids;
928 put_multipath_config(conf);
930 ro = uevent_get_disk_ro(uev);
932 pthread_cleanup_push(cleanup_lock, &vecs->lock);
934 pthread_testcancel();
936 pp = find_path_by_dev(vecs->pathvec, uev->kernel);
938 struct multipath *mpp = pp->mpp;
940 if (disable_changed_wwids &&
941 (strlen(pp->wwid) || pp->wwid_changed)) {
942 char wwid[WWID_SIZE];
944 strcpy(wwid, pp->wwid);
945 get_uid(pp, pp->state, uev->udev);
946 if (strcmp(wwid, pp->wwid) != 0) {
947 condlog(0, "%s: path wwid changed from '%s' to '%s'. disallowing", uev->kernel, wwid, pp->wwid);
948 strcpy(pp->wwid, wwid);
949 if (!pp->wwid_changed) {
950 pp->wwid_changed = 1;
953 dm_fail_path(pp->mpp->alias, pp->dev_t);
957 pp->wwid_changed = 0;
960 if (pp->initialized == INIT_REQUESTED_UDEV)
962 else if (mpp && ro >= 0) {
963 condlog(2, "%s: update path write_protect to '%d' (uevent)", uev->kernel, ro);
965 if (mpp->wait_for_udev)
966 mpp->wait_for_udev = 2;
969 pp->mpp->force_readonly = 1;
970 retval = reload_map(vecs, mpp, 0, 1);
971 pp->mpp->force_readonly = 0;
972 condlog(2, "%s: map %s reloaded (retval %d)",
973 uev->kernel, mpp->alias, retval);
978 lock_cleanup_pop(vecs->lock);
980 /* If the path is blacklisted, print a debug/non-default verbosity message. */
982 int flag = DI_SYSFS | DI_WWID;
984 conf = get_multipath_config();
985 retval = alloc_path_with_pathinfo(conf, uev->udev, uev->wwid, flag, NULL);
986 put_multipath_config(conf);
988 if (retval == PATHINFO_SKIPPED) {
989 condlog(3, "%s: spurious uevent, path is blacklisted", uev->kernel);
994 condlog(0, "%s: spurious uevent, path not found", uev->kernel);
997 retval = uev_add_path(uev, vecs, 1);
1002 uev_pathfail_check(struct uevent *uev, struct vectors *vecs)
1004 const char *action = NULL, *devt = NULL;
1008 action = uevent_get_dm_action(uev);
1011 if (strncmp(action, "PATH_FAILED", 11))
1013 devt = uevent_get_dm_path(uev);
1015 condlog(3, "%s: No DM_PATH in uevent", uev->kernel);
1019 pthread_cleanup_push(cleanup_lock, &vecs->lock);
1021 pthread_testcancel();
1022 pp = find_path_by_devt(vecs->pathvec, devt);
1025 r = io_err_stat_handle_pathfail(pp);
1027 condlog(3, "io_err_stat: %s: cannot handle pathfail uevent",
1030 lock_cleanup_pop(vecs->lock);
1040 map_discovery (struct vectors * vecs)
1042 struct multipath * mpp;
1045 if (dm_get_maps(vecs->mpvec))
1048 vector_foreach_slot (vecs->mpvec, mpp, i)
1049 if (update_multipath_table(mpp, vecs->pathvec, 1) ||
1050 update_multipath_status(mpp)) {
1051 remove_map(mpp, vecs, 1);
1059 uxsock_trigger (char * str, char ** reply, int * len, bool is_root,
1060 void * trigger_data)
1062 struct vectors * vecs;
1067 vecs = (struct vectors *)trigger_data;
1069 if ((str != NULL) && (is_root == false) &&
1070 (strncmp(str, "list", strlen("list")) != 0) &&
1071 (strncmp(str, "show", strlen("show")) != 0)) {
1072 *reply = STRDUP("permission deny: need to be root");
1074 *len = strlen(*reply) + 1;
1078 r = parse_cmd(str, reply, len, vecs, uxsock_timeout / 1000);
1082 *reply = STRDUP("timeout\n");
1084 *reply = STRDUP("fail\n");
1086 *len = strlen(*reply) + 1;
1089 else if (!r && *len == 0) {
1090 *reply = STRDUP("ok\n");
1092 *len = strlen(*reply) + 1;
1095 /* else if (r < 0) leave *reply alone */
1101 uev_trigger (struct uevent * uev, void * trigger_data)
1104 struct vectors * vecs;
1105 struct uevent *merge_uev, *tmp;
1107 vecs = (struct vectors *)trigger_data;
1109 pthread_cleanup_push(config_cleanup, NULL);
1110 pthread_mutex_lock(&config_lock);
1111 if (running_state != DAEMON_IDLE &&
1112 running_state != DAEMON_RUNNING)
1113 pthread_cond_wait(&config_cond, &config_lock);
1114 pthread_cleanup_pop(1);
1116 if (running_state == DAEMON_SHUTDOWN)
1121 * Add events are ignored here as the tables
1122 * are not fully initialised then.
1124 if (!strncmp(uev->kernel, "dm-", 3)) {
1125 if (!uevent_is_mpath(uev))
1127 if (!strncmp(uev->action, "change", 6)) {
1128 r = uev_add_map(uev, vecs);
1131 * the kernel-side dm-mpath issues a PATH_FAILED event
1132 * when it encounters a path IO error. It is reason-
1133 * able be the entry of path IO error accounting pro-
1136 uev_pathfail_check(uev, vecs);
1137 } else if (!strncmp(uev->action, "remove", 6)) {
1138 r = uev_remove_map(uev, vecs);
1144 * path add/remove/change event, add/remove maybe merged
1146 list_for_each_entry_safe(merge_uev, tmp, &uev->merge_node, node) {
1147 if (!strncmp(merge_uev->action, "add", 3))
1148 r += uev_add_path(merge_uev, vecs, 0);
1149 if (!strncmp(merge_uev->action, "remove", 6))
1150 r += uev_remove_path(merge_uev, vecs, 0);
1153 if (!strncmp(uev->action, "add", 3))
1154 r += uev_add_path(uev, vecs, 1);
1155 if (!strncmp(uev->action, "remove", 6))
1156 r += uev_remove_path(uev, vecs, 1);
1157 if (!strncmp(uev->action, "change", 6))
1158 r += uev_update_path(uev, vecs);
1164 static void rcu_unregister(void *param)
1166 rcu_unregister_thread();
1170 ueventloop (void * ap)
1172 struct udev *udev = ap;
1174 pthread_cleanup_push(rcu_unregister, NULL);
1175 rcu_register_thread();
1176 if (uevent_listen(udev))
1177 condlog(0, "error starting uevent listener");
1178 pthread_cleanup_pop(1);
1183 uevqloop (void * ap)
1185 pthread_cleanup_push(rcu_unregister, NULL);
1186 rcu_register_thread();
1187 if (uevent_dispatch(&uev_trigger, ap))
1188 condlog(0, "error starting uevent dispatcher");
1189 pthread_cleanup_pop(1);
1193 uxlsnrloop (void * ap)
1196 condlog(1, "Failed to init uxsock listener");
1199 pthread_cleanup_push(rcu_unregister, NULL);
1200 rcu_register_thread();
1201 set_handler_callback(LIST+PATHS, cli_list_paths);
1202 set_handler_callback(LIST+PATHS+FMT, cli_list_paths_fmt);
1203 set_handler_callback(LIST+PATHS+RAW+FMT, cli_list_paths_raw);
1204 set_handler_callback(LIST+PATH, cli_list_path);
1205 set_handler_callback(LIST+MAPS, cli_list_maps);
1206 set_unlocked_handler_callback(LIST+STATUS, cli_list_status);
1207 set_unlocked_handler_callback(LIST+DAEMON, cli_list_daemon);
1208 set_handler_callback(LIST+MAPS+STATUS, cli_list_maps_status);
1209 set_handler_callback(LIST+MAPS+STATS, cli_list_maps_stats);
1210 set_handler_callback(LIST+MAPS+FMT, cli_list_maps_fmt);
1211 set_handler_callback(LIST+MAPS+RAW+FMT, cli_list_maps_raw);
1212 set_handler_callback(LIST+MAPS+TOPOLOGY, cli_list_maps_topology);
1213 set_handler_callback(LIST+TOPOLOGY, cli_list_maps_topology);
1214 set_handler_callback(LIST+MAPS+JSON, cli_list_maps_json);
1215 set_handler_callback(LIST+MAP+TOPOLOGY, cli_list_map_topology);
1216 set_handler_callback(LIST+MAP+FMT, cli_list_map_fmt);
1217 set_handler_callback(LIST+MAP+RAW+FMT, cli_list_map_fmt);
1218 set_handler_callback(LIST+MAP+JSON, cli_list_map_json);
1219 set_handler_callback(LIST+CONFIG, cli_list_config);
1220 set_handler_callback(LIST+BLACKLIST, cli_list_blacklist);
1221 set_handler_callback(LIST+DEVICES, cli_list_devices);
1222 set_handler_callback(LIST+WILDCARDS, cli_list_wildcards);
1223 set_handler_callback(RESET+MAPS+STATS, cli_reset_maps_stats);
1224 set_handler_callback(RESET+MAP+STATS, cli_reset_map_stats);
1225 set_handler_callback(ADD+PATH, cli_add_path);
1226 set_handler_callback(DEL+PATH, cli_del_path);
1227 set_handler_callback(ADD+MAP, cli_add_map);
1228 set_handler_callback(DEL+MAP, cli_del_map);
1229 set_handler_callback(SWITCH+MAP+GROUP, cli_switch_group);
1230 set_unlocked_handler_callback(RECONFIGURE, cli_reconfigure);
1231 set_handler_callback(SUSPEND+MAP, cli_suspend);
1232 set_handler_callback(RESUME+MAP, cli_resume);
1233 set_handler_callback(RESIZE+MAP, cli_resize);
1234 set_handler_callback(RELOAD+MAP, cli_reload);
1235 set_handler_callback(RESET+MAP, cli_reassign);
1236 set_handler_callback(REINSTATE+PATH, cli_reinstate);
1237 set_handler_callback(FAIL+PATH, cli_fail);
1238 set_handler_callback(DISABLEQ+MAP, cli_disable_queueing);
1239 set_handler_callback(RESTOREQ+MAP, cli_restore_queueing);
1240 set_handler_callback(DISABLEQ+MAPS, cli_disable_all_queueing);
1241 set_handler_callback(RESTOREQ+MAPS, cli_restore_all_queueing);
1242 set_unlocked_handler_callback(QUIT, cli_quit);
1243 set_unlocked_handler_callback(SHUTDOWN, cli_shutdown);
1244 set_handler_callback(GETPRSTATUS+MAP, cli_getprstatus);
1245 set_handler_callback(SETPRSTATUS+MAP, cli_setprstatus);
1246 set_handler_callback(UNSETPRSTATUS+MAP, cli_unsetprstatus);
1247 set_handler_callback(FORCEQ+DAEMON, cli_force_no_daemon_q);
1248 set_handler_callback(RESTOREQ+DAEMON, cli_restore_no_daemon_q);
1249 set_handler_callback(GETPRKEY+MAP, cli_getprkey);
1250 set_handler_callback(SETPRKEY+MAP+KEY, cli_setprkey);
1251 set_handler_callback(UNSETPRKEY+MAP, cli_unsetprkey);
1254 uxsock_listen(&uxsock_trigger, ap);
1255 pthread_cleanup_pop(1);
1262 post_config_state(DAEMON_SHUTDOWN);
1266 fail_path (struct path * pp, int del_active)
1271 condlog(2, "checker failed path %s in map %s",
1272 pp->dev_t, pp->mpp->alias);
1274 dm_fail_path(pp->mpp->alias, pp->dev_t);
1276 update_queue_mode_del_path(pp->mpp);
1280 * caller must have locked the path list before calling that function
1283 reinstate_path (struct path * pp, int add_active)
1290 if (dm_reinstate_path(pp->mpp->alias, pp->dev_t)) {
1291 condlog(0, "%s: reinstate failed", pp->dev_t);
1294 condlog(2, "%s: reinstated", pp->dev_t);
1296 update_queue_mode_add_path(pp->mpp);
1302 enable_group(struct path * pp)
1304 struct pathgroup * pgp;
1307 * if path is added through uev_add_path, pgindex can be unset.
1308 * next update_strings() will set it, upon map reload event.
1310 * we can safely return here, because upon map reload, all
1311 * PG will be enabled.
1313 if (!pp->mpp->pg || !pp->pgindex)
1316 pgp = VECTOR_SLOT(pp->mpp->pg, pp->pgindex - 1);
1318 if (pgp->status == PGSTATE_DISABLED) {
1319 condlog(2, "%s: enable group #%i", pp->mpp->alias, pp->pgindex);
1320 dm_enablegroup(pp->mpp->alias, pp->pgindex);
1325 mpvec_garbage_collector (struct vectors * vecs)
1327 struct multipath * mpp;
1333 vector_foreach_slot (vecs->mpvec, mpp, i) {
1334 if (mpp && mpp->alias && !dm_map_present(mpp->alias)) {
1335 condlog(2, "%s: remove dead map", mpp->alias);
1336 remove_map_and_stop_waiter(mpp, vecs, 1);
1342 /* This is called after a path has started working again. It the multipath
1343 * device for this path uses the followover failback type, and this is the
1344 * best pathgroup, and this is the first path in the pathgroup to come back
1345 * up, then switch to this pathgroup */
1347 followover_should_failback(struct path * pp)
1349 struct pathgroup * pgp;
1353 if (pp->mpp->pgfailback != -FAILBACK_FOLLOWOVER ||
1354 !pp->mpp->pg || !pp->pgindex ||
1355 pp->pgindex != pp->mpp->bestpg)
1358 pgp = VECTOR_SLOT(pp->mpp->pg, pp->pgindex - 1);
1359 vector_foreach_slot(pgp->paths, pp1, i) {
1362 if (pp1->chkrstate != PATH_DOWN && pp1->chkrstate != PATH_SHAKY)
1369 missing_uev_wait_tick(struct vectors *vecs)
1371 struct multipath * mpp;
1373 int timed_out = 0, delayed_reconfig;
1374 struct config *conf;
1376 vector_foreach_slot (vecs->mpvec, mpp, i) {
1377 if (mpp->wait_for_udev && --mpp->uev_wait_tick <= 0) {
1379 condlog(0, "%s: timeout waiting on creation uevent. enabling reloads", mpp->alias);
1380 if (mpp->wait_for_udev > 1 && update_map(mpp, vecs)) {
1381 /* update_map removed map */
1385 mpp->wait_for_udev = 0;
1389 conf = get_multipath_config();
1390 delayed_reconfig = conf->delayed_reconfig;
1391 put_multipath_config(conf);
1392 if (timed_out && delayed_reconfig &&
1393 !need_to_delay_reconfig(vecs)) {
1394 condlog(2, "reconfigure (delayed)");
1395 set_config_state(DAEMON_CONFIGURE);
1400 ghost_delay_tick(struct vectors *vecs)
1402 struct multipath * mpp;
1405 vector_foreach_slot (vecs->mpvec, mpp, i) {
1406 if (mpp->ghost_delay_tick <= 0)
1408 if (--mpp->ghost_delay_tick <= 0) {
1409 condlog(0, "%s: timed out waiting for active path",
1411 mpp->force_udev_reload = 1;
1412 if (update_map(mpp, vecs) != 0) {
1413 /* update_map removed map */
1422 defered_failback_tick (vector mpvec)
1424 struct multipath * mpp;
1427 vector_foreach_slot (mpvec, mpp, i) {
1429 * defered failback getting sooner
1431 if (mpp->pgfailback > 0 && mpp->failback_tick > 0) {
1432 mpp->failback_tick--;
1434 if (!mpp->failback_tick && need_switch_pathgroup(mpp, 1))
1435 switch_pathgroup(mpp);
1441 retry_count_tick(vector mpvec)
1443 struct multipath *mpp;
1446 vector_foreach_slot (mpvec, mpp, i) {
1447 if (mpp->retry_tick > 0) {
1448 mpp->stat_total_queueing_time++;
1449 condlog(4, "%s: Retrying.. No active path", mpp->alias);
1450 if(--mpp->retry_tick == 0) {
1451 mpp->stat_map_failures++;
1452 dm_queue_if_no_path(mpp->alias, 0);
1453 condlog(2, "%s: Disable queueing", mpp->alias);
1459 int update_prio(struct path *pp, int refresh_all)
1463 struct pathgroup * pgp;
1464 int i, j, changed = 0;
1465 struct config *conf;
1468 vector_foreach_slot (pp->mpp->pg, pgp, i) {
1469 vector_foreach_slot (pgp->paths, pp1, j) {
1470 oldpriority = pp1->priority;
1471 conf = get_multipath_config();
1472 pathinfo(pp1, conf, DI_PRIO);
1473 put_multipath_config(conf);
1474 if (pp1->priority != oldpriority)
1480 oldpriority = pp->priority;
1481 conf = get_multipath_config();
1482 if (pp->state != PATH_DOWN)
1483 pathinfo(pp, conf, DI_PRIO);
1484 put_multipath_config(conf);
1486 if (pp->priority == oldpriority)
1491 int update_path_groups(struct multipath *mpp, struct vectors *vecs, int refresh)
1493 if (reload_map(vecs, mpp, refresh, 1))
1497 if (setup_multipath(vecs, mpp) != 0)
1499 sync_map_state(mpp);
1504 void repair_path(struct path * pp)
1506 if (pp->state != PATH_DOWN)
1509 checker_repair(&pp->checker);
1510 LOG_MSG(1, checker_message(&pp->checker));
1514 * Returns '1' if the path has been checked, '-1' if it was blacklisted
1518 check_path (struct vectors * vecs, struct path * pp, int ticks)
1521 int new_path_up = 0;
1522 int chkr_new_path_up = 0;
1524 int disable_reinstate = 0;
1525 int oldchkrstate = pp->chkrstate;
1526 int retrigger_tries, checkint;
1527 struct config *conf;
1530 if ((pp->initialized == INIT_OK ||
1531 pp->initialized == INIT_REQUESTED_UDEV) && !pp->mpp)
1535 pp->tick -= (pp->tick > ticks) ? ticks : pp->tick;
1537 return 0; /* don't check this path yet */
1539 conf = get_multipath_config();
1540 retrigger_tries = conf->retrigger_tries;
1541 checkint = conf->checkint;
1542 put_multipath_config(conf);
1543 if (!pp->mpp && pp->initialized == INIT_MISSING_UDEV &&
1544 pp->retriggers < retrigger_tries) {
1545 condlog(2, "%s: triggering change event to reinitialize",
1547 pp->initialized = INIT_REQUESTED_UDEV;
1549 sysfs_attr_set_value(pp->udev, "uevent", "change",
1555 * provision a next check soonest,
1556 * in case we exit abnormaly from here
1558 pp->tick = checkint;
1560 newstate = path_offline(pp);
1562 * Wait for uevent for removed paths;
1563 * some LLDDs like zfcp keep paths unavailable
1564 * without sending uevents.
1566 if (newstate == PATH_REMOVED)
1567 newstate = PATH_DOWN;
1569 if (newstate == PATH_UP) {
1570 conf = get_multipath_config();
1571 newstate = get_state(pp, conf, 1, newstate);
1572 put_multipath_config(conf);
1574 checker_clear_message(&pp->checker);
1576 if (pp->wwid_changed) {
1577 condlog(2, "%s: path wwid has changed. Refusing to use",
1579 newstate = PATH_DOWN;
1582 if (newstate == PATH_WILD || newstate == PATH_UNCHECKED) {
1583 condlog(2, "%s: unusable path", pp->dev);
1584 conf = get_multipath_config();
1585 pathinfo(pp, conf, 0);
1586 put_multipath_config(conf);
1590 if (!strlen(pp->wwid) && pp->initialized != INIT_MISSING_UDEV &&
1591 (newstate == PATH_UP || newstate == PATH_GHOST)) {
1592 condlog(2, "%s: add missing path", pp->dev);
1593 conf = get_multipath_config();
1594 ret = pathinfo(pp, conf, DI_ALL | DI_BLACKLIST);
1595 if (ret == PATHINFO_OK) {
1596 ev_add_path(pp, vecs, 1);
1598 } else if (ret == PATHINFO_SKIPPED) {
1599 put_multipath_config(conf);
1602 put_multipath_config(conf);
1607 * Async IO in flight. Keep the previous path state
1608 * and reschedule as soon as possible
1610 if (newstate == PATH_PENDING) {
1615 * Synchronize with kernel state
1617 if (update_multipath_strings(pp->mpp, vecs->pathvec, 1)) {
1618 condlog(1, "%s: Could not synchronize with kernel state",
1620 pp->dmstate = PSTATE_UNDEF;
1622 /* if update_multipath_strings orphaned the path, quit early */
1626 if (pp->io_err_disable_reinstate && hit_io_err_recheck_time(pp)) {
1627 pp->state = PATH_SHAKY;
1629 * to reschedule as soon as possible,so that this path can
1630 * be recoverd in time
1636 if ((newstate == PATH_UP || newstate == PATH_GHOST) &&
1637 pp->wait_checks > 0) {
1638 if (pp->mpp->nr_active > 0) {
1639 pp->state = PATH_DELAYED;
1643 pp->wait_checks = 0;
1647 * don't reinstate failed path, if its in stand-by
1648 * and if target supports only implicit tpgs mode.
1649 * this will prevent unnecessary i/o by dm on stand-by
1650 * paths if there are no other active paths in map.
1652 disable_reinstate = (newstate == PATH_GHOST &&
1653 pp->mpp->nr_active == 0 &&
1654 pp->tpgs == TPGS_IMPLICIT) ? 1 : 0;
1656 pp->chkrstate = newstate;
1657 if (newstate != pp->state) {
1658 int oldstate = pp->state;
1659 pp->state = newstate;
1661 LOG_MSG(1, checker_message(&pp->checker));
1664 * upon state change, reset the checkint
1665 * to the shortest delay
1667 conf = get_multipath_config();
1668 pp->checkint = conf->checkint;
1669 put_multipath_config(conf);
1671 if (newstate != PATH_UP && newstate != PATH_GHOST) {
1673 * proactively fail path in the DM
1675 if (oldstate == PATH_UP ||
1676 oldstate == PATH_GHOST) {
1678 if (pp->mpp->delay_wait_checks > 0 &&
1679 pp->watch_checks > 0) {
1680 pp->wait_checks = pp->mpp->delay_wait_checks;
1681 pp->watch_checks = 0;
1687 * cancel scheduled failback
1689 pp->mpp->failback_tick = 0;
1691 pp->mpp->stat_path_failures++;
1696 if(newstate == PATH_UP || newstate == PATH_GHOST){
1697 if ( pp->mpp && pp->mpp->prflag ){
1699 * Check Persistent Reservation.
1701 condlog(2, "%s: checking persistent reservation "
1702 "registration", pp->dev);
1703 mpath_pr_event_handle(pp);
1708 * reinstate this path
1710 if (oldstate != PATH_UP &&
1711 oldstate != PATH_GHOST) {
1712 if (pp->mpp->delay_watch_checks > 0)
1713 pp->watch_checks = pp->mpp->delay_watch_checks;
1716 if (pp->watch_checks > 0)
1720 if (!disable_reinstate && reinstate_path(pp, add_active)) {
1721 condlog(3, "%s: reload map", pp->dev);
1722 ev_add_path(pp, vecs, 1);
1728 if (oldchkrstate != PATH_UP && oldchkrstate != PATH_GHOST)
1729 chkr_new_path_up = 1;
1732 * if at least one path is up in a group, and
1733 * the group is disabled, re-enable it
1735 if (newstate == PATH_UP)
1738 else if (newstate == PATH_UP || newstate == PATH_GHOST) {
1739 if ((pp->dmstate == PSTATE_FAILED ||
1740 pp->dmstate == PSTATE_UNDEF) &&
1741 !disable_reinstate) {
1742 /* Clear IO errors */
1743 if (reinstate_path(pp, 0)) {
1744 condlog(3, "%s: reload map", pp->dev);
1745 ev_add_path(pp, vecs, 1);
1750 unsigned int max_checkint;
1751 LOG_MSG(4, checker_message(&pp->checker));
1752 conf = get_multipath_config();
1753 max_checkint = conf->max_checkint;
1754 put_multipath_config(conf);
1755 if (pp->checkint != max_checkint) {
1757 * double the next check delay.
1758 * max at conf->max_checkint
1760 if (pp->checkint < (max_checkint / 2))
1761 pp->checkint = 2 * pp->checkint;
1763 pp->checkint = max_checkint;
1765 condlog(4, "%s: delay next check %is",
1766 pp->dev_t, pp->checkint);
1768 if (pp->watch_checks > 0)
1770 pp->tick = pp->checkint;
1773 else if (newstate != PATH_UP && newstate != PATH_GHOST) {
1774 if (pp->dmstate == PSTATE_ACTIVE ||
1775 pp->dmstate == PSTATE_UNDEF)
1777 if (newstate == PATH_DOWN) {
1778 int log_checker_err;
1780 conf = get_multipath_config();
1781 log_checker_err = conf->log_checker_err;
1782 put_multipath_config(conf);
1783 if (log_checker_err == LOG_CHKR_ERR_ONCE)
1784 LOG_MSG(3, checker_message(&pp->checker));
1786 LOG_MSG(2, checker_message(&pp->checker));
1790 pp->state = newstate;
1793 if (pp->mpp->wait_for_udev)
1796 * path prio refreshing
1798 condlog(4, "path prio refresh");
1800 if (update_prio(pp, new_path_up) &&
1801 (pp->mpp->pgpolicyfn == (pgpolicyfn *)group_by_prio) &&
1802 pp->mpp->pgfailback == -FAILBACK_IMMEDIATE)
1803 update_path_groups(pp->mpp, vecs, !new_path_up);
1804 else if (need_switch_pathgroup(pp->mpp, 0)) {
1805 if (pp->mpp->pgfailback > 0 &&
1806 (new_path_up || pp->mpp->failback_tick <= 0))
1807 pp->mpp->failback_tick =
1808 pp->mpp->pgfailback + 1;
1809 else if (pp->mpp->pgfailback == -FAILBACK_IMMEDIATE ||
1810 (chkr_new_path_up && followover_should_failback(pp)))
1811 switch_pathgroup(pp->mpp);
1816 static void init_path_check_interval(struct vectors *vecs)
1818 struct config *conf;
1822 vector_foreach_slot (vecs->pathvec, pp, i) {
1823 conf = get_multipath_config();
1824 pp->checkint = conf->checkint;
1825 put_multipath_config(conf);
1830 checkerloop (void *ap)
1832 struct vectors *vecs;
1836 struct itimerval timer_tick_it;
1837 struct timespec last_time;
1838 struct config *conf;
1840 pthread_cleanup_push(rcu_unregister, NULL);
1841 rcu_register_thread();
1842 mlockall(MCL_CURRENT | MCL_FUTURE);
1843 vecs = (struct vectors *)ap;
1844 condlog(2, "path checkers start up");
1846 /* Tweak start time for initial path check */
1847 if (clock_gettime(CLOCK_MONOTONIC, &last_time) != 0)
1848 last_time.tv_sec = 0;
1850 last_time.tv_sec -= 1;
1853 struct timespec diff_time, start_time, end_time;
1854 int num_paths = 0, ticks = 0, signo, strict_timing, rc = 0;
1857 if (clock_gettime(CLOCK_MONOTONIC, &start_time) != 0)
1858 start_time.tv_sec = 0;
1859 if (start_time.tv_sec && last_time.tv_sec) {
1860 timespecsub(&start_time, &last_time, &diff_time);
1861 condlog(4, "tick (%lu.%06lu secs)",
1862 diff_time.tv_sec, diff_time.tv_nsec / 1000);
1863 last_time = start_time;
1864 ticks = diff_time.tv_sec;
1867 condlog(4, "tick (%d ticks)", ticks);
1871 sd_notify(0, "WATCHDOG=1");
1873 rc = set_config_state(DAEMON_RUNNING);
1874 if (rc == ETIMEDOUT) {
1875 condlog(4, "timeout waiting for DAEMON_IDLE");
1879 pthread_cleanup_push(cleanup_lock, &vecs->lock);
1881 pthread_testcancel();
1882 vector_foreach_slot (vecs->pathvec, pp, i) {
1883 rc = check_path(vecs, pp, ticks);
1885 vector_del_slot(vecs->pathvec, i);
1891 lock_cleanup_pop(vecs->lock);
1893 pthread_cleanup_push(cleanup_lock, &vecs->lock);
1895 pthread_testcancel();
1896 defered_failback_tick(vecs->mpvec);
1897 retry_count_tick(vecs->mpvec);
1898 missing_uev_wait_tick(vecs);
1899 ghost_delay_tick(vecs);
1900 lock_cleanup_pop(vecs->lock);
1905 pthread_cleanup_push(cleanup_lock, &vecs->lock);
1907 pthread_testcancel();
1908 condlog(4, "map garbage collection");
1909 mpvec_garbage_collector(vecs);
1911 lock_cleanup_pop(vecs->lock);
1914 diff_time.tv_nsec = 0;
1915 if (start_time.tv_sec &&
1916 clock_gettime(CLOCK_MONOTONIC, &end_time) == 0) {
1917 timespecsub(&end_time, &start_time, &diff_time);
1919 unsigned int max_checkint;
1921 condlog(3, "checked %d path%s in %lu.%06lu secs",
1922 num_paths, num_paths > 1 ? "s" : "",
1924 diff_time.tv_nsec / 1000);
1925 conf = get_multipath_config();
1926 max_checkint = conf->max_checkint;
1927 put_multipath_config(conf);
1928 if (diff_time.tv_sec > max_checkint)
1929 condlog(1, "path checkers took longer "
1930 "than %lu seconds, consider "
1931 "increasing max_polling_interval",
1936 post_config_state(DAEMON_IDLE);
1937 conf = get_multipath_config();
1938 strict_timing = conf->strict_timing;
1939 put_multipath_config(conf);
1943 timer_tick_it.it_interval.tv_sec = 0;
1944 timer_tick_it.it_interval.tv_usec = 0;
1945 if (diff_time.tv_nsec) {
1946 timer_tick_it.it_value.tv_sec = 0;
1947 timer_tick_it.it_value.tv_usec =
1948 1000UL * 1000 * 1000 - diff_time.tv_nsec;
1950 timer_tick_it.it_value.tv_sec = 1;
1951 timer_tick_it.it_value.tv_usec = 0;
1953 setitimer(ITIMER_REAL, &timer_tick_it, NULL);
1956 sigaddset(&mask, SIGALRM);
1957 condlog(3, "waiting for %lu.%06lu secs",
1958 timer_tick_it.it_value.tv_sec,
1959 timer_tick_it.it_value.tv_usec);
1960 if (sigwait(&mask, &signo) != 0) {
1961 condlog(3, "sigwait failed with error %d",
1963 conf = get_multipath_config();
1964 conf->strict_timing = 0;
1965 put_multipath_config(conf);
1970 pthread_cleanup_pop(1);
1975 configure (struct vectors * vecs, int start_waiters)
1977 struct multipath * mpp;
1981 struct config *conf;
1982 static int force_reload = FORCE_RELOAD_WEAK;
1984 if (!vecs->pathvec && !(vecs->pathvec = vector_alloc())) {
1985 condlog(0, "couldn't allocate path vec in configure");
1989 if (!vecs->mpvec && !(vecs->mpvec = vector_alloc())) {
1990 condlog(0, "couldn't allocate multipath vec in configure");
1994 if (!(mpvec = vector_alloc())) {
1995 condlog(0, "couldn't allocate new maps vec in configure");
2000 * probe for current path (from sysfs) and map (from dm) sets
2002 ret = path_discovery(vecs->pathvec, DI_ALL);
2004 condlog(0, "configure failed at path discovery");
2008 vector_foreach_slot (vecs->pathvec, pp, i){
2009 conf = get_multipath_config();
2010 if (filter_path(conf, pp) > 0){
2011 vector_del_slot(vecs->pathvec, i);
2016 pp->checkint = conf->checkint;
2017 put_multipath_config(conf);
2019 if (map_discovery(vecs)) {
2020 condlog(0, "configure failed at map discovery");
2025 * create new set of maps & push changed ones into dm
2026 * In the first call, use FORCE_RELOAD_WEAK to avoid making
2027 * superfluous ACT_RELOAD ioctls. Later calls are done
2028 * with FORCE_RELOAD_YES.
2030 ret = coalesce_paths(vecs, mpvec, NULL, force_reload, CMD_NONE);
2031 if (force_reload == FORCE_RELOAD_WEAK)
2032 force_reload = FORCE_RELOAD_YES;
2034 condlog(0, "configure failed while coalescing paths");
2039 * may need to remove some maps which are no longer relevant
2040 * e.g., due to blacklist changes in conf file
2042 if (coalesce_maps(vecs, mpvec)) {
2043 condlog(0, "configure failed while coalescing maps");
2049 sync_maps_state(mpvec);
2050 vector_foreach_slot(mpvec, mpp, i){
2051 remember_wwid(mpp->wwid);
2056 * purge dm of old maps
2061 * save new set of maps formed by considering current path state
2063 vector_free(vecs->mpvec);
2064 vecs->mpvec = mpvec;
2067 * start dm event waiter threads for these new maps
2069 vector_foreach_slot(vecs->mpvec, mpp, i) {
2070 if (setup_multipath(vecs, mpp)) {
2074 if (start_waiters) {
2075 if (start_waiter_thread(mpp, vecs)) {
2076 remove_map(mpp, vecs, 1);
2085 need_to_delay_reconfig(struct vectors * vecs)
2087 struct multipath *mpp;
2090 if (!VECTOR_SIZE(vecs->mpvec))
2093 vector_foreach_slot(vecs->mpvec, mpp, i) {
2094 if (mpp->wait_for_udev)
2100 void rcu_free_config(struct rcu_head *head)
2102 struct config *conf = container_of(head, struct config, rcu);
2108 reconfigure (struct vectors * vecs)
2110 struct config * old, *conf;
2112 conf = load_config(DEFAULT_CONFIGFILE);
2117 * free old map and path vectors ... they use old conf state
2119 if (VECTOR_SIZE(vecs->mpvec))
2120 remove_maps_and_stop_waiters(vecs);
2122 free_pathvec(vecs->pathvec, FREE_PATHS);
2123 vecs->pathvec = NULL;
2125 /* Re-read any timezone changes */
2128 dm_drv_version(conf->version, TGT_MPATH);
2130 conf->verbosity = verbosity;
2131 if (bindings_read_only)
2132 conf->bindings_read_only = bindings_read_only;
2133 if (conf->find_multipaths) {
2134 condlog(2, "find_multipaths is set: -n is implied");
2135 ignore_new_devs = 1;
2137 if (ignore_new_devs)
2138 conf->ignore_new_devs = ignore_new_devs;
2139 uxsock_timeout = conf->uxsock_timeout;
2141 old = rcu_dereference(multipath_conf);
2142 rcu_assign_pointer(multipath_conf, conf);
2143 call_rcu(&old->rcu, rcu_free_config);
2151 static struct vectors *
2154 struct vectors * vecs;
2156 vecs = (struct vectors *)MALLOC(sizeof(struct vectors));
2161 pthread_mutex_init(&vecs->lock.mutex, NULL);
2167 signal_set(int signo, void (*func) (int))
2170 struct sigaction sig;
2171 struct sigaction osig;
2173 sig.sa_handler = func;
2174 sigemptyset(&sig.sa_mask);
2177 r = sigaction(signo, &sig, &osig);
2182 return (osig.sa_handler);
2186 handle_signals(bool nonfatal)
2189 condlog(2, "exit (signal)");
2196 condlog(2, "reconfigure (signal)");
2197 set_config_state(DAEMON_CONFIGURE);
2199 if (log_reset_sig) {
2200 condlog(2, "reset log (signal)");
2201 pthread_mutex_lock(&logq_lock);
2202 log_reset("multipathd");
2203 pthread_mutex_unlock(&logq_lock);
2230 condlog(3, "SIGUSR2 received");
2239 sigaddset(&set, SIGUSR2);
2240 pthread_sigmask(SIG_SETMASK, &set, NULL);
2242 signal_set(SIGHUP, sighup);
2243 signal_set(SIGUSR1, sigusr1);
2244 signal_set(SIGUSR2, sigusr2);
2245 signal_set(SIGINT, sigend);
2246 signal_set(SIGTERM, sigend);
2247 signal_set(SIGPIPE, sigend);
2254 static struct sched_param sched_param = {
2255 .sched_priority = 99
2258 res = sched_setscheduler (0, SCHED_RR, &sched_param);
2261 condlog(LOG_WARNING, "Could not set SCHED_RR at priority 99");
2268 #ifdef OOM_SCORE_ADJ_MIN
2270 char *file = "/proc/self/oom_score_adj";
2271 int score = OOM_SCORE_ADJ_MIN;
2274 char *file = "/proc/self/oom_adj";
2275 int score = OOM_ADJUST_MIN;
2281 envp = getenv("OOMScoreAdjust");
2283 condlog(3, "Using systemd provided OOMScoreAdjust");
2287 if (stat(file, &st) == 0){
2288 fp = fopen(file, "w");
2290 condlog(0, "couldn't fopen %s : %s", file,
2294 fprintf(fp, "%i", score);
2298 if (errno != ENOENT) {
2299 condlog(0, "couldn't stat %s : %s", file,
2303 #ifdef OOM_ADJUST_MIN
2304 file = "/proc/self/oom_adj";
2305 score = OOM_ADJUST_MIN;
2310 condlog(0, "couldn't adjust oom score");
2314 child (void * param)
2316 pthread_t check_thr, uevent_thr, uxlsnr_thr, uevq_thr;
2317 pthread_attr_t log_attr, misc_attr, uevent_attr;
2318 struct vectors * vecs;
2319 struct multipath * mpp;
2322 unsigned long checkint;
2323 int startup_done = 0;
2327 struct config *conf;
2330 mlockall(MCL_CURRENT | MCL_FUTURE);
2334 setup_thread_attr(&misc_attr, 64 * 1024, 0);
2335 setup_thread_attr(&uevent_attr, DEFAULT_UEVENT_STACKSIZE * 1024, 0);
2336 setup_thread_attr(&waiter_attr, 32 * 1024, 1);
2337 setup_thread_attr(&io_err_stat_attr, 32 * 1024, 0);
2340 setup_thread_attr(&log_attr, 64 * 1024, 0);
2341 log_thread_start(&log_attr);
2342 pthread_attr_destroy(&log_attr);
2344 pid_fd = pidfile_create(DEFAULT_PIDFILE, daemon_pid);
2346 condlog(1, "failed to create pidfile");
2352 post_config_state(DAEMON_START);
2354 condlog(2, "--------start up--------");
2355 condlog(2, "read " DEFAULT_CONFIGFILE);
2357 conf = load_config(DEFAULT_CONFIGFILE);
2362 conf->verbosity = verbosity;
2363 if (bindings_read_only)
2364 conf->bindings_read_only = bindings_read_only;
2365 if (ignore_new_devs)
2366 conf->ignore_new_devs = ignore_new_devs;
2367 uxsock_timeout = conf->uxsock_timeout;
2368 rcu_assign_pointer(multipath_conf, conf);
2369 if (init_checkers(conf->multipath_dir)) {
2370 condlog(0, "failed to initialize checkers");
2373 if (init_prio(conf->multipath_dir)) {
2374 condlog(0, "failed to initialize prioritizers");
2378 setlogmask(LOG_UPTO(conf->verbosity + 3));
2380 envp = getenv("LimitNOFILE");
2383 condlog(2,"Using systemd provided open fds limit of %s", envp);
2384 } else if (conf->max_fds) {
2385 struct rlimit fd_limit;
2387 if (getrlimit(RLIMIT_NOFILE, &fd_limit) < 0) {
2388 condlog(0, "can't get open fds limit: %s",
2390 fd_limit.rlim_cur = 0;
2391 fd_limit.rlim_max = 0;
2393 if (fd_limit.rlim_cur < conf->max_fds) {
2394 fd_limit.rlim_cur = conf->max_fds;
2395 if (fd_limit.rlim_max < conf->max_fds)
2396 fd_limit.rlim_max = conf->max_fds;
2397 if (setrlimit(RLIMIT_NOFILE, &fd_limit) < 0) {
2398 condlog(0, "can't set open fds limit to "
2400 fd_limit.rlim_cur, fd_limit.rlim_max,
2403 condlog(3, "set open fds limit to %lu/%lu",
2404 fd_limit.rlim_cur, fd_limit.rlim_max);
2410 vecs = gvecs = init_vecs();
2418 envp = getenv("WATCHDOG_USEC");
2419 if (envp && sscanf(envp, "%lu", &checkint) == 1) {
2420 /* Value is in microseconds */
2421 conf->max_checkint = checkint / 1000000;
2422 /* Rescale checkint */
2423 if (conf->checkint > conf->max_checkint)
2424 conf->checkint = conf->max_checkint;
2426 conf->checkint = conf->max_checkint / 4;
2427 condlog(3, "enabling watchdog, interval %d max %d",
2428 conf->checkint, conf->max_checkint);
2429 use_watchdog = conf->checkint;
2433 * Startup done, invalidate configuration
2438 * Signal start of configuration
2440 post_config_state(DAEMON_CONFIGURE);
2442 init_path_check_interval(vecs);
2445 * Start uevent listener early to catch events
2447 if ((rc = pthread_create(&uevent_thr, &uevent_attr, ueventloop, udev))) {
2448 condlog(0, "failed to create uevent thread: %d", rc);
2451 pthread_attr_destroy(&uevent_attr);
2452 if ((rc = pthread_create(&uxlsnr_thr, &misc_attr, uxlsnrloop, vecs))) {
2453 condlog(0, "failed to create cli listener: %d", rc);
2460 rc = start_io_err_stat_thread(vecs);
2464 if ((rc = pthread_create(&check_thr, &misc_attr, checkerloop, vecs))) {
2465 condlog(0,"failed to create checker loop thread: %d", rc);
2468 if ((rc = pthread_create(&uevq_thr, &misc_attr, uevqloop, vecs))) {
2469 condlog(0, "failed to create uevent dispatcher: %d", rc);
2472 pthread_attr_destroy(&misc_attr);
2474 while (running_state != DAEMON_SHUTDOWN) {
2475 pthread_cleanup_push(config_cleanup, NULL);
2476 pthread_mutex_lock(&config_lock);
2477 if (running_state != DAEMON_CONFIGURE &&
2478 running_state != DAEMON_SHUTDOWN) {
2479 pthread_cond_wait(&config_cond, &config_lock);
2481 pthread_cleanup_pop(1);
2482 if (running_state == DAEMON_CONFIGURE) {
2483 pthread_cleanup_push(cleanup_lock, &vecs->lock);
2485 pthread_testcancel();
2486 if (!need_to_delay_reconfig(vecs)) {
2489 conf = get_multipath_config();
2490 conf->delayed_reconfig = 1;
2491 put_multipath_config(conf);
2493 lock_cleanup_pop(vecs->lock);
2494 post_config_state(DAEMON_IDLE);
2496 if (!startup_done) {
2497 sd_notify(0, "READY=1");
2505 conf = get_multipath_config();
2506 if (conf->queue_without_daemon == QUE_NO_DAEMON_OFF)
2507 vector_foreach_slot(vecs->mpvec, mpp, i)
2508 dm_queue_if_no_path(mpp->alias, 0);
2509 put_multipath_config(conf);
2510 remove_maps_and_stop_waiters(vecs);
2511 unlock(&vecs->lock);
2513 pthread_cancel(check_thr);
2514 pthread_cancel(uevent_thr);
2515 pthread_cancel(uxlsnr_thr);
2516 pthread_cancel(uevq_thr);
2518 pthread_join(check_thr, NULL);
2519 pthread_join(uevent_thr, NULL);
2520 pthread_join(uxlsnr_thr, NULL);
2521 pthread_join(uevq_thr, NULL);
2523 stop_io_err_stat_thread();
2526 free_pathvec(vecs->pathvec, FREE_PATHS);
2527 vecs->pathvec = NULL;
2528 unlock(&vecs->lock);
2530 pthread_mutex_destroy(&vecs->lock.mutex);
2540 /* We're done here */
2541 condlog(3, "unlink pidfile");
2542 unlink(DEFAULT_PIDFILE);
2544 condlog(2, "--------shut down-------");
2550 * Freeing config must be done after condlog() and dm_lib_exit(),
2551 * because logging functions like dlog() and dm_write_log()
2552 * reference the config.
2554 conf = rcu_dereference(multipath_conf);
2555 rcu_assign_pointer(multipath_conf, NULL);
2556 call_rcu(&conf->rcu, rcu_free_config);
2559 pthread_attr_destroy(&waiter_attr);
2560 pthread_attr_destroy(&io_err_stat_attr);
2562 dbg_free_final(NULL);
2566 sd_notify(0, "ERRNO=0");
2572 sd_notify(0, "ERRNO=1");
2585 if( (pid = fork()) < 0){
2586 fprintf(stderr, "Failed first fork : %s\n", strerror(errno));
2594 if ( (pid = fork()) < 0)
2595 fprintf(stderr, "Failed second fork : %s\n", strerror(errno));
2600 fprintf(stderr, "cannot chdir to '/', continuing\n");
2602 dev_null_fd = open("/dev/null", O_RDWR);
2603 if (dev_null_fd < 0){
2604 fprintf(stderr, "cannot open /dev/null for input & output : %s\n",
2609 close(STDIN_FILENO);
2610 if (dup(dev_null_fd) < 0) {
2611 fprintf(stderr, "cannot dup /dev/null to stdin : %s\n",
2615 close(STDOUT_FILENO);
2616 if (dup(dev_null_fd) < 0) {
2617 fprintf(stderr, "cannot dup /dev/null to stdout : %s\n",
2621 close(STDERR_FILENO);
2622 if (dup(dev_null_fd) < 0) {
2623 fprintf(stderr, "cannot dup /dev/null to stderr : %s\n",
2628 daemon_pid = getpid();
2633 main (int argc, char *argv[])
2635 extern char *optarg;
2640 struct config *conf;
2642 ANNOTATE_BENIGN_RACE_SIZED(&multipath_conf, sizeof(multipath_conf),
2643 "Manipulated through RCU");
2644 ANNOTATE_BENIGN_RACE_SIZED(&running_state, sizeof(running_state),
2645 "Suppress complaints about unprotected running_state reads");
2646 ANNOTATE_BENIGN_RACE_SIZED(&uxsock_timeout, sizeof(uxsock_timeout),
2647 "Suppress complaints about this scalar variable");
2651 if (getuid() != 0) {
2652 fprintf(stderr, "need to be root\n");
2656 /* make sure we don't lock any path */
2658 fprintf(stderr, "can't chdir to root directory : %s\n",
2660 umask(umask(077) | 022);
2662 pthread_cond_init_mono(&config_cond);
2665 libmp_udev_set_sync_support(0);
2667 while ((arg = getopt(argc, argv, ":dsv:k::Bn")) != EOF ) {
2673 //debug=1; /* ### comment me out ### */
2676 if (sizeof(optarg) > sizeof(char *) ||
2677 !isdigit(optarg[0]))
2680 verbosity = atoi(optarg);
2686 conf = load_config(DEFAULT_CONFIGFILE);
2690 conf->verbosity = verbosity;
2691 uxsock_timeout = conf->uxsock_timeout;
2692 uxclnt(optarg, uxsock_timeout + 100);
2696 bindings_read_only = 1;
2699 ignore_new_devs = 1;
2702 fprintf(stderr, "Invalid argument '-%c'\n",
2707 if (optind < argc) {
2712 conf = load_config(DEFAULT_CONFIGFILE);
2716 conf->verbosity = verbosity;
2717 uxsock_timeout = conf->uxsock_timeout;
2718 memset(cmd, 0x0, CMDSIZE);
2719 while (optind < argc) {
2720 if (strchr(argv[optind], ' '))
2721 c += snprintf(c, s + CMDSIZE - c, "\"%s\" ", argv[optind]);
2723 c += snprintf(c, s + CMDSIZE - c, "%s ", argv[optind]);
2726 c += snprintf(c, s + CMDSIZE - c, "\n");
2727 uxclnt(s, uxsock_timeout + 100);
2733 if (!isatty(fileno(stdout)))
2734 setbuf(stdout, NULL);
2736 daemon_pid = getpid();
2748 return (child(NULL));
2751 void * mpath_pr_event_handler_fn (void * pathp )
2753 struct multipath * mpp;
2754 int i, ret, isFound;
2755 struct path * pp = (struct path *)pathp;
2756 struct prout_param_descriptor *param;
2757 struct prin_resp *resp;
2761 resp = mpath_alloc_prin_response(MPATH_PRIN_RKEY_SA);
2763 condlog(0,"%s Alloc failed for prin response", pp->dev);
2767 ret = prin_do_scsi_ioctl(pp->dev, MPATH_PRIN_RKEY_SA, resp, 0);
2768 if (ret != MPATH_PR_SUCCESS )
2770 condlog(0,"%s : pr in read keys service action failed. Error=%d", pp->dev, ret);
2774 condlog(3, " event pr=%d addlen=%d",resp->prin_descriptor.prin_readkeys.prgeneration,
2775 resp->prin_descriptor.prin_readkeys.additional_length );
2777 if (resp->prin_descriptor.prin_readkeys.additional_length == 0 )
2779 condlog(1, "%s: No key found. Device may not be registered.", pp->dev);
2780 ret = MPATH_PR_SUCCESS;
2783 condlog(2, "Multipath reservation_key: 0x%" PRIx64 " ",
2784 get_be64(mpp->reservation_key));
2787 for (i = 0; i < resp->prin_descriptor.prin_readkeys.additional_length/8; i++ )
2789 condlog(2, "PR IN READKEYS[%d] reservation key:",i);
2790 dumpHex((char *)&resp->prin_descriptor.prin_readkeys.key_list[i*8], 8 , -1);
2791 if (!memcmp(&mpp->reservation_key, &resp->prin_descriptor.prin_readkeys.key_list[i*8], 8))
2793 condlog(2, "%s: pr key found in prin readkeys response", mpp->alias);
2800 condlog(0, "%s: Either device not registered or ", pp->dev);
2801 condlog(0, "host is not authorised for registration. Skip path");
2802 ret = MPATH_PR_OTHER;
2806 param= malloc(sizeof(struct prout_param_descriptor));
2807 memset(param, 0 , sizeof(struct prout_param_descriptor));
2808 memcpy(param->sa_key, &mpp->reservation_key, 8);
2809 param->num_transportid = 0;
2811 condlog(3, "device %s:%s", pp->dev, pp->mpp->wwid);
2813 ret = prout_do_scsi_ioctl(pp->dev, MPATH_PROUT_REG_IGN_SA, 0, 0, param, 0);
2814 if (ret != MPATH_PR_SUCCESS )
2816 condlog(0,"%s: Reservation registration failed. Error: %d", pp->dev, ret);
2826 int mpath_pr_event_handle(struct path *pp)
2830 pthread_attr_t attr;
2831 struct multipath * mpp;
2835 if (get_be64(mpp->reservation_key))
2838 pthread_attr_init(&attr);
2839 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
2841 rc = pthread_create(&thread, NULL , mpath_pr_event_handler_fn, pp);
2843 condlog(0, "%s: ERROR; return code from pthread_create() is %d", pp->dev, rc);
2846 pthread_attr_destroy(&attr);
2847 rc = pthread_join(thread, NULL);