2 * Copyright (c) 2004, 2005 Christophe Varoqui
3 * Copyright (c) 2005 Kiyoshi Ueda, NEC
4 * Copyright (c) 2005 Benjamin Marzinski, Redhat
5 * Copyright (c) 2005 Edward Goggin, EMC
9 #include <libdevmapper.h>
12 #include <sys/types.h>
16 #include <sys/resource.h>
18 #include <linux/oom.h>
22 #include <systemd/sd-daemon.h>
24 #include <semaphore.h>
30 #include "time-util.h"
38 static int use_watchdog;
54 #include "blacklist.h"
55 #include "structs_vec.h"
57 #include "devmapper.h"
60 #include "discovery.h"
64 #include "switchgroup.h"
66 #include "configure.h"
69 #include "pgpolicies.h"
73 #include "mpath_cmd.h"
74 #include "mpath_persist.h"
76 #include "prioritizers/alua_rtpg.h"
83 #include "cli_handlers.h"
87 #include "../third-party/valgrind/drd.h"
89 #define FILE_NAME_SIZE 256
92 #define LOG_MSG(a, b) \
95 condlog(a, "%s: %s - path offline", pp->mpp->alias, pp->dev); \
97 condlog(a, "%s: %s - %s", pp->mpp->alias, pp->dev, b); \
100 struct mpath_event_param
103 struct multipath *mpp;
106 unsigned int mpath_mx_alloc_len;
110 int bindings_read_only;
112 enum daemon_status running_state = DAEMON_INIT;
114 pthread_mutex_t config_lock = PTHREAD_MUTEX_INITIALIZER;
115 pthread_cond_t config_cond;
118 * global copy of vecs for use in sig handlers
120 struct vectors * gvecs;
124 struct config *multipath_conf;
126 /* Local variables */
127 static volatile sig_atomic_t exit_sig;
128 static volatile sig_atomic_t reconfig_sig;
129 static volatile sig_atomic_t log_reset_sig;
134 switch (running_state) {
139 case DAEMON_CONFIGURE:
145 case DAEMON_SHUTDOWN:
152 * I love you too, systemd ...
155 sd_notify_status(void)
157 switch (running_state) {
159 return "STATUS=init";
161 return "STATUS=startup";
162 case DAEMON_CONFIGURE:
163 return "STATUS=configure";
165 return "STATUS=idle";
167 return "STATUS=running";
168 case DAEMON_SHUTDOWN:
169 return "STATUS=shutdown";
174 static void config_cleanup(void *arg)
176 pthread_mutex_unlock(&config_lock);
179 void post_config_state(enum daemon_status state)
181 pthread_mutex_lock(&config_lock);
182 if (state != running_state) {
183 running_state = state;
184 pthread_cond_broadcast(&config_cond);
186 sd_notify(0, sd_notify_status());
189 pthread_mutex_unlock(&config_lock);
192 int set_config_state(enum daemon_status state)
196 pthread_cleanup_push(config_cleanup, NULL);
197 pthread_mutex_lock(&config_lock);
198 if (running_state != state) {
199 if (running_state != DAEMON_IDLE) {
202 clock_gettime(CLOCK_MONOTONIC, &ts);
204 rc = pthread_cond_timedwait(&config_cond,
208 running_state = state;
209 pthread_cond_broadcast(&config_cond);
211 sd_notify(0, sd_notify_status());
215 pthread_cleanup_pop(1);
219 struct config *get_multipath_config(void)
222 return rcu_dereference(multipath_conf);
225 void put_multipath_config(struct config *conf)
231 need_switch_pathgroup (struct multipath * mpp, int refresh)
233 struct pathgroup * pgp;
238 if (!mpp || mpp->pgfailback == -FAILBACK_MANUAL)
242 * Refresh path priority values
245 vector_foreach_slot (mpp->pg, pgp, i) {
246 vector_foreach_slot (pgp->paths, pp, j) {
247 conf = get_multipath_config();
248 pathinfo(pp, conf, DI_PRIO);
249 put_multipath_config(conf);
254 if (!mpp->pg || VECTOR_SIZE(mpp->paths) == 0)
257 mpp->bestpg = select_path_group(mpp);
259 if (mpp->bestpg != mpp->nextpg)
266 switch_pathgroup (struct multipath * mpp)
268 mpp->stat_switchgroup++;
269 dm_switchgroup(mpp->alias, mpp->bestpg);
270 condlog(2, "%s: switch to path group #%i",
271 mpp->alias, mpp->bestpg);
275 coalesce_maps(struct vectors *vecs, vector nmpv)
277 struct multipath * ompp;
278 vector ompv = vecs->mpvec;
279 unsigned int i, reassign_maps;
282 conf = get_multipath_config();
283 reassign_maps = conf->reassign_maps;
284 put_multipath_config(conf);
285 vector_foreach_slot (ompv, ompp, i) {
286 condlog(3, "%s: coalesce map", ompp->alias);
287 if (!find_mp_by_wwid(nmpv, ompp->wwid)) {
289 * remove all current maps not allowed by the
290 * current configuration
292 if (dm_flush_map(ompp->alias)) {
293 condlog(0, "%s: unable to flush devmap",
296 * may be just because the device is open
298 if (setup_multipath(vecs, ompp) != 0) {
302 if (!vector_alloc_slot(nmpv))
305 vector_set_slot(nmpv, ompp);
307 vector_del_slot(ompv, i);
312 condlog(2, "%s devmap removed", ompp->alias);
314 } else if (reassign_maps) {
315 condlog(3, "%s: Reassign existing device-mapper"
316 " devices", ompp->alias);
317 dm_reassign(ompp->alias);
324 sync_map_state(struct multipath *mpp)
326 struct pathgroup *pgp;
333 vector_foreach_slot (mpp->pg, pgp, i){
334 vector_foreach_slot (pgp->paths, pp, j){
335 if (pp->state == PATH_UNCHECKED ||
336 pp->state == PATH_WILD ||
337 pp->state == PATH_DELAYED)
339 if ((pp->dmstate == PSTATE_FAILED ||
340 pp->dmstate == PSTATE_UNDEF) &&
341 (pp->state == PATH_UP || pp->state == PATH_GHOST))
342 dm_reinstate_path(mpp->alias, pp->dev_t);
343 else if ((pp->dmstate == PSTATE_ACTIVE ||
344 pp->dmstate == PSTATE_UNDEF) &&
345 (pp->state == PATH_DOWN ||
346 pp->state == PATH_SHAKY))
347 dm_fail_path(mpp->alias, pp->dev_t);
353 sync_maps_state(vector mpvec)
356 struct multipath *mpp;
358 vector_foreach_slot (mpvec, mpp, i)
363 flush_map(struct multipath * mpp, struct vectors * vecs, int nopaths)
368 r = dm_flush_map_nopaths(mpp->alias, mpp->deferred_remove);
370 r = dm_flush_map(mpp->alias);
372 * clear references to this map before flushing so we can ignore
373 * the spurious uevent we may generate with the dm_flush_map call below
377 * May not really be an error -- if the map was already flushed
378 * from the device mapper by dmsetup(8) for instance.
381 condlog(0, "%s: can't flush", mpp->alias);
383 condlog(2, "%s: devmap deferred remove", mpp->alias);
384 mpp->deferred_remove = DEFERRED_REMOVE_IN_PROGRESS;
390 condlog(2, "%s: map flushed", mpp->alias);
393 orphan_paths(vecs->pathvec, mpp);
394 remove_map_and_stop_waiter(mpp, vecs, 1);
400 update_map (struct multipath *mpp, struct vectors *vecs)
403 char params[PARAMS_SIZE] = {0};
406 condlog(4, "%s: updating new map", mpp->alias);
407 if (adopt_paths(vecs->pathvec, mpp)) {
408 condlog(0, "%s: failed to adopt paths for new map update",
413 verify_paths(mpp, vecs);
414 mpp->flush_on_last_del = FLUSH_UNDEF;
415 mpp->action = ACT_RELOAD;
417 if (setup_map(mpp, params, PARAMS_SIZE)) {
418 condlog(0, "%s: failed to setup new map in update", mpp->alias);
422 if (domap(mpp, params, 1) <= 0 && retries-- > 0) {
423 condlog(0, "%s: map_udate sleep", mpp->alias);
430 if (setup_multipath(vecs, mpp))
436 condlog(0, "%s: failed reload in new map update", mpp->alias);
441 uev_add_map (struct uevent * uev, struct vectors * vecs)
444 int major = -1, minor = -1, rc;
446 condlog(3, "%s: add map (uevent)", uev->kernel);
447 alias = uevent_get_dm_name(uev);
449 condlog(3, "%s: No DM_NAME in uevent", uev->kernel);
450 major = uevent_get_major(uev);
451 minor = uevent_get_minor(uev);
452 alias = dm_mapname(major, minor);
454 condlog(2, "%s: mapname not found for %d:%d",
455 uev->kernel, major, minor);
459 pthread_cleanup_push(cleanup_lock, &vecs->lock);
461 pthread_testcancel();
462 rc = ev_add_map(uev->kernel, alias, vecs);
463 lock_cleanup_pop(vecs->lock);
469 ev_add_map (char * dev, char * alias, struct vectors * vecs)
472 struct multipath * mpp;
474 int r = 1, delayed_reconfig, reassign_maps;
477 map_present = dm_map_present(alias);
479 if (map_present && !dm_is_mpath(alias)) {
480 condlog(4, "%s: not a multipath map", alias);
484 mpp = find_mp_by_alias(vecs->mpvec, alias);
487 if (mpp->wait_for_udev > 1) {
488 if (update_map(mpp, vecs))
489 /* setup multipathd removed the map */
492 conf = get_multipath_config();
493 delayed_reconfig = conf->delayed_reconfig;
494 reassign_maps = conf->reassign_maps;
495 put_multipath_config(conf);
496 if (mpp->wait_for_udev) {
497 mpp->wait_for_udev = 0;
498 if (delayed_reconfig &&
499 !need_to_delay_reconfig(vecs)) {
500 condlog(2, "reconfigure (delayed)");
501 set_config_state(DAEMON_CONFIGURE);
506 * Not really an error -- we generate our own uevent
507 * if we create a multipath mapped device as a result
511 condlog(3, "%s: Reassign existing device-mapper devices",
517 condlog(2, "%s: adding map", alias);
520 * now we can register the map
523 if ((mpp = add_map_without_path(vecs, alias))) {
525 condlog(2, "%s: devmap %s registered", alias, dev);
528 condlog(2, "%s: uev_add_map failed", dev);
532 r = get_refwwid(CMD_NONE, dev, DEV_DEVMAP, vecs->pathvec, &refwwid);
535 r = coalesce_paths(vecs, NULL, refwwid, 0, CMD_NONE);
540 condlog(2, "%s: devmap %s added", alias, dev);
542 condlog(2, "%s: uev_add_map %s blacklisted", alias, dev);
544 condlog(0, "%s: uev_add_map %s failed", alias, dev);
551 uev_remove_map (struct uevent * uev, struct vectors * vecs)
555 struct multipath *mpp;
557 condlog(2, "%s: remove map (uevent)", uev->kernel);
558 alias = uevent_get_dm_name(uev);
560 condlog(3, "%s: No DM_NAME in uevent, ignoring", uev->kernel);
563 minor = uevent_get_minor(uev);
565 pthread_cleanup_push(cleanup_lock, &vecs->lock);
567 pthread_testcancel();
568 mpp = find_mp_by_minor(vecs->mpvec, minor);
571 condlog(2, "%s: devmap not registered, can't remove",
575 if (strcmp(mpp->alias, alias)) {
576 condlog(2, "%s: minor number mismatch (map %d, event %d)",
577 mpp->alias, mpp->dmi->minor, minor);
581 orphan_paths(vecs->pathvec, mpp);
582 remove_map_and_stop_waiter(mpp, vecs, 1);
584 lock_cleanup_pop(vecs->lock);
589 /* Called from CLI handler */
591 ev_remove_map (char * devname, char * alias, int minor, struct vectors * vecs)
593 struct multipath * mpp;
595 mpp = find_mp_by_minor(vecs->mpvec, minor);
598 condlog(2, "%s: devmap not registered, can't remove",
602 if (strcmp(mpp->alias, alias)) {
603 condlog(2, "%s: minor number mismatch (map %d, event %d)",
604 mpp->alias, mpp->dmi->minor, minor);
607 return flush_map(mpp, vecs, 0);
611 uev_add_path (struct uevent *uev, struct vectors * vecs)
617 condlog(2, "%s: add path (uevent)", uev->kernel);
618 if (strstr(uev->kernel, "..") != NULL) {
620 * Don't allow relative device names in the pathvec
622 condlog(0, "%s: path name is invalid", uev->kernel);
626 pthread_cleanup_push(cleanup_lock, &vecs->lock);
628 pthread_testcancel();
629 pp = find_path_by_dev(vecs->pathvec, uev->kernel);
633 condlog(0, "%s: spurious uevent, path already in pathvec",
635 if (!pp->mpp && !strlen(pp->wwid)) {
636 condlog(3, "%s: reinitialize path", uev->kernel);
637 udev_device_unref(pp->udev);
638 pp->udev = udev_device_ref(uev->udev);
639 conf = get_multipath_config();
640 r = pathinfo(pp, conf,
641 DI_ALL | DI_BLACKLIST);
642 put_multipath_config(conf);
643 if (r == PATHINFO_OK)
644 ret = ev_add_path(pp, vecs);
645 else if (r == PATHINFO_SKIPPED) {
646 condlog(3, "%s: remove blacklisted path",
648 i = find_slot(vecs->pathvec, (void *)pp);
650 vector_del_slot(vecs->pathvec, i);
653 condlog(0, "%s: failed to reinitialize path",
659 lock_cleanup_pop(vecs->lock);
664 * get path vital state
666 conf = get_multipath_config();
667 ret = alloc_path_with_pathinfo(conf, uev->udev,
669 put_multipath_config(conf);
671 if (ret == PATHINFO_SKIPPED)
673 condlog(3, "%s: failed to get path info", uev->kernel);
676 pthread_cleanup_push(cleanup_lock, &vecs->lock);
678 pthread_testcancel();
679 ret = store_path(vecs->pathvec, pp);
681 conf = get_multipath_config();
682 pp->checkint = conf->checkint;
683 put_multipath_config(conf);
684 ret = ev_add_path(pp, vecs);
686 condlog(0, "%s: failed to store path info, "
692 lock_cleanup_pop(vecs->lock);
702 ev_add_path (struct path * pp, struct vectors * vecs)
704 struct multipath * mpp;
705 char params[PARAMS_SIZE] = {0};
707 int start_waiter = 0;
711 * need path UID to go any further
713 if (strlen(pp->wwid) == 0) {
714 condlog(0, "%s: failed to get path uid", pp->dev);
715 goto fail; /* leave path added to pathvec */
717 mpp = find_mp_by_wwid(vecs->mpvec, pp->wwid);
718 if (mpp && mpp->wait_for_udev &&
719 (pathcount(mpp, PATH_UP) > 0 ||
720 (pathcount(mpp, PATH_GHOST) > 0 && pp->tpgs != TPGS_IMPLICIT))) {
721 /* if wait_for_udev is set and valid paths exist */
722 mpp->wait_for_udev = 2;
723 orphan_path(pp, "waiting for create to complete");
730 if (pp->size && mpp->size != pp->size) {
731 condlog(0, "%s: failed to add new path %s, "
732 "device size mismatch",
733 mpp->alias, pp->dev);
734 int i = find_slot(vecs->pathvec, (void *)pp);
736 vector_del_slot(vecs->pathvec, i);
741 condlog(4,"%s: adopting all paths for path %s",
742 mpp->alias, pp->dev);
743 if (adopt_paths(vecs->pathvec, mpp))
744 goto fail; /* leave path added to pathvec */
746 verify_paths(mpp, vecs);
747 mpp->flush_on_last_del = FLUSH_UNDEF;
748 mpp->action = ACT_RELOAD;
750 if (!should_multipath(pp, vecs->pathvec)) {
751 orphan_path(pp, "only one path");
754 condlog(4,"%s: creating new map", pp->dev);
755 if ((mpp = add_map_with_path(vecs, pp, 1))) {
756 mpp->action = ACT_CREATE;
758 * We don't depend on ACT_CREATE, as domap will
759 * set it to ACT_NOTHING when complete.
764 goto fail; /* leave path added to pathvec */
767 /* persistent reservation check*/
768 mpath_pr_event_handle(pp);
771 * push the map to the device-mapper
773 if (setup_map(mpp, params, PARAMS_SIZE)) {
774 condlog(0, "%s: failed to setup map for addition of new "
775 "path %s", mpp->alias, pp->dev);
779 * reload the map for the multipath mapped device
782 ret = domap(mpp, params, 1);
784 if (ret < 0 && retries-- > 0) {
785 condlog(0, "%s: retry domap for addition of new "
786 "path %s", mpp->alias, pp->dev);
790 condlog(0, "%s: failed in domap for addition of new "
791 "path %s", mpp->alias, pp->dev);
793 * deal with asynchronous uevents :((
795 if (mpp->action == ACT_RELOAD && retries-- > 0) {
796 condlog(0, "%s: ev_add_path sleep", mpp->alias);
798 update_mpp_paths(mpp, vecs->pathvec);
801 else if (mpp->action == ACT_RELOAD)
802 condlog(0, "%s: giving up reload", mpp->alias);
809 * update our state from kernel regardless of create or reload
811 if (setup_multipath(vecs, mpp))
812 goto fail; /* if setup_multipath fails, it removes the map */
816 if ((mpp->action == ACT_CREATE ||
817 (mpp->action == ACT_NOTHING && start_waiter && !mpp->waiter)) &&
818 start_waiter_thread(mpp, vecs))
822 condlog(2, "%s [%s]: path added to devmap %s",
823 pp->dev, pp->dev_t, mpp->alias);
829 remove_map(mpp, vecs, 1);
831 orphan_path(pp, "failed to add path");
836 uev_remove_path (struct uevent *uev, struct vectors * vecs)
841 condlog(2, "%s: remove path (uevent)", uev->kernel);
842 pthread_cleanup_push(cleanup_lock, &vecs->lock);
844 pthread_testcancel();
845 pp = find_path_by_dev(vecs->pathvec, uev->kernel);
847 ret = ev_remove_path(pp, vecs);
848 lock_cleanup_pop(vecs->lock);
850 /* Not an error; path might have been purged earlier */
851 condlog(0, "%s: path already removed", uev->kernel);
858 ev_remove_path (struct path *pp, struct vectors * vecs)
860 struct multipath * mpp;
862 char params[PARAMS_SIZE] = {0};
865 * avoid referring to the map of an orphaned path
867 if ((mpp = pp->mpp)) {
869 * transform the mp->pg vector of vectors of paths
870 * into a mp->params string to feed the device-mapper
872 if (update_mpp_paths(mpp, vecs->pathvec)) {
873 condlog(0, "%s: failed to update paths",
877 if ((i = find_slot(mpp->paths, (void *)pp)) != -1)
878 vector_del_slot(mpp->paths, i);
881 * remove the map IFF removing the last path
883 if (VECTOR_SIZE(mpp->paths) == 0) {
884 char alias[WWID_SIZE];
887 * flush_map will fail if the device is open
889 strncpy(alias, mpp->alias, WWID_SIZE);
890 if (mpp->flush_on_last_del == FLUSH_ENABLED) {
891 condlog(2, "%s Last path deleted, disabling queueing", mpp->alias);
893 mpp->no_path_retry = NO_PATH_RETRY_FAIL;
894 mpp->flush_on_last_del = FLUSH_IN_PROGRESS;
895 mpp->stat_map_failures++;
896 dm_queue_if_no_path(mpp->alias, 0);
898 if (!flush_map(mpp, vecs, 1)) {
899 condlog(2, "%s: removed map after"
900 " removing all paths",
906 * Not an error, continue
910 if (setup_map(mpp, params, PARAMS_SIZE)) {
911 condlog(0, "%s: failed to setup map for"
912 " removal of path %s", mpp->alias, pp->dev);
916 if (mpp->wait_for_udev) {
917 mpp->wait_for_udev = 2;
924 mpp->action = ACT_RELOAD;
925 if (domap(mpp, params, 1) <= 0) {
926 condlog(0, "%s: failed in domap for "
927 "removal of path %s",
928 mpp->alias, pp->dev);
932 * update our state from kernel
934 if (setup_multipath(vecs, mpp))
938 condlog(2, "%s [%s]: path removed from map %s",
939 pp->dev, pp->dev_t, mpp->alias);
944 if ((i = find_slot(vecs->pathvec, (void *)pp)) != -1)
945 vector_del_slot(vecs->pathvec, i);
952 remove_map_and_stop_waiter(mpp, vecs, 1);
957 uev_update_path (struct uevent *uev, struct vectors * vecs)
962 int disable_changed_wwids;
964 conf = get_multipath_config();
965 disable_changed_wwids = conf->disable_changed_wwids;
966 put_multipath_config(conf);
968 ro = uevent_get_disk_ro(uev);
970 pthread_cleanup_push(cleanup_lock, &vecs->lock);
972 pthread_testcancel();
974 pp = find_path_by_dev(vecs->pathvec, uev->kernel);
976 struct multipath *mpp = pp->mpp;
978 if (disable_changed_wwids &&
979 (strlen(pp->wwid) || pp->wwid_changed)) {
980 char wwid[WWID_SIZE];
982 strcpy(wwid, pp->wwid);
983 get_uid(pp, pp->state, uev->udev);
984 if (strcmp(wwid, pp->wwid) != 0) {
985 condlog(0, "%s: path wwid changed from '%s' to '%s'. disallowing", uev->kernel, wwid, pp->wwid);
986 strcpy(pp->wwid, wwid);
987 if (!pp->wwid_changed) {
988 pp->wwid_changed = 1;
990 dm_fail_path(pp->mpp->alias, pp->dev_t);
994 pp->wwid_changed = 0;
997 if (pp->initialized == INIT_REQUESTED_UDEV)
998 retval = uev_add_path(uev, vecs);
999 else if (mpp && ro >= 0) {
1000 condlog(2, "%s: update path write_protect to '%d' (uevent)", uev->kernel, ro);
1002 if (mpp->wait_for_udev)
1003 mpp->wait_for_udev = 2;
1005 retval = reload_map(vecs, mpp, 0, 1);
1006 condlog(2, "%s: map %s reloaded (retval %d)",
1007 uev->kernel, mpp->alias, retval);
1012 lock_cleanup_pop(vecs->lock);
1014 condlog(0, "%s: spurious uevent, path not found", uev->kernel);
1020 map_discovery (struct vectors * vecs)
1022 struct multipath * mpp;
1025 if (dm_get_maps(vecs->mpvec))
1028 vector_foreach_slot (vecs->mpvec, mpp, i)
1029 if (setup_multipath(vecs, mpp))
1036 uxsock_trigger (char * str, char ** reply, int * len, void * trigger_data)
1038 struct vectors * vecs;
1043 vecs = (struct vectors *)trigger_data;
1045 r = parse_cmd(str, reply, len, vecs, uxsock_timeout / 1000);
1049 *reply = STRDUP("timeout\n");
1051 *reply = STRDUP("fail\n");
1052 *len = strlen(*reply) + 1;
1055 else if (!r && *len == 0) {
1056 *reply = STRDUP("ok\n");
1057 *len = strlen(*reply) + 1;
1060 /* else if (r < 0) leave *reply alone */
1066 uev_discard(char * devpath)
1072 * keep only block devices, discard partitions
1074 tmp = strstr(devpath, "/block/");
1076 condlog(4, "no /block/ in '%s'", devpath);
1079 if (sscanf(tmp, "/block/%10s", a) != 1 ||
1080 sscanf(tmp, "/block/%10[^/]/%10s", a, b) == 2) {
1081 condlog(4, "discard event on %s", devpath);
1088 uev_trigger (struct uevent * uev, void * trigger_data)
1091 struct vectors * vecs;
1092 struct config *conf;
1094 vecs = (struct vectors *)trigger_data;
1096 if (uev_discard(uev->devpath))
1099 pthread_cleanup_push(config_cleanup, NULL);
1100 pthread_mutex_lock(&config_lock);
1101 if (running_state != DAEMON_IDLE &&
1102 running_state != DAEMON_RUNNING)
1103 pthread_cond_wait(&config_cond, &config_lock);
1104 pthread_cleanup_pop(1);
1106 if (running_state == DAEMON_SHUTDOWN)
1111 * Add events are ignored here as the tables
1112 * are not fully initialised then.
1114 if (!strncmp(uev->kernel, "dm-", 3)) {
1115 if (!strncmp(uev->action, "change", 6)) {
1116 r = uev_add_map(uev, vecs);
1119 if (!strncmp(uev->action, "remove", 6)) {
1120 r = uev_remove_map(uev, vecs);
1127 * path add/remove event
1129 conf = get_multipath_config();
1130 if (filter_devnode(conf->blist_devnode, conf->elist_devnode,
1132 put_multipath_config(conf);
1135 put_multipath_config(conf);
1137 if (!strncmp(uev->action, "add", 3)) {
1138 r = uev_add_path(uev, vecs);
1141 if (!strncmp(uev->action, "remove", 6)) {
1142 r = uev_remove_path(uev, vecs);
1145 if (!strncmp(uev->action, "change", 6)) {
1146 r = uev_update_path(uev, vecs);
1154 static void rcu_unregister(void *param)
1156 rcu_unregister_thread();
1160 ueventloop (void * ap)
1162 struct udev *udev = ap;
1164 pthread_cleanup_push(rcu_unregister, NULL);
1165 rcu_register_thread();
1166 if (uevent_listen(udev))
1167 condlog(0, "error starting uevent listener");
1168 pthread_cleanup_pop(1);
1173 uevqloop (void * ap)
1175 pthread_cleanup_push(rcu_unregister, NULL);
1176 rcu_register_thread();
1177 if (uevent_dispatch(&uev_trigger, ap))
1178 condlog(0, "error starting uevent dispatcher");
1179 pthread_cleanup_pop(1);
1183 uxlsnrloop (void * ap)
1186 condlog(1, "Failed to init uxsock listener");
1189 pthread_cleanup_push(rcu_unregister, NULL);
1190 rcu_register_thread();
1191 set_handler_callback(LIST+PATHS, cli_list_paths);
1192 set_handler_callback(LIST+PATHS+FMT, cli_list_paths_fmt);
1193 set_handler_callback(LIST+PATHS+RAW+FMT, cli_list_paths_raw);
1194 set_handler_callback(LIST+PATH, cli_list_path);
1195 set_handler_callback(LIST+MAPS, cli_list_maps);
1196 set_unlocked_handler_callback(LIST+STATUS, cli_list_status);
1197 set_unlocked_handler_callback(LIST+DAEMON, cli_list_daemon);
1198 set_handler_callback(LIST+MAPS+STATUS, cli_list_maps_status);
1199 set_handler_callback(LIST+MAPS+STATS, cli_list_maps_stats);
1200 set_handler_callback(LIST+MAPS+FMT, cli_list_maps_fmt);
1201 set_handler_callback(LIST+MAPS+RAW+FMT, cli_list_maps_raw);
1202 set_handler_callback(LIST+MAPS+TOPOLOGY, cli_list_maps_topology);
1203 set_handler_callback(LIST+TOPOLOGY, cli_list_maps_topology);
1204 set_handler_callback(LIST+MAPS+JSON, cli_list_maps_json);
1205 set_handler_callback(LIST+MAP+TOPOLOGY, cli_list_map_topology);
1206 set_handler_callback(LIST+MAP+FMT, cli_list_map_fmt);
1207 set_handler_callback(LIST+MAP+RAW+FMT, cli_list_map_fmt);
1208 set_handler_callback(LIST+MAP+JSON, cli_list_map_json);
1209 set_handler_callback(LIST+CONFIG, cli_list_config);
1210 set_handler_callback(LIST+BLACKLIST, cli_list_blacklist);
1211 set_handler_callback(LIST+DEVICES, cli_list_devices);
1212 set_handler_callback(LIST+WILDCARDS, cli_list_wildcards);
1213 set_handler_callback(ADD+PATH, cli_add_path);
1214 set_handler_callback(DEL+PATH, cli_del_path);
1215 set_handler_callback(ADD+MAP, cli_add_map);
1216 set_handler_callback(DEL+MAP, cli_del_map);
1217 set_handler_callback(SWITCH+MAP+GROUP, cli_switch_group);
1218 set_unlocked_handler_callback(RECONFIGURE, cli_reconfigure);
1219 set_handler_callback(SUSPEND+MAP, cli_suspend);
1220 set_handler_callback(RESUME+MAP, cli_resume);
1221 set_handler_callback(RESIZE+MAP, cli_resize);
1222 set_handler_callback(RELOAD+MAP, cli_reload);
1223 set_handler_callback(RESET+MAP, cli_reassign);
1224 set_handler_callback(REINSTATE+PATH, cli_reinstate);
1225 set_handler_callback(FAIL+PATH, cli_fail);
1226 set_handler_callback(DISABLEQ+MAP, cli_disable_queueing);
1227 set_handler_callback(RESTOREQ+MAP, cli_restore_queueing);
1228 set_handler_callback(DISABLEQ+MAPS, cli_disable_all_queueing);
1229 set_handler_callback(RESTOREQ+MAPS, cli_restore_all_queueing);
1230 set_unlocked_handler_callback(QUIT, cli_quit);
1231 set_unlocked_handler_callback(SHUTDOWN, cli_shutdown);
1232 set_handler_callback(GETPRSTATUS+MAP, cli_getprstatus);
1233 set_handler_callback(SETPRSTATUS+MAP, cli_setprstatus);
1234 set_handler_callback(UNSETPRSTATUS+MAP, cli_unsetprstatus);
1235 set_handler_callback(FORCEQ+DAEMON, cli_force_no_daemon_q);
1236 set_handler_callback(RESTOREQ+DAEMON, cli_restore_no_daemon_q);
1239 uxsock_listen(&uxsock_trigger, ap);
1240 pthread_cleanup_pop(1);
1247 post_config_state(DAEMON_SHUTDOWN);
1251 fail_path (struct path * pp, int del_active)
1256 condlog(2, "checker failed path %s in map %s",
1257 pp->dev_t, pp->mpp->alias);
1259 dm_fail_path(pp->mpp->alias, pp->dev_t);
1261 update_queue_mode_del_path(pp->mpp);
1265 * caller must have locked the path list before calling that function
1268 reinstate_path (struct path * pp, int add_active)
1275 if (dm_reinstate_path(pp->mpp->alias, pp->dev_t)) {
1276 condlog(0, "%s: reinstate failed", pp->dev_t);
1279 condlog(2, "%s: reinstated", pp->dev_t);
1281 update_queue_mode_add_path(pp->mpp);
1287 enable_group(struct path * pp)
1289 struct pathgroup * pgp;
1292 * if path is added through uev_add_path, pgindex can be unset.
1293 * next update_strings() will set it, upon map reload event.
1295 * we can safely return here, because upon map reload, all
1296 * PG will be enabled.
1298 if (!pp->mpp->pg || !pp->pgindex)
1301 pgp = VECTOR_SLOT(pp->mpp->pg, pp->pgindex - 1);
1303 if (pgp->status == PGSTATE_DISABLED) {
1304 condlog(2, "%s: enable group #%i", pp->mpp->alias, pp->pgindex);
1305 dm_enablegroup(pp->mpp->alias, pp->pgindex);
1310 mpvec_garbage_collector (struct vectors * vecs)
1312 struct multipath * mpp;
1318 vector_foreach_slot (vecs->mpvec, mpp, i) {
1319 if (mpp && mpp->alias && !dm_map_present(mpp->alias)) {
1320 condlog(2, "%s: remove dead map", mpp->alias);
1321 remove_map_and_stop_waiter(mpp, vecs, 1);
1327 /* This is called after a path has started working again. It the multipath
1328 * device for this path uses the followover failback type, and this is the
1329 * best pathgroup, and this is the first path in the pathgroup to come back
1330 * up, then switch to this pathgroup */
1332 followover_should_failback(struct path * pp)
1334 struct pathgroup * pgp;
1338 if (pp->mpp->pgfailback != -FAILBACK_FOLLOWOVER ||
1339 !pp->mpp->pg || !pp->pgindex ||
1340 pp->pgindex != pp->mpp->bestpg)
1343 pgp = VECTOR_SLOT(pp->mpp->pg, pp->pgindex - 1);
1344 vector_foreach_slot(pgp->paths, pp1, i) {
1347 if (pp1->chkrstate != PATH_DOWN && pp1->chkrstate != PATH_SHAKY)
1354 missing_uev_wait_tick(struct vectors *vecs)
1356 struct multipath * mpp;
1358 int timed_out = 0, delayed_reconfig;
1359 struct config *conf;
1361 vector_foreach_slot (vecs->mpvec, mpp, i) {
1362 if (mpp->wait_for_udev && --mpp->uev_wait_tick <= 0) {
1364 condlog(0, "%s: timeout waiting on creation uevent. enabling reloads", mpp->alias);
1365 if (mpp->wait_for_udev > 1 && update_map(mpp, vecs)) {
1366 /* update_map removed map */
1370 mpp->wait_for_udev = 0;
1374 conf = get_multipath_config();
1375 delayed_reconfig = conf->delayed_reconfig;
1376 put_multipath_config(conf);
1377 if (timed_out && delayed_reconfig &&
1378 !need_to_delay_reconfig(vecs)) {
1379 condlog(2, "reconfigure (delayed)");
1380 set_config_state(DAEMON_CONFIGURE);
1385 defered_failback_tick (vector mpvec)
1387 struct multipath * mpp;
1390 vector_foreach_slot (mpvec, mpp, i) {
1392 * defered failback getting sooner
1394 if (mpp->pgfailback > 0 && mpp->failback_tick > 0) {
1395 mpp->failback_tick--;
1397 if (!mpp->failback_tick && need_switch_pathgroup(mpp, 1))
1398 switch_pathgroup(mpp);
1404 retry_count_tick(vector mpvec)
1406 struct multipath *mpp;
1409 vector_foreach_slot (mpvec, mpp, i) {
1410 if (mpp->retry_tick > 0) {
1411 mpp->stat_total_queueing_time++;
1412 condlog(4, "%s: Retrying.. No active path", mpp->alias);
1413 if(--mpp->retry_tick == 0) {
1414 mpp->stat_map_failures++;
1415 dm_queue_if_no_path(mpp->alias, 0);
1416 condlog(2, "%s: Disable queueing", mpp->alias);
1422 int update_prio(struct path *pp, int refresh_all)
1426 struct pathgroup * pgp;
1427 int i, j, changed = 0;
1428 struct config *conf;
1431 vector_foreach_slot (pp->mpp->pg, pgp, i) {
1432 vector_foreach_slot (pgp->paths, pp1, j) {
1433 oldpriority = pp1->priority;
1434 conf = get_multipath_config();
1435 pathinfo(pp1, conf, DI_PRIO);
1436 put_multipath_config(conf);
1437 if (pp1->priority != oldpriority)
1443 oldpriority = pp->priority;
1444 conf = get_multipath_config();
1445 pathinfo(pp, conf, DI_PRIO);
1446 put_multipath_config(conf);
1448 if (pp->priority == oldpriority)
1453 int update_path_groups(struct multipath *mpp, struct vectors *vecs, int refresh)
1455 if (reload_map(vecs, mpp, refresh, 1))
1459 if (setup_multipath(vecs, mpp) != 0)
1461 sync_map_state(mpp);
1466 void repair_path(struct path * pp)
1468 if (pp->state != PATH_DOWN)
1471 checker_repair(&pp->checker);
1472 LOG_MSG(1, checker_message(&pp->checker));
1476 * Returns '1' if the path has been checked, '-1' if it was blacklisted
1480 check_path (struct vectors * vecs, struct path * pp, int ticks)
1483 int new_path_up = 0;
1484 int chkr_new_path_up = 0;
1486 int disable_reinstate = 0;
1487 int oldchkrstate = pp->chkrstate;
1488 int retrigger_tries, checkint;
1489 struct config *conf;
1492 if ((pp->initialized == INIT_OK ||
1493 pp->initialized == INIT_REQUESTED_UDEV) && !pp->mpp)
1497 pp->tick -= (pp->tick > ticks) ? ticks : pp->tick;
1499 return 0; /* don't check this path yet */
1501 conf = get_multipath_config();
1502 retrigger_tries = conf->retrigger_tries;
1503 checkint = conf->checkint;
1504 put_multipath_config(conf);
1505 if (!pp->mpp && pp->initialized == INIT_MISSING_UDEV &&
1506 pp->retriggers < retrigger_tries) {
1507 condlog(2, "%s: triggering change event to reinitialize",
1509 pp->initialized = INIT_REQUESTED_UDEV;
1511 sysfs_attr_set_value(pp->udev, "uevent", "change",
1517 * provision a next check soonest,
1518 * in case we exit abnormaly from here
1520 pp->tick = checkint;
1522 newstate = path_offline(pp);
1524 * Wait for uevent for removed paths;
1525 * some LLDDs like zfcp keep paths unavailable
1526 * without sending uevents.
1528 if (newstate == PATH_REMOVED)
1529 newstate = PATH_DOWN;
1531 if (newstate == PATH_UP) {
1532 conf = get_multipath_config();
1533 newstate = get_state(pp, conf, 1);
1534 put_multipath_config(conf);
1536 checker_clear_message(&pp->checker);
1538 if (pp->wwid_changed) {
1539 condlog(2, "%s: path wwid has changed. Refusing to use",
1541 newstate = PATH_DOWN;
1544 if (newstate == PATH_WILD || newstate == PATH_UNCHECKED) {
1545 condlog(2, "%s: unusable path", pp->dev);
1546 conf = get_multipath_config();
1547 pathinfo(pp, conf, 0);
1548 put_multipath_config(conf);
1552 if (!strlen(pp->wwid) && pp->initialized != INIT_MISSING_UDEV &&
1553 (newstate == PATH_UP || newstate == PATH_GHOST)) {
1554 condlog(2, "%s: add missing path", pp->dev);
1555 conf = get_multipath_config();
1556 ret = pathinfo(pp, conf, DI_ALL | DI_BLACKLIST);
1557 if (ret == PATHINFO_OK) {
1558 ev_add_path(pp, vecs);
1560 } else if (ret == PATHINFO_SKIPPED) {
1561 put_multipath_config(conf);
1564 put_multipath_config(conf);
1569 * Async IO in flight. Keep the previous path state
1570 * and reschedule as soon as possible
1572 if (newstate == PATH_PENDING) {
1577 * Synchronize with kernel state
1579 if (update_multipath_strings(pp->mpp, vecs->pathvec, 1)) {
1580 condlog(1, "%s: Could not synchronize with kernel state",
1582 pp->dmstate = PSTATE_UNDEF;
1584 /* if update_multipath_strings orphaned the path, quit early */
1588 if ((newstate == PATH_UP || newstate == PATH_GHOST) &&
1589 pp->wait_checks > 0) {
1590 if (pp->mpp && pp->mpp->nr_active > 0) {
1591 pp->state = PATH_DELAYED;
1595 pp->wait_checks = 0;
1599 * don't reinstate failed path, if its in stand-by
1600 * and if target supports only implicit tpgs mode.
1601 * this will prevent unnecessary i/o by dm on stand-by
1602 * paths if there are no other active paths in map.
1604 disable_reinstate = (newstate == PATH_GHOST &&
1605 pp->mpp->nr_active == 0 &&
1606 pp->tpgs == TPGS_IMPLICIT) ? 1 : 0;
1608 pp->chkrstate = newstate;
1609 if (newstate != pp->state) {
1610 int oldstate = pp->state;
1611 pp->state = newstate;
1613 LOG_MSG(1, checker_message(&pp->checker));
1616 * upon state change, reset the checkint
1617 * to the shortest delay
1619 conf = get_multipath_config();
1620 pp->checkint = conf->checkint;
1621 put_multipath_config(conf);
1623 if (newstate == PATH_DOWN || newstate == PATH_SHAKY || newstate == PATH_TIMEOUT) {
1625 * proactively fail path in the DM
1627 if (oldstate == PATH_UP ||
1628 oldstate == PATH_GHOST) {
1630 if (pp->mpp->delay_wait_checks > 0 &&
1631 pp->watch_checks > 0) {
1632 pp->wait_checks = pp->mpp->delay_wait_checks;
1633 pp->watch_checks = 0;
1639 * cancel scheduled failback
1641 pp->mpp->failback_tick = 0;
1643 pp->mpp->stat_path_failures++;
1648 if(newstate == PATH_UP || newstate == PATH_GHOST){
1649 if ( pp->mpp && pp->mpp->prflag ){
1651 * Check Persistent Reservation.
1653 condlog(2, "%s: checking persistent reservation "
1654 "registration", pp->dev);
1655 mpath_pr_event_handle(pp);
1660 * reinstate this path
1662 if (oldstate != PATH_UP &&
1663 oldstate != PATH_GHOST) {
1664 if (pp->mpp->delay_watch_checks > 0)
1665 pp->watch_checks = pp->mpp->delay_watch_checks;
1668 if (pp->watch_checks > 0)
1672 if (!disable_reinstate && reinstate_path(pp, add_active)) {
1673 condlog(3, "%s: reload map", pp->dev);
1674 ev_add_path(pp, vecs);
1680 if (oldchkrstate != PATH_UP && oldchkrstate != PATH_GHOST)
1681 chkr_new_path_up = 1;
1684 * if at least one path is up in a group, and
1685 * the group is disabled, re-enable it
1687 if (newstate == PATH_UP)
1690 else if (newstate == PATH_UP || newstate == PATH_GHOST) {
1691 if ((pp->dmstate == PSTATE_FAILED ||
1692 pp->dmstate == PSTATE_UNDEF) &&
1693 !disable_reinstate) {
1694 /* Clear IO errors */
1695 if (reinstate_path(pp, 0)) {
1696 condlog(3, "%s: reload map", pp->dev);
1697 ev_add_path(pp, vecs);
1702 unsigned int max_checkint;
1703 LOG_MSG(4, checker_message(&pp->checker));
1704 conf = get_multipath_config();
1705 max_checkint = conf->max_checkint;
1706 put_multipath_config(conf);
1707 if (pp->checkint != max_checkint) {
1709 * double the next check delay.
1710 * max at conf->max_checkint
1712 if (pp->checkint < (max_checkint / 2))
1713 pp->checkint = 2 * pp->checkint;
1715 pp->checkint = max_checkint;
1717 condlog(4, "%s: delay next check %is",
1718 pp->dev_t, pp->checkint);
1720 if (pp->watch_checks > 0)
1722 pp->tick = pp->checkint;
1725 else if (newstate == PATH_DOWN) {
1726 int log_checker_err;
1728 conf = get_multipath_config();
1729 log_checker_err = conf->log_checker_err;
1730 put_multipath_config(conf);
1731 if (log_checker_err == LOG_CHKR_ERR_ONCE)
1732 LOG_MSG(3, checker_message(&pp->checker));
1734 LOG_MSG(2, checker_message(&pp->checker));
1737 pp->state = newstate;
1740 if (pp->mpp->wait_for_udev)
1743 * path prio refreshing
1745 condlog(4, "path prio refresh");
1747 if (update_prio(pp, new_path_up) &&
1748 (pp->mpp->pgpolicyfn == (pgpolicyfn *)group_by_prio) &&
1749 pp->mpp->pgfailback == -FAILBACK_IMMEDIATE)
1750 update_path_groups(pp->mpp, vecs, !new_path_up);
1751 else if (need_switch_pathgroup(pp->mpp, 0)) {
1752 if (pp->mpp->pgfailback > 0 &&
1753 (new_path_up || pp->mpp->failback_tick <= 0))
1754 pp->mpp->failback_tick =
1755 pp->mpp->pgfailback + 1;
1756 else if (pp->mpp->pgfailback == -FAILBACK_IMMEDIATE ||
1757 (chkr_new_path_up && followover_should_failback(pp)))
1758 switch_pathgroup(pp->mpp);
1763 static void init_path_check_interval(struct vectors *vecs)
1765 struct config *conf;
1769 vector_foreach_slot (vecs->pathvec, pp, i) {
1770 conf = get_multipath_config();
1771 pp->checkint = conf->checkint;
1772 put_multipath_config(conf);
1777 checkerloop (void *ap)
1779 struct vectors *vecs;
1783 struct itimerval timer_tick_it;
1784 struct timespec last_time;
1785 struct config *conf;
1787 pthread_cleanup_push(rcu_unregister, NULL);
1788 rcu_register_thread();
1789 mlockall(MCL_CURRENT | MCL_FUTURE);
1790 vecs = (struct vectors *)ap;
1791 condlog(2, "path checkers start up");
1793 /* Tweak start time for initial path check */
1794 if (clock_gettime(CLOCK_MONOTONIC, &last_time) != 0)
1795 last_time.tv_sec = 0;
1797 last_time.tv_sec -= 1;
1800 struct timespec diff_time, start_time, end_time;
1801 int num_paths = 0, ticks = 0, signo, strict_timing, rc = 0;
1804 if (clock_gettime(CLOCK_MONOTONIC, &start_time) != 0)
1805 start_time.tv_sec = 0;
1806 if (start_time.tv_sec && last_time.tv_sec) {
1807 timespecsub(&start_time, &last_time, &diff_time);
1808 condlog(4, "tick (%lu.%06lu secs)",
1809 diff_time.tv_sec, diff_time.tv_nsec / 1000);
1810 last_time = start_time;
1811 ticks = diff_time.tv_sec;
1814 condlog(4, "tick (%d ticks)", ticks);
1818 sd_notify(0, "WATCHDOG=1");
1820 rc = set_config_state(DAEMON_RUNNING);
1821 if (rc == ETIMEDOUT) {
1822 condlog(4, "timeout waiting for DAEMON_IDLE");
1826 pthread_cleanup_push(cleanup_lock, &vecs->lock);
1828 pthread_testcancel();
1829 vector_foreach_slot (vecs->pathvec, pp, i) {
1830 rc = check_path(vecs, pp, ticks);
1832 vector_del_slot(vecs->pathvec, i);
1838 lock_cleanup_pop(vecs->lock);
1840 pthread_cleanup_push(cleanup_lock, &vecs->lock);
1842 pthread_testcancel();
1843 defered_failback_tick(vecs->mpvec);
1844 retry_count_tick(vecs->mpvec);
1845 missing_uev_wait_tick(vecs);
1846 lock_cleanup_pop(vecs->lock);
1851 pthread_cleanup_push(cleanup_lock, &vecs->lock);
1853 pthread_testcancel();
1854 condlog(4, "map garbage collection");
1855 mpvec_garbage_collector(vecs);
1857 lock_cleanup_pop(vecs->lock);
1860 diff_time.tv_nsec = 0;
1861 if (start_time.tv_sec &&
1862 clock_gettime(CLOCK_MONOTONIC, &end_time) == 0) {
1863 timespecsub(&end_time, &start_time, &diff_time);
1865 unsigned int max_checkint;
1867 condlog(3, "checked %d path%s in %lu.%06lu secs",
1868 num_paths, num_paths > 1 ? "s" : "",
1870 diff_time.tv_nsec / 1000);
1871 conf = get_multipath_config();
1872 max_checkint = conf->max_checkint;
1873 put_multipath_config(conf);
1874 if (diff_time.tv_sec > max_checkint)
1875 condlog(1, "path checkers took longer "
1876 "than %lu seconds, consider "
1877 "increasing max_polling_interval",
1882 post_config_state(DAEMON_IDLE);
1883 conf = get_multipath_config();
1884 strict_timing = conf->strict_timing;
1885 put_multipath_config(conf);
1889 timer_tick_it.it_interval.tv_sec = 0;
1890 timer_tick_it.it_interval.tv_usec = 0;
1891 if (diff_time.tv_nsec) {
1892 timer_tick_it.it_value.tv_sec = 0;
1893 timer_tick_it.it_value.tv_usec =
1894 1000UL * 1000 * 1000 - diff_time.tv_nsec;
1896 timer_tick_it.it_value.tv_sec = 1;
1897 timer_tick_it.it_value.tv_usec = 0;
1899 setitimer(ITIMER_REAL, &timer_tick_it, NULL);
1902 sigaddset(&mask, SIGALRM);
1903 condlog(3, "waiting for %lu.%06lu secs",
1904 timer_tick_it.it_value.tv_sec,
1905 timer_tick_it.it_value.tv_usec);
1906 if (sigwait(&mask, &signo) != 0) {
1907 condlog(3, "sigwait failed with error %d",
1909 conf = get_multipath_config();
1910 conf->strict_timing = 0;
1911 put_multipath_config(conf);
1916 pthread_cleanup_pop(1);
1921 configure (struct vectors * vecs, int start_waiters)
1923 struct multipath * mpp;
1927 struct config *conf;
1929 if (!vecs->pathvec && !(vecs->pathvec = vector_alloc())) {
1930 condlog(0, "couldn't allocate path vec in configure");
1934 if (!vecs->mpvec && !(vecs->mpvec = vector_alloc())) {
1935 condlog(0, "couldn't allocate multipath vec in configure");
1939 if (!(mpvec = vector_alloc())) {
1940 condlog(0, "couldn't allocate new maps vec in configure");
1945 * probe for current path (from sysfs) and map (from dm) sets
1947 ret = path_discovery(vecs->pathvec, DI_ALL);
1949 condlog(0, "configure failed at path discovery");
1953 vector_foreach_slot (vecs->pathvec, pp, i){
1954 conf = get_multipath_config();
1955 if (filter_path(conf, pp) > 0){
1956 vector_del_slot(vecs->pathvec, i);
1961 pp->checkint = conf->checkint;
1962 put_multipath_config(conf);
1964 if (map_discovery(vecs)) {
1965 condlog(0, "configure failed at map discovery");
1970 * create new set of maps & push changed ones into dm
1972 if (coalesce_paths(vecs, mpvec, NULL, 1, CMD_NONE)) {
1973 condlog(0, "configure failed while coalescing paths");
1978 * may need to remove some maps which are no longer relevant
1979 * e.g., due to blacklist changes in conf file
1981 if (coalesce_maps(vecs, mpvec)) {
1982 condlog(0, "configure failed while coalescing maps");
1988 sync_maps_state(mpvec);
1989 vector_foreach_slot(mpvec, mpp, i){
1990 remember_wwid(mpp->wwid);
1995 * purge dm of old maps
2000 * save new set of maps formed by considering current path state
2002 vector_free(vecs->mpvec);
2003 vecs->mpvec = mpvec;
2006 * start dm event waiter threads for these new maps
2008 vector_foreach_slot(vecs->mpvec, mpp, i) {
2009 if (setup_multipath(vecs, mpp)) {
2013 if (start_waiters) {
2014 if (start_waiter_thread(mpp, vecs)) {
2015 remove_map(mpp, vecs, 1);
2024 need_to_delay_reconfig(struct vectors * vecs)
2026 struct multipath *mpp;
2029 if (!VECTOR_SIZE(vecs->mpvec))
2032 vector_foreach_slot(vecs->mpvec, mpp, i) {
2033 if (mpp->wait_for_udev)
2039 void rcu_free_config(struct rcu_head *head)
2041 struct config *conf = container_of(head, struct config, rcu);
2047 reconfigure (struct vectors * vecs)
2049 struct config * old, *conf;
2051 conf = load_config(DEFAULT_CONFIGFILE);
2056 * free old map and path vectors ... they use old conf state
2058 if (VECTOR_SIZE(vecs->mpvec))
2059 remove_maps_and_stop_waiters(vecs);
2061 free_pathvec(vecs->pathvec, FREE_PATHS);
2062 vecs->pathvec = NULL;
2064 /* Re-read any timezone changes */
2067 dm_drv_version(conf->version, TGT_MPATH);
2069 conf->verbosity = verbosity;
2070 if (bindings_read_only)
2071 conf->bindings_read_only = bindings_read_only;
2072 if (ignore_new_devs)
2073 conf->ignore_new_devs = ignore_new_devs;
2074 uxsock_timeout = conf->uxsock_timeout;
2076 old = rcu_dereference(multipath_conf);
2077 rcu_assign_pointer(multipath_conf, conf);
2078 call_rcu(&old->rcu, rcu_free_config);
2086 static struct vectors *
2089 struct vectors * vecs;
2091 vecs = (struct vectors *)MALLOC(sizeof(struct vectors));
2096 pthread_mutex_init(&vecs->lock.mutex, NULL);
2102 signal_set(int signo, void (*func) (int))
2105 struct sigaction sig;
2106 struct sigaction osig;
2108 sig.sa_handler = func;
2109 sigemptyset(&sig.sa_mask);
2112 r = sigaction(signo, &sig, &osig);
2117 return (osig.sa_handler);
2121 handle_signals(void)
2124 condlog(2, "exit (signal)");
2128 condlog(2, "reconfigure (signal)");
2129 set_config_state(DAEMON_CONFIGURE);
2131 if (log_reset_sig) {
2132 condlog(2, "reset log (signal)");
2133 pthread_mutex_lock(&logq_lock);
2134 log_reset("multipathd");
2135 pthread_mutex_unlock(&logq_lock);
2163 condlog(3, "SIGUSR2 received");
2169 signal_set(SIGHUP, sighup);
2170 signal_set(SIGUSR1, sigusr1);
2171 signal_set(SIGUSR2, sigusr2);
2172 signal_set(SIGINT, sigend);
2173 signal_set(SIGTERM, sigend);
2174 signal_set(SIGPIPE, sigend);
2181 static struct sched_param sched_param = {
2182 .sched_priority = 99
2185 res = sched_setscheduler (0, SCHED_RR, &sched_param);
2188 condlog(LOG_WARNING, "Could not set SCHED_RR at priority 99");
2195 #ifdef OOM_SCORE_ADJ_MIN
2197 char *file = "/proc/self/oom_score_adj";
2198 int score = OOM_SCORE_ADJ_MIN;
2201 char *file = "/proc/self/oom_adj";
2202 int score = OOM_ADJUST_MIN;
2208 envp = getenv("OOMScoreAdjust");
2210 condlog(3, "Using systemd provided OOMScoreAdjust");
2214 if (stat(file, &st) == 0){
2215 fp = fopen(file, "w");
2217 condlog(0, "couldn't fopen %s : %s", file,
2221 fprintf(fp, "%i", score);
2225 if (errno != ENOENT) {
2226 condlog(0, "couldn't stat %s : %s", file,
2230 #ifdef OOM_ADJUST_MIN
2231 file = "/proc/self/oom_adj";
2232 score = OOM_ADJUST_MIN;
2237 condlog(0, "couldn't adjust oom score");
2241 child (void * param)
2243 pthread_t check_thr, uevent_thr, uxlsnr_thr, uevq_thr;
2244 pthread_attr_t log_attr, misc_attr, uevent_attr;
2245 struct vectors * vecs;
2246 struct multipath * mpp;
2249 unsigned long checkint;
2253 struct config *conf;
2256 mlockall(MCL_CURRENT | MCL_FUTURE);
2260 setup_thread_attr(&misc_attr, 64 * 1024, 0);
2261 setup_thread_attr(&uevent_attr, DEFAULT_UEVENT_STACKSIZE * 1024, 0);
2262 setup_thread_attr(&waiter_attr, 32 * 1024, 1);
2265 setup_thread_attr(&log_attr, 64 * 1024, 0);
2266 log_thread_start(&log_attr);
2267 pthread_attr_destroy(&log_attr);
2269 pid_fd = pidfile_create(DEFAULT_PIDFILE, daemon_pid);
2271 condlog(1, "failed to create pidfile");
2277 post_config_state(DAEMON_START);
2279 condlog(2, "--------start up--------");
2280 condlog(2, "read " DEFAULT_CONFIGFILE);
2282 conf = load_config(DEFAULT_CONFIGFILE);
2287 conf->verbosity = verbosity;
2288 if (bindings_read_only)
2289 conf->bindings_read_only = bindings_read_only;
2290 if (ignore_new_devs)
2291 conf->ignore_new_devs = ignore_new_devs;
2292 uxsock_timeout = conf->uxsock_timeout;
2293 rcu_assign_pointer(multipath_conf, conf);
2294 dm_init(conf->verbosity);
2295 dm_drv_version(conf->version, TGT_MPATH);
2296 if (init_checkers(conf->multipath_dir)) {
2297 condlog(0, "failed to initialize checkers");
2300 if (init_prio(conf->multipath_dir)) {
2301 condlog(0, "failed to initialize prioritizers");
2305 setlogmask(LOG_UPTO(conf->verbosity + 3));
2307 envp = getenv("LimitNOFILE");
2310 condlog(2,"Using systemd provided open fds limit of %s", envp);
2311 } else if (conf->max_fds) {
2312 struct rlimit fd_limit;
2314 if (getrlimit(RLIMIT_NOFILE, &fd_limit) < 0) {
2315 condlog(0, "can't get open fds limit: %s",
2317 fd_limit.rlim_cur = 0;
2318 fd_limit.rlim_max = 0;
2320 if (fd_limit.rlim_cur < conf->max_fds) {
2321 fd_limit.rlim_cur = conf->max_fds;
2322 if (fd_limit.rlim_max < conf->max_fds)
2323 fd_limit.rlim_max = conf->max_fds;
2324 if (setrlimit(RLIMIT_NOFILE, &fd_limit) < 0) {
2325 condlog(0, "can't set open fds limit to "
2327 fd_limit.rlim_cur, fd_limit.rlim_max,
2330 condlog(3, "set open fds limit to %lu/%lu",
2331 fd_limit.rlim_cur, fd_limit.rlim_max);
2337 vecs = gvecs = init_vecs();
2344 dm_udev_set_sync_support(0);
2346 envp = getenv("WATCHDOG_USEC");
2347 if (envp && sscanf(envp, "%lu", &checkint) == 1) {
2348 /* Value is in microseconds */
2349 conf->max_checkint = checkint / 1000000;
2350 /* Rescale checkint */
2351 if (conf->checkint > conf->max_checkint)
2352 conf->checkint = conf->max_checkint;
2354 conf->checkint = conf->max_checkint / 4;
2355 condlog(3, "enabling watchdog, interval %d max %d",
2356 conf->checkint, conf->max_checkint);
2357 use_watchdog = conf->checkint;
2361 * Startup done, invalidate configuration
2366 * Signal start of configuration
2368 post_config_state(DAEMON_CONFIGURE);
2370 init_path_check_interval(vecs);
2373 * Start uevent listener early to catch events
2375 if ((rc = pthread_create(&uevent_thr, &uevent_attr, ueventloop, udev))) {
2376 condlog(0, "failed to create uevent thread: %d", rc);
2379 pthread_attr_destroy(&uevent_attr);
2380 if ((rc = pthread_create(&uxlsnr_thr, &misc_attr, uxlsnrloop, vecs))) {
2381 condlog(0, "failed to create cli listener: %d", rc);
2388 if ((rc = pthread_create(&check_thr, &misc_attr, checkerloop, vecs))) {
2389 condlog(0,"failed to create checker loop thread: %d", rc);
2392 if ((rc = pthread_create(&uevq_thr, &misc_attr, uevqloop, vecs))) {
2393 condlog(0, "failed to create uevent dispatcher: %d", rc);
2396 pthread_attr_destroy(&misc_attr);
2399 sd_notify(0, "READY=1");
2402 while (running_state != DAEMON_SHUTDOWN) {
2403 pthread_cleanup_push(config_cleanup, NULL);
2404 pthread_mutex_lock(&config_lock);
2405 if (running_state != DAEMON_CONFIGURE &&
2406 running_state != DAEMON_SHUTDOWN) {
2407 pthread_cond_wait(&config_cond, &config_lock);
2409 pthread_cleanup_pop(1);
2410 if (running_state == DAEMON_CONFIGURE) {
2411 pthread_cleanup_push(cleanup_lock, &vecs->lock);
2413 pthread_testcancel();
2414 if (!need_to_delay_reconfig(vecs)) {
2417 conf = get_multipath_config();
2418 conf->delayed_reconfig = 1;
2419 put_multipath_config(conf);
2421 lock_cleanup_pop(vecs->lock);
2422 post_config_state(DAEMON_IDLE);
2427 conf = get_multipath_config();
2428 if (conf->queue_without_daemon == QUE_NO_DAEMON_OFF)
2429 vector_foreach_slot(vecs->mpvec, mpp, i)
2430 dm_queue_if_no_path(mpp->alias, 0);
2431 put_multipath_config(conf);
2432 remove_maps_and_stop_waiters(vecs);
2433 unlock(&vecs->lock);
2435 pthread_cancel(check_thr);
2436 pthread_cancel(uevent_thr);
2437 pthread_cancel(uxlsnr_thr);
2438 pthread_cancel(uevq_thr);
2440 pthread_join(check_thr, NULL);
2441 pthread_join(uevent_thr, NULL);
2442 pthread_join(uxlsnr_thr, NULL);
2443 pthread_join(uevq_thr, NULL);
2446 free_pathvec(vecs->pathvec, FREE_PATHS);
2447 vecs->pathvec = NULL;
2448 unlock(&vecs->lock);
2450 pthread_mutex_destroy(&vecs->lock.mutex);
2460 /* We're done here */
2461 condlog(3, "unlink pidfile");
2462 unlink(DEFAULT_PIDFILE);
2464 condlog(2, "--------shut down-------");
2470 * Freeing config must be done after condlog() and dm_lib_exit(),
2471 * because logging functions like dlog() and dm_write_log()
2472 * reference the config.
2474 conf = rcu_dereference(multipath_conf);
2475 rcu_assign_pointer(multipath_conf, NULL);
2476 call_rcu(&conf->rcu, rcu_free_config);
2479 pthread_attr_destroy(&waiter_attr);
2481 dbg_free_final(NULL);
2485 sd_notify(0, "ERRNO=0");
2491 sd_notify(0, "ERRNO=1");
2504 if( (pid = fork()) < 0){
2505 fprintf(stderr, "Failed first fork : %s\n", strerror(errno));
2513 if ( (pid = fork()) < 0)
2514 fprintf(stderr, "Failed second fork : %s\n", strerror(errno));
2519 fprintf(stderr, "cannot chdir to '/', continuing\n");
2521 dev_null_fd = open("/dev/null", O_RDWR);
2522 if (dev_null_fd < 0){
2523 fprintf(stderr, "cannot open /dev/null for input & output : %s\n",
2528 close(STDIN_FILENO);
2529 if (dup(dev_null_fd) < 0) {
2530 fprintf(stderr, "cannot dup /dev/null to stdin : %s\n",
2534 close(STDOUT_FILENO);
2535 if (dup(dev_null_fd) < 0) {
2536 fprintf(stderr, "cannot dup /dev/null to stdout : %s\n",
2540 close(STDERR_FILENO);
2541 if (dup(dev_null_fd) < 0) {
2542 fprintf(stderr, "cannot dup /dev/null to stderr : %s\n",
2547 daemon_pid = getpid();
2552 main (int argc, char *argv[])
2554 extern char *optarg;
2559 struct config *conf;
2561 ANNOTATE_BENIGN_RACE_SIZED(&multipath_conf, sizeof(multipath_conf),
2562 "Manipulated through RCU");
2563 ANNOTATE_BENIGN_RACE_SIZED(&running_state, sizeof(running_state),
2564 "Suppress complaints about unprotected running_state reads");
2565 ANNOTATE_BENIGN_RACE_SIZED(&uxsock_timeout, sizeof(uxsock_timeout),
2566 "Suppress complaints about this scalar variable");
2570 if (getuid() != 0) {
2571 fprintf(stderr, "need to be root\n");
2575 /* make sure we don't lock any path */
2577 fprintf(stderr, "can't chdir to root directory : %s\n",
2579 umask(umask(077) | 022);
2581 pthread_cond_init_mono(&config_cond);
2585 while ((arg = getopt(argc, argv, ":dsv:k::Bn")) != EOF ) {
2591 //debug=1; /* ### comment me out ### */
2594 if (sizeof(optarg) > sizeof(char *) ||
2595 !isdigit(optarg[0]))
2598 verbosity = atoi(optarg);
2604 conf = load_config(DEFAULT_CONFIGFILE);
2608 conf->verbosity = verbosity;
2609 uxsock_timeout = conf->uxsock_timeout;
2610 uxclnt(optarg, uxsock_timeout + 100);
2613 bindings_read_only = 1;
2616 ignore_new_devs = 1;
2619 fprintf(stderr, "Invalid argument '-%c'\n",
2624 if (optind < argc) {
2629 conf = load_config(DEFAULT_CONFIGFILE);
2633 conf->verbosity = verbosity;
2634 uxsock_timeout = conf->uxsock_timeout;
2635 memset(cmd, 0x0, CMDSIZE);
2636 while (optind < argc) {
2637 if (strchr(argv[optind], ' '))
2638 c += snprintf(c, s + CMDSIZE - c, "\"%s\" ", argv[optind]);
2640 c += snprintf(c, s + CMDSIZE - c, "%s ", argv[optind]);
2643 c += snprintf(c, s + CMDSIZE - c, "\n");
2644 uxclnt(s, uxsock_timeout + 100);
2649 if (!isatty(fileno(stdout)))
2650 setbuf(stdout, NULL);
2652 daemon_pid = getpid();
2664 return (child(NULL));
2667 void * mpath_pr_event_handler_fn (void * pathp )
2669 struct multipath * mpp;
2670 int i,j, ret, isFound;
2671 struct path * pp = (struct path *)pathp;
2672 unsigned char *keyp;
2674 struct prout_param_descriptor *param;
2675 struct prin_resp *resp;
2679 resp = mpath_alloc_prin_response(MPATH_PRIN_RKEY_SA);
2681 condlog(0,"%s Alloc failed for prin response", pp->dev);
2685 ret = prin_do_scsi_ioctl(pp->dev, MPATH_PRIN_RKEY_SA, resp, 0);
2686 if (ret != MPATH_PR_SUCCESS )
2688 condlog(0,"%s : pr in read keys service action failed. Error=%d", pp->dev, ret);
2692 condlog(3, " event pr=%d addlen=%d",resp->prin_descriptor.prin_readkeys.prgeneration,
2693 resp->prin_descriptor.prin_readkeys.additional_length );
2695 if (resp->prin_descriptor.prin_readkeys.additional_length == 0 )
2697 condlog(1, "%s: No key found. Device may not be registered.", pp->dev);
2698 ret = MPATH_PR_SUCCESS;
2702 keyp = (unsigned char *)mpp->reservation_key;
2703 for (j = 0; j < 8; ++j) {
2709 condlog(2, "Multipath reservation_key: 0x%" PRIx64 " ", prkey);
2712 for (i = 0; i < resp->prin_descriptor.prin_readkeys.additional_length/8; i++ )
2714 condlog(2, "PR IN READKEYS[%d] reservation key:",i);
2715 dumpHex((char *)&resp->prin_descriptor.prin_readkeys.key_list[i*8], 8 , -1);
2716 if (!memcmp(mpp->reservation_key, &resp->prin_descriptor.prin_readkeys.key_list[i*8], 8))
2718 condlog(2, "%s: pr key found in prin readkeys response", mpp->alias);
2725 condlog(0, "%s: Either device not registered or ", pp->dev);
2726 condlog(0, "host is not authorised for registration. Skip path");
2727 ret = MPATH_PR_OTHER;
2731 param= malloc(sizeof(struct prout_param_descriptor));
2732 memset(param, 0 , sizeof(struct prout_param_descriptor));
2734 for (j = 7; j >= 0; --j) {
2735 param->sa_key[j] = (prkey & 0xff);
2738 param->num_transportid = 0;
2740 condlog(3, "device %s:%s", pp->dev, pp->mpp->wwid);
2742 ret = prout_do_scsi_ioctl(pp->dev, MPATH_PROUT_REG_IGN_SA, 0, 0, param, 0);
2743 if (ret != MPATH_PR_SUCCESS )
2745 condlog(0,"%s: Reservation registration failed. Error: %d", pp->dev, ret);
2755 int mpath_pr_event_handle(struct path *pp)
2759 pthread_attr_t attr;
2760 struct multipath * mpp;
2764 if (!mpp->reservation_key)
2767 pthread_attr_init(&attr);
2768 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
2770 rc = pthread_create(&thread, NULL , mpath_pr_event_handler_fn, pp);
2772 condlog(0, "%s: ERROR; return code from pthread_create() is %d", pp->dev, rc);
2775 pthread_attr_destroy(&attr);
2776 rc = pthread_join(thread, NULL);