multipath: add fast_io_fail and dev_loss_tmo config parameters
authorBenjamin Marzinski <bmarzins@redhat.com>
Tue, 23 Mar 2010 02:44:39 +0000 (21:44 -0500)
committerChristophe Varoqui <christophe.varoqui@free.fr>
Thu, 25 Mar 2010 19:14:11 +0000 (20:14 +0100)
This patch adds two new configuration parameters to multipath.conf,
fast_io_fail_tmo and dev_loss_tmo which set

/sys/class/fc_remote_ports/rport-<host>:<channel>-<rport_id>/fast_io_fail_tmo and
/sys/class/fc_remote_ports/rport-<host>:<channel>-<rport_id>/dev_loss_tmo

for all the capable paths in a multipath device.

Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
12 files changed:
libmultipath/config.h
libmultipath/configure.c
libmultipath/dict.c
libmultipath/discovery.c
libmultipath/discovery.h
libmultipath/propsel.c
libmultipath/propsel.h
libmultipath/structs.h
libmultipath/sysfs.c
libmultipath/sysfs.h
multipath.conf.annotated
multipath/multipath.conf.5

index 50a728c..1d66cca 100644 (file)
@@ -31,6 +31,8 @@ struct hwentry {
        int minio;
        int pg_timeout;
        int flush_on_last_del;
+       int fast_io_fail;
+       unsigned int dev_loss;
        char * bl_product;
 };
 
@@ -75,6 +77,8 @@ struct config {
        int daemon;
        int flush_on_last_del;
        int attribute_flags;
+       int fast_io_fail;
+       unsigned int dev_loss;
        uid_t uid;
        gid_t gid;
        mode_t mode;
index 9c549d0..c69190c 100644 (file)
@@ -70,7 +70,10 @@ setup_map (struct multipath * mpp)
        select_mode(mpp);
        select_uid(mpp);
        select_gid(mpp);
+       select_fast_io_fail(mpp);
+       select_dev_loss(mpp);
 
+       sysfs_set_scsi_tmo(mpp);
        /*
         * assign paths to path groups -- start with no groups and all paths
         * in mpp->paths
index 45423c6..9782599 100644 (file)
@@ -36,6 +36,35 @@ polling_interval_handler(vector strvec)
        return 0;
 }
 
+static int
+def_fast_io_fail_handler(vector strvec)
+{
+       char * buff;
+
+       buff = set_value(strvec);
+       if (strlen(buff) == 3 && !strcmp(buff, "off"))
+               conf->fast_io_fail = -1;
+       else if (sscanf(buff, "%d", &conf->fast_io_fail) != 1 ||
+                conf->fast_io_fail < -1)
+               conf->fast_io_fail = 0;
+
+       FREE(buff);
+       return 0;
+}
+
+static int
+def_dev_loss_handler(vector strvec)
+{
+       char * buff;
+
+       buff = set_value(strvec);
+       if (sscanf(buff, "%u", &conf->dev_loss) != 1)
+               conf->dev_loss = 0;
+
+       FREE(buff);
+       return 0;
+}
+
 static int
 verbosity_handler(vector strvec)
 {
@@ -627,6 +656,37 @@ bl_product_handler(vector strvec)
        return 0;
 }
 
+static int
+hw_fast_io_fail_handler(vector strvec)
+{
+       char * buff;
+       struct hwentry * hwe = VECTOR_LAST_SLOT(conf->hwtable);
+
+       buff = set_value(strvec);
+       if (strlen(buff) == 3 && !strcmp(buff, "off"))
+               hwe->fast_io_fail = -1;
+       else if (sscanf(buff, "%d", &hwe->fast_io_fail) != 1 ||
+                hwe->fast_io_fail < -1)
+               hwe->fast_io_fail = 0;
+
+       FREE(buff);
+       return 0;
+}
+
+static int
+hw_dev_loss_handler(vector strvec)
+{
+       char * buff;
+       struct hwentry * hwe = VECTOR_LAST_SLOT(conf->hwtable);
+
+       buff = set_value(strvec);
+       if (sscanf(buff, "%u", &hwe->dev_loss) != 1)
+               hwe->dev_loss = 0;
+
+       FREE(buff);
+       return 0;
+}
+
 static int
 hw_pgpolicy_handler(vector strvec)
 {
@@ -1389,6 +1449,26 @@ snprint_mp_flush_on_last_del (char * buff, int len, void * data)
        return 0;
 }
 
+static int
+snprint_hw_fast_io_fail(char * buff, int len, void * data)
+{
+       struct hwentry * hwe = (struct hwentry *)data;
+       if (!hwe->fast_io_fail)
+               return 0;
+       if (hwe->fast_io_fail == -1)
+               return snprintf(buff, len, "off");
+       return snprintf(buff, len, "%d", hwe->fast_io_fail);
+}
+
+static int
+snprint_hw_dev_loss(char * buff, int len, void * data)
+{
+       struct hwentry * hwe = (struct hwentry *)data;
+       if (!hwe->dev_loss)
+               return 0;
+       return snprintf(buff, len, "%u", hwe->dev_loss);
+}
+
 static int
 snprint_hw_vendor (char * buff, int len, void * data)
 {
@@ -1639,6 +1719,24 @@ snprint_def_polling_interval (char * buff, int len, void * data)
        return snprintf(buff, len, "%i", conf->checkint);
 }
 
+static int
+snprint_def_fast_io_fail(char * buff, int len, void * data)
+{
+       if (!conf->fast_io_fail)
+               return 0;
+       if (conf->fast_io_fail == -1)
+               return snprintf(buff, len, "off");
+       return snprintf(buff, len, "%d", conf->fast_io_fail);
+}
+
+static int
+snprint_def_dev_loss(char * buff, int len, void * data)
+{
+       if (!conf->dev_loss)
+               return 0;
+       return snprintf(buff, len, "%u", conf->dev_loss);
+}
+
 static int
 snprint_def_verbosity (char * buff, int len, void * data)
 {
@@ -1937,6 +2035,8 @@ init_keywords(void)
        install_keyword("mode", &def_mode_handler, &snprint_def_mode);
        install_keyword("uid", &def_uid_handler, &snprint_def_uid);
        install_keyword("gid", &def_gid_handler, &snprint_def_gid);
+       install_keyword("fast_io_fail_tmo", &def_fast_io_fail_handler, &snprint_def_fast_io_fail);
+       install_keyword("dev_loss_tmo", &def_dev_loss_handler, &snprint_def_dev_loss);
        __deprecated install_keyword("default_selector", &def_selector_handler, NULL);
        __deprecated install_keyword("default_path_grouping_policy", &def_pgpolicy_handler, NULL);
        __deprecated install_keyword("default_getuid_callout", &def_getuid_callout_handler, NULL);
@@ -1991,6 +2091,8 @@ init_keywords(void)
        install_keyword("rr_min_io", &hw_minio_handler, &snprint_hw_rr_min_io);
        install_keyword("pg_timeout", &hw_pg_timeout_handler, &snprint_hw_pg_timeout);
        install_keyword("flush_on_last_del", &hw_flush_on_last_del_handler, &snprint_hw_flush_on_last_del);
+       install_keyword("fast_io_fail_tmo", &hw_fast_io_fail_handler, &snprint_hw_fast_io_fail);
+       install_keyword("dev_loss_tmo", &hw_dev_loss_handler, &snprint_hw_dev_loss);
        install_sublevel_end();
 
        install_keyword_root("multipaths", &multipaths_handler);
index 6b99d07..70495b0 100644 (file)
@@ -204,6 +204,43 @@ sysfs_get_fc_nodename (struct sysfs_device * dev, char * node,
        return 1;
 }
 
+int
+sysfs_set_scsi_tmo (struct multipath *mpp)
+{
+       char attr_path[SYSFS_PATH_SIZE];
+       struct path *pp;
+       int i;
+       char value[11];
+
+       if (!mpp->dev_loss && !mpp->fast_io_fail)
+               return 0;
+       vector_foreach_slot(mpp->paths, pp, i) {
+               if (safe_snprintf(attr_path, SYSFS_PATH_SIZE,
+                                 "/class/fc_remote_ports/rport-%d:%d-%d",
+                                 pp->sg_id.host_no, pp->sg_id.channel,
+                                 pp->sg_id.scsi_id)) {
+                       condlog(0, "attr_path '/class/fc_remote_ports/rport-%d:%d-%d' too large", pp->sg_id.host_no, pp->sg_id.channel, pp->sg_id.scsi_id);
+                       return 1;
+               }
+               if (mpp->dev_loss){
+                       snprintf(value, 11, "%u", mpp->dev_loss);
+                       if (sysfs_attr_set_value(attr_path, "dev_loss_tmo",
+                                                value))
+                               return 1;
+               }
+               if (mpp->fast_io_fail){
+                       if (mpp->fast_io_fail == -1)
+                               sprintf(value, "off");
+                       else
+                               snprintf(value, 11, "%u", mpp->fast_io_fail);
+                       if (sysfs_attr_set_value(attr_path, "fast_io_fail",
+                                                value))
+                               return 1;
+               }
+       }
+       return 0;
+}
+
 static int
 opennode (char * dev, int mode)
 {
index 7283f36..dd9671e 100644 (file)
@@ -33,6 +33,7 @@ int path_offline (struct path *);
 int pathinfo (struct path *, vector hwtable, int mask);
 struct path * store_pathinfo (vector pathvec, vector hwtable,
                              char * devname, int flag);
+int sysfs_set_scsi_tmo (struct multipath *mpp);
 
 /*
  * discovery bitmask
index efaa31b..baab020 100644 (file)
@@ -445,6 +445,48 @@ select_pg_timeout(struct multipath *mp)
        return 0;
 }
 
+extern int
+select_fast_io_fail(struct multipath *mp)
+{
+       if (mp->hwe && mp->hwe->fast_io_fail) {
+               mp->fast_io_fail = mp->hwe->fast_io_fail;
+               if (mp->fast_io_fail == -1)
+                       condlog(3, "%s: fast_io_fail_tmo = off (controller default)", mp->alias);
+               else
+                       condlog(3, "%s: fast_io_fail_tmo = %d (controller default)", mp->alias, mp->fast_io_fail);
+               return 0;
+       }
+       if (conf->fast_io_fail) {
+               mp->fast_io_fail = conf->fast_io_fail;
+               if (mp->fast_io_fail == -1)
+                       condlog(3, "%s: fast_io_fail_tmo = off (config file default)", mp->alias);
+               else
+                       condlog(3, "%s: fast_io_fail_tmo = %d (config file default)", mp->alias, mp->fast_io_fail);
+               return 0;
+       }
+       mp->fast_io_fail = 0;
+       return 0;
+}
+
+extern int
+select_dev_loss(struct multipath *mp)
+{
+       if (mp->hwe && mp->hwe->dev_loss) {
+               mp->dev_loss = mp->hwe->dev_loss;
+               condlog(3, "%s: dev_loss_tmo = %u (controller default)",
+                       mp->alias, mp->dev_loss);
+               return 0;
+       }
+       if (conf->dev_loss) {
+               mp->dev_loss = conf->dev_loss;
+               condlog(3, "%s: dev_loss_tmo = %u (config file default)",
+                       mp->alias, mp->dev_loss);
+               return 0;
+       }
+       mp->dev_loss = 0;
+       return 0;
+}
+
 extern int
 select_flush_on_last_del(struct multipath *mp)
 {
index f098d28..57bea5b 100644 (file)
@@ -15,3 +15,5 @@ int select_minio(struct multipath *mp);
 int select_mode(struct multipath *mp);
 int select_uid(struct multipath *mp);
 int select_gid(struct multipath *mp);
+int select_fast_io_fail(struct multipath *mp);
+int select_dev_loss(struct multipath *mp);
index afd1246..dfd0556 100644 (file)
@@ -166,6 +166,8 @@ struct multipath {
        int pg_timeout;
        int flush_on_last_del;
        int attribute_flags;
+       int fast_io_fail;
+       unsigned int dev_loss;
        uid_t uid;
        gid_t gid;
        mode_t mode;
index e00a101..b05a274 100644 (file)
@@ -356,6 +356,62 @@ void sysfs_device_put(struct sysfs_device *dev)
        return;
 }
 
+int
+sysfs_attr_set_value(const char *devpath, const char *attr_name,
+                    const char *value)
+{
+       char path_full[PATH_SIZE];
+       int sysfs_len;
+       struct stat statbuf;
+       int fd, value_len, ret = -1;
+
+       dbg("open '%s'/'%s'", devpath, attr_name);
+       sysfs_len = snprintf(path_full, PATH_SIZE, "%s%s/%s", sysfs_path,
+                            devpath, attr_name);
+       if (sysfs_len >= PATH_SIZE || sysfs_len < 0) {
+               if (sysfs_len < 0)
+                       dbg("cannot copy sysfs path %s%s/%s : %s", sysfs_path,
+                           devpath, attr_name, strerror(errno));
+               else
+                       dbg("sysfs_path %s%s/%s too large", sysfs_path,
+                           devpath, attr_name);
+               goto out;
+       }
+
+       if (stat(path_full, &statbuf) != 0) {
+               dbg("stat '%s' failed: %s" path_full, strerror(errno));
+               goto out;
+       }
+
+       /* skip directories */
+        if (S_ISDIR(statbuf.st_mode))
+                goto out;
+
+       if ((statbuf.st_mode & S_IWUSR) == 0)
+               goto out;
+
+       fd = open(path_full, O_WRONLY);
+       if (fd < 0) {
+               dbg("attribute '%s' can not be opened: %s",
+                   path_full, strerror(errno));
+               goto out;
+       }
+       value_len = strlen(value) + 1;
+       ret = write(fd, value, value_len);
+       if (ret == value_len)
+               ret = 0;
+       else if (ret < 0)
+               dbg("write to %s failed: %s", path_full, strerror(errno));
+       else {
+               dbg("tried to write %d to %s. Wrote %d\n", value_len,
+                   path_full, ret);
+               ret = -1;
+       }
+out:
+       return ret;
+}
+
+
 char *sysfs_attr_get_value(const char *devpath, const char *attr_name)
 {
        char path_full[PATH_SIZE];
index 620962c..57d4cb1 100644 (file)
@@ -22,5 +22,6 @@ void sysfs_device_put(struct sysfs_device *dev);
 char *sysfs_attr_get_value(const char *devpath, const char *attr_name);
 int sysfs_resolve_link(char *path, size_t size);
 int sysfs_get_size (struct sysfs_device * dev, unsigned long long * size);
-
+int sysfs_attr_set_value(const char *devpath, const char *attr_name,
+                        const char *value);
 #endif
index c222da4..bd04e70 100644 (file)
 #      # default : determined by the process
 #      gid disk
 #
+#      #
+#      # name    : fast_io_fail_tmo
+#      # scope   : multipath & multipathd
+#      # desc    : The number of seconds the scsi layer will wait after a
+#      #           problem has been detected on a FC remote port before failing
+#      #           IO to devices on that remote port.
+#      # values  : off | n >= 0 (smaller than dev_loss_tmo)
+#      # default : determined by the OS
+#      fast_io_fail_tmo 5
+#
+#      #
+#      # name    : dev_loss_tmo
+#      # scope   : multipath & multipathd
+#      # desc    : The number of seconds the scsi layer will wait after a
+#      #           problem has been detected on a FC remote port before
+#      #           removing it from the system.
+#      # values  : n > 0
+#      # default : determined by the OS
+#      dev_loss_tmo 600
 #}
 #      
 ##
 #              # desc    : If set to "yes", multipathd will disable queueing
 #              #           when the last path to a device has been deleted.
 #              # values  : yes|no
-#              # default : no
 #              #
 #              flush_on_last_del       yes
 #
 #              # desc    : product strings to blacklist for this vendor
 #              #
 #              product_blacklist       LUN_Z
+#
+#              #
+#              # name    : fast_io_fail_tmo
+#              # scope   : multipath & multipathd
+#              # desc    : The number of seconds the scsi layer will wait after
+#              #           a problem has been detected on a FC remote port
+#              #           before failing IO to devices on that remote port.
+#              # values  : off | n >= 0 (smaller than dev_loss_tmo)
+#              fast_io_fail_tmo 5
+#
+#              #
+#              # name    : dev_loss_tmo
+#              # scope   : multipath & multipathd
+#              # desc    : The number of seconds the scsi layer will wait after
+#              #           a problem has been detected on a FC remote port
+#              #           before removing it from the system.
+#              # values  : n > 0
+#              dev_loss_tmo 600
 #      }
 #      device {
 #              vendor                  "COMPAQ  "
index 2ceb9be..ed9b2d8 100644 (file)
@@ -240,6 +240,17 @@ this to the system limit from /proc/sys/fs/nr_open. If this is not set, the
 maximum number of open fds is taken from the calling process. It is usually
 1024. To be safe, this should be set to the maximum number of paths plus 32,
 if that number is greated than 1024.
+.TP
+.B fast_io_fail_tmo
+Specify the number of seconds the scsi layer will wait after a problem has been
+detected on a FC remote port before failing IO to devices on that remote port.
+This should be smaller than dev_loss_tmo. Setting this to
+.I off
+will disable the timeout.
+.TP
+.B dev_loss_tmo
+Specify the number of seconds the scsi layer will wait after a problem has
+been detected on a FC remote port before removing it from the system.
 .
 .SH "blacklist section"
 The
@@ -384,6 +395,10 @@ section:
 .B no_path_retry
 .TP
 .B rr_min_io
+.TP
+.B fast_io_fail_tmo
+.TP
+.B dev_loss_tmo
 .RE
 .PD
 .LP