multipath-tools: use internal drd.h file
[multipath-tools/.git] / multipathd / main.c
1 /*
2  * Copyright (c) 2004, 2005 Christophe Varoqui
3  * Copyright (c) 2005 Kiyoshi Ueda, NEC
4  * Copyright (c) 2005 Benjamin Marzinski, Redhat
5  * Copyright (c) 2005 Edward Goggin, EMC
6  */
7 #include <unistd.h>
8 #include <sys/stat.h>
9 #include <libdevmapper.h>
10 #include <sys/wait.h>
11 #include <sys/mman.h>
12 #include <sys/types.h>
13 #include <fcntl.h>
14 #include <errno.h>
15 #include <sys/time.h>
16 #include <sys/resource.h>
17 #include <limits.h>
18 #include <linux/oom.h>
19 #include <libudev.h>
20 #include <urcu.h>
21 #ifdef USE_SYSTEMD
22 #include <systemd/sd-daemon.h>
23 #endif
24 #include <semaphore.h>
25 #include <time.h>
26
27 /*
28  * libmultipath
29  */
30 #include "time-util.h"
31
32 /*
33  * libcheckers
34  */
35 #include "checkers.h"
36
37 #ifdef USE_SYSTEMD
38 static int use_watchdog;
39 #endif
40
41 int uxsock_timeout;
42
43 /*
44  * libmultipath
45  */
46 #include "parser.h"
47 #include "vector.h"
48 #include "memory.h"
49 #include "config.h"
50 #include "util.h"
51 #include "hwtable.h"
52 #include "defaults.h"
53 #include "structs.h"
54 #include "blacklist.h"
55 #include "structs_vec.h"
56 #include "dmparser.h"
57 #include "devmapper.h"
58 #include "sysfs.h"
59 #include "dict.h"
60 #include "discovery.h"
61 #include "debug.h"
62 #include "propsel.h"
63 #include "uevent.h"
64 #include "switchgroup.h"
65 #include "print.h"
66 #include "configure.h"
67 #include "prio.h"
68 #include "wwids.h"
69 #include "pgpolicies.h"
70 #include "uevent.h"
71 #include "log.h"
72
73 #include "mpath_cmd.h"
74 #include "mpath_persist.h"
75
76 #include "prioritizers/alua_rtpg.h"
77
78 #include "main.h"
79 #include "pidfile.h"
80 #include "uxlsnr.h"
81 #include "uxclnt.h"
82 #include "cli.h"
83 #include "cli_handlers.h"
84 #include "lock.h"
85 #include "waiter.h"
86 #include "wwids.h"
87 #include "../third-party/valgrind/drd.h"
88
89 #define FILE_NAME_SIZE 256
90 #define CMDSIZE 160
91
92 #define LOG_MSG(a, b) \
93 do { \
94         if (pp->offline) \
95                 condlog(a, "%s: %s - path offline", pp->mpp->alias, pp->dev); \
96         else if (strlen(b)) \
97                 condlog(a, "%s: %s - %s", pp->mpp->alias, pp->dev, b); \
98 } while(0)
99
100 struct mpath_event_param
101 {
102         char * devname;
103         struct multipath *mpp;
104 };
105
106 unsigned int mpath_mx_alloc_len;
107
108 int logsink;
109 int verbosity;
110 int bindings_read_only;
111 int ignore_new_devs;
112 enum daemon_status running_state = DAEMON_INIT;
113 pid_t daemon_pid;
114 pthread_mutex_t config_lock = PTHREAD_MUTEX_INITIALIZER;
115 pthread_cond_t config_cond;
116
117 /*
118  * global copy of vecs for use in sig handlers
119  */
120 struct vectors * gvecs;
121
122 struct udev * udev;
123
124 struct config *multipath_conf;
125
126 /* Local variables */
127 static volatile sig_atomic_t exit_sig;
128 static volatile sig_atomic_t reconfig_sig;
129 static volatile sig_atomic_t log_reset_sig;
130
131 const char *
132 daemon_status(void)
133 {
134         switch (running_state) {
135         case DAEMON_INIT:
136                 return "init";
137         case DAEMON_START:
138                 return "startup";
139         case DAEMON_CONFIGURE:
140                 return "configure";
141         case DAEMON_IDLE:
142                 return "idle";
143         case DAEMON_RUNNING:
144                 return "running";
145         case DAEMON_SHUTDOWN:
146                 return "shutdown";
147         }
148         return NULL;
149 }
150
151 /*
152  * I love you too, systemd ...
153  */
154 const char *
155 sd_notify_status(void)
156 {
157         switch (running_state) {
158         case DAEMON_INIT:
159                 return "STATUS=init";
160         case DAEMON_START:
161                 return "STATUS=startup";
162         case DAEMON_CONFIGURE:
163                 return "STATUS=configure";
164         case DAEMON_IDLE:
165                 return "STATUS=idle";
166         case DAEMON_RUNNING:
167                 return "STATUS=running";
168         case DAEMON_SHUTDOWN:
169                 return "STATUS=shutdown";
170         }
171         return NULL;
172 }
173
174 static void config_cleanup(void *arg)
175 {
176         pthread_mutex_unlock(&config_lock);
177 }
178
179 void post_config_state(enum daemon_status state)
180 {
181         pthread_mutex_lock(&config_lock);
182         if (state != running_state) {
183                 running_state = state;
184                 pthread_cond_broadcast(&config_cond);
185 #ifdef USE_SYSTEMD
186                 sd_notify(0, sd_notify_status());
187 #endif
188         }
189         pthread_mutex_unlock(&config_lock);
190 }
191
192 int set_config_state(enum daemon_status state)
193 {
194         int rc = 0;
195
196         pthread_cleanup_push(config_cleanup, NULL);
197         pthread_mutex_lock(&config_lock);
198         if (running_state != state) {
199                 if (running_state != DAEMON_IDLE) {
200                         struct timespec ts;
201
202                         clock_gettime(CLOCK_MONOTONIC, &ts);
203                         ts.tv_sec += 1;
204                         rc = pthread_cond_timedwait(&config_cond,
205                                                     &config_lock, &ts);
206                 }
207                 if (!rc) {
208                         running_state = state;
209                         pthread_cond_broadcast(&config_cond);
210 #ifdef USE_SYSTEMD
211                         sd_notify(0, sd_notify_status());
212 #endif
213                 }
214         }
215         pthread_cleanup_pop(1);
216         return rc;
217 }
218
219 struct config *get_multipath_config(void)
220 {
221         rcu_read_lock();
222         return rcu_dereference(multipath_conf);
223 }
224
225 void put_multipath_config(struct config *conf)
226 {
227         rcu_read_unlock();
228 }
229
230 static int
231 need_switch_pathgroup (struct multipath * mpp, int refresh)
232 {
233         struct pathgroup * pgp;
234         struct path * pp;
235         unsigned int i, j;
236         struct config *conf;
237
238         if (!mpp || mpp->pgfailback == -FAILBACK_MANUAL)
239                 return 0;
240
241         /*
242          * Refresh path priority values
243          */
244         if (refresh) {
245                 vector_foreach_slot (mpp->pg, pgp, i) {
246                         vector_foreach_slot (pgp->paths, pp, j) {
247                                 conf = get_multipath_config();
248                                 pathinfo(pp, conf, DI_PRIO);
249                                 put_multipath_config(conf);
250                         }
251                 }
252         }
253
254         if (!mpp->pg || VECTOR_SIZE(mpp->paths) == 0)
255                 return 0;
256
257         mpp->bestpg = select_path_group(mpp);
258
259         if (mpp->bestpg != mpp->nextpg)
260                 return 1;
261
262         return 0;
263 }
264
265 static void
266 switch_pathgroup (struct multipath * mpp)
267 {
268         mpp->stat_switchgroup++;
269         dm_switchgroup(mpp->alias, mpp->bestpg);
270         condlog(2, "%s: switch to path group #%i",
271                  mpp->alias, mpp->bestpg);
272 }
273
274 static int
275 coalesce_maps(struct vectors *vecs, vector nmpv)
276 {
277         struct multipath * ompp;
278         vector ompv = vecs->mpvec;
279         unsigned int i, reassign_maps;
280         struct config *conf;
281
282         conf = get_multipath_config();
283         reassign_maps = conf->reassign_maps;
284         put_multipath_config(conf);
285         vector_foreach_slot (ompv, ompp, i) {
286                 condlog(3, "%s: coalesce map", ompp->alias);
287                 if (!find_mp_by_wwid(nmpv, ompp->wwid)) {
288                         /*
289                          * remove all current maps not allowed by the
290                          * current configuration
291                          */
292                         if (dm_flush_map(ompp->alias)) {
293                                 condlog(0, "%s: unable to flush devmap",
294                                         ompp->alias);
295                                 /*
296                                  * may be just because the device is open
297                                  */
298                                 if (setup_multipath(vecs, ompp) != 0) {
299                                         i--;
300                                         continue;
301                                 }
302                                 if (!vector_alloc_slot(nmpv))
303                                         return 1;
304
305                                 vector_set_slot(nmpv, ompp);
306
307                                 vector_del_slot(ompv, i);
308                                 i--;
309                         }
310                         else {
311                                 dm_lib_release();
312                                 condlog(2, "%s devmap removed", ompp->alias);
313                         }
314                 } else if (reassign_maps) {
315                         condlog(3, "%s: Reassign existing device-mapper"
316                                 " devices", ompp->alias);
317                         dm_reassign(ompp->alias);
318                 }
319         }
320         return 0;
321 }
322
323 void
324 sync_map_state(struct multipath *mpp)
325 {
326         struct pathgroup *pgp;
327         struct path *pp;
328         unsigned int i, j;
329
330         if (!mpp->pg)
331                 return;
332
333         vector_foreach_slot (mpp->pg, pgp, i){
334                 vector_foreach_slot (pgp->paths, pp, j){
335                         if (pp->state == PATH_UNCHECKED ||
336                             pp->state == PATH_WILD ||
337                             pp->state == PATH_DELAYED)
338                                 continue;
339                         if ((pp->dmstate == PSTATE_FAILED ||
340                              pp->dmstate == PSTATE_UNDEF) &&
341                             (pp->state == PATH_UP || pp->state == PATH_GHOST))
342                                 dm_reinstate_path(mpp->alias, pp->dev_t);
343                         else if ((pp->dmstate == PSTATE_ACTIVE ||
344                                   pp->dmstate == PSTATE_UNDEF) &&
345                                  (pp->state == PATH_DOWN ||
346                                   pp->state == PATH_SHAKY))
347                                 dm_fail_path(mpp->alias, pp->dev_t);
348                 }
349         }
350 }
351
352 static void
353 sync_maps_state(vector mpvec)
354 {
355         unsigned int i;
356         struct multipath *mpp;
357
358         vector_foreach_slot (mpvec, mpp, i)
359                 sync_map_state(mpp);
360 }
361
362 static int
363 flush_map(struct multipath * mpp, struct vectors * vecs, int nopaths)
364 {
365         int r;
366
367         if (nopaths)
368                 r = dm_flush_map_nopaths(mpp->alias, mpp->deferred_remove);
369         else
370                 r = dm_flush_map(mpp->alias);
371         /*
372          * clear references to this map before flushing so we can ignore
373          * the spurious uevent we may generate with the dm_flush_map call below
374          */
375         if (r) {
376                 /*
377                  * May not really be an error -- if the map was already flushed
378                  * from the device mapper by dmsetup(8) for instance.
379                  */
380                 if (r == 1)
381                         condlog(0, "%s: can't flush", mpp->alias);
382                 else {
383                         condlog(2, "%s: devmap deferred remove", mpp->alias);
384                         mpp->deferred_remove = DEFERRED_REMOVE_IN_PROGRESS;
385                 }
386                 return r;
387         }
388         else {
389                 dm_lib_release();
390                 condlog(2, "%s: map flushed", mpp->alias);
391         }
392
393         orphan_paths(vecs->pathvec, mpp);
394         remove_map_and_stop_waiter(mpp, vecs, 1);
395
396         return 0;
397 }
398
399 int
400 update_map (struct multipath *mpp, struct vectors *vecs)
401 {
402         int retries = 3;
403         char params[PARAMS_SIZE] = {0};
404
405 retry:
406         condlog(4, "%s: updating new map", mpp->alias);
407         if (adopt_paths(vecs->pathvec, mpp)) {
408                 condlog(0, "%s: failed to adopt paths for new map update",
409                         mpp->alias);
410                 retries = -1;
411                 goto fail;
412         }
413         verify_paths(mpp, vecs);
414         mpp->flush_on_last_del = FLUSH_UNDEF;
415         mpp->action = ACT_RELOAD;
416
417         if (setup_map(mpp, params, PARAMS_SIZE)) {
418                 condlog(0, "%s: failed to setup new map in update", mpp->alias);
419                 retries = -1;
420                 goto fail;
421         }
422         if (domap(mpp, params, 1) <= 0 && retries-- > 0) {
423                 condlog(0, "%s: map_udate sleep", mpp->alias);
424                 sleep(1);
425                 goto retry;
426         }
427         dm_lib_release();
428
429 fail:
430         if (setup_multipath(vecs, mpp))
431                 return 1;
432
433         sync_map_state(mpp);
434
435         if (retries < 0)
436                 condlog(0, "%s: failed reload in new map update", mpp->alias);
437         return 0;
438 }
439
440 static int
441 uev_add_map (struct uevent * uev, struct vectors * vecs)
442 {
443         char *alias;
444         int major = -1, minor = -1, rc;
445
446         condlog(3, "%s: add map (uevent)", uev->kernel);
447         alias = uevent_get_dm_name(uev);
448         if (!alias) {
449                 condlog(3, "%s: No DM_NAME in uevent", uev->kernel);
450                 major = uevent_get_major(uev);
451                 minor = uevent_get_minor(uev);
452                 alias = dm_mapname(major, minor);
453                 if (!alias) {
454                         condlog(2, "%s: mapname not found for %d:%d",
455                                 uev->kernel, major, minor);
456                         return 1;
457                 }
458         }
459         pthread_cleanup_push(cleanup_lock, &vecs->lock);
460         lock(&vecs->lock);
461         pthread_testcancel();
462         rc = ev_add_map(uev->kernel, alias, vecs);
463         lock_cleanup_pop(vecs->lock);
464         FREE(alias);
465         return rc;
466 }
467
468 int
469 ev_add_map (char * dev, char * alias, struct vectors * vecs)
470 {
471         char * refwwid;
472         struct multipath * mpp;
473         int map_present;
474         int r = 1, delayed_reconfig, reassign_maps;
475         struct config *conf;
476
477         map_present = dm_map_present(alias);
478
479         if (map_present && !dm_is_mpath(alias)) {
480                 condlog(4, "%s: not a multipath map", alias);
481                 return 0;
482         }
483
484         mpp = find_mp_by_alias(vecs->mpvec, alias);
485
486         if (mpp) {
487                 if (mpp->wait_for_udev > 1) {
488                         if (update_map(mpp, vecs))
489                                 /* setup multipathd removed the map */
490                                 return 1;
491                 }
492                 conf = get_multipath_config();
493                 delayed_reconfig = conf->delayed_reconfig;
494                 reassign_maps = conf->reassign_maps;
495                 put_multipath_config(conf);
496                 if (mpp->wait_for_udev) {
497                         mpp->wait_for_udev = 0;
498                         if (delayed_reconfig &&
499                             !need_to_delay_reconfig(vecs)) {
500                                 condlog(2, "reconfigure (delayed)");
501                                 set_config_state(DAEMON_CONFIGURE);
502                                 return 0;
503                         }
504                 }
505                 /*
506                  * Not really an error -- we generate our own uevent
507                  * if we create a multipath mapped device as a result
508                  * of uev_add_path
509                  */
510                 if (reassign_maps) {
511                         condlog(3, "%s: Reassign existing device-mapper devices",
512                                 alias);
513                         dm_reassign(alias);
514                 }
515                 return 0;
516         }
517         condlog(2, "%s: adding map", alias);
518
519         /*
520          * now we can register the map
521          */
522         if (map_present) {
523                 if ((mpp = add_map_without_path(vecs, alias))) {
524                         sync_map_state(mpp);
525                         condlog(2, "%s: devmap %s registered", alias, dev);
526                         return 0;
527                 } else {
528                         condlog(2, "%s: uev_add_map failed", dev);
529                         return 1;
530                 }
531         }
532         r = get_refwwid(CMD_NONE, dev, DEV_DEVMAP, vecs->pathvec, &refwwid);
533
534         if (refwwid) {
535                 r = coalesce_paths(vecs, NULL, refwwid, 0, CMD_NONE);
536                 dm_lib_release();
537         }
538
539         if (!r)
540                 condlog(2, "%s: devmap %s added", alias, dev);
541         else if (r == 2)
542                 condlog(2, "%s: uev_add_map %s blacklisted", alias, dev);
543         else
544                 condlog(0, "%s: uev_add_map %s failed", alias, dev);
545
546         FREE(refwwid);
547         return r;
548 }
549
550 static int
551 uev_remove_map (struct uevent * uev, struct vectors * vecs)
552 {
553         char *alias;
554         int minor;
555         struct multipath *mpp;
556
557         condlog(2, "%s: remove map (uevent)", uev->kernel);
558         alias = uevent_get_dm_name(uev);
559         if (!alias) {
560                 condlog(3, "%s: No DM_NAME in uevent, ignoring", uev->kernel);
561                 return 0;
562         }
563         minor = uevent_get_minor(uev);
564
565         pthread_cleanup_push(cleanup_lock, &vecs->lock);
566         lock(&vecs->lock);
567         pthread_testcancel();
568         mpp = find_mp_by_minor(vecs->mpvec, minor);
569
570         if (!mpp) {
571                 condlog(2, "%s: devmap not registered, can't remove",
572                         uev->kernel);
573                 goto out;
574         }
575         if (strcmp(mpp->alias, alias)) {
576                 condlog(2, "%s: minor number mismatch (map %d, event %d)",
577                         mpp->alias, mpp->dmi->minor, minor);
578                 goto out;
579         }
580
581         orphan_paths(vecs->pathvec, mpp);
582         remove_map_and_stop_waiter(mpp, vecs, 1);
583 out:
584         lock_cleanup_pop(vecs->lock);
585         FREE(alias);
586         return 0;
587 }
588
589 /* Called from CLI handler */
590 int
591 ev_remove_map (char * devname, char * alias, int minor, struct vectors * vecs)
592 {
593         struct multipath * mpp;
594
595         mpp = find_mp_by_minor(vecs->mpvec, minor);
596
597         if (!mpp) {
598                 condlog(2, "%s: devmap not registered, can't remove",
599                         devname);
600                 return 1;
601         }
602         if (strcmp(mpp->alias, alias)) {
603                 condlog(2, "%s: minor number mismatch (map %d, event %d)",
604                         mpp->alias, mpp->dmi->minor, minor);
605                 return 1;
606         }
607         return flush_map(mpp, vecs, 0);
608 }
609
610 static int
611 uev_add_path (struct uevent *uev, struct vectors * vecs)
612 {
613         struct path *pp;
614         int ret = 0, i;
615         struct config *conf;
616
617         condlog(2, "%s: add path (uevent)", uev->kernel);
618         if (strstr(uev->kernel, "..") != NULL) {
619                 /*
620                  * Don't allow relative device names in the pathvec
621                  */
622                 condlog(0, "%s: path name is invalid", uev->kernel);
623                 return 1;
624         }
625
626         pthread_cleanup_push(cleanup_lock, &vecs->lock);
627         lock(&vecs->lock);
628         pthread_testcancel();
629         pp = find_path_by_dev(vecs->pathvec, uev->kernel);
630         if (pp) {
631                 int r;
632
633                 condlog(0, "%s: spurious uevent, path already in pathvec",
634                         uev->kernel);
635                 if (!pp->mpp && !strlen(pp->wwid)) {
636                         condlog(3, "%s: reinitialize path", uev->kernel);
637                         udev_device_unref(pp->udev);
638                         pp->udev = udev_device_ref(uev->udev);
639                         conf = get_multipath_config();
640                         r = pathinfo(pp, conf,
641                                      DI_ALL | DI_BLACKLIST);
642                         put_multipath_config(conf);
643                         if (r == PATHINFO_OK)
644                                 ret = ev_add_path(pp, vecs);
645                         else if (r == PATHINFO_SKIPPED) {
646                                 condlog(3, "%s: remove blacklisted path",
647                                         uev->kernel);
648                                 i = find_slot(vecs->pathvec, (void *)pp);
649                                 if (i != -1)
650                                         vector_del_slot(vecs->pathvec, i);
651                                 free_path(pp);
652                         } else {
653                                 condlog(0, "%s: failed to reinitialize path",
654                                         uev->kernel);
655                                 ret = 1;
656                         }
657                 }
658         }
659         lock_cleanup_pop(vecs->lock);
660         if (pp)
661                 return ret;
662
663         /*
664          * get path vital state
665          */
666         conf = get_multipath_config();
667         ret = alloc_path_with_pathinfo(conf, uev->udev,
668                                        DI_ALL, &pp);
669         put_multipath_config(conf);
670         if (!pp) {
671                 if (ret == PATHINFO_SKIPPED)
672                         return 0;
673                 condlog(3, "%s: failed to get path info", uev->kernel);
674                 return 1;
675         }
676         pthread_cleanup_push(cleanup_lock, &vecs->lock);
677         lock(&vecs->lock);
678         pthread_testcancel();
679         ret = store_path(vecs->pathvec, pp);
680         if (!ret) {
681                 conf = get_multipath_config();
682                 pp->checkint = conf->checkint;
683                 put_multipath_config(conf);
684                 ret = ev_add_path(pp, vecs);
685         } else {
686                 condlog(0, "%s: failed to store path info, "
687                         "dropping event",
688                         uev->kernel);
689                 free_path(pp);
690                 ret = 1;
691         }
692         lock_cleanup_pop(vecs->lock);
693         return ret;
694 }
695
696 /*
697  * returns:
698  * 0: added
699  * 1: error
700  */
701 int
702 ev_add_path (struct path * pp, struct vectors * vecs)
703 {
704         struct multipath * mpp;
705         char params[PARAMS_SIZE] = {0};
706         int retries = 3;
707         int start_waiter = 0;
708         int ret;
709
710         /*
711          * need path UID to go any further
712          */
713         if (strlen(pp->wwid) == 0) {
714                 condlog(0, "%s: failed to get path uid", pp->dev);
715                 goto fail; /* leave path added to pathvec */
716         }
717         mpp = find_mp_by_wwid(vecs->mpvec, pp->wwid);
718         if (mpp && mpp->wait_for_udev) {
719                 mpp->wait_for_udev = 2;
720                 orphan_path(pp, "waiting for create to complete");
721                 return 0;
722         }
723
724         pp->mpp = mpp;
725 rescan:
726         if (mpp) {
727                 if (pp->size && mpp->size != pp->size) {
728                         condlog(0, "%s: failed to add new path %s, "
729                                 "device size mismatch",
730                                 mpp->alias, pp->dev);
731                         int i = find_slot(vecs->pathvec, (void *)pp);
732                         if (i != -1)
733                                 vector_del_slot(vecs->pathvec, i);
734                         free_path(pp);
735                         return 1;
736                 }
737
738                 condlog(4,"%s: adopting all paths for path %s",
739                         mpp->alias, pp->dev);
740                 if (adopt_paths(vecs->pathvec, mpp))
741                         goto fail; /* leave path added to pathvec */
742
743                 verify_paths(mpp, vecs);
744                 mpp->flush_on_last_del = FLUSH_UNDEF;
745                 mpp->action = ACT_RELOAD;
746         } else {
747                 if (!should_multipath(pp, vecs->pathvec)) {
748                         orphan_path(pp, "only one path");
749                         return 0;
750                 }
751                 condlog(4,"%s: creating new map", pp->dev);
752                 if ((mpp = add_map_with_path(vecs, pp, 1))) {
753                         mpp->action = ACT_CREATE;
754                         /*
755                          * We don't depend on ACT_CREATE, as domap will
756                          * set it to ACT_NOTHING when complete.
757                          */
758                         start_waiter = 1;
759                 }
760                 if (!start_waiter)
761                         goto fail; /* leave path added to pathvec */
762         }
763
764         /* persistent reservation check*/
765         mpath_pr_event_handle(pp);
766
767         /*
768          * push the map to the device-mapper
769          */
770         if (setup_map(mpp, params, PARAMS_SIZE)) {
771                 condlog(0, "%s: failed to setup map for addition of new "
772                         "path %s", mpp->alias, pp->dev);
773                 goto fail_map;
774         }
775         /*
776          * reload the map for the multipath mapped device
777          */
778 retry:
779         ret = domap(mpp, params, 1);
780         if (ret <= 0) {
781                 if (ret < 0 && retries-- > 0) {
782                         condlog(0, "%s: retry domap for addition of new "
783                                 "path %s", mpp->alias, pp->dev);
784                         sleep(1);
785                         goto retry;
786                 }
787                 condlog(0, "%s: failed in domap for addition of new "
788                         "path %s", mpp->alias, pp->dev);
789                 /*
790                  * deal with asynchronous uevents :((
791                  */
792                 if (mpp->action == ACT_RELOAD && retries-- > 0) {
793                         condlog(0, "%s: ev_add_path sleep", mpp->alias);
794                         sleep(1);
795                         update_mpp_paths(mpp, vecs->pathvec);
796                         goto rescan;
797                 }
798                 else if (mpp->action == ACT_RELOAD)
799                         condlog(0, "%s: giving up reload", mpp->alias);
800                 else
801                         goto fail_map;
802         }
803         dm_lib_release();
804
805         /*
806          * update our state from kernel regardless of create or reload
807          */
808         if (setup_multipath(vecs, mpp))
809                 goto fail; /* if setup_multipath fails, it removes the map */
810
811         sync_map_state(mpp);
812
813         if ((mpp->action == ACT_CREATE ||
814              (mpp->action == ACT_NOTHING && start_waiter && !mpp->waiter)) &&
815             start_waiter_thread(mpp, vecs))
816                         goto fail_map;
817
818         if (retries >= 0) {
819                 condlog(2, "%s [%s]: path added to devmap %s",
820                         pp->dev, pp->dev_t, mpp->alias);
821                 return 0;
822         } else
823                 goto fail;
824
825 fail_map:
826         remove_map(mpp, vecs, 1);
827 fail:
828         orphan_path(pp, "failed to add path");
829         return 1;
830 }
831
832 static int
833 uev_remove_path (struct uevent *uev, struct vectors * vecs)
834 {
835         struct path *pp;
836         int ret;
837
838         condlog(2, "%s: remove path (uevent)", uev->kernel);
839         pthread_cleanup_push(cleanup_lock, &vecs->lock);
840         lock(&vecs->lock);
841         pthread_testcancel();
842         pp = find_path_by_dev(vecs->pathvec, uev->kernel);
843         if (pp)
844                 ret = ev_remove_path(pp, vecs);
845         lock_cleanup_pop(vecs->lock);
846         if (!pp) {
847                 /* Not an error; path might have been purged earlier */
848                 condlog(0, "%s: path already removed", uev->kernel);
849                 return 0;
850         }
851         return ret;
852 }
853
854 int
855 ev_remove_path (struct path *pp, struct vectors * vecs)
856 {
857         struct multipath * mpp;
858         int i, retval = 0;
859         char params[PARAMS_SIZE] = {0};
860
861         /*
862          * avoid referring to the map of an orphaned path
863          */
864         if ((mpp = pp->mpp)) {
865                 /*
866                  * transform the mp->pg vector of vectors of paths
867                  * into a mp->params string to feed the device-mapper
868                  */
869                 if (update_mpp_paths(mpp, vecs->pathvec)) {
870                         condlog(0, "%s: failed to update paths",
871                                 mpp->alias);
872                         goto fail;
873                 }
874                 if ((i = find_slot(mpp->paths, (void *)pp)) != -1)
875                         vector_del_slot(mpp->paths, i);
876
877                 /*
878                  * remove the map IFF removing the last path
879                  */
880                 if (VECTOR_SIZE(mpp->paths) == 0) {
881                         char alias[WWID_SIZE];
882
883                         /*
884                          * flush_map will fail if the device is open
885                          */
886                         strncpy(alias, mpp->alias, WWID_SIZE);
887                         if (mpp->flush_on_last_del == FLUSH_ENABLED) {
888                                 condlog(2, "%s Last path deleted, disabling queueing", mpp->alias);
889                                 mpp->retry_tick = 0;
890                                 mpp->no_path_retry = NO_PATH_RETRY_FAIL;
891                                 mpp->flush_on_last_del = FLUSH_IN_PROGRESS;
892                                 dm_queue_if_no_path(mpp->alias, 0);
893                         }
894                         if (!flush_map(mpp, vecs, 1)) {
895                                 condlog(2, "%s: removed map after"
896                                         " removing all paths",
897                                         alias);
898                                 retval = 0;
899                                 goto out;
900                         }
901                         /*
902                          * Not an error, continue
903                          */
904                 }
905
906                 if (setup_map(mpp, params, PARAMS_SIZE)) {
907                         condlog(0, "%s: failed to setup map for"
908                                 " removal of path %s", mpp->alias, pp->dev);
909                         goto fail;
910                 }
911
912                 if (mpp->wait_for_udev) {
913                         mpp->wait_for_udev = 2;
914                         goto out;
915                 }
916
917                 /*
918                  * reload the map
919                  */
920                 mpp->action = ACT_RELOAD;
921                 if (domap(mpp, params, 1) <= 0) {
922                         condlog(0, "%s: failed in domap for "
923                                 "removal of path %s",
924                                 mpp->alias, pp->dev);
925                         retval = 1;
926                 } else {
927                         /*
928                          * update our state from kernel
929                          */
930                         if (setup_multipath(vecs, mpp))
931                                 return 1;
932                         sync_map_state(mpp);
933
934                         condlog(2, "%s [%s]: path removed from map %s",
935                                 pp->dev, pp->dev_t, mpp->alias);
936                 }
937         }
938
939 out:
940         if ((i = find_slot(vecs->pathvec, (void *)pp)) != -1)
941                 vector_del_slot(vecs->pathvec, i);
942
943         free_path(pp);
944
945         return retval;
946
947 fail:
948         remove_map_and_stop_waiter(mpp, vecs, 1);
949         return 1;
950 }
951
952 static int
953 uev_update_path (struct uevent *uev, struct vectors * vecs)
954 {
955         int ro, retval = 0;
956
957         ro = uevent_get_disk_ro(uev);
958
959         if (ro >= 0) {
960                 struct path * pp;
961                 struct multipath *mpp = NULL;
962
963                 condlog(2, "%s: update path write_protect to '%d' (uevent)",
964                         uev->kernel, ro);
965                 pthread_cleanup_push(cleanup_lock, &vecs->lock);
966                 lock(&vecs->lock);
967                 pthread_testcancel();
968                 /*
969                  * pthread_mutex_lock() and pthread_mutex_unlock()
970                  * need to be at the same indentation level, hence
971                  * this slightly convoluted codepath.
972                  */
973                 pp = find_path_by_dev(vecs->pathvec, uev->kernel);
974                 if (pp) {
975                         if (pp->initialized == INIT_REQUESTED_UDEV) {
976                                 retval = 2;
977                         } else {
978                                 mpp = pp->mpp;
979                                 if (mpp && mpp->wait_for_udev) {
980                                         mpp->wait_for_udev = 2;
981                                         mpp = NULL;
982                                         retval = 0;
983                                 }
984                         }
985                         if (mpp) {
986                                 retval = reload_map(vecs, mpp, 0, 1);
987
988                                 condlog(2, "%s: map %s reloaded (retval %d)",
989                                         uev->kernel, mpp->alias, retval);
990                         }
991                 }
992                 lock_cleanup_pop(vecs->lock);
993                 if (!pp) {
994                         condlog(0, "%s: spurious uevent, path not found",
995                                 uev->kernel);
996                         return 1;
997                 }
998                 if (retval == 2)
999                         return uev_add_path(uev, vecs);
1000         }
1001
1002         return retval;
1003 }
1004
1005 static int
1006 map_discovery (struct vectors * vecs)
1007 {
1008         struct multipath * mpp;
1009         unsigned int i;
1010
1011         if (dm_get_maps(vecs->mpvec))
1012                 return 1;
1013
1014         vector_foreach_slot (vecs->mpvec, mpp, i)
1015                 if (setup_multipath(vecs, mpp))
1016                         return 1;
1017
1018         return 0;
1019 }
1020
1021 int
1022 uxsock_trigger (char * str, char ** reply, int * len, void * trigger_data)
1023 {
1024         struct vectors * vecs;
1025         int r;
1026
1027         *reply = NULL;
1028         *len = 0;
1029         vecs = (struct vectors *)trigger_data;
1030
1031         r = parse_cmd(str, reply, len, vecs, uxsock_timeout / 1000);
1032
1033         if (r > 0) {
1034                 if (r == ETIMEDOUT)
1035                         *reply = STRDUP("timeout\n");
1036                 else
1037                         *reply = STRDUP("fail\n");
1038                 *len = strlen(*reply) + 1;
1039                 r = 1;
1040         }
1041         else if (!r && *len == 0) {
1042                 *reply = STRDUP("ok\n");
1043                 *len = strlen(*reply) + 1;
1044                 r = 0;
1045         }
1046         /* else if (r < 0) leave *reply alone */
1047
1048         return r;
1049 }
1050
1051 static int
1052 uev_discard(char * devpath)
1053 {
1054         char *tmp;
1055         char a[11], b[11];
1056
1057         /*
1058          * keep only block devices, discard partitions
1059          */
1060         tmp = strstr(devpath, "/block/");
1061         if (tmp == NULL){
1062                 condlog(4, "no /block/ in '%s'", devpath);
1063                 return 1;
1064         }
1065         if (sscanf(tmp, "/block/%10s", a) != 1 ||
1066             sscanf(tmp, "/block/%10[^/]/%10s", a, b) == 2) {
1067                 condlog(4, "discard event on %s", devpath);
1068                 return 1;
1069         }
1070         return 0;
1071 }
1072
1073 int
1074 uev_trigger (struct uevent * uev, void * trigger_data)
1075 {
1076         int r = 0;
1077         struct vectors * vecs;
1078         struct config *conf;
1079
1080         vecs = (struct vectors *)trigger_data;
1081
1082         if (uev_discard(uev->devpath))
1083                 return 0;
1084
1085         pthread_cleanup_push(config_cleanup, NULL);
1086         pthread_mutex_lock(&config_lock);
1087         if (running_state != DAEMON_IDLE &&
1088             running_state != DAEMON_RUNNING)
1089                 pthread_cond_wait(&config_cond, &config_lock);
1090         pthread_cleanup_pop(1);
1091
1092         if (running_state == DAEMON_SHUTDOWN)
1093                 return 0;
1094
1095         /*
1096          * device map event
1097          * Add events are ignored here as the tables
1098          * are not fully initialised then.
1099          */
1100         if (!strncmp(uev->kernel, "dm-", 3)) {
1101                 if (!strncmp(uev->action, "change", 6)) {
1102                         r = uev_add_map(uev, vecs);
1103                         goto out;
1104                 }
1105                 if (!strncmp(uev->action, "remove", 6)) {
1106                         r = uev_remove_map(uev, vecs);
1107                         goto out;
1108                 }
1109                 goto out;
1110         }
1111
1112         /*
1113          * path add/remove event
1114          */
1115         conf = get_multipath_config();
1116         if (filter_devnode(conf->blist_devnode, conf->elist_devnode,
1117                            uev->kernel) > 0) {
1118                 put_multipath_config(conf);
1119                 goto out;
1120         }
1121         put_multipath_config(conf);
1122
1123         if (!strncmp(uev->action, "add", 3)) {
1124                 r = uev_add_path(uev, vecs);
1125                 goto out;
1126         }
1127         if (!strncmp(uev->action, "remove", 6)) {
1128                 r = uev_remove_path(uev, vecs);
1129                 goto out;
1130         }
1131         if (!strncmp(uev->action, "change", 6)) {
1132                 r = uev_update_path(uev, vecs);
1133                 goto out;
1134         }
1135
1136 out:
1137         return r;
1138 }
1139
1140 static void rcu_unregister(void *param)
1141 {
1142         rcu_unregister_thread();
1143 }
1144
1145 static void *
1146 ueventloop (void * ap)
1147 {
1148         struct udev *udev = ap;
1149
1150         pthread_cleanup_push(rcu_unregister, NULL);
1151         rcu_register_thread();
1152         if (uevent_listen(udev))
1153                 condlog(0, "error starting uevent listener");
1154         pthread_cleanup_pop(1);
1155         return NULL;
1156 }
1157
1158 static void *
1159 uevqloop (void * ap)
1160 {
1161         pthread_cleanup_push(rcu_unregister, NULL);
1162         rcu_register_thread();
1163         if (uevent_dispatch(&uev_trigger, ap))
1164                 condlog(0, "error starting uevent dispatcher");
1165         pthread_cleanup_pop(1);
1166         return NULL;
1167 }
1168 static void *
1169 uxlsnrloop (void * ap)
1170 {
1171         if (cli_init()) {
1172                 condlog(1, "Failed to init uxsock listener");
1173                 return NULL;
1174         }
1175         pthread_cleanup_push(rcu_unregister, NULL);
1176         rcu_register_thread();
1177         set_handler_callback(LIST+PATHS, cli_list_paths);
1178         set_handler_callback(LIST+PATHS+FMT, cli_list_paths_fmt);
1179         set_handler_callback(LIST+PATHS+RAW+FMT, cli_list_paths_raw);
1180         set_handler_callback(LIST+PATH, cli_list_path);
1181         set_handler_callback(LIST+MAPS, cli_list_maps);
1182         set_unlocked_handler_callback(LIST+STATUS, cli_list_status);
1183         set_unlocked_handler_callback(LIST+DAEMON, cli_list_daemon);
1184         set_handler_callback(LIST+MAPS+STATUS, cli_list_maps_status);
1185         set_handler_callback(LIST+MAPS+STATS, cli_list_maps_stats);
1186         set_handler_callback(LIST+MAPS+FMT, cli_list_maps_fmt);
1187         set_handler_callback(LIST+MAPS+RAW+FMT, cli_list_maps_raw);
1188         set_handler_callback(LIST+MAPS+TOPOLOGY, cli_list_maps_topology);
1189         set_handler_callback(LIST+TOPOLOGY, cli_list_maps_topology);
1190         set_handler_callback(LIST+MAPS+JSON, cli_list_maps_json);
1191         set_handler_callback(LIST+MAP+TOPOLOGY, cli_list_map_topology);
1192         set_handler_callback(LIST+MAP+FMT, cli_list_map_fmt);
1193         set_handler_callback(LIST+MAP+RAW+FMT, cli_list_map_fmt);
1194         set_handler_callback(LIST+MAP+JSON, cli_list_map_json);
1195         set_handler_callback(LIST+CONFIG, cli_list_config);
1196         set_handler_callback(LIST+BLACKLIST, cli_list_blacklist);
1197         set_handler_callback(LIST+DEVICES, cli_list_devices);
1198         set_handler_callback(LIST+WILDCARDS, cli_list_wildcards);
1199         set_handler_callback(ADD+PATH, cli_add_path);
1200         set_handler_callback(DEL+PATH, cli_del_path);
1201         set_handler_callback(ADD+MAP, cli_add_map);
1202         set_handler_callback(DEL+MAP, cli_del_map);
1203         set_handler_callback(SWITCH+MAP+GROUP, cli_switch_group);
1204         set_unlocked_handler_callback(RECONFIGURE, cli_reconfigure);
1205         set_handler_callback(SUSPEND+MAP, cli_suspend);
1206         set_handler_callback(RESUME+MAP, cli_resume);
1207         set_handler_callback(RESIZE+MAP, cli_resize);
1208         set_handler_callback(RELOAD+MAP, cli_reload);
1209         set_handler_callback(RESET+MAP, cli_reassign);
1210         set_handler_callback(REINSTATE+PATH, cli_reinstate);
1211         set_handler_callback(FAIL+PATH, cli_fail);
1212         set_handler_callback(DISABLEQ+MAP, cli_disable_queueing);
1213         set_handler_callback(RESTOREQ+MAP, cli_restore_queueing);
1214         set_handler_callback(DISABLEQ+MAPS, cli_disable_all_queueing);
1215         set_handler_callback(RESTOREQ+MAPS, cli_restore_all_queueing);
1216         set_unlocked_handler_callback(QUIT, cli_quit);
1217         set_unlocked_handler_callback(SHUTDOWN, cli_shutdown);
1218         set_handler_callback(GETPRSTATUS+MAP, cli_getprstatus);
1219         set_handler_callback(SETPRSTATUS+MAP, cli_setprstatus);
1220         set_handler_callback(UNSETPRSTATUS+MAP, cli_unsetprstatus);
1221         set_handler_callback(FORCEQ+DAEMON, cli_force_no_daemon_q);
1222         set_handler_callback(RESTOREQ+DAEMON, cli_restore_no_daemon_q);
1223
1224         umask(077);
1225         uxsock_listen(&uxsock_trigger, ap);
1226         pthread_cleanup_pop(1);
1227         return NULL;
1228 }
1229
1230 void
1231 exit_daemon (void)
1232 {
1233         post_config_state(DAEMON_SHUTDOWN);
1234 }
1235
1236 static void
1237 fail_path (struct path * pp, int del_active)
1238 {
1239         if (!pp->mpp)
1240                 return;
1241
1242         condlog(2, "checker failed path %s in map %s",
1243                  pp->dev_t, pp->mpp->alias);
1244
1245         dm_fail_path(pp->mpp->alias, pp->dev_t);
1246         if (del_active)
1247                 update_queue_mode_del_path(pp->mpp);
1248 }
1249
1250 /*
1251  * caller must have locked the path list before calling that function
1252  */
1253 static int
1254 reinstate_path (struct path * pp, int add_active)
1255 {
1256         int ret = 0;
1257
1258         if (!pp->mpp)
1259                 return 0;
1260
1261         if (dm_reinstate_path(pp->mpp->alias, pp->dev_t)) {
1262                 condlog(0, "%s: reinstate failed", pp->dev_t);
1263                 ret = 1;
1264         } else {
1265                 condlog(2, "%s: reinstated", pp->dev_t);
1266                 if (add_active)
1267                         update_queue_mode_add_path(pp->mpp);
1268         }
1269         return ret;
1270 }
1271
1272 static void
1273 enable_group(struct path * pp)
1274 {
1275         struct pathgroup * pgp;
1276
1277         /*
1278          * if path is added through uev_add_path, pgindex can be unset.
1279          * next update_strings() will set it, upon map reload event.
1280          *
1281          * we can safely return here, because upon map reload, all
1282          * PG will be enabled.
1283          */
1284         if (!pp->mpp->pg || !pp->pgindex)
1285                 return;
1286
1287         pgp = VECTOR_SLOT(pp->mpp->pg, pp->pgindex - 1);
1288
1289         if (pgp->status == PGSTATE_DISABLED) {
1290                 condlog(2, "%s: enable group #%i", pp->mpp->alias, pp->pgindex);
1291                 dm_enablegroup(pp->mpp->alias, pp->pgindex);
1292         }
1293 }
1294
1295 static void
1296 mpvec_garbage_collector (struct vectors * vecs)
1297 {
1298         struct multipath * mpp;
1299         unsigned int i;
1300
1301         if (!vecs->mpvec)
1302                 return;
1303
1304         vector_foreach_slot (vecs->mpvec, mpp, i) {
1305                 if (mpp && mpp->alias && !dm_map_present(mpp->alias)) {
1306                         condlog(2, "%s: remove dead map", mpp->alias);
1307                         remove_map_and_stop_waiter(mpp, vecs, 1);
1308                         i--;
1309                 }
1310         }
1311 }
1312
1313 /* This is called after a path has started working again. It the multipath
1314  * device for this path uses the followover failback type, and this is the
1315  * best pathgroup, and this is the first path in the pathgroup to come back
1316  * up, then switch to this pathgroup */
1317 static int
1318 followover_should_failback(struct path * pp)
1319 {
1320         struct pathgroup * pgp;
1321         struct path *pp1;
1322         int i;
1323
1324         if (pp->mpp->pgfailback != -FAILBACK_FOLLOWOVER ||
1325             !pp->mpp->pg || !pp->pgindex ||
1326             pp->pgindex != pp->mpp->bestpg)
1327                 return 0;
1328
1329         pgp = VECTOR_SLOT(pp->mpp->pg, pp->pgindex - 1);
1330         vector_foreach_slot(pgp->paths, pp1, i) {
1331                 if (pp1 == pp)
1332                         continue;
1333                 if (pp1->chkrstate != PATH_DOWN && pp1->chkrstate != PATH_SHAKY)
1334                         return 0;
1335         }
1336         return 1;
1337 }
1338
1339 static void
1340 missing_uev_wait_tick(struct vectors *vecs)
1341 {
1342         struct multipath * mpp;
1343         unsigned int i;
1344         int timed_out = 0, delayed_reconfig;
1345         struct config *conf;
1346
1347         vector_foreach_slot (vecs->mpvec, mpp, i) {
1348                 if (mpp->wait_for_udev && --mpp->uev_wait_tick <= 0) {
1349                         timed_out = 1;
1350                         condlog(0, "%s: timeout waiting on creation uevent. enabling reloads", mpp->alias);
1351                         if (mpp->wait_for_udev > 1 && update_map(mpp, vecs)) {
1352                                 /* update_map removed map */
1353                                 i--;
1354                                 continue;
1355                         }
1356                         mpp->wait_for_udev = 0;
1357                 }
1358         }
1359
1360         conf = get_multipath_config();
1361         delayed_reconfig = conf->delayed_reconfig;
1362         put_multipath_config(conf);
1363         if (timed_out && delayed_reconfig &&
1364             !need_to_delay_reconfig(vecs)) {
1365                 condlog(2, "reconfigure (delayed)");
1366                 set_config_state(DAEMON_CONFIGURE);
1367         }
1368 }
1369
1370 static void
1371 defered_failback_tick (vector mpvec)
1372 {
1373         struct multipath * mpp;
1374         unsigned int i;
1375
1376         vector_foreach_slot (mpvec, mpp, i) {
1377                 /*
1378                  * defered failback getting sooner
1379                  */
1380                 if (mpp->pgfailback > 0 && mpp->failback_tick > 0) {
1381                         mpp->failback_tick--;
1382
1383                         if (!mpp->failback_tick && need_switch_pathgroup(mpp, 1))
1384                                 switch_pathgroup(mpp);
1385                 }
1386         }
1387 }
1388
1389 static void
1390 retry_count_tick(vector mpvec)
1391 {
1392         struct multipath *mpp;
1393         unsigned int i;
1394
1395         vector_foreach_slot (mpvec, mpp, i) {
1396                 if (mpp->retry_tick > 0) {
1397                         mpp->stat_total_queueing_time++;
1398                         condlog(4, "%s: Retrying.. No active path", mpp->alias);
1399                         if(--mpp->retry_tick == 0) {
1400                                 dm_queue_if_no_path(mpp->alias, 0);
1401                                 condlog(2, "%s: Disable queueing", mpp->alias);
1402                         }
1403                 }
1404         }
1405 }
1406
1407 int update_prio(struct path *pp, int refresh_all)
1408 {
1409         int oldpriority;
1410         struct path *pp1;
1411         struct pathgroup * pgp;
1412         int i, j, changed = 0;
1413         struct config *conf;
1414
1415         if (refresh_all) {
1416                 vector_foreach_slot (pp->mpp->pg, pgp, i) {
1417                         vector_foreach_slot (pgp->paths, pp1, j) {
1418                                 oldpriority = pp1->priority;
1419                                 conf = get_multipath_config();
1420                                 pathinfo(pp1, conf, DI_PRIO);
1421                                 put_multipath_config(conf);
1422                                 if (pp1->priority != oldpriority)
1423                                         changed = 1;
1424                         }
1425                 }
1426                 return changed;
1427         }
1428         oldpriority = pp->priority;
1429         conf = get_multipath_config();
1430         pathinfo(pp, conf, DI_PRIO);
1431         put_multipath_config(conf);
1432
1433         if (pp->priority == oldpriority)
1434                 return 0;
1435         return 1;
1436 }
1437
1438 int update_path_groups(struct multipath *mpp, struct vectors *vecs, int refresh)
1439 {
1440         if (reload_map(vecs, mpp, refresh, 1))
1441                 return 1;
1442
1443         dm_lib_release();
1444         if (setup_multipath(vecs, mpp) != 0)
1445                 return 1;
1446         sync_map_state(mpp);
1447
1448         return 0;
1449 }
1450
1451 void repair_path(struct path * pp)
1452 {
1453         if (pp->state != PATH_DOWN)
1454                 return;
1455
1456         checker_repair(&pp->checker);
1457         if (strlen(checker_message(&pp->checker)))
1458                 LOG_MSG(1, checker_message(&pp->checker));
1459 }
1460
1461 /*
1462  * Returns '1' if the path has been checked, '-1' if it was blacklisted
1463  * and '0' otherwise
1464  */
1465 int
1466 check_path (struct vectors * vecs, struct path * pp, int ticks)
1467 {
1468         int newstate;
1469         int new_path_up = 0;
1470         int chkr_new_path_up = 0;
1471         int add_active;
1472         int disable_reinstate = 0;
1473         int oldchkrstate = pp->chkrstate;
1474         int retrigger_tries, checkint;
1475         struct config *conf;
1476         int ret;
1477
1478         if ((pp->initialized == INIT_OK ||
1479              pp->initialized == INIT_REQUESTED_UDEV) && !pp->mpp)
1480                 return 0;
1481
1482         if (pp->tick)
1483                 pp->tick -= (pp->tick > ticks) ? ticks : pp->tick;
1484         if (pp->tick)
1485                 return 0; /* don't check this path yet */
1486
1487         conf = get_multipath_config();
1488         retrigger_tries = conf->retrigger_tries;
1489         checkint = conf->checkint;
1490         put_multipath_config(conf);
1491         if (!pp->mpp && pp->initialized == INIT_MISSING_UDEV &&
1492             pp->retriggers < retrigger_tries) {
1493                 condlog(2, "%s: triggering change event to reinitialize",
1494                         pp->dev);
1495                 pp->initialized = INIT_REQUESTED_UDEV;
1496                 pp->retriggers++;
1497                 sysfs_attr_set_value(pp->udev, "uevent", "change",
1498                                      strlen("change"));
1499                 return 0;
1500         }
1501
1502         /*
1503          * provision a next check soonest,
1504          * in case we exit abnormaly from here
1505          */
1506         pp->tick = checkint;
1507
1508         newstate = path_offline(pp);
1509         /*
1510          * Wait for uevent for removed paths;
1511          * some LLDDs like zfcp keep paths unavailable
1512          * without sending uevents.
1513          */
1514         if (newstate == PATH_REMOVED)
1515                 newstate = PATH_DOWN;
1516
1517         if (newstate == PATH_UP) {
1518                 conf = get_multipath_config();
1519                 newstate = get_state(pp, conf, 1);
1520                 put_multipath_config(conf);
1521         } else
1522                 checker_clear_message(&pp->checker);
1523
1524         if (newstate == PATH_WILD || newstate == PATH_UNCHECKED) {
1525                 condlog(2, "%s: unusable path", pp->dev);
1526                 conf = get_multipath_config();
1527                 pathinfo(pp, conf, 0);
1528                 put_multipath_config(conf);
1529                 return 1;
1530         }
1531         if (!pp->mpp) {
1532                 if (!strlen(pp->wwid) && pp->initialized != INIT_MISSING_UDEV &&
1533                     (newstate == PATH_UP || newstate == PATH_GHOST)) {
1534                         condlog(2, "%s: add missing path", pp->dev);
1535                         conf = get_multipath_config();
1536                         ret = pathinfo(pp, conf, DI_ALL | DI_BLACKLIST);
1537                         if (ret == PATHINFO_OK) {
1538                                 ev_add_path(pp, vecs);
1539                                 pp->tick = 1;
1540                         } else if (ret == PATHINFO_SKIPPED) {
1541                                 put_multipath_config(conf);
1542                                 return -1;
1543                         }
1544                         put_multipath_config(conf);
1545                 }
1546                 return 0;
1547         }
1548         /*
1549          * Async IO in flight. Keep the previous path state
1550          * and reschedule as soon as possible
1551          */
1552         if (newstate == PATH_PENDING) {
1553                 pp->tick = 1;
1554                 return 0;
1555         }
1556         /*
1557          * Synchronize with kernel state
1558          */
1559         if (update_multipath_strings(pp->mpp, vecs->pathvec, 1)) {
1560                 condlog(1, "%s: Could not synchronize with kernel state",
1561                         pp->dev);
1562                 pp->dmstate = PSTATE_UNDEF;
1563         }
1564         /* if update_multipath_strings orphaned the path, quit early */
1565         if (!pp->mpp)
1566                 return 0;
1567
1568         if ((newstate == PATH_UP || newstate == PATH_GHOST) &&
1569              pp->wait_checks > 0) {
1570                 if (pp->mpp && pp->mpp->nr_active > 0) {
1571                         pp->state = PATH_DELAYED;
1572                         pp->wait_checks--;
1573                         return 1;
1574                 } else
1575                         pp->wait_checks = 0;
1576         }
1577
1578         /*
1579          * don't reinstate failed path, if its in stand-by
1580          * and if target supports only implicit tpgs mode.
1581          * this will prevent unnecessary i/o by dm on stand-by
1582          * paths if there are no other active paths in map.
1583          */
1584         disable_reinstate = (newstate == PATH_GHOST &&
1585                             pp->mpp->nr_active == 0 &&
1586                             pp->tpgs == TPGS_IMPLICIT) ? 1 : 0;
1587
1588         pp->chkrstate = newstate;
1589         if (newstate != pp->state) {
1590                 int oldstate = pp->state;
1591                 pp->state = newstate;
1592
1593                 if (strlen(checker_message(&pp->checker)))
1594                         LOG_MSG(1, checker_message(&pp->checker));
1595
1596                 /*
1597                  * upon state change, reset the checkint
1598                  * to the shortest delay
1599                  */
1600                 conf = get_multipath_config();
1601                 pp->checkint = conf->checkint;
1602                 put_multipath_config(conf);
1603
1604                 if (newstate == PATH_DOWN || newstate == PATH_SHAKY || newstate == PATH_TIMEOUT) {
1605                         /*
1606                          * proactively fail path in the DM
1607                          */
1608                         if (oldstate == PATH_UP ||
1609                             oldstate == PATH_GHOST) {
1610                                 fail_path(pp, 1);
1611                                 if (pp->mpp->delay_wait_checks > 0 &&
1612                                     pp->watch_checks > 0) {
1613                                         pp->wait_checks = pp->mpp->delay_wait_checks;
1614                                         pp->watch_checks = 0;
1615                                 }
1616                         }else
1617                                 fail_path(pp, 0);
1618
1619                         /*
1620                          * cancel scheduled failback
1621                          */
1622                         pp->mpp->failback_tick = 0;
1623
1624                         pp->mpp->stat_path_failures++;
1625                         repair_path(pp);
1626                         return 1;
1627                 }
1628
1629                 if(newstate == PATH_UP || newstate == PATH_GHOST){
1630                         if ( pp->mpp && pp->mpp->prflag ){
1631                                 /*
1632                                  * Check Persistent Reservation.
1633                                  */
1634                         condlog(2, "%s: checking persistent reservation "
1635                                 "registration", pp->dev);
1636                         mpath_pr_event_handle(pp);
1637                         }
1638                 }
1639
1640                 /*
1641                  * reinstate this path
1642                  */
1643                 if (oldstate != PATH_UP &&
1644                     oldstate != PATH_GHOST) {
1645                         if (pp->mpp->delay_watch_checks > 0)
1646                                 pp->watch_checks = pp->mpp->delay_watch_checks;
1647                         add_active = 1;
1648                 } else {
1649                         if (pp->watch_checks > 0)
1650                                 pp->watch_checks--;
1651                         add_active = 0;
1652                 }
1653                 if (!disable_reinstate && reinstate_path(pp, add_active)) {
1654                         condlog(3, "%s: reload map", pp->dev);
1655                         ev_add_path(pp, vecs);
1656                         pp->tick = 1;
1657                         return 0;
1658                 }
1659                 new_path_up = 1;
1660
1661                 if (oldchkrstate != PATH_UP && oldchkrstate != PATH_GHOST)
1662                         chkr_new_path_up = 1;
1663
1664                 /*
1665                  * if at least one path is up in a group, and
1666                  * the group is disabled, re-enable it
1667                  */
1668                 if (newstate == PATH_UP)
1669                         enable_group(pp);
1670         }
1671         else if (newstate == PATH_UP || newstate == PATH_GHOST) {
1672                 if ((pp->dmstate == PSTATE_FAILED ||
1673                     pp->dmstate == PSTATE_UNDEF) &&
1674                     !disable_reinstate) {
1675                         /* Clear IO errors */
1676                         if (reinstate_path(pp, 0)) {
1677                                 condlog(3, "%s: reload map", pp->dev);
1678                                 ev_add_path(pp, vecs);
1679                                 pp->tick = 1;
1680                                 return 0;
1681                         }
1682                 } else {
1683                         unsigned int max_checkint;
1684                         LOG_MSG(4, checker_message(&pp->checker));
1685                         conf = get_multipath_config();
1686                         max_checkint = conf->max_checkint;
1687                         put_multipath_config(conf);
1688                         if (pp->checkint != max_checkint) {
1689                                 /*
1690                                  * double the next check delay.
1691                                  * max at conf->max_checkint
1692                                  */
1693                                 if (pp->checkint < (max_checkint / 2))
1694                                         pp->checkint = 2 * pp->checkint;
1695                                 else
1696                                         pp->checkint = max_checkint;
1697
1698                                 condlog(4, "%s: delay next check %is",
1699                                         pp->dev_t, pp->checkint);
1700                         }
1701                         if (pp->watch_checks > 0)
1702                                 pp->watch_checks--;
1703                         pp->tick = pp->checkint;
1704                 }
1705         }
1706         else if (newstate == PATH_DOWN &&
1707                  strlen(checker_message(&pp->checker))) {
1708                 int log_checker_err;
1709
1710                 conf = get_multipath_config();
1711                 log_checker_err = conf->log_checker_err;
1712                 put_multipath_config(conf);
1713                 if (log_checker_err == LOG_CHKR_ERR_ONCE)
1714                         LOG_MSG(3, checker_message(&pp->checker));
1715                 else
1716                         LOG_MSG(2, checker_message(&pp->checker));
1717         }
1718
1719         pp->state = newstate;
1720         repair_path(pp);
1721
1722         if (pp->mpp->wait_for_udev)
1723                 return 1;
1724         /*
1725          * path prio refreshing
1726          */
1727         condlog(4, "path prio refresh");
1728
1729         if (update_prio(pp, new_path_up) &&
1730             (pp->mpp->pgpolicyfn == (pgpolicyfn *)group_by_prio) &&
1731              pp->mpp->pgfailback == -FAILBACK_IMMEDIATE)
1732                 update_path_groups(pp->mpp, vecs, !new_path_up);
1733         else if (need_switch_pathgroup(pp->mpp, 0)) {
1734                 if (pp->mpp->pgfailback > 0 &&
1735                     (new_path_up || pp->mpp->failback_tick <= 0))
1736                         pp->mpp->failback_tick =
1737                                 pp->mpp->pgfailback + 1;
1738                 else if (pp->mpp->pgfailback == -FAILBACK_IMMEDIATE ||
1739                          (chkr_new_path_up && followover_should_failback(pp)))
1740                         switch_pathgroup(pp->mpp);
1741         }
1742         return 1;
1743 }
1744
1745 static void init_path_check_interval(struct vectors *vecs)
1746 {
1747         struct config *conf;
1748         struct path *pp;
1749         unsigned int i;
1750
1751         vector_foreach_slot (vecs->pathvec, pp, i) {
1752                 conf = get_multipath_config();
1753                 pp->checkint = conf->checkint;
1754                 put_multipath_config(conf);
1755         }
1756 }
1757
1758 static void *
1759 checkerloop (void *ap)
1760 {
1761         struct vectors *vecs;
1762         struct path *pp;
1763         int count = 0;
1764         unsigned int i;
1765         struct itimerval timer_tick_it;
1766         struct timespec last_time;
1767         struct config *conf;
1768
1769         pthread_cleanup_push(rcu_unregister, NULL);
1770         rcu_register_thread();
1771         mlockall(MCL_CURRENT | MCL_FUTURE);
1772         vecs = (struct vectors *)ap;
1773         condlog(2, "path checkers start up");
1774
1775         /* Tweak start time for initial path check */
1776         if (clock_gettime(CLOCK_MONOTONIC, &last_time) != 0)
1777                 last_time.tv_sec = 0;
1778         else
1779                 last_time.tv_sec -= 1;
1780
1781         while (1) {
1782                 struct timespec diff_time, start_time, end_time;
1783                 int num_paths = 0, ticks = 0, signo, strict_timing, rc = 0;
1784                 sigset_t mask;
1785
1786                 if (clock_gettime(CLOCK_MONOTONIC, &start_time) != 0)
1787                         start_time.tv_sec = 0;
1788                 if (start_time.tv_sec && last_time.tv_sec) {
1789                         timespecsub(&start_time, &last_time, &diff_time);
1790                         condlog(4, "tick (%lu.%06lu secs)",
1791                                 diff_time.tv_sec, diff_time.tv_nsec / 1000);
1792                         last_time = start_time;
1793                         ticks = diff_time.tv_sec;
1794                 } else {
1795                         ticks = 1;
1796                         condlog(4, "tick (%d ticks)", ticks);
1797                 }
1798 #ifdef USE_SYSTEMD
1799                 if (use_watchdog)
1800                         sd_notify(0, "WATCHDOG=1");
1801 #endif
1802                 rc = set_config_state(DAEMON_RUNNING);
1803                 if (rc == ETIMEDOUT) {
1804                         condlog(4, "timeout waiting for DAEMON_IDLE");
1805                         continue;
1806                 }
1807                 if (vecs->pathvec) {
1808                         pthread_cleanup_push(cleanup_lock, &vecs->lock);
1809                         lock(&vecs->lock);
1810                         pthread_testcancel();
1811                         vector_foreach_slot (vecs->pathvec, pp, i) {
1812                                 rc = check_path(vecs, pp, ticks);
1813                                 if (rc < 0) {
1814                                         vector_del_slot(vecs->pathvec, i);
1815                                         free_path(pp);
1816                                         i--;
1817                                 } else
1818                                         num_paths += rc;
1819                         }
1820                         lock_cleanup_pop(vecs->lock);
1821                 }
1822                 if (vecs->mpvec) {
1823                         pthread_cleanup_push(cleanup_lock, &vecs->lock);
1824                         lock(&vecs->lock);
1825                         pthread_testcancel();
1826                         defered_failback_tick(vecs->mpvec);
1827                         retry_count_tick(vecs->mpvec);
1828                         missing_uev_wait_tick(vecs);
1829                         lock_cleanup_pop(vecs->lock);
1830                 }
1831                 if (count)
1832                         count--;
1833                 else {
1834                         pthread_cleanup_push(cleanup_lock, &vecs->lock);
1835                         lock(&vecs->lock);
1836                         pthread_testcancel();
1837                         condlog(4, "map garbage collection");
1838                         mpvec_garbage_collector(vecs);
1839                         count = MAPGCINT;
1840                         lock_cleanup_pop(vecs->lock);
1841                 }
1842
1843                 diff_time.tv_nsec = 0;
1844                 if (start_time.tv_sec &&
1845                     clock_gettime(CLOCK_MONOTONIC, &end_time) == 0) {
1846                         timespecsub(&end_time, &start_time, &diff_time);
1847                         if (num_paths) {
1848                                 unsigned int max_checkint;
1849
1850                                 condlog(3, "checked %d path%s in %lu.%06lu secs",
1851                                         num_paths, num_paths > 1 ? "s" : "",
1852                                         diff_time.tv_sec,
1853                                         diff_time.tv_nsec / 1000);
1854                                 conf = get_multipath_config();
1855                                 max_checkint = conf->max_checkint;
1856                                 put_multipath_config(conf);
1857                                 if (diff_time.tv_sec > max_checkint)
1858                                         condlog(1, "path checkers took longer "
1859                                                 "than %lu seconds, consider "
1860                                                 "increasing max_polling_interval",
1861                                                 diff_time.tv_sec);
1862                         }
1863                 }
1864
1865                 post_config_state(DAEMON_IDLE);
1866                 conf = get_multipath_config();
1867                 strict_timing = conf->strict_timing;
1868                 put_multipath_config(conf);
1869                 if (!strict_timing)
1870                         sleep(1);
1871                 else {
1872                         timer_tick_it.it_interval.tv_sec = 0;
1873                         timer_tick_it.it_interval.tv_usec = 0;
1874                         if (diff_time.tv_nsec) {
1875                                 timer_tick_it.it_value.tv_sec = 0;
1876                                 timer_tick_it.it_value.tv_usec =
1877                                      1000UL * 1000 * 1000 - diff_time.tv_nsec;
1878                         } else {
1879                                 timer_tick_it.it_value.tv_sec = 1;
1880                                 timer_tick_it.it_value.tv_usec = 0;
1881                         }
1882                         setitimer(ITIMER_REAL, &timer_tick_it, NULL);
1883
1884                         sigemptyset(&mask);
1885                         sigaddset(&mask, SIGALRM);
1886                         condlog(3, "waiting for %lu.%06lu secs",
1887                                 timer_tick_it.it_value.tv_sec,
1888                                 timer_tick_it.it_value.tv_usec);
1889                         if (sigwait(&mask, &signo) != 0) {
1890                                 condlog(3, "sigwait failed with error %d",
1891                                         errno);
1892                                 conf = get_multipath_config();
1893                                 conf->strict_timing = 0;
1894                                 put_multipath_config(conf);
1895                                 break;
1896                         }
1897                 }
1898         }
1899         pthread_cleanup_pop(1);
1900         return NULL;
1901 }
1902
1903 int
1904 configure (struct vectors * vecs, int start_waiters)
1905 {
1906         struct multipath * mpp;
1907         struct path * pp;
1908         vector mpvec;
1909         int i, ret;
1910         struct config *conf;
1911
1912         if (!vecs->pathvec && !(vecs->pathvec = vector_alloc()))
1913                 return 1;
1914
1915         if (!vecs->mpvec && !(vecs->mpvec = vector_alloc()))
1916                 return 1;
1917
1918         if (!(mpvec = vector_alloc()))
1919                 return 1;
1920
1921         /*
1922          * probe for current path (from sysfs) and map (from dm) sets
1923          */
1924         ret = path_discovery(vecs->pathvec, DI_ALL);
1925         if (ret < 0)
1926                 return 1;
1927
1928         vector_foreach_slot (vecs->pathvec, pp, i){
1929                 conf = get_multipath_config();
1930                 if (filter_path(conf, pp) > 0){
1931                         vector_del_slot(vecs->pathvec, i);
1932                         free_path(pp);
1933                         i--;
1934                 }
1935                 else
1936                         pp->checkint = conf->checkint;
1937                 put_multipath_config(conf);
1938         }
1939         if (map_discovery(vecs))
1940                 return 1;
1941
1942         /*
1943          * create new set of maps & push changed ones into dm
1944          */
1945         if (coalesce_paths(vecs, mpvec, NULL, 1, CMD_NONE))
1946                 return 1;
1947
1948         /*
1949          * may need to remove some maps which are no longer relevant
1950          * e.g., due to blacklist changes in conf file
1951          */
1952         if (coalesce_maps(vecs, mpvec))
1953                 return 1;
1954
1955         dm_lib_release();
1956
1957         sync_maps_state(mpvec);
1958         vector_foreach_slot(mpvec, mpp, i){
1959                 remember_wwid(mpp->wwid);
1960                 update_map_pr(mpp);
1961         }
1962
1963         /*
1964          * purge dm of old maps
1965          */
1966         remove_maps(vecs);
1967
1968         /*
1969          * save new set of maps formed by considering current path state
1970          */
1971         vector_free(vecs->mpvec);
1972         vecs->mpvec = mpvec;
1973
1974         /*
1975          * start dm event waiter threads for these new maps
1976          */
1977         vector_foreach_slot(vecs->mpvec, mpp, i) {
1978                 if (setup_multipath(vecs, mpp))
1979                         return 1;
1980                 if (start_waiters)
1981                         if (start_waiter_thread(mpp, vecs))
1982                                 return 1;
1983         }
1984         return 0;
1985 }
1986
1987 int
1988 need_to_delay_reconfig(struct vectors * vecs)
1989 {
1990         struct multipath *mpp;
1991         int i;
1992
1993         if (!VECTOR_SIZE(vecs->mpvec))
1994                 return 0;
1995
1996         vector_foreach_slot(vecs->mpvec, mpp, i) {
1997                 if (mpp->wait_for_udev)
1998                         return 1;
1999         }
2000         return 0;
2001 }
2002
2003 void rcu_free_config(struct rcu_head *head)
2004 {
2005         struct config *conf = container_of(head, struct config, rcu);
2006
2007         free_config(conf);
2008 }
2009
2010 int
2011 reconfigure (struct vectors * vecs)
2012 {
2013         struct config * old, *conf;
2014
2015         conf = load_config(DEFAULT_CONFIGFILE);
2016         if (!conf)
2017                 return 1;
2018
2019         /*
2020          * free old map and path vectors ... they use old conf state
2021          */
2022         if (VECTOR_SIZE(vecs->mpvec))
2023                 remove_maps_and_stop_waiters(vecs);
2024
2025         free_pathvec(vecs->pathvec, FREE_PATHS);
2026         vecs->pathvec = NULL;
2027
2028         /* Re-read any timezone changes */
2029         tzset();
2030
2031         dm_drv_version(conf->version, TGT_MPATH);
2032         if (verbosity)
2033                 conf->verbosity = verbosity;
2034         if (bindings_read_only)
2035                 conf->bindings_read_only = bindings_read_only;
2036         if (ignore_new_devs)
2037                 conf->ignore_new_devs = ignore_new_devs;
2038         uxsock_timeout = conf->uxsock_timeout;
2039
2040         old = rcu_dereference(multipath_conf);
2041         rcu_assign_pointer(multipath_conf, conf);
2042         call_rcu(&old->rcu, rcu_free_config);
2043
2044         configure(vecs, 1);
2045
2046
2047         return 0;
2048 }
2049
2050 static struct vectors *
2051 init_vecs (void)
2052 {
2053         struct vectors * vecs;
2054
2055         vecs = (struct vectors *)MALLOC(sizeof(struct vectors));
2056
2057         if (!vecs)
2058                 return NULL;
2059
2060         pthread_mutex_init(&vecs->lock.mutex, NULL);
2061
2062         return vecs;
2063 }
2064
2065 static void *
2066 signal_set(int signo, void (*func) (int))
2067 {
2068         int r;
2069         struct sigaction sig;
2070         struct sigaction osig;
2071
2072         sig.sa_handler = func;
2073         sigemptyset(&sig.sa_mask);
2074         sig.sa_flags = 0;
2075
2076         r = sigaction(signo, &sig, &osig);
2077
2078         if (r < 0)
2079                 return (SIG_ERR);
2080         else
2081                 return (osig.sa_handler);
2082 }
2083
2084 void
2085 handle_signals(void)
2086 {
2087         if (exit_sig) {
2088                 condlog(2, "exit (signal)");
2089                 exit_daemon();
2090         }
2091         if (reconfig_sig) {
2092                 condlog(2, "reconfigure (signal)");
2093                 set_config_state(DAEMON_CONFIGURE);
2094         }
2095         if (log_reset_sig) {
2096                 condlog(2, "reset log (signal)");
2097                 pthread_mutex_lock(&logq_lock);
2098                 log_reset("multipathd");
2099                 pthread_mutex_unlock(&logq_lock);
2100         }
2101         exit_sig = 0;
2102         reconfig_sig = 0;
2103         log_reset_sig = 0;
2104 }
2105
2106 static void
2107 sighup (int sig)
2108 {
2109         reconfig_sig = 1;
2110 }
2111
2112 static void
2113 sigend (int sig)
2114 {
2115         exit_sig = 1;
2116 }
2117
2118 static void
2119 sigusr1 (int sig)
2120 {
2121         log_reset_sig = 1;
2122 }
2123
2124 static void
2125 sigusr2 (int sig)
2126 {
2127         condlog(3, "SIGUSR2 received");
2128 }
2129
2130 static void
2131 signal_init(void)
2132 {
2133         signal_set(SIGHUP, sighup);
2134         signal_set(SIGUSR1, sigusr1);
2135         signal_set(SIGUSR2, sigusr2);
2136         signal_set(SIGINT, sigend);
2137         signal_set(SIGTERM, sigend);
2138         signal_set(SIGPIPE, sigend);
2139 }
2140
2141 static void
2142 setscheduler (void)
2143 {
2144         int res;
2145         static struct sched_param sched_param = {
2146                 .sched_priority = 99
2147         };
2148
2149         res = sched_setscheduler (0, SCHED_RR, &sched_param);
2150
2151         if (res == -1)
2152                 condlog(LOG_WARNING, "Could not set SCHED_RR at priority 99");
2153         return;
2154 }
2155
2156 static void
2157 set_oom_adj (void)
2158 {
2159 #ifdef OOM_SCORE_ADJ_MIN
2160         int retry = 1;
2161         char *file = "/proc/self/oom_score_adj";
2162         int score = OOM_SCORE_ADJ_MIN;
2163 #else
2164         int retry = 0;
2165         char *file = "/proc/self/oom_adj";
2166         int score = OOM_ADJUST_MIN;
2167 #endif
2168         FILE *fp;
2169         struct stat st;
2170         char *envp;
2171
2172         envp = getenv("OOMScoreAdjust");
2173         if (envp) {
2174                 condlog(3, "Using systemd provided OOMScoreAdjust");
2175                 return;
2176         }
2177         do {
2178                 if (stat(file, &st) == 0){
2179                         fp = fopen(file, "w");
2180                         if (!fp) {
2181                                 condlog(0, "couldn't fopen %s : %s", file,
2182                                         strerror(errno));
2183                                 return;
2184                         }
2185                         fprintf(fp, "%i", score);
2186                         fclose(fp);
2187                         return;
2188                 }
2189                 if (errno != ENOENT) {
2190                         condlog(0, "couldn't stat %s : %s", file,
2191                                 strerror(errno));
2192                         return;
2193                 }
2194 #ifdef OOM_ADJUST_MIN
2195                 file = "/proc/self/oom_adj";
2196                 score = OOM_ADJUST_MIN;
2197 #else
2198                 retry = 0;
2199 #endif
2200         } while (retry--);
2201         condlog(0, "couldn't adjust oom score");
2202 }
2203
2204 static int
2205 child (void * param)
2206 {
2207         pthread_t check_thr, uevent_thr, uxlsnr_thr, uevq_thr;
2208         pthread_attr_t log_attr, misc_attr, uevent_attr;
2209         struct vectors * vecs;
2210         struct multipath * mpp;
2211         int i;
2212 #ifdef USE_SYSTEMD
2213         unsigned long checkint;
2214 #endif
2215         int rc;
2216         int pid_fd = -1;
2217         struct config *conf;
2218         char *envp;
2219
2220         mlockall(MCL_CURRENT | MCL_FUTURE);
2221         signal_init();
2222         rcu_init();
2223
2224         setup_thread_attr(&misc_attr, 64 * 1024, 0);
2225         setup_thread_attr(&uevent_attr, DEFAULT_UEVENT_STACKSIZE * 1024, 0);
2226         setup_thread_attr(&waiter_attr, 32 * 1024, 1);
2227
2228         if (logsink == 1) {
2229                 setup_thread_attr(&log_attr, 64 * 1024, 0);
2230                 log_thread_start(&log_attr);
2231                 pthread_attr_destroy(&log_attr);
2232         }
2233         pid_fd = pidfile_create(DEFAULT_PIDFILE, daemon_pid);
2234         if (pid_fd < 0) {
2235                 condlog(1, "failed to create pidfile");
2236                 if (logsink == 1)
2237                         log_thread_stop();
2238                 exit(1);
2239         }
2240
2241         post_config_state(DAEMON_START);
2242
2243         condlog(2, "--------start up--------");
2244         condlog(2, "read " DEFAULT_CONFIGFILE);
2245
2246         conf = load_config(DEFAULT_CONFIGFILE);
2247         if (!conf)
2248                 goto failed;
2249
2250         if (verbosity)
2251                 conf->verbosity = verbosity;
2252         if (bindings_read_only)
2253                 conf->bindings_read_only = bindings_read_only;
2254         if (ignore_new_devs)
2255                 conf->ignore_new_devs = ignore_new_devs;
2256         uxsock_timeout = conf->uxsock_timeout;
2257         rcu_assign_pointer(multipath_conf, conf);
2258         dm_init(conf->verbosity);
2259         dm_drv_version(conf->version, TGT_MPATH);
2260         if (init_checkers(conf->multipath_dir)) {
2261                 condlog(0, "failed to initialize checkers");
2262                 goto failed;
2263         }
2264         if (init_prio(conf->multipath_dir)) {
2265                 condlog(0, "failed to initialize prioritizers");
2266                 goto failed;
2267         }
2268
2269         setlogmask(LOG_UPTO(conf->verbosity + 3));
2270
2271         envp = getenv("LimitNOFILE");
2272
2273         if (envp) {
2274                 condlog(2,"Using systemd provided open fds limit of %s", envp);
2275         } else if (conf->max_fds) {
2276                 struct rlimit fd_limit;
2277
2278                 if (getrlimit(RLIMIT_NOFILE, &fd_limit) < 0) {
2279                         condlog(0, "can't get open fds limit: %s",
2280                                 strerror(errno));
2281                         fd_limit.rlim_cur = 0;
2282                         fd_limit.rlim_max = 0;
2283                 }
2284                 if (fd_limit.rlim_cur < conf->max_fds) {
2285                         fd_limit.rlim_cur = conf->max_fds;
2286                         if (fd_limit.rlim_max < conf->max_fds)
2287                                 fd_limit.rlim_max = conf->max_fds;
2288                         if (setrlimit(RLIMIT_NOFILE, &fd_limit) < 0) {
2289                                 condlog(0, "can't set open fds limit to "
2290                                         "%lu/%lu : %s",
2291                                         fd_limit.rlim_cur, fd_limit.rlim_max,
2292                                         strerror(errno));
2293                         } else {
2294                                 condlog(3, "set open fds limit to %lu/%lu",
2295                                         fd_limit.rlim_cur, fd_limit.rlim_max);
2296                         }
2297                 }
2298
2299         }
2300
2301         vecs = gvecs = init_vecs();
2302         if (!vecs)
2303                 goto failed;
2304
2305         setscheduler();
2306         set_oom_adj();
2307
2308         dm_udev_set_sync_support(0);
2309 #ifdef USE_SYSTEMD
2310         envp = getenv("WATCHDOG_USEC");
2311         if (envp && sscanf(envp, "%lu", &checkint) == 1) {
2312                 /* Value is in microseconds */
2313                 conf->max_checkint = checkint / 1000000;
2314                 /* Rescale checkint */
2315                 if (conf->checkint > conf->max_checkint)
2316                         conf->checkint = conf->max_checkint;
2317                 else
2318                         conf->checkint = conf->max_checkint / 4;
2319                 condlog(3, "enabling watchdog, interval %d max %d",
2320                         conf->checkint, conf->max_checkint);
2321                 use_watchdog = conf->checkint;
2322         }
2323 #endif
2324         /*
2325          * Startup done, invalidate configuration
2326          */
2327         conf = NULL;
2328
2329         /*
2330          * Signal start of configuration
2331          */
2332         post_config_state(DAEMON_CONFIGURE);
2333
2334         init_path_check_interval(vecs);
2335
2336         /*
2337          * Start uevent listener early to catch events
2338          */
2339         if ((rc = pthread_create(&uevent_thr, &uevent_attr, ueventloop, udev))) {
2340                 condlog(0, "failed to create uevent thread: %d", rc);
2341                 goto failed;
2342         }
2343         pthread_attr_destroy(&uevent_attr);
2344         if ((rc = pthread_create(&uxlsnr_thr, &misc_attr, uxlsnrloop, vecs))) {
2345                 condlog(0, "failed to create cli listener: %d", rc);
2346                 goto failed;
2347         }
2348
2349         /*
2350          * start threads
2351          */
2352         if ((rc = pthread_create(&check_thr, &misc_attr, checkerloop, vecs))) {
2353                 condlog(0,"failed to create checker loop thread: %d", rc);
2354                 goto failed;
2355         }
2356         if ((rc = pthread_create(&uevq_thr, &misc_attr, uevqloop, vecs))) {
2357                 condlog(0, "failed to create uevent dispatcher: %d", rc);
2358                 goto failed;
2359         }
2360         pthread_attr_destroy(&misc_attr);
2361
2362 #ifdef USE_SYSTEMD
2363         sd_notify(0, "READY=1");
2364 #endif
2365
2366         while (running_state != DAEMON_SHUTDOWN) {
2367                 pthread_cleanup_push(config_cleanup, NULL);
2368                 pthread_mutex_lock(&config_lock);
2369                 if (running_state != DAEMON_CONFIGURE &&
2370                     running_state != DAEMON_SHUTDOWN) {
2371                         pthread_cond_wait(&config_cond, &config_lock);
2372                 }
2373                 pthread_cleanup_pop(1);
2374                 if (running_state == DAEMON_CONFIGURE) {
2375                         pthread_cleanup_push(cleanup_lock, &vecs->lock);
2376                         lock(&vecs->lock);
2377                         pthread_testcancel();
2378                         if (!need_to_delay_reconfig(vecs)) {
2379                                 reconfigure(vecs);
2380                         } else {
2381                                 conf = get_multipath_config();
2382                                 conf->delayed_reconfig = 1;
2383                                 put_multipath_config(conf);
2384                         }
2385                         lock_cleanup_pop(vecs->lock);
2386                         post_config_state(DAEMON_IDLE);
2387                 }
2388         }
2389
2390         lock(&vecs->lock);
2391         conf = get_multipath_config();
2392         if (conf->queue_without_daemon == QUE_NO_DAEMON_OFF)
2393                 vector_foreach_slot(vecs->mpvec, mpp, i)
2394                         dm_queue_if_no_path(mpp->alias, 0);
2395         put_multipath_config(conf);
2396         remove_maps_and_stop_waiters(vecs);
2397         unlock(&vecs->lock);
2398
2399         pthread_cancel(check_thr);
2400         pthread_cancel(uevent_thr);
2401         pthread_cancel(uxlsnr_thr);
2402         pthread_cancel(uevq_thr);
2403
2404         pthread_join(check_thr, NULL);
2405         pthread_join(uevent_thr, NULL);
2406         pthread_join(uxlsnr_thr, NULL);
2407         pthread_join(uevq_thr, NULL);
2408
2409         lock(&vecs->lock);
2410         free_pathvec(vecs->pathvec, FREE_PATHS);
2411         vecs->pathvec = NULL;
2412         unlock(&vecs->lock);
2413
2414         pthread_mutex_destroy(&vecs->lock.mutex);
2415         FREE(vecs);
2416         vecs = NULL;
2417
2418         cleanup_checkers();
2419         cleanup_prio();
2420
2421         dm_lib_release();
2422         dm_lib_exit();
2423
2424         /* We're done here */
2425         condlog(3, "unlink pidfile");
2426         unlink(DEFAULT_PIDFILE);
2427
2428         condlog(2, "--------shut down-------");
2429
2430         if (logsink == 1)
2431                 log_thread_stop();
2432
2433         /*
2434          * Freeing config must be done after condlog() and dm_lib_exit(),
2435          * because logging functions like dlog() and dm_write_log()
2436          * reference the config.
2437          */
2438         conf = rcu_dereference(multipath_conf);
2439         rcu_assign_pointer(multipath_conf, NULL);
2440         call_rcu(&conf->rcu, rcu_free_config);
2441         udev_unref(udev);
2442         udev = NULL;
2443         pthread_attr_destroy(&waiter_attr);
2444 #ifdef _DEBUG_
2445         dbg_free_final(NULL);
2446 #endif
2447
2448 #ifdef USE_SYSTEMD
2449         sd_notify(0, "ERRNO=0");
2450 #endif
2451         exit(0);
2452
2453 failed:
2454 #ifdef USE_SYSTEMD
2455         sd_notify(0, "ERRNO=1");
2456 #endif
2457         if (pid_fd >= 0)
2458                 close(pid_fd);
2459         exit(1);
2460 }
2461
2462 static int
2463 daemonize(void)
2464 {
2465         int pid;
2466         int dev_null_fd;
2467
2468         if( (pid = fork()) < 0){
2469                 fprintf(stderr, "Failed first fork : %s\n", strerror(errno));
2470                 return -1;
2471         }
2472         else if (pid != 0)
2473                 return pid;
2474
2475         setsid();
2476
2477         if ( (pid = fork()) < 0)
2478                 fprintf(stderr, "Failed second fork : %s\n", strerror(errno));
2479         else if (pid != 0)
2480                 _exit(0);
2481
2482         if (chdir("/") < 0)
2483                 fprintf(stderr, "cannot chdir to '/', continuing\n");
2484
2485         dev_null_fd = open("/dev/null", O_RDWR);
2486         if (dev_null_fd < 0){
2487                 fprintf(stderr, "cannot open /dev/null for input & output : %s\n",
2488                         strerror(errno));
2489                 _exit(0);
2490         }
2491
2492         close(STDIN_FILENO);
2493         if (dup(dev_null_fd) < 0) {
2494                 fprintf(stderr, "cannot dup /dev/null to stdin : %s\n",
2495                         strerror(errno));
2496                 _exit(0);
2497         }
2498         close(STDOUT_FILENO);
2499         if (dup(dev_null_fd) < 0) {
2500                 fprintf(stderr, "cannot dup /dev/null to stdout : %s\n",
2501                         strerror(errno));
2502                 _exit(0);
2503         }
2504         close(STDERR_FILENO);
2505         if (dup(dev_null_fd) < 0) {
2506                 fprintf(stderr, "cannot dup /dev/null to stderr : %s\n",
2507                         strerror(errno));
2508                 _exit(0);
2509         }
2510         close(dev_null_fd);
2511         daemon_pid = getpid();
2512         return 0;
2513 }
2514
2515 int
2516 main (int argc, char *argv[])
2517 {
2518         extern char *optarg;
2519         extern int optind;
2520         int arg;
2521         int err;
2522         int foreground = 0;
2523         struct config *conf;
2524
2525         ANNOTATE_BENIGN_RACE_SIZED(&multipath_conf, sizeof(multipath_conf),
2526                                    "Manipulated through RCU");
2527         ANNOTATE_BENIGN_RACE_SIZED(&running_state, sizeof(running_state),
2528                 "Suppress complaints about unprotected running_state reads");
2529         ANNOTATE_BENIGN_RACE_SIZED(&uxsock_timeout, sizeof(uxsock_timeout),
2530                 "Suppress complaints about this scalar variable");
2531
2532         logsink = 1;
2533
2534         if (getuid() != 0) {
2535                 fprintf(stderr, "need to be root\n");
2536                 exit(1);
2537         }
2538
2539         /* make sure we don't lock any path */
2540         if (chdir("/") < 0)
2541                 fprintf(stderr, "can't chdir to root directory : %s\n",
2542                         strerror(errno));
2543         umask(umask(077) | 022);
2544
2545         pthread_cond_init_mono(&config_cond);
2546         
2547         udev = udev_new();
2548
2549         while ((arg = getopt(argc, argv, ":dsv:k::Bn")) != EOF ) {
2550                 switch(arg) {
2551                 case 'd':
2552                         foreground = 1;
2553                         if (logsink > 0)
2554                                 logsink = 0;
2555                         //debug=1; /* ### comment me out ### */
2556                         break;
2557                 case 'v':
2558                         if (sizeof(optarg) > sizeof(char *) ||
2559                             !isdigit(optarg[0]))
2560                                 exit(1);
2561
2562                         verbosity = atoi(optarg);
2563                         break;
2564                 case 's':
2565                         logsink = -1;
2566                         break;
2567                 case 'k':
2568                         conf = load_config(DEFAULT_CONFIGFILE);
2569                         if (!conf)
2570                                 exit(1);
2571                         if (verbosity)
2572                                 conf->verbosity = verbosity;
2573                         uxsock_timeout = conf->uxsock_timeout;
2574                         uxclnt(optarg, uxsock_timeout + 100);
2575                         exit(0);
2576                 case 'B':
2577                         bindings_read_only = 1;
2578                         break;
2579                 case 'n':
2580                         ignore_new_devs = 1;
2581                         break;
2582                 default:
2583                         fprintf(stderr, "Invalid argument '-%c'\n",
2584                                 optopt);
2585                         exit(1);
2586                 }
2587         }
2588         if (optind < argc) {
2589                 char cmd[CMDSIZE];
2590                 char * s = cmd;
2591                 char * c = s;
2592
2593                 conf = load_config(DEFAULT_CONFIGFILE);
2594                 if (!conf)
2595                         exit(1);
2596                 if (verbosity)
2597                         conf->verbosity = verbosity;
2598                 uxsock_timeout = conf->uxsock_timeout;
2599                 memset(cmd, 0x0, CMDSIZE);
2600                 while (optind < argc) {
2601                         if (strchr(argv[optind], ' '))
2602                                 c += snprintf(c, s + CMDSIZE - c, "\"%s\" ", argv[optind]);
2603                         else
2604                                 c += snprintf(c, s + CMDSIZE - c, "%s ", argv[optind]);
2605                         optind++;
2606                 }
2607                 c += snprintf(c, s + CMDSIZE - c, "\n");
2608                 uxclnt(s, uxsock_timeout + 100);
2609                 exit(0);
2610         }
2611
2612         if (foreground) {
2613                 if (!isatty(fileno(stdout)))
2614                         setbuf(stdout, NULL);
2615                 err = 0;
2616                 daemon_pid = getpid();
2617         } else
2618                 err = daemonize();
2619
2620         if (err < 0)
2621                 /* error */
2622                 exit(1);
2623         else if (err > 0)
2624                 /* parent dies */
2625                 exit(0);
2626         else
2627                 /* child lives */
2628                 return (child(NULL));
2629 }
2630
2631 void *  mpath_pr_event_handler_fn (void * pathp )
2632 {
2633         struct multipath * mpp;
2634         int i,j, ret, isFound;
2635         struct path * pp = (struct path *)pathp;
2636         unsigned char *keyp;
2637         uint64_t prkey;
2638         struct prout_param_descriptor *param;
2639         struct prin_resp *resp;
2640
2641         mpp = pp->mpp;
2642
2643         resp = mpath_alloc_prin_response(MPATH_PRIN_RKEY_SA);
2644         if (!resp){
2645                 condlog(0,"%s Alloc failed for prin response", pp->dev);
2646                 return NULL;
2647         }
2648
2649         ret = prin_do_scsi_ioctl(pp->dev, MPATH_PRIN_RKEY_SA, resp, 0);
2650         if (ret != MPATH_PR_SUCCESS )
2651         {
2652                 condlog(0,"%s : pr in read keys service action failed. Error=%d", pp->dev, ret);
2653                 goto out;
2654         }
2655
2656         condlog(3, " event pr=%d addlen=%d",resp->prin_descriptor.prin_readkeys.prgeneration,
2657                         resp->prin_descriptor.prin_readkeys.additional_length );
2658
2659         if (resp->prin_descriptor.prin_readkeys.additional_length == 0 )
2660         {
2661                 condlog(1, "%s: No key found. Device may not be registered.", pp->dev);
2662                 ret = MPATH_PR_SUCCESS;
2663                 goto out;
2664         }
2665         prkey = 0;
2666         keyp = (unsigned char *)mpp->reservation_key;
2667         for (j = 0; j < 8; ++j) {
2668                 if (j > 0)
2669                         prkey <<= 8;
2670                 prkey |= *keyp;
2671                 ++keyp;
2672         }
2673         condlog(2, "Multipath  reservation_key: 0x%" PRIx64 " ", prkey);
2674
2675         isFound =0;
2676         for (i = 0; i < resp->prin_descriptor.prin_readkeys.additional_length/8; i++ )
2677         {
2678                 condlog(2, "PR IN READKEYS[%d]  reservation key:",i);
2679                 dumpHex((char *)&resp->prin_descriptor.prin_readkeys.key_list[i*8], 8 , -1);
2680                 if (!memcmp(mpp->reservation_key, &resp->prin_descriptor.prin_readkeys.key_list[i*8], 8))
2681                 {
2682                         condlog(2, "%s: pr key found in prin readkeys response", mpp->alias);
2683                         isFound =1;
2684                         break;
2685                 }
2686         }
2687         if (!isFound)
2688         {
2689                 condlog(0, "%s: Either device not registered or ", pp->dev);
2690                 condlog(0, "host is not authorised for registration. Skip path");
2691                 ret = MPATH_PR_OTHER;
2692                 goto out;
2693         }
2694
2695         param= malloc(sizeof(struct prout_param_descriptor));
2696         memset(param, 0 , sizeof(struct prout_param_descriptor));
2697
2698         for (j = 7; j >= 0; --j) {
2699                 param->sa_key[j] = (prkey & 0xff);
2700                 prkey >>= 8;
2701         }
2702         param->num_transportid = 0;
2703
2704         condlog(3, "device %s:%s", pp->dev, pp->mpp->wwid);
2705
2706         ret = prout_do_scsi_ioctl(pp->dev, MPATH_PROUT_REG_IGN_SA, 0, 0, param, 0);
2707         if (ret != MPATH_PR_SUCCESS )
2708         {
2709                 condlog(0,"%s: Reservation registration failed. Error: %d", pp->dev, ret);
2710         }
2711         mpp->prflag = 1;
2712
2713         free(param);
2714 out:
2715         free(resp);
2716         return NULL;
2717 }
2718
2719 int mpath_pr_event_handle(struct path *pp)
2720 {
2721         pthread_t thread;
2722         int rc;
2723         pthread_attr_t attr;
2724         struct multipath * mpp;
2725
2726         mpp = pp->mpp;
2727
2728         if (!mpp->reservation_key)
2729                 return -1;
2730
2731         pthread_attr_init(&attr);
2732         pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
2733
2734         rc = pthread_create(&thread, NULL , mpath_pr_event_handler_fn, pp);
2735         if (rc) {
2736                 condlog(0, "%s: ERROR; return code from pthread_create() is %d", pp->dev, rc);
2737                 return -1;
2738         }
2739         pthread_attr_destroy(&attr);
2740         rc = pthread_join(thread, NULL);
2741         return 0;
2742 }