multipathd: pid fd resource leak
[multipath-tools/.git] / multipathd / main.c
1 /*
2  * Copyright (c) 2004, 2005 Christophe Varoqui
3  * Copyright (c) 2005 Kiyoshi Ueda, NEC
4  * Copyright (c) 2005 Benjamin Marzinski, Redhat
5  * Copyright (c) 2005 Edward Goggin, EMC
6  */
7 #include <unistd.h>
8 #include <sys/stat.h>
9 #include <libdevmapper.h>
10 #include <wait.h>
11 #include <sys/mman.h>
12 #include <sys/types.h>
13 #include <fcntl.h>
14 #include <errno.h>
15 #include <sys/time.h>
16 #include <sys/resource.h>
17 #include <limits.h>
18 #include <linux/oom.h>
19 #include <libudev.h>
20 #ifdef USE_SYSTEMD
21 #include <systemd/sd-daemon.h>
22 #endif
23 #include <semaphore.h>
24 #include <mpath_cmd.h>
25 #include <mpath_persist.h>
26 #include <time.h>
27
28 /*
29  * libcheckers
30  */
31 #include <checkers.h>
32
33 #ifdef USE_SYSTEMD
34 static int use_watchdog;
35 #endif
36
37 int uxsock_timeout;
38
39 /*
40  * libmultipath
41  */
42 #include <parser.h>
43 #include <vector.h>
44 #include <memory.h>
45 #include <config.h>
46 #include <util.h>
47 #include <hwtable.h>
48 #include <defaults.h>
49 #include <structs.h>
50 #include <blacklist.h>
51 #include <structs_vec.h>
52 #include <dmparser.h>
53 #include <devmapper.h>
54 #include <sysfs.h>
55 #include <dict.h>
56 #include <discovery.h>
57 #include <debug.h>
58 #include <propsel.h>
59 #include <uevent.h>
60 #include <switchgroup.h>
61 #include <print.h>
62 #include <configure.h>
63 #include <prio.h>
64 #include <wwids.h>
65 #include <pgpolicies.h>
66 #include <uevent.h>
67 #include <log.h>
68 #include "prioritizers/alua_rtpg.h"
69
70 #include "main.h"
71 #include "pidfile.h"
72 #include "uxlsnr.h"
73 #include "uxclnt.h"
74 #include "cli.h"
75 #include "cli_handlers.h"
76 #include "lock.h"
77 #include "waiter.h"
78 #include "wwids.h"
79
80 #define FILE_NAME_SIZE 256
81 #define CMDSIZE 160
82
83 #define LOG_MSG(a, b) \
84 do { \
85         if (pp->offline) \
86                 condlog(a, "%s: %s - path offline", pp->mpp->alias, pp->dev); \
87         else if (strlen(b)) \
88                 condlog(a, "%s: %s - %s", pp->mpp->alias, pp->dev, b); \
89 } while(0)
90
91 struct mpath_event_param
92 {
93         char * devname;
94         struct multipath *mpp;
95 };
96
97 unsigned int mpath_mx_alloc_len;
98
99 int logsink;
100 enum daemon_status running_state = DAEMON_INIT;
101 pid_t daemon_pid;
102 pthread_mutex_t config_lock = PTHREAD_MUTEX_INITIALIZER;
103 pthread_cond_t config_cond = PTHREAD_COND_INITIALIZER;
104
105 /*
106  * global copy of vecs for use in sig handlers
107  */
108 struct vectors * gvecs;
109
110 struct udev * udev;
111
112 const char *
113 daemon_status(void)
114 {
115         switch (running_state) {
116         case DAEMON_INIT:
117                 return "init";
118         case DAEMON_START:
119                 return "startup";
120         case DAEMON_CONFIGURE:
121                 return "configure";
122         case DAEMON_IDLE:
123                 return "idle";
124         case DAEMON_RUNNING:
125                 return "running";
126         case DAEMON_SHUTDOWN:
127                 return "shutdown";
128         }
129         return NULL;
130 }
131
132 /*
133  * I love you too, systemd ...
134  */
135 const char *
136 sd_notify_status(void)
137 {
138         switch (running_state) {
139         case DAEMON_INIT:
140                 return "STATUS=init";
141         case DAEMON_START:
142                 return "STATUS=startup";
143         case DAEMON_CONFIGURE:
144                 return "STATUS=configure";
145         case DAEMON_IDLE:
146                 return "STATUS=idle";
147         case DAEMON_RUNNING:
148                 return "STATUS=running";
149         case DAEMON_SHUTDOWN:
150                 return "STATUS=shutdown";
151         }
152         return NULL;
153 }
154
155 static void config_cleanup(void *arg)
156 {
157         pthread_mutex_unlock(&config_lock);
158 }
159
160 void post_config_state(enum daemon_status state)
161 {
162         pthread_mutex_lock(&config_lock);
163         if (state != running_state) {
164                 running_state = state;
165                 pthread_cond_broadcast(&config_cond);
166 #ifdef USE_SYSTEMD
167                 sd_notify(0, sd_notify_status());
168 #endif
169         }
170         pthread_mutex_unlock(&config_lock);
171 }
172
173 int set_config_state(enum daemon_status state)
174 {
175         int rc = 0;
176
177         pthread_cleanup_push(config_cleanup, NULL);
178         pthread_mutex_lock(&config_lock);
179         if (running_state != state) {
180                 if (running_state != DAEMON_IDLE) {
181                         struct timespec ts;
182
183                         clock_gettime(CLOCK_REALTIME, &ts);
184                         ts.tv_sec += 1;
185                         rc = pthread_cond_timedwait(&config_cond,
186                                                     &config_lock, &ts);
187                 }
188                 if (!rc) {
189                         running_state = state;
190                         pthread_cond_broadcast(&config_cond);
191 #ifdef USE_SYSTEMD
192                         sd_notify(0, sd_notify_status());
193 #endif
194                 }
195         }
196         pthread_cleanup_pop(1);
197         return rc;
198 }
199
200 static int
201 need_switch_pathgroup (struct multipath * mpp, int refresh)
202 {
203         struct pathgroup * pgp;
204         struct path * pp;
205         unsigned int i, j;
206
207         if (!mpp || mpp->pgfailback == -FAILBACK_MANUAL)
208                 return 0;
209
210         /*
211          * Refresh path priority values
212          */
213         if (refresh)
214                 vector_foreach_slot (mpp->pg, pgp, i)
215                         vector_foreach_slot (pgp->paths, pp, j)
216                                 pathinfo(pp, conf->hwtable, DI_PRIO);
217
218         if (!mpp->pg || VECTOR_SIZE(mpp->paths) == 0)
219                 return 0;
220
221         mpp->bestpg = select_path_group(mpp);
222
223         if (mpp->bestpg != mpp->nextpg)
224                 return 1;
225
226         return 0;
227 }
228
229 static void
230 switch_pathgroup (struct multipath * mpp)
231 {
232         mpp->stat_switchgroup++;
233         dm_switchgroup(mpp->alias, mpp->bestpg);
234         condlog(2, "%s: switch to path group #%i",
235                  mpp->alias, mpp->bestpg);
236 }
237
238 static int
239 coalesce_maps(struct vectors *vecs, vector nmpv)
240 {
241         struct multipath * ompp;
242         vector ompv = vecs->mpvec;
243         unsigned int i;
244
245         vector_foreach_slot (ompv, ompp, i) {
246                 condlog(3, "%s: coalesce map", ompp->alias);
247                 if (!find_mp_by_wwid(nmpv, ompp->wwid)) {
248                         /*
249                          * remove all current maps not allowed by the
250                          * current configuration
251                          */
252                         if (dm_flush_map(ompp->alias)) {
253                                 condlog(0, "%s: unable to flush devmap",
254                                         ompp->alias);
255                                 /*
256                                  * may be just because the device is open
257                                  */
258                                 if (setup_multipath(vecs, ompp) != 0) {
259                                         i--;
260                                         continue;
261                                 }
262                                 if (!vector_alloc_slot(nmpv))
263                                         return 1;
264
265                                 vector_set_slot(nmpv, ompp);
266
267                                 vector_del_slot(ompv, i);
268                                 i--;
269                         }
270                         else {
271                                 dm_lib_release();
272                                 condlog(2, "%s devmap removed", ompp->alias);
273                         }
274                 } else if (conf->reassign_maps) {
275                         condlog(3, "%s: Reassign existing device-mapper"
276                                 " devices", ompp->alias);
277                         dm_reassign(ompp->alias);
278                 }
279         }
280         return 0;
281 }
282
283 void
284 sync_map_state(struct multipath *mpp)
285 {
286         struct pathgroup *pgp;
287         struct path *pp;
288         unsigned int i, j;
289
290         if (!mpp->pg)
291                 return;
292
293         vector_foreach_slot (mpp->pg, pgp, i){
294                 vector_foreach_slot (pgp->paths, pp, j){
295                         if (pp->state == PATH_UNCHECKED || 
296                             pp->state == PATH_WILD ||
297                             pp->state == PATH_DELAYED)
298                                 continue;
299                         if ((pp->dmstate == PSTATE_FAILED ||
300                              pp->dmstate == PSTATE_UNDEF) &&
301                             (pp->state == PATH_UP || pp->state == PATH_GHOST))
302                                 dm_reinstate_path(mpp->alias, pp->dev_t);
303                         else if ((pp->dmstate == PSTATE_ACTIVE ||
304                                   pp->dmstate == PSTATE_UNDEF) &&
305                                  (pp->state == PATH_DOWN ||
306                                   pp->state == PATH_SHAKY))
307                                 dm_fail_path(mpp->alias, pp->dev_t);
308                 }
309         }
310 }
311
312 static void
313 sync_maps_state(vector mpvec)
314 {
315         unsigned int i;
316         struct multipath *mpp;
317
318         vector_foreach_slot (mpvec, mpp, i)
319                 sync_map_state(mpp);
320 }
321
322 static int
323 flush_map(struct multipath * mpp, struct vectors * vecs, int nopaths)
324 {
325         int r;
326
327         if (nopaths)
328                 r = dm_flush_map_nopaths(mpp->alias, mpp->deferred_remove);
329         else
330                 r = dm_flush_map(mpp->alias);
331         /*
332          * clear references to this map before flushing so we can ignore
333          * the spurious uevent we may generate with the dm_flush_map call below
334          */
335         if (r) {
336                 /*
337                  * May not really be an error -- if the map was already flushed
338                  * from the device mapper by dmsetup(8) for instance.
339                  */
340                 if (r == 1)
341                         condlog(0, "%s: can't flush", mpp->alias);
342                 else {
343                         condlog(2, "%s: devmap deferred remove", mpp->alias);
344                         mpp->deferred_remove = DEFERRED_REMOVE_IN_PROGRESS;
345                 }
346                 return r;
347         }
348         else {
349                 dm_lib_release();
350                 condlog(2, "%s: map flushed", mpp->alias);
351         }
352
353         orphan_paths(vecs->pathvec, mpp);
354         remove_map_and_stop_waiter(mpp, vecs, 1);
355
356         return 0;
357 }
358
359 int
360 update_map (struct multipath *mpp, struct vectors *vecs)
361 {
362         int retries = 3;
363         char params[PARAMS_SIZE] = {0};
364
365 retry:
366         condlog(4, "%s: updating new map", mpp->alias);
367         if (adopt_paths(vecs->pathvec, mpp)) {
368                 condlog(0, "%s: failed to adopt paths for new map update",
369                         mpp->alias);
370                 retries = -1;
371                 goto fail;
372         }
373         verify_paths(mpp, vecs);
374         mpp->flush_on_last_del = FLUSH_UNDEF;
375         mpp->action = ACT_RELOAD;
376
377         if (setup_map(mpp, params, PARAMS_SIZE)) {
378                 condlog(0, "%s: failed to setup new map in update", mpp->alias);
379                 retries = -1;
380                 goto fail;
381         }
382         if (domap(mpp, params) <= 0 && retries-- > 0) {
383                 condlog(0, "%s: map_udate sleep", mpp->alias);
384                 sleep(1);
385                 goto retry;
386         }
387         dm_lib_release();
388
389 fail:
390         if (setup_multipath(vecs, mpp))
391                 return 1;
392
393         sync_map_state(mpp);
394
395         if (retries < 0)
396                 condlog(0, "%s: failed reload in new map update", mpp->alias);
397         return 0;
398 }
399
400 static int
401 uev_add_map (struct uevent * uev, struct vectors * vecs)
402 {
403         char *alias;
404         int major = -1, minor = -1, rc;
405
406         condlog(3, "%s: add map (uevent)", uev->kernel);
407         alias = uevent_get_dm_name(uev);
408         if (!alias) {
409                 condlog(3, "%s: No DM_NAME in uevent", uev->kernel);
410                 major = uevent_get_major(uev);
411                 minor = uevent_get_minor(uev);
412                 alias = dm_mapname(major, minor);
413                 if (!alias) {
414                         condlog(2, "%s: mapname not found for %d:%d",
415                                 uev->kernel, major, minor);
416                         return 1;
417                 }
418         }
419         pthread_cleanup_push(cleanup_lock, &vecs->lock);
420         lock(vecs->lock);
421         pthread_testcancel();
422         rc = ev_add_map(uev->kernel, alias, vecs);
423         lock_cleanup_pop(vecs->lock);
424         FREE(alias);
425         return rc;
426 }
427
428 int
429 ev_add_map (char * dev, char * alias, struct vectors * vecs)
430 {
431         char * refwwid;
432         struct multipath * mpp;
433         int map_present;
434         int r = 1;
435
436         map_present = dm_map_present(alias);
437
438         if (map_present && !dm_is_mpath(alias)) {
439                 condlog(4, "%s: not a multipath map", alias);
440                 return 0;
441         }
442
443         mpp = find_mp_by_alias(vecs->mpvec, alias);
444
445         if (mpp) {
446                 if (mpp->wait_for_udev > 1) {
447                         if (update_map(mpp, vecs))
448                                 /* setup multipathd removed the map */
449                                 return 1;
450                 }
451                 if (mpp->wait_for_udev) {
452                         mpp->wait_for_udev = 0;
453                         if (conf->delayed_reconfig &&
454                             !need_to_delay_reconfig(vecs)) {
455                                 condlog(2, "reconfigure (delayed)");
456                                 set_config_state(DAEMON_CONFIGURE);
457                                 return 0;
458                         }
459                 }
460                 /*
461                  * Not really an error -- we generate our own uevent
462                  * if we create a multipath mapped device as a result
463                  * of uev_add_path
464                  */
465                 if (conf->reassign_maps) {
466                         condlog(3, "%s: Reassign existing device-mapper devices",
467                                 alias);
468                         dm_reassign(alias);
469                 }
470                 return 0;
471         }
472         condlog(2, "%s: adding map", alias);
473
474         /*
475          * now we can register the map
476          */
477         if (map_present) {
478                 if ((mpp = add_map_without_path(vecs, alias))) {
479                         sync_map_state(mpp);
480                         condlog(2, "%s: devmap %s registered", alias, dev);
481                         return 0;
482                 } else {
483                         condlog(2, "%s: uev_add_map failed", dev);
484                         return 1;
485                 }
486         }
487         r = get_refwwid(dev, DEV_DEVMAP, vecs->pathvec, &refwwid);
488
489         if (refwwid) {
490                 r = coalesce_paths(vecs, NULL, refwwid, 0);
491                 dm_lib_release();
492         }
493
494         if (!r)
495                 condlog(2, "%s: devmap %s added", alias, dev);
496         else if (r == 2)
497                 condlog(2, "%s: uev_add_map %s blacklisted", alias, dev);
498         else
499                 condlog(0, "%s: uev_add_map %s failed", alias, dev);
500
501         FREE(refwwid);
502         return r;
503 }
504
505 static int
506 uev_remove_map (struct uevent * uev, struct vectors * vecs)
507 {
508         char *alias;
509         int minor;
510         struct multipath *mpp;
511
512         condlog(2, "%s: remove map (uevent)", uev->kernel);
513         alias = uevent_get_dm_name(uev);
514         if (!alias) {
515                 condlog(3, "%s: No DM_NAME in uevent, ignoring", uev->kernel);
516                 return 0;
517         }
518         minor = uevent_get_minor(uev);
519
520         pthread_cleanup_push(cleanup_lock, &vecs->lock);
521         lock(vecs->lock);
522         pthread_testcancel();
523         mpp = find_mp_by_minor(vecs->mpvec, minor);
524
525         if (!mpp) {
526                 condlog(2, "%s: devmap not registered, can't remove",
527                         uev->kernel);
528                 goto out;
529         }
530         if (strcmp(mpp->alias, alias)) {
531                 condlog(2, "%s: minor number mismatch (map %d, event %d)",
532                         mpp->alias, mpp->dmi->minor, minor);
533                 goto out;
534         }
535
536         orphan_paths(vecs->pathvec, mpp);
537         remove_map_and_stop_waiter(mpp, vecs, 1);
538 out:
539         lock_cleanup_pop(vecs->lock);
540         FREE(alias);
541         return 0;
542 }
543
544 /* Called from CLI handler */
545 int
546 ev_remove_map (char * devname, char * alias, int minor, struct vectors * vecs)
547 {
548         struct multipath * mpp;
549
550         mpp = find_mp_by_minor(vecs->mpvec, minor);
551
552         if (!mpp) {
553                 condlog(2, "%s: devmap not registered, can't remove",
554                         devname);
555                 return 1;
556         }
557         if (strcmp(mpp->alias, alias)) {
558                 condlog(2, "%s: minor number mismatch (map %d, event %d)",
559                         mpp->alias, mpp->dmi->minor, minor);
560                 return 1;
561         }
562         return flush_map(mpp, vecs, 0);
563 }
564
565 static int
566 uev_add_path (struct uevent *uev, struct vectors * vecs)
567 {
568         struct path *pp;
569         int ret = 0, i;
570
571         condlog(2, "%s: add path (uevent)", uev->kernel);
572         if (strstr(uev->kernel, "..") != NULL) {
573                 /*
574                  * Don't allow relative device names in the pathvec
575                  */
576                 condlog(0, "%s: path name is invalid", uev->kernel);
577                 return 1;
578         }
579
580         pthread_cleanup_push(cleanup_lock, &vecs->lock);
581         lock(vecs->lock);
582         pthread_testcancel();
583         if (filter_devnode(conf->blist_devnode, conf->elist_devnode,
584                            uev->kernel) > 0) {
585                 ret = 0;
586                 goto out_unlock;
587         }
588         pp = find_path_by_dev(vecs->pathvec, uev->kernel);
589         if (pp) {
590                 int r;
591
592                 condlog(0, "%s: spurious uevent, path already in pathvec",
593                         uev->kernel);
594                 if (!pp->mpp && !strlen(pp->wwid)) {
595                         condlog(3, "%s: reinitialize path", uev->kernel);
596                         udev_device_unref(pp->udev);
597                         pp->udev = udev_device_ref(uev->udev);
598                         r = pathinfo(pp, conf->hwtable,
599                                      DI_ALL | DI_BLACKLIST);
600                         if (r == PATHINFO_OK)
601                                 ret = ev_add_path(pp, vecs);
602                         else if (r == PATHINFO_SKIPPED) {
603                                 condlog(3, "%s: remove blacklisted path",
604                                         uev->kernel);
605                                 i = find_slot(vecs->pathvec, (void *)pp);
606                                 if (i != -1)
607                                         vector_del_slot(vecs->pathvec, i);
608                                 free_path(pp);
609                         } else {
610                                 condlog(0, "%s: failed to reinitialize path",
611                                         uev->kernel);
612                                 ret = 1;
613                         }
614                 }
615         }
616         lock_cleanup_pop(vecs->lock);
617         if (pp)
618                 return ret;
619
620         /*
621          * get path vital state
622          */
623         ret = alloc_path_with_pathinfo(conf->hwtable, uev->udev,
624                                        DI_ALL, &pp);
625         if (!pp) {
626                 if (ret == PATHINFO_SKIPPED)
627                         return 0;
628                 condlog(3, "%s: failed to get path info", uev->kernel);
629                 return 1;
630         }
631         pthread_cleanup_push(cleanup_lock, &vecs->lock);
632         lock(vecs->lock);
633         pthread_testcancel();
634         ret = store_path(vecs->pathvec, pp);
635         if (!ret) {
636                 pp->checkint = conf->checkint;
637                 ret = ev_add_path(pp, vecs);
638         } else {
639                 condlog(0, "%s: failed to store path info, "
640                         "dropping event",
641                         uev->kernel);
642                 free_path(pp);
643                 ret = 1;
644         }
645 out_unlock:
646         lock_cleanup_pop(vecs->lock);
647         return ret;
648 }
649
650 /*
651  * returns:
652  * 0: added
653  * 1: error
654  */
655 int
656 ev_add_path (struct path * pp, struct vectors * vecs)
657 {
658         struct multipath * mpp;
659         char params[PARAMS_SIZE] = {0};
660         int retries = 3;
661         int start_waiter = 0;
662         int ret;
663
664         /*
665          * need path UID to go any further
666          */
667         if (strlen(pp->wwid) == 0) {
668                 condlog(0, "%s: failed to get path uid", pp->dev);
669                 goto fail; /* leave path added to pathvec */
670         }
671         mpp = find_mp_by_wwid(vecs->mpvec, pp->wwid);
672         if (mpp && mpp->wait_for_udev) {
673                 mpp->wait_for_udev = 2;
674                 orphan_path(pp, "waiting for create to complete");
675                 return 0;
676         }
677
678         pp->mpp = mpp;
679 rescan:
680         if (mpp) {
681                 if (pp->size && mpp->size != pp->size) {
682                         condlog(0, "%s: failed to add new path %s, "
683                                 "device size mismatch",
684                                 mpp->alias, pp->dev);
685                         int i = find_slot(vecs->pathvec, (void *)pp);
686                         if (i != -1)
687                                 vector_del_slot(vecs->pathvec, i);
688                         free_path(pp);
689                         return 1;
690                 }
691
692                 condlog(4,"%s: adopting all paths for path %s",
693                         mpp->alias, pp->dev);
694                 if (adopt_paths(vecs->pathvec, mpp))
695                         goto fail; /* leave path added to pathvec */
696
697                 verify_paths(mpp, vecs);
698                 mpp->flush_on_last_del = FLUSH_UNDEF;
699                 mpp->action = ACT_RELOAD;
700         } else {
701                 if (!should_multipath(pp, vecs->pathvec)) {
702                         orphan_path(pp, "only one path");
703                         return 0;
704                 }
705                 condlog(4,"%s: creating new map", pp->dev);
706                 if ((mpp = add_map_with_path(vecs, pp, 1))) {
707                         mpp->action = ACT_CREATE;
708                         /*
709                          * We don't depend on ACT_CREATE, as domap will
710                          * set it to ACT_NOTHING when complete.
711                          */
712                         start_waiter = 1;
713                 }
714                 if (!start_waiter)
715                         goto fail; /* leave path added to pathvec */
716         }
717
718         /* persistent reservation check*/
719         mpath_pr_event_handle(pp);
720
721         /*
722          * push the map to the device-mapper
723          */
724         if (setup_map(mpp, params, PARAMS_SIZE)) {
725                 condlog(0, "%s: failed to setup map for addition of new "
726                         "path %s", mpp->alias, pp->dev);
727                 goto fail_map;
728         }
729         /*
730          * reload the map for the multipath mapped device
731          */
732 retry:
733         ret = domap(mpp, params);
734         if (ret <= 0) {
735                 if (ret < 0 && retries-- > 0) {
736                         condlog(0, "%s: retry domap for addition of new "
737                                 "path %s", mpp->alias, pp->dev);
738                         sleep(1);
739                         goto retry;
740                 }
741                 condlog(0, "%s: failed in domap for addition of new "
742                         "path %s", mpp->alias, pp->dev);
743                 /*
744                  * deal with asynchronous uevents :((
745                  */
746                 if (mpp->action == ACT_RELOAD && retries-- > 0) {
747                         condlog(0, "%s: ev_add_path sleep", mpp->alias);
748                         sleep(1);
749                         update_mpp_paths(mpp, vecs->pathvec);
750                         goto rescan;
751                 }
752                 else if (mpp->action == ACT_RELOAD)
753                         condlog(0, "%s: giving up reload", mpp->alias);
754                 else
755                         goto fail_map;
756         }
757         dm_lib_release();
758
759         /*
760          * update our state from kernel regardless of create or reload
761          */
762         if (setup_multipath(vecs, mpp))
763                 goto fail; /* if setup_multipath fails, it removes the map */
764
765         sync_map_state(mpp);
766
767         if ((mpp->action == ACT_CREATE ||
768              (mpp->action == ACT_NOTHING && start_waiter && !mpp->waiter)) &&
769             start_waiter_thread(mpp, vecs))
770                         goto fail_map;
771
772         if (retries >= 0) {
773                 condlog(2, "%s [%s]: path added to devmap %s",
774                         pp->dev, pp->dev_t, mpp->alias);
775                 return 0;
776         } else
777                 goto fail;
778
779 fail_map:
780         remove_map(mpp, vecs, 1);
781 fail:
782         orphan_path(pp, "failed to add path");
783         return 1;
784 }
785
786 static int
787 uev_remove_path (struct uevent *uev, struct vectors * vecs)
788 {
789         struct path *pp = NULL;
790         int ret = 0;
791
792         condlog(2, "%s: remove path (uevent)", uev->kernel);
793         pthread_cleanup_push(cleanup_lock, &vecs->lock);
794         lock(vecs->lock);
795         pthread_testcancel();
796         if (filter_devnode(conf->blist_devnode, conf->elist_devnode,
797                            uev->kernel) == 0) {
798                 pp = find_path_by_dev(vecs->pathvec, uev->kernel);
799                 if (pp)
800                         ret = ev_remove_path(pp, vecs);
801                 else
802                         /* Not an error; path might have been purged earlier */
803                         condlog(0, "%s: path already removed", uev->kernel);
804         }
805         lock_cleanup_pop(vecs->lock);
806         return ret;
807 }
808
809 int
810 ev_remove_path (struct path *pp, struct vectors * vecs)
811 {
812         struct multipath * mpp;
813         int i, retval = 0;
814         char params[PARAMS_SIZE] = {0};
815
816         /*
817          * avoid referring to the map of an orphaned path
818          */
819         if ((mpp = pp->mpp)) {
820                 /*
821                  * transform the mp->pg vector of vectors of paths
822                  * into a mp->params string to feed the device-mapper
823                  */
824                 if (update_mpp_paths(mpp, vecs->pathvec)) {
825                         condlog(0, "%s: failed to update paths",
826                                 mpp->alias);
827                         goto fail;
828                 }
829                 if ((i = find_slot(mpp->paths, (void *)pp)) != -1)
830                         vector_del_slot(mpp->paths, i);
831
832                 /*
833                  * remove the map IFF removing the last path
834                  */
835                 if (VECTOR_SIZE(mpp->paths) == 0) {
836                         char alias[WWID_SIZE];
837
838                         /*
839                          * flush_map will fail if the device is open
840                          */
841                         strncpy(alias, mpp->alias, WWID_SIZE);
842                         if (mpp->flush_on_last_del == FLUSH_ENABLED) {
843                                 condlog(2, "%s Last path deleted, disabling queueing", mpp->alias);
844                                 mpp->retry_tick = 0;
845                                 mpp->no_path_retry = NO_PATH_RETRY_FAIL;
846                                 mpp->flush_on_last_del = FLUSH_IN_PROGRESS;
847                                 dm_queue_if_no_path(mpp->alias, 0);
848                         }
849                         if (!flush_map(mpp, vecs, 1)) {
850                                 condlog(2, "%s: removed map after"
851                                         " removing all paths",
852                                         alias);
853                                 retval = 0;
854                                 goto out;
855                         }
856                         /*
857                          * Not an error, continue
858                          */
859                 }
860
861                 if (setup_map(mpp, params, PARAMS_SIZE)) {
862                         condlog(0, "%s: failed to setup map for"
863                                 " removal of path %s", mpp->alias, pp->dev);
864                         goto fail;
865                 }
866
867                 if (mpp->wait_for_udev) {
868                         mpp->wait_for_udev = 2;
869                         goto out;
870                 }
871
872                 /*
873                  * reload the map
874                  */
875                 mpp->action = ACT_RELOAD;
876                 if (domap(mpp, params) <= 0) {
877                         condlog(0, "%s: failed in domap for "
878                                 "removal of path %s",
879                                 mpp->alias, pp->dev);
880                         retval = 1;
881                 } else {
882                         /*
883                          * update our state from kernel
884                          */
885                         if (setup_multipath(vecs, mpp))
886                                 return 1;
887                         sync_map_state(mpp);
888
889                         condlog(2, "%s [%s]: path removed from map %s",
890                                 pp->dev, pp->dev_t, mpp->alias);
891                 }
892         }
893
894 out:
895         if ((i = find_slot(vecs->pathvec, (void *)pp)) != -1)
896                 vector_del_slot(vecs->pathvec, i);
897
898         free_path(pp);
899
900         return retval;
901
902 fail:
903         remove_map_and_stop_waiter(mpp, vecs, 1);
904         return 1;
905 }
906
907 static int
908 uev_update_path (struct uevent *uev, struct vectors * vecs)
909 {
910         int ro, retval = 0;
911
912         ro = uevent_get_disk_ro(uev);
913
914         if (ro >= 0) {
915                 struct path * pp = NULL;
916                 struct multipath *mpp = NULL;
917
918                 condlog(2, "%s: update path write_protect to '%d' (uevent)",
919                         uev->kernel, ro);
920                 pthread_cleanup_push(cleanup_lock, &vecs->lock);
921                 lock(vecs->lock);
922                 pthread_testcancel();
923                 /*
924                  * pthread_mutex_lock() and pthread_mutex_unlock()
925                  * need to be at the same indentation level, hence
926                  * this slightly convoluted codepath.
927                  */
928                 if (filter_devnode(conf->blist_devnode, conf->elist_devnode,
929                                    uev->kernel) > 0) {
930                         goto out_unlock;
931                 }
932                 pp = find_path_by_dev(vecs->pathvec, uev->kernel);
933                 if (pp) {
934                         if (pp->initialized == INIT_REQUESTED_UDEV) {
935                                 retval = 2;
936                         } else {
937                                 mpp = pp->mpp;
938                                 if (mpp && mpp->wait_for_udev) {
939                                         mpp->wait_for_udev = 2;
940                                         mpp = NULL;
941                                         retval = 0;
942                                 }
943                         }
944                         if (mpp) {
945                                 retval = reload_map(vecs, mpp, 0);
946
947                                 condlog(2, "%s: map %s reloaded (retval %d)",
948                                         uev->kernel, mpp->alias, retval);
949                         }
950                 }
951         out_unlock:
952                 lock_cleanup_pop(vecs->lock);
953                 if (!pp) {
954                         if (retval)
955                                 condlog(0, "%s: spurious uevent, path not found",
956                                         uev->kernel);
957                         return retval;
958                 }
959                 if (retval == 2)
960                         return uev_add_path(uev, vecs);
961         }
962
963         return retval;
964 }
965
966 static int
967 map_discovery (struct vectors * vecs)
968 {
969         struct multipath * mpp;
970         unsigned int i;
971
972         if (dm_get_maps(vecs->mpvec))
973                 return 1;
974
975         vector_foreach_slot (vecs->mpvec, mpp, i)
976                 if (setup_multipath(vecs, mpp))
977                         return 1;
978
979         return 0;
980 }
981
982 int
983 uxsock_trigger (char * str, char ** reply, int * len, void * trigger_data)
984 {
985         struct vectors * vecs;
986         int r;
987
988         *reply = NULL;
989         *len = 0;
990         vecs = (struct vectors *)trigger_data;
991
992         r = parse_cmd(str, reply, len, vecs, uxsock_timeout / 1000);
993
994         if (r > 0) {
995                 if (r == ETIMEDOUT)
996                         *reply = STRDUP("timeout\n");
997                 else
998                         *reply = STRDUP("fail\n");
999                 *len = strlen(*reply) + 1;
1000                 r = 1;
1001         }
1002         else if (!r && *len == 0) {
1003                 *reply = STRDUP("ok\n");
1004                 *len = strlen(*reply) + 1;
1005                 r = 0;
1006         }
1007         /* else if (r < 0) leave *reply alone */
1008
1009         return r;
1010 }
1011
1012 static int
1013 uev_discard(char * devpath)
1014 {
1015         char *tmp;
1016         char a[11], b[11];
1017
1018         /*
1019          * keep only block devices, discard partitions
1020          */
1021         tmp = strstr(devpath, "/block/");
1022         if (tmp == NULL){
1023                 condlog(4, "no /block/ in '%s'", devpath);
1024                 return 1;
1025         }
1026         if (sscanf(tmp, "/block/%10s", a) != 1 ||
1027             sscanf(tmp, "/block/%10[^/]/%10s", a, b) == 2) {
1028                 condlog(4, "discard event on %s", devpath);
1029                 return 1;
1030         }
1031         return 0;
1032 }
1033
1034 int
1035 uev_trigger (struct uevent * uev, void * trigger_data)
1036 {
1037         int r = 0;
1038         struct vectors * vecs;
1039
1040         vecs = (struct vectors *)trigger_data;
1041
1042         if (uev_discard(uev->devpath))
1043                 return 0;
1044
1045         pthread_cleanup_push(config_cleanup, NULL);
1046         pthread_mutex_lock(&config_lock);
1047         if (running_state != DAEMON_IDLE &&
1048             running_state != DAEMON_RUNNING)
1049                 pthread_cond_wait(&config_cond, &config_lock);
1050         pthread_cleanup_pop(1);
1051
1052         if (running_state == DAEMON_SHUTDOWN)
1053                 return 0;
1054
1055         /*
1056          * device map event
1057          * Add events are ignored here as the tables
1058          * are not fully initialised then.
1059          */
1060         if (!strncmp(uev->kernel, "dm-", 3)) {
1061                 if (!strncmp(uev->action, "change", 6)) {
1062                         r = uev_add_map(uev, vecs);
1063                         goto out;
1064                 }
1065                 if (!strncmp(uev->action, "remove", 6)) {
1066                         r = uev_remove_map(uev, vecs);
1067                         goto out;
1068                 }
1069                 goto out;
1070         }
1071
1072         /*
1073          * path add/remove event
1074          */
1075         if (!strncmp(uev->action, "add", 3)) {
1076                 r = uev_add_path(uev, vecs);
1077                 goto out;
1078         }
1079         if (!strncmp(uev->action, "remove", 6)) {
1080                 r = uev_remove_path(uev, vecs);
1081                 goto out;
1082         }
1083         if (!strncmp(uev->action, "change", 6)) {
1084                 r = uev_update_path(uev, vecs);
1085                 goto out;
1086         }
1087
1088 out:
1089         return r;
1090 }
1091
1092 static void *
1093 ueventloop (void * ap)
1094 {
1095         struct udev *udev = ap;
1096
1097         if (uevent_listen(udev))
1098                 condlog(0, "error starting uevent listener");
1099
1100         return NULL;
1101 }
1102
1103 static void *
1104 uevqloop (void * ap)
1105 {
1106         if (uevent_dispatch(&uev_trigger, ap))
1107                 condlog(0, "error starting uevent dispatcher");
1108
1109         return NULL;
1110 }
1111 static void *
1112 uxlsnrloop (void * ap)
1113 {
1114         if (cli_init()) {
1115                 condlog(1, "Failed to init uxsock listener");
1116                 return NULL;
1117         }
1118
1119         set_handler_callback(LIST+PATHS, cli_list_paths);
1120         set_handler_callback(LIST+PATHS+FMT, cli_list_paths_fmt);
1121         set_handler_callback(LIST+PATHS+RAW+FMT, cli_list_paths_raw);
1122         set_handler_callback(LIST+PATH, cli_list_path);
1123         set_handler_callback(LIST+MAPS, cli_list_maps);
1124         set_unlocked_handler_callback(LIST+STATUS, cli_list_status);
1125         set_unlocked_handler_callback(LIST+DAEMON, cli_list_daemon);
1126         set_handler_callback(LIST+MAPS+STATUS, cli_list_maps_status);
1127         set_handler_callback(LIST+MAPS+STATS, cli_list_maps_stats);
1128         set_handler_callback(LIST+MAPS+FMT, cli_list_maps_fmt);
1129         set_handler_callback(LIST+MAPS+RAW+FMT, cli_list_maps_raw);
1130         set_handler_callback(LIST+MAPS+TOPOLOGY, cli_list_maps_topology);
1131         set_handler_callback(LIST+TOPOLOGY, cli_list_maps_topology);
1132         set_handler_callback(LIST+MAPS+JSON, cli_list_maps_json);
1133         set_handler_callback(LIST+MAP+TOPOLOGY, cli_list_map_topology);
1134         set_handler_callback(LIST+MAP+FMT, cli_list_map_fmt);
1135         set_handler_callback(LIST+MAP+RAW+FMT, cli_list_map_fmt);
1136         set_handler_callback(LIST+MAP+JSON, cli_list_map_json);
1137         set_handler_callback(LIST+CONFIG, cli_list_config);
1138         set_handler_callback(LIST+BLACKLIST, cli_list_blacklist);
1139         set_handler_callback(LIST+DEVICES, cli_list_devices);
1140         set_handler_callback(LIST+WILDCARDS, cli_list_wildcards);
1141         set_handler_callback(ADD+PATH, cli_add_path);
1142         set_handler_callback(DEL+PATH, cli_del_path);
1143         set_handler_callback(ADD+MAP, cli_add_map);
1144         set_handler_callback(DEL+MAP, cli_del_map);
1145         set_handler_callback(SWITCH+MAP+GROUP, cli_switch_group);
1146         set_unlocked_handler_callback(RECONFIGURE, cli_reconfigure);
1147         set_handler_callback(SUSPEND+MAP, cli_suspend);
1148         set_handler_callback(RESUME+MAP, cli_resume);
1149         set_handler_callback(RESIZE+MAP, cli_resize);
1150         set_handler_callback(RELOAD+MAP, cli_reload);
1151         set_handler_callback(RESET+MAP, cli_reassign);
1152         set_handler_callback(REINSTATE+PATH, cli_reinstate);
1153         set_handler_callback(FAIL+PATH, cli_fail);
1154         set_handler_callback(DISABLEQ+MAP, cli_disable_queueing);
1155         set_handler_callback(RESTOREQ+MAP, cli_restore_queueing);
1156         set_handler_callback(DISABLEQ+MAPS, cli_disable_all_queueing);
1157         set_handler_callback(RESTOREQ+MAPS, cli_restore_all_queueing);
1158         set_unlocked_handler_callback(QUIT, cli_quit);
1159         set_unlocked_handler_callback(SHUTDOWN, cli_shutdown);
1160         set_handler_callback(GETPRSTATUS+MAP, cli_getprstatus);
1161         set_handler_callback(SETPRSTATUS+MAP, cli_setprstatus);
1162         set_handler_callback(UNSETPRSTATUS+MAP, cli_unsetprstatus);
1163         set_handler_callback(FORCEQ+DAEMON, cli_force_no_daemon_q);
1164         set_handler_callback(RESTOREQ+DAEMON, cli_restore_no_daemon_q);
1165
1166         umask(077);
1167         uxsock_listen(&uxsock_trigger, ap);
1168
1169         return NULL;
1170 }
1171
1172 void
1173 exit_daemon (void)
1174 {
1175         post_config_state(DAEMON_SHUTDOWN);
1176 }
1177
1178 static void
1179 fail_path (struct path * pp, int del_active)
1180 {
1181         if (!pp->mpp)
1182                 return;
1183
1184         condlog(2, "checker failed path %s in map %s",
1185                  pp->dev_t, pp->mpp->alias);
1186
1187         dm_fail_path(pp->mpp->alias, pp->dev_t);
1188         if (del_active)
1189                 update_queue_mode_del_path(pp->mpp);
1190 }
1191
1192 /*
1193  * caller must have locked the path list before calling that function
1194  */
1195 static int
1196 reinstate_path (struct path * pp, int add_active)
1197 {
1198         int ret = 0;
1199
1200         if (!pp->mpp)
1201                 return 0;
1202
1203         if (dm_reinstate_path(pp->mpp->alias, pp->dev_t)) {
1204                 condlog(0, "%s: reinstate failed", pp->dev_t);
1205                 ret = 1;
1206         } else {
1207                 condlog(2, "%s: reinstated", pp->dev_t);
1208                 if (add_active)
1209                         update_queue_mode_add_path(pp->mpp);
1210         }
1211         return ret;
1212 }
1213
1214 static void
1215 enable_group(struct path * pp)
1216 {
1217         struct pathgroup * pgp;
1218
1219         /*
1220          * if path is added through uev_add_path, pgindex can be unset.
1221          * next update_strings() will set it, upon map reload event.
1222          *
1223          * we can safely return here, because upon map reload, all
1224          * PG will be enabled.
1225          */
1226         if (!pp->mpp->pg || !pp->pgindex)
1227                 return;
1228
1229         pgp = VECTOR_SLOT(pp->mpp->pg, pp->pgindex - 1);
1230
1231         if (pgp->status == PGSTATE_DISABLED) {
1232                 condlog(2, "%s: enable group #%i", pp->mpp->alias, pp->pgindex);
1233                 dm_enablegroup(pp->mpp->alias, pp->pgindex);
1234         }
1235 }
1236
1237 static void
1238 mpvec_garbage_collector (struct vectors * vecs)
1239 {
1240         struct multipath * mpp;
1241         unsigned int i;
1242
1243         if (!vecs->mpvec)
1244                 return;
1245
1246         vector_foreach_slot (vecs->mpvec, mpp, i) {
1247                 if (mpp && mpp->alias && !dm_map_present(mpp->alias)) {
1248                         condlog(2, "%s: remove dead map", mpp->alias);
1249                         remove_map_and_stop_waiter(mpp, vecs, 1);
1250                         i--;
1251                 }
1252         }
1253 }
1254
1255 /* This is called after a path has started working again. It the multipath
1256  * device for this path uses the followover failback type, and this is the
1257  * best pathgroup, and this is the first path in the pathgroup to come back
1258  * up, then switch to this pathgroup */
1259 static int
1260 followover_should_failback(struct path * pp)
1261 {
1262         struct pathgroup * pgp;
1263         struct path *pp1;
1264         int i;
1265
1266         if (pp->mpp->pgfailback != -FAILBACK_FOLLOWOVER ||
1267             !pp->mpp->pg || !pp->pgindex ||
1268             pp->pgindex != pp->mpp->bestpg)
1269                 return 0;
1270
1271         pgp = VECTOR_SLOT(pp->mpp->pg, pp->pgindex - 1);
1272         vector_foreach_slot(pgp->paths, pp1, i) {
1273                 if (pp1 == pp)
1274                         continue;
1275                 if (pp1->chkrstate != PATH_DOWN && pp1->chkrstate != PATH_SHAKY)
1276                         return 0;
1277         }
1278         return 1;
1279 }
1280
1281 static void
1282 missing_uev_wait_tick(struct vectors *vecs)
1283 {
1284         struct multipath * mpp;
1285         unsigned int i;
1286         int timed_out = 0;
1287
1288         vector_foreach_slot (vecs->mpvec, mpp, i) {
1289                 if (mpp->wait_for_udev && --mpp->uev_wait_tick <= 0) {
1290                         timed_out = 1;
1291                         condlog(0, "%s: timeout waiting on creation uevent. enabling reloads", mpp->alias);
1292                         if (mpp->wait_for_udev > 1 && update_map(mpp, vecs)) {
1293                                 /* update_map removed map */
1294                                 i--;
1295                                 continue;
1296                         }
1297                         mpp->wait_for_udev = 0;
1298                 }
1299         }
1300
1301         if (timed_out && conf->delayed_reconfig &&
1302             !need_to_delay_reconfig(vecs)) {
1303                 condlog(2, "reconfigure (delayed)");
1304                 set_config_state(DAEMON_CONFIGURE);
1305         }
1306 }
1307
1308 static void
1309 defered_failback_tick (vector mpvec)
1310 {
1311         struct multipath * mpp;
1312         unsigned int i;
1313
1314         vector_foreach_slot (mpvec, mpp, i) {
1315                 /*
1316                  * defered failback getting sooner
1317                  */
1318                 if (mpp->pgfailback > 0 && mpp->failback_tick > 0) {
1319                         mpp->failback_tick--;
1320
1321                         if (!mpp->failback_tick && need_switch_pathgroup(mpp, 1))
1322                                 switch_pathgroup(mpp);
1323                 }
1324         }
1325 }
1326
1327 static void
1328 retry_count_tick(vector mpvec)
1329 {
1330         struct multipath *mpp;
1331         unsigned int i;
1332
1333         vector_foreach_slot (mpvec, mpp, i) {
1334                 if (mpp->retry_tick > 0) {
1335                         mpp->stat_total_queueing_time++;
1336                         condlog(4, "%s: Retrying.. No active path", mpp->alias);
1337                         if(--mpp->retry_tick == 0) {
1338                                 dm_queue_if_no_path(mpp->alias, 0);
1339                                 condlog(2, "%s: Disable queueing", mpp->alias);
1340                         }
1341                 }
1342         }
1343 }
1344
1345 int update_prio(struct path *pp, int refresh_all)
1346 {
1347         int oldpriority;
1348         struct path *pp1;
1349         struct pathgroup * pgp;
1350         int i, j, changed = 0;
1351
1352         if (refresh_all) {
1353                 vector_foreach_slot (pp->mpp->pg, pgp, i) {
1354                         vector_foreach_slot (pgp->paths, pp1, j) {
1355                                 oldpriority = pp1->priority;
1356                                 pathinfo(pp1, conf->hwtable, DI_PRIO);
1357                                 if (pp1->priority != oldpriority)
1358                                         changed = 1;
1359                         }
1360                 }
1361                 return changed;
1362         }
1363         oldpriority = pp->priority;
1364         pathinfo(pp, conf->hwtable, DI_PRIO);
1365
1366         if (pp->priority == oldpriority)
1367                 return 0;
1368         return 1;
1369 }
1370
1371 int update_path_groups(struct multipath *mpp, struct vectors *vecs, int refresh)
1372 {
1373         if (reload_map(vecs, mpp, refresh))
1374                 return 1;
1375
1376         dm_lib_release();
1377         if (setup_multipath(vecs, mpp) != 0)
1378                 return 1;
1379         sync_map_state(mpp);
1380
1381         return 0;
1382 }
1383
1384 /*
1385  * Returns '1' if the path has been checked, '0' otherwise
1386  */
1387 int
1388 check_path (struct vectors * vecs, struct path * pp, int ticks)
1389 {
1390         int newstate;
1391         int new_path_up = 0;
1392         int chkr_new_path_up = 0;
1393         int add_active;
1394         int disable_reinstate = 0;
1395         int oldchkrstate = pp->chkrstate;
1396
1397         if ((pp->initialized == INIT_OK ||
1398              pp->initialized == INIT_REQUESTED_UDEV) && !pp->mpp)
1399                 return 0;
1400
1401         if (pp->tick)
1402                 pp->tick -= (pp->tick > ticks) ? ticks : pp->tick;
1403         if (pp->tick)
1404                 return 0; /* don't check this path yet */
1405
1406         if (!pp->mpp && pp->initialized == INIT_MISSING_UDEV &&
1407             pp->retriggers < conf->retrigger_tries) {
1408                 condlog(2, "%s: triggering change event to reinitialize",
1409                         pp->dev);
1410                 pp->initialized = INIT_REQUESTED_UDEV;
1411                 pp->retriggers++;
1412                 sysfs_attr_set_value(pp->udev, "uevent", "change",
1413                                      strlen("change"));
1414                 return 0;
1415         }
1416
1417         /*
1418          * provision a next check soonest,
1419          * in case we exit abnormaly from here
1420          */
1421         pp->tick = conf->checkint;
1422
1423         newstate = path_offline(pp);
1424         /*
1425          * Wait for uevent for removed paths;
1426          * some LLDDs like zfcp keep paths unavailable
1427          * without sending uevents.
1428          */
1429         if (newstate == PATH_REMOVED)
1430                 newstate = PATH_DOWN;
1431
1432         if (newstate == PATH_UP)
1433                 newstate = get_state(pp, 1);
1434         else
1435                 checker_clear_message(&pp->checker);
1436
1437         if (newstate == PATH_WILD || newstate == PATH_UNCHECKED) {
1438                 condlog(2, "%s: unusable path", pp->dev);
1439                 pathinfo(pp, conf->hwtable, 0);
1440                 return 1;
1441         }
1442         if (!pp->mpp) {
1443                 if (!strlen(pp->wwid) && pp->initialized != INIT_MISSING_UDEV &&
1444                     (newstate == PATH_UP || newstate == PATH_GHOST)) {
1445                         condlog(2, "%s: add missing path", pp->dev);
1446                         if (pathinfo(pp, conf->hwtable, DI_ALL) == 0) {
1447                                 ev_add_path(pp, vecs);
1448                                 pp->tick = 1;
1449                         }
1450                 }
1451                 return 0;
1452         }
1453         /*
1454          * Async IO in flight. Keep the previous path state
1455          * and reschedule as soon as possible
1456          */
1457         if (newstate == PATH_PENDING) {
1458                 pp->tick = 1;
1459                 return 0;
1460         }
1461         /*
1462          * Synchronize with kernel state
1463          */
1464         if (update_multipath_strings(pp->mpp, vecs->pathvec)) {
1465                 condlog(1, "%s: Could not synchronize with kernel state",
1466                         pp->dev);
1467                 pp->dmstate = PSTATE_UNDEF;
1468         }
1469         /* if update_multipath_strings orphaned the path, quit early */
1470         if (!pp->mpp)
1471                 return 0;
1472
1473         if ((newstate == PATH_UP || newstate == PATH_GHOST) &&
1474              pp->wait_checks > 0) {
1475                 if (pp->mpp && pp->mpp->nr_active > 0) {
1476                         pp->state = PATH_DELAYED;
1477                         pp->wait_checks--;
1478                         return 1;
1479                 } else
1480                         pp->wait_checks = 0;
1481         }
1482
1483         /*
1484          * don't reinstate failed path, if its in stand-by
1485          * and if target supports only implicit tpgs mode.
1486          * this will prevent unnecessary i/o by dm on stand-by
1487          * paths if there are no other active paths in map.
1488          */
1489         disable_reinstate = (newstate == PATH_GHOST &&
1490                             pp->mpp->nr_active == 0 &&
1491                             pp->tpgs == TPGS_IMPLICIT) ? 1 : 0;
1492
1493         pp->chkrstate = newstate;
1494         if (newstate != pp->state) {
1495                 int oldstate = pp->state;
1496                 pp->state = newstate;
1497
1498                 if (strlen(checker_message(&pp->checker)))
1499                         LOG_MSG(1, checker_message(&pp->checker));
1500
1501                 /*
1502                  * upon state change, reset the checkint
1503                  * to the shortest delay
1504                  */
1505                 pp->checkint = conf->checkint;
1506
1507                 if (newstate == PATH_DOWN || newstate == PATH_SHAKY) {
1508                         /*
1509                          * proactively fail path in the DM
1510                          */
1511                         if (oldstate == PATH_UP ||
1512                             oldstate == PATH_GHOST) {
1513                                 fail_path(pp, 1);
1514                                 if (pp->mpp->delay_wait_checks > 0 &&
1515                                     pp->watch_checks > 0) {
1516                                         pp->wait_checks = pp->mpp->delay_wait_checks;
1517                                         pp->watch_checks = 0;
1518                                 }
1519                         }else
1520                                 fail_path(pp, 0);
1521
1522                         /*
1523                          * cancel scheduled failback
1524                          */
1525                         pp->mpp->failback_tick = 0;
1526
1527                         pp->mpp->stat_path_failures++;
1528                         return 1;
1529                 }
1530
1531                 if(newstate == PATH_UP || newstate == PATH_GHOST){
1532                         if ( pp->mpp && pp->mpp->prflag ){
1533                                 /*
1534                                  * Check Persistent Reservation.
1535                                  */
1536                         condlog(2, "%s: checking persistent reservation "
1537                                 "registration", pp->dev);
1538                         mpath_pr_event_handle(pp);
1539                         }
1540                 }
1541
1542                 /*
1543                  * reinstate this path
1544                  */
1545                 if (oldstate != PATH_UP &&
1546                     oldstate != PATH_GHOST) {
1547                         if (pp->mpp->delay_watch_checks > 0)
1548                                 pp->watch_checks = pp->mpp->delay_watch_checks;
1549                         add_active = 1;
1550                 } else {
1551                         if (pp->watch_checks > 0)
1552                                 pp->watch_checks--;
1553                         add_active = 0;
1554                 }
1555                 if (!disable_reinstate && reinstate_path(pp, add_active)) {
1556                         condlog(3, "%s: reload map", pp->dev);
1557                         ev_add_path(pp, vecs);
1558                         pp->tick = 1;
1559                         return 0;
1560                 }
1561                 new_path_up = 1;
1562
1563                 if (oldchkrstate != PATH_UP && oldchkrstate != PATH_GHOST)
1564                         chkr_new_path_up = 1;
1565
1566                 /*
1567                  * if at least one path is up in a group, and
1568                  * the group is disabled, re-enable it
1569                  */
1570                 if (newstate == PATH_UP)
1571                         enable_group(pp);
1572         }
1573         else if (newstate == PATH_UP || newstate == PATH_GHOST) {
1574                 if ((pp->dmstate == PSTATE_FAILED ||
1575                     pp->dmstate == PSTATE_UNDEF) &&
1576                     !disable_reinstate) {
1577                         /* Clear IO errors */
1578                         if (reinstate_path(pp, 0)) {
1579                                 condlog(3, "%s: reload map", pp->dev);
1580                                 ev_add_path(pp, vecs);
1581                                 pp->tick = 1;
1582                                 return 0;
1583                         }
1584                 } else {
1585                         LOG_MSG(4, checker_message(&pp->checker));
1586                         if (pp->checkint != conf->max_checkint) {
1587                                 /*
1588                                  * double the next check delay.
1589                                  * max at conf->max_checkint
1590                                  */
1591                                 if (pp->checkint < (conf->max_checkint / 2))
1592                                         pp->checkint = 2 * pp->checkint;
1593                                 else
1594                                         pp->checkint = conf->max_checkint;
1595
1596                                 condlog(4, "%s: delay next check %is",
1597                                         pp->dev_t, pp->checkint);
1598                         }
1599                         if (pp->watch_checks > 0)
1600                                 pp->watch_checks--;
1601                         pp->tick = pp->checkint;
1602                 }
1603         }
1604         else if (newstate == PATH_DOWN &&
1605                  strlen(checker_message(&pp->checker))) {
1606                 if (conf->log_checker_err == LOG_CHKR_ERR_ONCE)
1607                         LOG_MSG(3, checker_message(&pp->checker));
1608                 else
1609                         LOG_MSG(2, checker_message(&pp->checker));
1610         }
1611
1612         pp->state = newstate;
1613
1614
1615         if (pp->mpp->wait_for_udev)
1616                 return 1;
1617         /*
1618          * path prio refreshing
1619          */
1620         condlog(4, "path prio refresh");
1621
1622         if (update_prio(pp, new_path_up) &&
1623             (pp->mpp->pgpolicyfn == (pgpolicyfn *)group_by_prio) &&
1624              pp->mpp->pgfailback == -FAILBACK_IMMEDIATE)
1625                 update_path_groups(pp->mpp, vecs, !new_path_up);
1626         else if (need_switch_pathgroup(pp->mpp, 0)) {
1627                 if (pp->mpp->pgfailback > 0 &&
1628                     (new_path_up || pp->mpp->failback_tick <= 0))
1629                         pp->mpp->failback_tick =
1630                                 pp->mpp->pgfailback + 1;
1631                 else if (pp->mpp->pgfailback == -FAILBACK_IMMEDIATE ||
1632                          (chkr_new_path_up && followover_should_failback(pp)))
1633                         switch_pathgroup(pp->mpp);
1634         }
1635         return 1;
1636 }
1637
1638 static void *
1639 checkerloop (void *ap)
1640 {
1641         struct vectors *vecs;
1642         struct path *pp;
1643         int count = 0;
1644         unsigned int i;
1645         struct itimerval timer_tick_it;
1646         struct timeval last_time;
1647
1648         mlockall(MCL_CURRENT | MCL_FUTURE);
1649         vecs = (struct vectors *)ap;
1650         condlog(2, "path checkers start up");
1651
1652         /*
1653          * init the path check interval
1654          */
1655         vector_foreach_slot (vecs->pathvec, pp, i) {
1656                 pp->checkint = conf->checkint;
1657         }
1658
1659         /* Tweak start time for initial path check */
1660         if (gettimeofday(&last_time, NULL) != 0)
1661                 last_time.tv_sec = 0;
1662         else
1663                 last_time.tv_sec -= 1;
1664
1665         while (1) {
1666                 struct timeval diff_time, start_time, end_time;
1667                 int num_paths = 0, ticks = 0, signo, strict_timing, rc = 0;
1668                 sigset_t mask;
1669
1670                 if (gettimeofday(&start_time, NULL) != 0)
1671                         start_time.tv_sec = 0;
1672                 if (start_time.tv_sec && last_time.tv_sec) {
1673                         timersub(&start_time, &last_time, &diff_time);
1674                         condlog(4, "tick (%lu.%06lu secs)",
1675                                 diff_time.tv_sec, diff_time.tv_usec);
1676                         last_time.tv_sec = start_time.tv_sec;
1677                         last_time.tv_usec = start_time.tv_usec;
1678                         ticks = diff_time.tv_sec;
1679                 } else {
1680                         ticks = 1;
1681                         condlog(4, "tick (%d ticks)", ticks);
1682                 }
1683 #ifdef USE_SYSTEMD
1684                 if (use_watchdog)
1685                         sd_notify(0, "WATCHDOG=1");
1686 #endif
1687                 rc = set_config_state(DAEMON_RUNNING);
1688                 if (rc == ETIMEDOUT) {
1689                         condlog(4, "timeout waiting for DAEMON_IDLE");
1690                         continue;
1691                 }
1692                 strict_timing = conf->strict_timing;
1693                 if (vecs->pathvec) {
1694                         pthread_cleanup_push(cleanup_lock, &vecs->lock);
1695                         lock(vecs->lock);
1696                         pthread_testcancel();
1697                         vector_foreach_slot (vecs->pathvec, pp, i) {
1698                                 num_paths += check_path(vecs, pp, ticks);
1699                         }
1700                         lock_cleanup_pop(vecs->lock);
1701                 }
1702                 if (vecs->mpvec) {
1703                         pthread_cleanup_push(cleanup_lock, &vecs->lock);
1704                         lock(vecs->lock);
1705                         pthread_testcancel();
1706                         defered_failback_tick(vecs->mpvec);
1707                         retry_count_tick(vecs->mpvec);
1708                         missing_uev_wait_tick(vecs);
1709                         lock_cleanup_pop(vecs->lock);
1710                 }
1711                 if (count)
1712                         count--;
1713                 else {
1714                         pthread_cleanup_push(cleanup_lock, &vecs->lock);
1715                         lock(vecs->lock);
1716                         pthread_testcancel();
1717                         condlog(4, "map garbage collection");
1718                         mpvec_garbage_collector(vecs);
1719                         count = MAPGCINT;
1720                         lock_cleanup_pop(vecs->lock);
1721                 }
1722
1723                 diff_time.tv_usec = 0;
1724                 if (start_time.tv_sec &&
1725                     gettimeofday(&end_time, NULL) == 0) {
1726                         timersub(&end_time, &start_time, &diff_time);
1727                         if (num_paths) {
1728                                 condlog(3, "checked %d path%s in %lu.%06lu secs",
1729                                         num_paths, num_paths > 1 ? "s" : "",
1730                                         diff_time.tv_sec, diff_time.tv_usec);
1731                                 if (diff_time.tv_sec > conf->max_checkint)
1732                                         condlog(1, "path checkers took longer "
1733                                                 "than %lu seconds, consider "
1734                                                 "increasing max_polling_interval",
1735                                                 diff_time.tv_sec);
1736                         }
1737                 }
1738
1739                 post_config_state(DAEMON_IDLE);
1740                 if (!strict_timing)
1741                         sleep(1);
1742                 else {
1743                         timer_tick_it.it_interval.tv_sec = 0;
1744                         timer_tick_it.it_interval.tv_usec = 0;
1745                         if (diff_time.tv_usec) {
1746                                 timer_tick_it.it_value.tv_sec = 0;
1747                                 timer_tick_it.it_value.tv_usec =
1748                                         (unsigned long)1000000 - diff_time.tv_usec;
1749                         } else {
1750                                 timer_tick_it.it_value.tv_sec = 1;
1751                                 timer_tick_it.it_value.tv_usec = 0;
1752                         }
1753                         setitimer(ITIMER_REAL, &timer_tick_it, NULL);
1754
1755                         sigemptyset(&mask);
1756                         sigaddset(&mask, SIGALRM);
1757                         condlog(3, "waiting for %lu.%06lu secs",
1758                                 timer_tick_it.it_value.tv_sec,
1759                                 timer_tick_it.it_value.tv_usec);
1760                         if (sigwait(&mask, &signo) != 0) {
1761                                 condlog(3, "sigwait failed with error %d",
1762                                         errno);
1763                                 conf->strict_timing = 0;
1764                                 break;
1765                         }
1766                 }
1767         }
1768         return NULL;
1769 }
1770
1771 int
1772 configure (struct vectors * vecs, int start_waiters)
1773 {
1774         struct multipath * mpp;
1775         struct path * pp;
1776         vector mpvec;
1777         int i, ret;
1778
1779         if (!vecs->pathvec && !(vecs->pathvec = vector_alloc()))
1780                 return 1;
1781
1782         if (!vecs->mpvec && !(vecs->mpvec = vector_alloc()))
1783                 return 1;
1784
1785         if (!(mpvec = vector_alloc()))
1786                 return 1;
1787
1788         /*
1789          * probe for current path (from sysfs) and map (from dm) sets
1790          */
1791         ret = path_discovery(vecs->pathvec, conf, DI_ALL);
1792         if (ret < 0)
1793                 return 1;
1794
1795         vector_foreach_slot (vecs->pathvec, pp, i){
1796                 if (filter_path(conf, pp) > 0){
1797                         vector_del_slot(vecs->pathvec, i);
1798                         free_path(pp);
1799                         i--;
1800                 }
1801                 else
1802                         pp->checkint = conf->checkint;
1803         }
1804         if (map_discovery(vecs))
1805                 return 1;
1806
1807         /*
1808          * create new set of maps & push changed ones into dm
1809          */
1810         if (coalesce_paths(vecs, mpvec, NULL, 1))
1811                 return 1;
1812
1813         /*
1814          * may need to remove some maps which are no longer relevant
1815          * e.g., due to blacklist changes in conf file
1816          */
1817         if (coalesce_maps(vecs, mpvec))
1818                 return 1;
1819
1820         dm_lib_release();
1821
1822         sync_maps_state(mpvec);
1823         vector_foreach_slot(mpvec, mpp, i){
1824                 remember_wwid(mpp->wwid);
1825                 update_map_pr(mpp);
1826         }
1827
1828         /*
1829          * purge dm of old maps
1830          */
1831         remove_maps(vecs);
1832
1833         /*
1834          * save new set of maps formed by considering current path state
1835          */
1836         vector_free(vecs->mpvec);
1837         vecs->mpvec = mpvec;
1838
1839         /*
1840          * start dm event waiter threads for these new maps
1841          */
1842         vector_foreach_slot(vecs->mpvec, mpp, i) {
1843                 if (setup_multipath(vecs, mpp))
1844                         return 1;
1845                 if (start_waiters)
1846                         if (start_waiter_thread(mpp, vecs))
1847                                 return 1;
1848         }
1849         return 0;
1850 }
1851
1852 int
1853 need_to_delay_reconfig(struct vectors * vecs)
1854 {
1855         struct multipath *mpp;
1856         int i;
1857
1858         if (!VECTOR_SIZE(vecs->mpvec))
1859                 return 0;
1860
1861         vector_foreach_slot(vecs->mpvec, mpp, i) {
1862                 if (mpp->wait_for_udev)
1863                         return 1;
1864         }
1865         return 0;
1866 }
1867
1868 int
1869 reconfigure (struct vectors * vecs)
1870 {
1871         struct config * old = conf;
1872         int retval = 1;
1873
1874         /*
1875          * free old map and path vectors ... they use old conf state
1876          */
1877         if (VECTOR_SIZE(vecs->mpvec))
1878                 remove_maps_and_stop_waiters(vecs);
1879
1880         if (VECTOR_SIZE(vecs->pathvec))
1881                 free_pathvec(vecs->pathvec, FREE_PATHS);
1882
1883         vecs->pathvec = NULL;
1884         conf = NULL;
1885
1886         /* Re-read any timezone changes */
1887         tzset();
1888
1889         if (!load_config(DEFAULT_CONFIGFILE, udev)) {
1890                 dm_drv_version(conf->version, TGT_MPATH);
1891                 conf->verbosity = old->verbosity;
1892                 conf->bindings_read_only = old->bindings_read_only;
1893                 conf->ignore_new_devs = old->ignore_new_devs;
1894                 conf->daemon = 1;
1895                 configure(vecs, 1);
1896                 free_config(old);
1897                 retval = 0;
1898         } else {
1899                 conf = old;
1900         }
1901         uxsock_timeout = conf->uxsock_timeout;
1902
1903         return retval;
1904 }
1905
1906 static struct vectors *
1907 init_vecs (void)
1908 {
1909         struct vectors * vecs;
1910
1911         vecs = (struct vectors *)MALLOC(sizeof(struct vectors));
1912
1913         if (!vecs)
1914                 return NULL;
1915
1916         vecs->lock.mutex =
1917                 (pthread_mutex_t *)MALLOC(sizeof(pthread_mutex_t));
1918
1919         if (!vecs->lock.mutex)
1920                 goto out;
1921
1922         pthread_mutex_init(vecs->lock.mutex, NULL);
1923         vecs->lock.depth = 0;
1924
1925         return vecs;
1926
1927 out:
1928         FREE(vecs);
1929         condlog(0, "failed to init paths");
1930         return NULL;
1931 }
1932
1933 static void *
1934 signal_set(int signo, void (*func) (int))
1935 {
1936         int r;
1937         struct sigaction sig;
1938         struct sigaction osig;
1939
1940         sig.sa_handler = func;
1941         sigemptyset(&sig.sa_mask);
1942         sig.sa_flags = 0;
1943
1944         r = sigaction(signo, &sig, &osig);
1945
1946         if (r < 0)
1947                 return (SIG_ERR);
1948         else
1949                 return (osig.sa_handler);
1950 }
1951
1952 void
1953 handle_signals(void)
1954 {
1955         if (reconfig_sig) {
1956                 condlog(2, "reconfigure (signal)");
1957                 set_config_state(DAEMON_CONFIGURE);
1958         }
1959         if (log_reset_sig) {
1960                 condlog(2, "reset log (signal)");
1961                 pthread_mutex_lock(&logq_lock);
1962                 log_reset("multipathd");
1963                 pthread_mutex_unlock(&logq_lock);
1964         }
1965         reconfig_sig = 0;
1966         log_reset_sig = 0;
1967 }
1968
1969 static void
1970 sighup (int sig)
1971 {
1972         reconfig_sig = 1;
1973 }
1974
1975 static void
1976 sigend (int sig)
1977 {
1978         exit_daemon();
1979 }
1980
1981 static void
1982 sigusr1 (int sig)
1983 {
1984         log_reset_sig = 1;
1985 }
1986
1987 static void
1988 sigusr2 (int sig)
1989 {
1990         condlog(3, "SIGUSR2 received");
1991 }
1992
1993 static void
1994 signal_init(void)
1995 {
1996         sigset_t set;
1997
1998         sigemptyset(&set);
1999         sigaddset(&set, SIGHUP);
2000         sigaddset(&set, SIGUSR1);
2001         sigaddset(&set, SIGUSR2);
2002         sigaddset(&set, SIGALRM);
2003         pthread_sigmask(SIG_BLOCK, &set, NULL);
2004
2005         signal_set(SIGHUP, sighup);
2006         signal_set(SIGUSR1, sigusr1);
2007         signal_set(SIGUSR2, sigusr2);
2008         signal_set(SIGINT, sigend);
2009         signal_set(SIGTERM, sigend);
2010         signal(SIGPIPE, SIG_IGN);
2011 }
2012
2013 static void
2014 setscheduler (void)
2015 {
2016         int res;
2017         static struct sched_param sched_param = {
2018                 .sched_priority = 99
2019         };
2020
2021         res = sched_setscheduler (0, SCHED_RR, &sched_param);
2022
2023         if (res == -1)
2024                 condlog(LOG_WARNING, "Could not set SCHED_RR at priority 99");
2025         return;
2026 }
2027
2028 static void
2029 set_oom_adj (void)
2030 {
2031 #ifdef OOM_SCORE_ADJ_MIN
2032         int retry = 1;
2033         char *file = "/proc/self/oom_score_adj";
2034         int score = OOM_SCORE_ADJ_MIN;
2035 #else
2036         int retry = 0;
2037         char *file = "/proc/self/oom_adj";
2038         int score = OOM_ADJUST_MIN;
2039 #endif
2040         FILE *fp;
2041         struct stat st;
2042         char *envp;
2043
2044         envp = getenv("OOMScoreAdjust");
2045         if (envp) {
2046                 condlog(3, "Using systemd provided OOMScoreAdjust");
2047                 return;
2048         }
2049         do {
2050                 if (stat(file, &st) == 0){
2051                         fp = fopen(file, "w");
2052                         if (!fp) {
2053                                 condlog(0, "couldn't fopen %s : %s", file,
2054                                         strerror(errno));
2055                                 return;
2056                         }
2057                         fprintf(fp, "%i", score);
2058                         fclose(fp);
2059                         return;
2060                 }
2061                 if (errno != ENOENT) {
2062                         condlog(0, "couldn't stat %s : %s", file,
2063                                 strerror(errno));
2064                         return;
2065                 }
2066 #ifdef OOM_ADJUST_MIN
2067                 file = "/proc/self/oom_adj";
2068                 score = OOM_ADJUST_MIN;
2069 #else
2070                 retry = 0;
2071 #endif
2072         } while (retry--);
2073         condlog(0, "couldn't adjust oom score");
2074 }
2075
2076 static int
2077 child (void * param)
2078 {
2079         pthread_t check_thr, uevent_thr, uxlsnr_thr, uevq_thr;
2080         pthread_attr_t log_attr, misc_attr, uevent_attr;
2081         struct vectors * vecs;
2082         struct multipath * mpp;
2083         int i;
2084 #ifdef USE_SYSTEMD
2085         unsigned long checkint;
2086 #endif
2087         int rc;
2088         int pid_fd = -1;
2089         char *envp;
2090
2091         mlockall(MCL_CURRENT | MCL_FUTURE);
2092         signal_init();
2093
2094         udev = udev_new();
2095
2096         setup_thread_attr(&misc_attr, 64 * 1024, 1);
2097         setup_thread_attr(&uevent_attr, DEFAULT_UEVENT_STACKSIZE * 1024, 1);
2098         setup_thread_attr(&waiter_attr, 32 * 1024, 1);
2099
2100         if (logsink == 1) {
2101                 setup_thread_attr(&log_attr, 64 * 1024, 0);
2102                 log_thread_start(&log_attr);
2103                 pthread_attr_destroy(&log_attr);
2104         }
2105         pid_fd = pidfile_create(DEFAULT_PIDFILE, daemon_pid);
2106         if (pid_fd < 0) {
2107                 condlog(1, "failed to create pidfile");
2108                 if (logsink == 1)
2109                         log_thread_stop();
2110                 exit(1);
2111         }
2112
2113         post_config_state(DAEMON_START);
2114
2115         condlog(2, "--------start up--------");
2116         condlog(2, "read " DEFAULT_CONFIGFILE);
2117
2118         if (load_config(DEFAULT_CONFIGFILE, udev))
2119                 goto failed;
2120
2121         uxsock_timeout = conf->uxsock_timeout;
2122
2123         dm_drv_version(conf->version, TGT_MPATH);
2124         if (init_checkers()) {
2125                 condlog(0, "failed to initialize checkers");
2126                 goto failed;
2127         }
2128         if (init_prio()) {
2129                 condlog(0, "failed to initialize prioritizers");
2130                 goto failed;
2131         }
2132
2133         setlogmask(LOG_UPTO(conf->verbosity + 3));
2134
2135         envp = getenv("LimitNOFILE");
2136
2137         if (envp) {
2138                 condlog(2,"Using systemd provided open fds limit of %s", envp);
2139         } else if (conf->max_fds) {
2140                 struct rlimit fd_limit;
2141
2142                 if (getrlimit(RLIMIT_NOFILE, &fd_limit) < 0) {
2143                         condlog(0, "can't get open fds limit: %s",
2144                                 strerror(errno));
2145                         fd_limit.rlim_cur = 0;
2146                         fd_limit.rlim_max = 0;
2147                 }
2148                 if (fd_limit.rlim_cur < conf->max_fds) {
2149                         fd_limit.rlim_cur = conf->max_fds;
2150                         if (fd_limit.rlim_max < conf->max_fds)
2151                                 fd_limit.rlim_max = conf->max_fds;
2152                         if (setrlimit(RLIMIT_NOFILE, &fd_limit) < 0) {
2153                                 condlog(0, "can't set open fds limit to "
2154                                         "%lu/%lu : %s",
2155                                         fd_limit.rlim_cur, fd_limit.rlim_max,
2156                                         strerror(errno));
2157                         } else {
2158                                 condlog(3, "set open fds limit to %lu/%lu",
2159                                         fd_limit.rlim_cur, fd_limit.rlim_max);
2160                         }
2161                 }
2162
2163         }
2164
2165         vecs = gvecs = init_vecs();
2166         if (!vecs)
2167                 goto failed;
2168
2169         setscheduler();
2170         set_oom_adj();
2171
2172         conf->daemon = 1;
2173         dm_udev_set_sync_support(0);
2174 #ifdef USE_SYSTEMD
2175         envp = getenv("WATCHDOG_USEC");
2176         if (envp && sscanf(envp, "%lu", &checkint) == 1) {
2177                 /* Value is in microseconds */
2178                 conf->max_checkint = checkint / 1000000;
2179                 /* Rescale checkint */
2180                 if (conf->checkint > conf->max_checkint)
2181                         conf->checkint = conf->max_checkint;
2182                 else
2183                         conf->checkint = conf->max_checkint / 4;
2184                 condlog(3, "enabling watchdog, interval %d max %d",
2185                         conf->checkint, conf->max_checkint);
2186                 use_watchdog = conf->checkint;
2187         }
2188 #endif
2189         /*
2190          * Signal start of configuration
2191          */
2192         post_config_state(DAEMON_CONFIGURE);
2193
2194         /*
2195          * Start uevent listener early to catch events
2196          */
2197         if ((rc = pthread_create(&uevent_thr, &uevent_attr, ueventloop, udev))) {
2198                 condlog(0, "failed to create uevent thread: %d", rc);
2199                 goto failed;
2200         }
2201         pthread_attr_destroy(&uevent_attr);
2202         if ((rc = pthread_create(&uxlsnr_thr, &misc_attr, uxlsnrloop, vecs))) {
2203                 condlog(0, "failed to create cli listener: %d", rc);
2204                 goto failed;
2205         }
2206
2207         /*
2208          * start threads
2209          */
2210         if ((rc = pthread_create(&check_thr, &misc_attr, checkerloop, vecs))) {
2211                 condlog(0,"failed to create checker loop thread: %d", rc);
2212                 goto failed;
2213         }
2214         if ((rc = pthread_create(&uevq_thr, &misc_attr, uevqloop, vecs))) {
2215                 condlog(0, "failed to create uevent dispatcher: %d", rc);
2216                 goto failed;
2217         }
2218         pthread_attr_destroy(&misc_attr);
2219
2220 #ifdef USE_SYSTEMD
2221         sd_notify(0, "READY=1");
2222 #endif
2223
2224         while (running_state != DAEMON_SHUTDOWN) {
2225                 pthread_cleanup_push(config_cleanup, NULL);
2226                 pthread_mutex_lock(&config_lock);
2227                 if (running_state != DAEMON_CONFIGURE &&
2228                     running_state != DAEMON_SHUTDOWN) {
2229                         pthread_cond_wait(&config_cond, &config_lock);
2230                 }
2231                 pthread_cleanup_pop(1);
2232                 if (running_state == DAEMON_CONFIGURE) {
2233                         pthread_cleanup_push(cleanup_lock, &vecs->lock);
2234                         lock(vecs->lock);
2235                         pthread_testcancel();
2236                         if (!need_to_delay_reconfig(vecs)) {
2237                                 reconfigure(vecs);
2238                         } else {
2239                                 conf->delayed_reconfig = 1;
2240                         }
2241                         lock_cleanup_pop(vecs->lock);
2242                         post_config_state(DAEMON_IDLE);
2243                 }
2244         }
2245
2246         lock(vecs->lock);
2247         if (conf->queue_without_daemon == QUE_NO_DAEMON_OFF)
2248                 vector_foreach_slot(vecs->mpvec, mpp, i)
2249                         dm_queue_if_no_path(mpp->alias, 0);
2250         remove_maps_and_stop_waiters(vecs);
2251         unlock(vecs->lock);
2252
2253         pthread_cancel(check_thr);
2254         pthread_cancel(uevent_thr);
2255         pthread_cancel(uxlsnr_thr);
2256         pthread_cancel(uevq_thr);
2257
2258         lock(vecs->lock);
2259         free_pathvec(vecs->pathvec, FREE_PATHS);
2260         vecs->pathvec = NULL;
2261         unlock(vecs->lock);
2262         /* Now all the waitevent threads will start rushing in. */
2263         while (vecs->lock.depth > 0) {
2264                 sleep (1); /* This is weak. */
2265                 condlog(3, "Have %d wait event checkers threads to de-alloc,"
2266                         " waiting...", vecs->lock.depth);
2267         }
2268         pthread_mutex_destroy(vecs->lock.mutex);
2269         FREE(vecs->lock.mutex);
2270         vecs->lock.depth = 0;
2271         vecs->lock.mutex = NULL;
2272         FREE(vecs);
2273         vecs = NULL;
2274
2275         cleanup_checkers();
2276         cleanup_prio();
2277
2278         dm_lib_release();
2279         dm_lib_exit();
2280
2281         /* We're done here */
2282         condlog(3, "unlink pidfile");
2283         unlink(DEFAULT_PIDFILE);
2284
2285         condlog(2, "--------shut down-------");
2286
2287         if (logsink == 1)
2288                 log_thread_stop();
2289
2290         /*
2291          * Freeing config must be done after condlog() and dm_lib_exit(),
2292          * because logging functions like dlog() and dm_write_log()
2293          * reference the config.
2294          */
2295         free_config(conf);
2296         conf = NULL;
2297         udev_unref(udev);
2298         udev = NULL;
2299 #ifdef _DEBUG_
2300         dbg_free_final(NULL);
2301 #endif
2302
2303 #ifdef USE_SYSTEMD
2304         sd_notify(0, "ERRNO=0");
2305 #endif
2306         exit(0);
2307
2308 failed:
2309 #ifdef USE_SYSTEMD
2310         sd_notify(0, "ERRNO=1");
2311 #endif
2312         if (pid_fd >= 0)
2313                 close(pid_fd);
2314         exit(1);
2315 }
2316
2317 static int
2318 daemonize(void)
2319 {
2320         int pid;
2321         int dev_null_fd;
2322
2323         if( (pid = fork()) < 0){
2324                 fprintf(stderr, "Failed first fork : %s\n", strerror(errno));
2325                 return -1;
2326         }
2327         else if (pid != 0)
2328                 return pid;
2329
2330         setsid();
2331
2332         if ( (pid = fork()) < 0)
2333                 fprintf(stderr, "Failed second fork : %s\n", strerror(errno));
2334         else if (pid != 0)
2335                 _exit(0);
2336
2337         if (chdir("/") < 0)
2338                 fprintf(stderr, "cannot chdir to '/', continuing\n");
2339
2340         dev_null_fd = open("/dev/null", O_RDWR);
2341         if (dev_null_fd < 0){
2342                 fprintf(stderr, "cannot open /dev/null for input & output : %s\n",
2343                         strerror(errno));
2344                 _exit(0);
2345         }
2346
2347         close(STDIN_FILENO);
2348         if (dup(dev_null_fd) < 0) {
2349                 fprintf(stderr, "cannot dup /dev/null to stdin : %s\n",
2350                         strerror(errno));
2351                 _exit(0);
2352         }
2353         close(STDOUT_FILENO);
2354         if (dup(dev_null_fd) < 0) {
2355                 fprintf(stderr, "cannot dup /dev/null to stdout : %s\n",
2356                         strerror(errno));
2357                 _exit(0);
2358         }
2359         close(STDERR_FILENO);
2360         if (dup(dev_null_fd) < 0) {
2361                 fprintf(stderr, "cannot dup /dev/null to stderr : %s\n",
2362                         strerror(errno));
2363                 _exit(0);
2364         }
2365         close(dev_null_fd);
2366         daemon_pid = getpid();
2367         return 0;
2368 }
2369
2370 int
2371 main (int argc, char *argv[])
2372 {
2373         extern char *optarg;
2374         extern int optind;
2375         int arg;
2376         int err;
2377         int foreground = 0;
2378
2379         logsink = 1;
2380         dm_init();
2381
2382         if (getuid() != 0) {
2383                 fprintf(stderr, "need to be root\n");
2384                 exit(1);
2385         }
2386
2387         /* make sure we don't lock any path */
2388         if (chdir("/") < 0)
2389                 fprintf(stderr, "can't chdir to root directory : %s\n",
2390                         strerror(errno));
2391         umask(umask(077) | 022);
2392
2393         conf = alloc_config();
2394
2395         if (!conf)
2396                 exit(1);
2397
2398         while ((arg = getopt(argc, argv, ":dsv:k::Bn")) != EOF ) {
2399         switch(arg) {
2400                 case 'd':
2401                         foreground = 1;
2402                         if (logsink > 0)
2403                                 logsink = 0;
2404                         //debug=1; /* ### comment me out ### */
2405                         break;
2406                 case 'v':
2407                         if (sizeof(optarg) > sizeof(char *) ||
2408                             !isdigit(optarg[0]))
2409                                 exit(1);
2410
2411                         conf->verbosity = atoi(optarg);
2412                         break;
2413                 case 's':
2414                         logsink = -1;
2415                         break;
2416                 case 'k':
2417                         if (load_config(DEFAULT_CONFIGFILE, udev_new()))
2418                                 exit(1);
2419                         uxclnt(optarg, uxsock_timeout + 100);
2420                         exit(0);
2421                 case 'B':
2422                         conf->bindings_read_only = 1;
2423                         break;
2424                 case 'n':
2425                         conf->ignore_new_devs = 1;
2426                         break;
2427                 default:
2428                         fprintf(stderr, "Invalid argument '-%c'\n",
2429                                 optopt);
2430                         exit(1);
2431                 }
2432         }
2433         if (optind < argc) {
2434                 char cmd[CMDSIZE];
2435                 char * s = cmd;
2436                 char * c = s;
2437
2438                 if (load_config(DEFAULT_CONFIGFILE, udev_new()))
2439                         exit(1);
2440                 memset(cmd, 0x0, CMDSIZE);
2441                 while (optind < argc) {
2442                         if (strchr(argv[optind], ' '))
2443                                 c += snprintf(c, s + CMDSIZE - c, "\"%s\" ", argv[optind]);
2444                         else
2445                                 c += snprintf(c, s + CMDSIZE - c, "%s ", argv[optind]);
2446                         optind++;
2447                 }
2448                 c += snprintf(c, s + CMDSIZE - c, "\n");
2449                 uxclnt(s, uxsock_timeout + 100);
2450                 exit(0);
2451         }
2452
2453         if (foreground) {
2454                 if (!isatty(fileno(stdout)))
2455                         setbuf(stdout, NULL);
2456                 err = 0;
2457                 daemon_pid = getpid();
2458         } else
2459                 err = daemonize();
2460
2461         if (err < 0)
2462                 /* error */
2463                 exit(1);
2464         else if (err > 0)
2465                 /* parent dies */
2466                 exit(0);
2467         else
2468                 /* child lives */
2469                 return (child(NULL));
2470 }
2471
2472 void *  mpath_pr_event_handler_fn (void * pathp )
2473 {
2474         struct multipath * mpp;
2475         int i,j, ret, isFound;
2476         struct path * pp = (struct path *)pathp;
2477         unsigned char *keyp;
2478         uint64_t prkey;
2479         struct prout_param_descriptor *param;
2480         struct prin_resp *resp;
2481
2482         mpp = pp->mpp;
2483
2484         resp = mpath_alloc_prin_response(MPATH_PRIN_RKEY_SA);
2485         if (!resp){
2486                 condlog(0,"%s Alloc failed for prin response", pp->dev);
2487                 return NULL;
2488         }
2489
2490         ret = prin_do_scsi_ioctl(pp->dev, MPATH_PRIN_RKEY_SA, resp, 0);
2491         if (ret != MPATH_PR_SUCCESS )
2492         {
2493                 condlog(0,"%s : pr in read keys service action failed. Error=%d", pp->dev, ret);
2494                 goto out;
2495         }
2496
2497         condlog(3, " event pr=%d addlen=%d",resp->prin_descriptor.prin_readkeys.prgeneration,
2498                         resp->prin_descriptor.prin_readkeys.additional_length );
2499
2500         if (resp->prin_descriptor.prin_readkeys.additional_length == 0 )
2501         {
2502                 condlog(1, "%s: No key found. Device may not be registered.", pp->dev);
2503                 ret = MPATH_PR_SUCCESS;
2504                 goto out;
2505         }
2506         prkey = 0;
2507         keyp = (unsigned char *)mpp->reservation_key;
2508         for (j = 0; j < 8; ++j) {
2509                 if (j > 0)
2510                         prkey <<= 8;
2511                 prkey |= *keyp;
2512                 ++keyp;
2513         }
2514         condlog(2, "Multipath  reservation_key: 0x%" PRIx64 " ", prkey);
2515
2516         isFound =0;
2517         for (i = 0; i < resp->prin_descriptor.prin_readkeys.additional_length/8; i++ )
2518         {
2519                 condlog(2, "PR IN READKEYS[%d]  reservation key:",i);
2520                 dumpHex((char *)&resp->prin_descriptor.prin_readkeys.key_list[i*8], 8 , -1);
2521                 if (!memcmp(mpp->reservation_key, &resp->prin_descriptor.prin_readkeys.key_list[i*8], 8))
2522                 {
2523                         condlog(2, "%s: pr key found in prin readkeys response", mpp->alias);
2524                         isFound =1;
2525                         break;
2526                 }
2527         }
2528         if (!isFound)
2529         {
2530                 condlog(0, "%s: Either device not registered or ", pp->dev);
2531                 condlog(0, "host is not authorised for registration. Skip path");
2532                 ret = MPATH_PR_OTHER;
2533                 goto out;
2534         }
2535
2536         param= malloc(sizeof(struct prout_param_descriptor));
2537         memset(param, 0 , sizeof(struct prout_param_descriptor));
2538
2539         for (j = 7; j >= 0; --j) {
2540                 param->sa_key[j] = (prkey & 0xff);
2541                 prkey >>= 8;
2542         }
2543         param->num_transportid = 0;
2544
2545         condlog(3, "device %s:%s", pp->dev, pp->mpp->wwid);
2546
2547         ret = prout_do_scsi_ioctl(pp->dev, MPATH_PROUT_REG_IGN_SA, 0, 0, param, 0);
2548         if (ret != MPATH_PR_SUCCESS )
2549         {
2550                 condlog(0,"%s: Reservation registration failed. Error: %d", pp->dev, ret);
2551         }
2552         mpp->prflag = 1;
2553
2554         free(param);
2555 out:
2556         free(resp);
2557         return NULL;
2558 }
2559
2560 int mpath_pr_event_handle(struct path *pp)
2561 {
2562         pthread_t thread;
2563         int rc;
2564         pthread_attr_t attr;
2565         struct multipath * mpp;
2566
2567         mpp = pp->mpp;
2568
2569         if (!mpp->reservation_key)
2570                 return -1;
2571
2572         pthread_attr_init(&attr);
2573         pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
2574
2575         rc = pthread_create(&thread, NULL , mpath_pr_event_handler_fn, pp);
2576         if (rc) {
2577                 condlog(0, "%s: ERROR; return code from pthread_create() is %d", pp->dev, rc);
2578                 return -1;
2579         }
2580         pthread_attr_destroy(&attr);
2581         rc = pthread_join(thread, NULL);
2582         return 0;
2583 }
2584