multipath: make 'struct config' a local variable
[multipath-tools/.git] / multipathd / main.c
1 /*
2  * Copyright (c) 2004, 2005 Christophe Varoqui
3  * Copyright (c) 2005 Kiyoshi Ueda, NEC
4  * Copyright (c) 2005 Benjamin Marzinski, Redhat
5  * Copyright (c) 2005 Edward Goggin, EMC
6  */
7 #include <unistd.h>
8 #include <sys/stat.h>
9 #include <libdevmapper.h>
10 #include <wait.h>
11 #include <sys/mman.h>
12 #include <sys/types.h>
13 #include <fcntl.h>
14 #include <errno.h>
15 #include <sys/time.h>
16 #include <sys/resource.h>
17 #include <limits.h>
18 #include <linux/oom.h>
19 #include <libudev.h>
20 #ifdef USE_SYSTEMD
21 #include <systemd/sd-daemon.h>
22 #endif
23 #include <semaphore.h>
24 #include <time.h>
25
26 /*
27  * libcheckers
28  */
29 #include <checkers.h>
30
31 #ifdef USE_SYSTEMD
32 static int use_watchdog;
33 #endif
34
35 int uxsock_timeout;
36
37 /*
38  * libmultipath
39  */
40 #include <parser.h>
41 #include <vector.h>
42 #include <memory.h>
43 #include <config.h>
44 #include <util.h>
45 #include <hwtable.h>
46 #include <defaults.h>
47 #include <structs.h>
48 #include <blacklist.h>
49 #include <structs_vec.h>
50 #include <dmparser.h>
51 #include <devmapper.h>
52 #include <sysfs.h>
53 #include <dict.h>
54 #include <discovery.h>
55 #include <debug.h>
56 #include <propsel.h>
57 #include <uevent.h>
58 #include <switchgroup.h>
59 #include <print.h>
60 #include <configure.h>
61 #include <prio.h>
62 #include <wwids.h>
63 #include <pgpolicies.h>
64 #include <uevent.h>
65 #include <log.h>
66
67 #include <mpath_cmd.h>
68 #include <mpath_persist.h>
69
70 #include "prioritizers/alua_rtpg.h"
71
72 #include "main.h"
73 #include "pidfile.h"
74 #include "uxlsnr.h"
75 #include "uxclnt.h"
76 #include "cli.h"
77 #include "cli_handlers.h"
78 #include "lock.h"
79 #include "waiter.h"
80 #include "wwids.h"
81
82 #define FILE_NAME_SIZE 256
83 #define CMDSIZE 160
84
85 #define LOG_MSG(a, b) \
86 do { \
87         if (pp->offline) \
88                 condlog(a, "%s: %s - path offline", pp->mpp->alias, pp->dev); \
89         else if (strlen(b)) \
90                 condlog(a, "%s: %s - %s", pp->mpp->alias, pp->dev, b); \
91 } while(0)
92
93 struct mpath_event_param
94 {
95         char * devname;
96         struct multipath *mpp;
97 };
98
99 unsigned int mpath_mx_alloc_len;
100
101 int logsink;
102 int verbosity;
103 int bindings_read_only;
104 int ignore_new_devs;
105 enum daemon_status running_state = DAEMON_INIT;
106 pid_t daemon_pid;
107 pthread_mutex_t config_lock = PTHREAD_MUTEX_INITIALIZER;
108 pthread_cond_t config_cond = PTHREAD_COND_INITIALIZER;
109
110 /*
111  * global copy of vecs for use in sig handlers
112  */
113 struct vectors * gvecs;
114
115 struct udev * udev;
116
117 struct config *multipath_conf;
118
119 const char *
120 daemon_status(void)
121 {
122         switch (running_state) {
123         case DAEMON_INIT:
124                 return "init";
125         case DAEMON_START:
126                 return "startup";
127         case DAEMON_CONFIGURE:
128                 return "configure";
129         case DAEMON_IDLE:
130                 return "idle";
131         case DAEMON_RUNNING:
132                 return "running";
133         case DAEMON_SHUTDOWN:
134                 return "shutdown";
135         }
136         return NULL;
137 }
138
139 /*
140  * I love you too, systemd ...
141  */
142 const char *
143 sd_notify_status(void)
144 {
145         switch (running_state) {
146         case DAEMON_INIT:
147                 return "STATUS=init";
148         case DAEMON_START:
149                 return "STATUS=startup";
150         case DAEMON_CONFIGURE:
151                 return "STATUS=configure";
152         case DAEMON_IDLE:
153                 return "STATUS=idle";
154         case DAEMON_RUNNING:
155                 return "STATUS=running";
156         case DAEMON_SHUTDOWN:
157                 return "STATUS=shutdown";
158         }
159         return NULL;
160 }
161
162 static void config_cleanup(void *arg)
163 {
164         pthread_mutex_unlock(&config_lock);
165 }
166
167 void post_config_state(enum daemon_status state)
168 {
169         pthread_mutex_lock(&config_lock);
170         if (state != running_state) {
171                 running_state = state;
172                 pthread_cond_broadcast(&config_cond);
173 #ifdef USE_SYSTEMD
174                 sd_notify(0, sd_notify_status());
175 #endif
176         }
177         pthread_mutex_unlock(&config_lock);
178 }
179
180 int set_config_state(enum daemon_status state)
181 {
182         int rc = 0;
183
184         pthread_cleanup_push(config_cleanup, NULL);
185         pthread_mutex_lock(&config_lock);
186         if (running_state != state) {
187                 if (running_state != DAEMON_IDLE) {
188                         struct timespec ts;
189
190                         clock_gettime(CLOCK_REALTIME, &ts);
191                         ts.tv_sec += 1;
192                         rc = pthread_cond_timedwait(&config_cond,
193                                                     &config_lock, &ts);
194                 }
195                 if (!rc) {
196                         running_state = state;
197                         pthread_cond_broadcast(&config_cond);
198 #ifdef USE_SYSTEMD
199                         sd_notify(0, sd_notify_status());
200 #endif
201                 }
202         }
203         pthread_cleanup_pop(1);
204         return rc;
205 }
206
207 struct config *get_multipath_config(void)
208 {
209         return multipath_conf;
210 }
211
212 void put_multipath_config(struct config *conf)
213 {
214         /* Noop for now */
215 }
216
217 static int
218 need_switch_pathgroup (struct multipath * mpp, int refresh)
219 {
220         struct pathgroup * pgp;
221         struct path * pp;
222         unsigned int i, j;
223         struct config *conf;
224
225         if (!mpp || mpp->pgfailback == -FAILBACK_MANUAL)
226                 return 0;
227
228         /*
229          * Refresh path priority values
230          */
231         if (refresh) {
232                 vector_foreach_slot (mpp->pg, pgp, i) {
233                         vector_foreach_slot (pgp->paths, pp, j) {
234                                 conf = get_multipath_config();
235                                 pathinfo(pp, conf, DI_PRIO);
236                                 put_multipath_config(conf);
237                         }
238                 }
239         }
240
241         if (!mpp->pg || VECTOR_SIZE(mpp->paths) == 0)
242                 return 0;
243
244         mpp->bestpg = select_path_group(mpp);
245
246         if (mpp->bestpg != mpp->nextpg)
247                 return 1;
248
249         return 0;
250 }
251
252 static void
253 switch_pathgroup (struct multipath * mpp)
254 {
255         mpp->stat_switchgroup++;
256         dm_switchgroup(mpp->alias, mpp->bestpg);
257         condlog(2, "%s: switch to path group #%i",
258                  mpp->alias, mpp->bestpg);
259 }
260
261 static int
262 coalesce_maps(struct vectors *vecs, vector nmpv)
263 {
264         struct multipath * ompp;
265         vector ompv = vecs->mpvec;
266         unsigned int i, reassign_maps;
267         struct config *conf;
268
269         conf = get_multipath_config();
270         reassign_maps = conf->reassign_maps;
271         put_multipath_config(conf);
272         vector_foreach_slot (ompv, ompp, i) {
273                 condlog(3, "%s: coalesce map", ompp->alias);
274                 if (!find_mp_by_wwid(nmpv, ompp->wwid)) {
275                         /*
276                          * remove all current maps not allowed by the
277                          * current configuration
278                          */
279                         if (dm_flush_map(ompp->alias)) {
280                                 condlog(0, "%s: unable to flush devmap",
281                                         ompp->alias);
282                                 /*
283                                  * may be just because the device is open
284                                  */
285                                 if (setup_multipath(vecs, ompp) != 0) {
286                                         i--;
287                                         continue;
288                                 }
289                                 if (!vector_alloc_slot(nmpv))
290                                         return 1;
291
292                                 vector_set_slot(nmpv, ompp);
293
294                                 vector_del_slot(ompv, i);
295                                 i--;
296                         }
297                         else {
298                                 dm_lib_release();
299                                 condlog(2, "%s devmap removed", ompp->alias);
300                         }
301                 } else if (reassign_maps) {
302                         condlog(3, "%s: Reassign existing device-mapper"
303                                 " devices", ompp->alias);
304                         dm_reassign(ompp->alias);
305                 }
306         }
307         return 0;
308 }
309
310 void
311 sync_map_state(struct multipath *mpp)
312 {
313         struct pathgroup *pgp;
314         struct path *pp;
315         unsigned int i, j;
316
317         if (!mpp->pg)
318                 return;
319
320         vector_foreach_slot (mpp->pg, pgp, i){
321                 vector_foreach_slot (pgp->paths, pp, j){
322                         if (pp->state == PATH_UNCHECKED ||
323                             pp->state == PATH_WILD ||
324                             pp->state == PATH_DELAYED)
325                                 continue;
326                         if ((pp->dmstate == PSTATE_FAILED ||
327                              pp->dmstate == PSTATE_UNDEF) &&
328                             (pp->state == PATH_UP || pp->state == PATH_GHOST))
329                                 dm_reinstate_path(mpp->alias, pp->dev_t);
330                         else if ((pp->dmstate == PSTATE_ACTIVE ||
331                                   pp->dmstate == PSTATE_UNDEF) &&
332                                  (pp->state == PATH_DOWN ||
333                                   pp->state == PATH_SHAKY))
334                                 dm_fail_path(mpp->alias, pp->dev_t);
335                 }
336         }
337 }
338
339 static void
340 sync_maps_state(vector mpvec)
341 {
342         unsigned int i;
343         struct multipath *mpp;
344
345         vector_foreach_slot (mpvec, mpp, i)
346                 sync_map_state(mpp);
347 }
348
349 static int
350 flush_map(struct multipath * mpp, struct vectors * vecs, int nopaths)
351 {
352         int r;
353
354         if (nopaths)
355                 r = dm_flush_map_nopaths(mpp->alias, mpp->deferred_remove);
356         else
357                 r = dm_flush_map(mpp->alias);
358         /*
359          * clear references to this map before flushing so we can ignore
360          * the spurious uevent we may generate with the dm_flush_map call below
361          */
362         if (r) {
363                 /*
364                  * May not really be an error -- if the map was already flushed
365                  * from the device mapper by dmsetup(8) for instance.
366                  */
367                 if (r == 1)
368                         condlog(0, "%s: can't flush", mpp->alias);
369                 else {
370                         condlog(2, "%s: devmap deferred remove", mpp->alias);
371                         mpp->deferred_remove = DEFERRED_REMOVE_IN_PROGRESS;
372                 }
373                 return r;
374         }
375         else {
376                 dm_lib_release();
377                 condlog(2, "%s: map flushed", mpp->alias);
378         }
379
380         orphan_paths(vecs->pathvec, mpp);
381         remove_map_and_stop_waiter(mpp, vecs, 1);
382
383         return 0;
384 }
385
386 int
387 update_map (struct multipath *mpp, struct vectors *vecs)
388 {
389         int retries = 3;
390         char params[PARAMS_SIZE] = {0};
391
392 retry:
393         condlog(4, "%s: updating new map", mpp->alias);
394         if (adopt_paths(vecs->pathvec, mpp)) {
395                 condlog(0, "%s: failed to adopt paths for new map update",
396                         mpp->alias);
397                 retries = -1;
398                 goto fail;
399         }
400         verify_paths(mpp, vecs);
401         mpp->flush_on_last_del = FLUSH_UNDEF;
402         mpp->action = ACT_RELOAD;
403
404         if (setup_map(mpp, params, PARAMS_SIZE)) {
405                 condlog(0, "%s: failed to setup new map in update", mpp->alias);
406                 retries = -1;
407                 goto fail;
408         }
409         if (domap(mpp, params, 1) <= 0 && retries-- > 0) {
410                 condlog(0, "%s: map_udate sleep", mpp->alias);
411                 sleep(1);
412                 goto retry;
413         }
414         dm_lib_release();
415
416 fail:
417         if (setup_multipath(vecs, mpp))
418                 return 1;
419
420         sync_map_state(mpp);
421
422         if (retries < 0)
423                 condlog(0, "%s: failed reload in new map update", mpp->alias);
424         return 0;
425 }
426
427 static int
428 uev_add_map (struct uevent * uev, struct vectors * vecs)
429 {
430         char *alias;
431         int major = -1, minor = -1, rc;
432
433         condlog(3, "%s: add map (uevent)", uev->kernel);
434         alias = uevent_get_dm_name(uev);
435         if (!alias) {
436                 condlog(3, "%s: No DM_NAME in uevent", uev->kernel);
437                 major = uevent_get_major(uev);
438                 minor = uevent_get_minor(uev);
439                 alias = dm_mapname(major, minor);
440                 if (!alias) {
441                         condlog(2, "%s: mapname not found for %d:%d",
442                                 uev->kernel, major, minor);
443                         return 1;
444                 }
445         }
446         pthread_cleanup_push(cleanup_lock, &vecs->lock);
447         lock(vecs->lock);
448         pthread_testcancel();
449         rc = ev_add_map(uev->kernel, alias, vecs);
450         lock_cleanup_pop(vecs->lock);
451         FREE(alias);
452         return rc;
453 }
454
455 int
456 ev_add_map (char * dev, char * alias, struct vectors * vecs)
457 {
458         char * refwwid;
459         struct multipath * mpp;
460         int map_present;
461         int r = 1, delayed_reconfig, reassign_maps;
462         struct config *conf;
463
464         map_present = dm_map_present(alias);
465
466         if (map_present && !dm_is_mpath(alias)) {
467                 condlog(4, "%s: not a multipath map", alias);
468                 return 0;
469         }
470
471         mpp = find_mp_by_alias(vecs->mpvec, alias);
472
473         if (mpp) {
474                 if (mpp->wait_for_udev > 1) {
475                         if (update_map(mpp, vecs))
476                                 /* setup multipathd removed the map */
477                                 return 1;
478                 }
479                 conf = get_multipath_config();
480                 delayed_reconfig = conf->delayed_reconfig;
481                 reassign_maps = conf->reassign_maps;
482                 put_multipath_config(conf);
483                 if (mpp->wait_for_udev) {
484                         mpp->wait_for_udev = 0;
485                         if (delayed_reconfig &&
486                             !need_to_delay_reconfig(vecs)) {
487                                 condlog(2, "reconfigure (delayed)");
488                                 set_config_state(DAEMON_CONFIGURE);
489                                 return 0;
490                         }
491                 }
492                 /*
493                  * Not really an error -- we generate our own uevent
494                  * if we create a multipath mapped device as a result
495                  * of uev_add_path
496                  */
497                 if (reassign_maps) {
498                         condlog(3, "%s: Reassign existing device-mapper devices",
499                                 alias);
500                         dm_reassign(alias);
501                 }
502                 return 0;
503         }
504         condlog(2, "%s: adding map", alias);
505
506         /*
507          * now we can register the map
508          */
509         if (map_present) {
510                 if ((mpp = add_map_without_path(vecs, alias))) {
511                         sync_map_state(mpp);
512                         condlog(2, "%s: devmap %s registered", alias, dev);
513                         return 0;
514                 } else {
515                         condlog(2, "%s: uev_add_map failed", dev);
516                         return 1;
517                 }
518         }
519         r = get_refwwid(CMD_NONE, dev, DEV_DEVMAP, vecs->pathvec, &refwwid);
520
521         if (refwwid) {
522                 r = coalesce_paths(vecs, NULL, refwwid, 0, CMD_NONE);
523                 dm_lib_release();
524         }
525
526         if (!r)
527                 condlog(2, "%s: devmap %s added", alias, dev);
528         else if (r == 2)
529                 condlog(2, "%s: uev_add_map %s blacklisted", alias, dev);
530         else
531                 condlog(0, "%s: uev_add_map %s failed", alias, dev);
532
533         FREE(refwwid);
534         return r;
535 }
536
537 static int
538 uev_remove_map (struct uevent * uev, struct vectors * vecs)
539 {
540         char *alias;
541         int minor;
542         struct multipath *mpp;
543
544         condlog(2, "%s: remove map (uevent)", uev->kernel);
545         alias = uevent_get_dm_name(uev);
546         if (!alias) {
547                 condlog(3, "%s: No DM_NAME in uevent, ignoring", uev->kernel);
548                 return 0;
549         }
550         minor = uevent_get_minor(uev);
551
552         pthread_cleanup_push(cleanup_lock, &vecs->lock);
553         lock(vecs->lock);
554         pthread_testcancel();
555         mpp = find_mp_by_minor(vecs->mpvec, minor);
556
557         if (!mpp) {
558                 condlog(2, "%s: devmap not registered, can't remove",
559                         uev->kernel);
560                 goto out;
561         }
562         if (strcmp(mpp->alias, alias)) {
563                 condlog(2, "%s: minor number mismatch (map %d, event %d)",
564                         mpp->alias, mpp->dmi->minor, minor);
565                 goto out;
566         }
567
568         orphan_paths(vecs->pathvec, mpp);
569         remove_map_and_stop_waiter(mpp, vecs, 1);
570 out:
571         lock_cleanup_pop(vecs->lock);
572         FREE(alias);
573         return 0;
574 }
575
576 /* Called from CLI handler */
577 int
578 ev_remove_map (char * devname, char * alias, int minor, struct vectors * vecs)
579 {
580         struct multipath * mpp;
581
582         mpp = find_mp_by_minor(vecs->mpvec, minor);
583
584         if (!mpp) {
585                 condlog(2, "%s: devmap not registered, can't remove",
586                         devname);
587                 return 1;
588         }
589         if (strcmp(mpp->alias, alias)) {
590                 condlog(2, "%s: minor number mismatch (map %d, event %d)",
591                         mpp->alias, mpp->dmi->minor, minor);
592                 return 1;
593         }
594         return flush_map(mpp, vecs, 0);
595 }
596
597 static int
598 uev_add_path (struct uevent *uev, struct vectors * vecs)
599 {
600         struct path *pp;
601         int ret = 0, i;
602         struct config *conf;
603
604         condlog(2, "%s: add path (uevent)", uev->kernel);
605         if (strstr(uev->kernel, "..") != NULL) {
606                 /*
607                  * Don't allow relative device names in the pathvec
608                  */
609                 condlog(0, "%s: path name is invalid", uev->kernel);
610                 return 1;
611         }
612
613         pthread_cleanup_push(cleanup_lock, &vecs->lock);
614         lock(vecs->lock);
615         pthread_testcancel();
616         pp = find_path_by_dev(vecs->pathvec, uev->kernel);
617         if (pp) {
618                 int r;
619
620                 condlog(0, "%s: spurious uevent, path already in pathvec",
621                         uev->kernel);
622                 if (!pp->mpp && !strlen(pp->wwid)) {
623                         condlog(3, "%s: reinitialize path", uev->kernel);
624                         udev_device_unref(pp->udev);
625                         pp->udev = udev_device_ref(uev->udev);
626                         conf = get_multipath_config();
627                         r = pathinfo(pp, conf,
628                                      DI_ALL | DI_BLACKLIST);
629                         put_multipath_config(conf);
630                         if (r == PATHINFO_OK)
631                                 ret = ev_add_path(pp, vecs);
632                         else if (r == PATHINFO_SKIPPED) {
633                                 condlog(3, "%s: remove blacklisted path",
634                                         uev->kernel);
635                                 i = find_slot(vecs->pathvec, (void *)pp);
636                                 if (i != -1)
637                                         vector_del_slot(vecs->pathvec, i);
638                                 free_path(pp);
639                         } else {
640                                 condlog(0, "%s: failed to reinitialize path",
641                                         uev->kernel);
642                                 ret = 1;
643                         }
644                 }
645         }
646         lock_cleanup_pop(vecs->lock);
647         if (pp)
648                 return ret;
649
650         /*
651          * get path vital state
652          */
653         conf = get_multipath_config();
654         ret = alloc_path_with_pathinfo(conf, uev->udev,
655                                        DI_ALL, &pp);
656         put_multipath_config(conf);
657         if (!pp) {
658                 if (ret == PATHINFO_SKIPPED)
659                         return 0;
660                 condlog(3, "%s: failed to get path info", uev->kernel);
661                 return 1;
662         }
663         pthread_cleanup_push(cleanup_lock, &vecs->lock);
664         lock(vecs->lock);
665         pthread_testcancel();
666         ret = store_path(vecs->pathvec, pp);
667         if (!ret) {
668                 conf = get_multipath_config();
669                 pp->checkint = conf->checkint;
670                 put_multipath_config(conf);
671                 ret = ev_add_path(pp, vecs);
672         } else {
673                 condlog(0, "%s: failed to store path info, "
674                         "dropping event",
675                         uev->kernel);
676                 free_path(pp);
677                 ret = 1;
678         }
679         lock_cleanup_pop(vecs->lock);
680         return ret;
681 }
682
683 /*
684  * returns:
685  * 0: added
686  * 1: error
687  */
688 int
689 ev_add_path (struct path * pp, struct vectors * vecs)
690 {
691         struct multipath * mpp;
692         char params[PARAMS_SIZE] = {0};
693         int retries = 3;
694         int start_waiter = 0;
695         int ret;
696
697         /*
698          * need path UID to go any further
699          */
700         if (strlen(pp->wwid) == 0) {
701                 condlog(0, "%s: failed to get path uid", pp->dev);
702                 goto fail; /* leave path added to pathvec */
703         }
704         mpp = find_mp_by_wwid(vecs->mpvec, pp->wwid);
705         if (mpp && mpp->wait_for_udev) {
706                 mpp->wait_for_udev = 2;
707                 orphan_path(pp, "waiting for create to complete");
708                 return 0;
709         }
710
711         pp->mpp = mpp;
712 rescan:
713         if (mpp) {
714                 if (pp->size && mpp->size != pp->size) {
715                         condlog(0, "%s: failed to add new path %s, "
716                                 "device size mismatch",
717                                 mpp->alias, pp->dev);
718                         int i = find_slot(vecs->pathvec, (void *)pp);
719                         if (i != -1)
720                                 vector_del_slot(vecs->pathvec, i);
721                         free_path(pp);
722                         return 1;
723                 }
724
725                 condlog(4,"%s: adopting all paths for path %s",
726                         mpp->alias, pp->dev);
727                 if (adopt_paths(vecs->pathvec, mpp))
728                         goto fail; /* leave path added to pathvec */
729
730                 verify_paths(mpp, vecs);
731                 mpp->flush_on_last_del = FLUSH_UNDEF;
732                 mpp->action = ACT_RELOAD;
733         } else {
734                 if (!should_multipath(pp, vecs->pathvec)) {
735                         orphan_path(pp, "only one path");
736                         return 0;
737                 }
738                 condlog(4,"%s: creating new map", pp->dev);
739                 if ((mpp = add_map_with_path(vecs, pp, 1))) {
740                         mpp->action = ACT_CREATE;
741                         /*
742                          * We don't depend on ACT_CREATE, as domap will
743                          * set it to ACT_NOTHING when complete.
744                          */
745                         start_waiter = 1;
746                 }
747                 if (!start_waiter)
748                         goto fail; /* leave path added to pathvec */
749         }
750
751         /* persistent reservation check*/
752         mpath_pr_event_handle(pp);
753
754         /*
755          * push the map to the device-mapper
756          */
757         if (setup_map(mpp, params, PARAMS_SIZE)) {
758                 condlog(0, "%s: failed to setup map for addition of new "
759                         "path %s", mpp->alias, pp->dev);
760                 goto fail_map;
761         }
762         /*
763          * reload the map for the multipath mapped device
764          */
765 retry:
766         ret = domap(mpp, params, 1);
767         if (ret <= 0) {
768                 if (ret < 0 && retries-- > 0) {
769                         condlog(0, "%s: retry domap for addition of new "
770                                 "path %s", mpp->alias, pp->dev);
771                         sleep(1);
772                         goto retry;
773                 }
774                 condlog(0, "%s: failed in domap for addition of new "
775                         "path %s", mpp->alias, pp->dev);
776                 /*
777                  * deal with asynchronous uevents :((
778                  */
779                 if (mpp->action == ACT_RELOAD && retries-- > 0) {
780                         condlog(0, "%s: ev_add_path sleep", mpp->alias);
781                         sleep(1);
782                         update_mpp_paths(mpp, vecs->pathvec);
783                         goto rescan;
784                 }
785                 else if (mpp->action == ACT_RELOAD)
786                         condlog(0, "%s: giving up reload", mpp->alias);
787                 else
788                         goto fail_map;
789         }
790         dm_lib_release();
791
792         /*
793          * update our state from kernel regardless of create or reload
794          */
795         if (setup_multipath(vecs, mpp))
796                 goto fail; /* if setup_multipath fails, it removes the map */
797
798         sync_map_state(mpp);
799
800         if ((mpp->action == ACT_CREATE ||
801              (mpp->action == ACT_NOTHING && start_waiter && !mpp->waiter)) &&
802             start_waiter_thread(mpp, vecs))
803                         goto fail_map;
804
805         if (retries >= 0) {
806                 condlog(2, "%s [%s]: path added to devmap %s",
807                         pp->dev, pp->dev_t, mpp->alias);
808                 return 0;
809         } else
810                 goto fail;
811
812 fail_map:
813         remove_map(mpp, vecs, 1);
814 fail:
815         orphan_path(pp, "failed to add path");
816         return 1;
817 }
818
819 static int
820 uev_remove_path (struct uevent *uev, struct vectors * vecs)
821 {
822         struct path *pp;
823         int ret;
824
825         condlog(2, "%s: remove path (uevent)", uev->kernel);
826         pthread_cleanup_push(cleanup_lock, &vecs->lock);
827         lock(vecs->lock);
828         pthread_testcancel();
829         pp = find_path_by_dev(vecs->pathvec, uev->kernel);
830         if (pp)
831                 ret = ev_remove_path(pp, vecs);
832         lock_cleanup_pop(vecs->lock);
833         if (!pp) {
834                 /* Not an error; path might have been purged earlier */
835                 condlog(0, "%s: path already removed", uev->kernel);
836                 return 0;
837         }
838         return ret;
839 }
840
841 int
842 ev_remove_path (struct path *pp, struct vectors * vecs)
843 {
844         struct multipath * mpp;
845         int i, retval = 0;
846         char params[PARAMS_SIZE] = {0};
847
848         /*
849          * avoid referring to the map of an orphaned path
850          */
851         if ((mpp = pp->mpp)) {
852                 /*
853                  * transform the mp->pg vector of vectors of paths
854                  * into a mp->params string to feed the device-mapper
855                  */
856                 if (update_mpp_paths(mpp, vecs->pathvec)) {
857                         condlog(0, "%s: failed to update paths",
858                                 mpp->alias);
859                         goto fail;
860                 }
861                 if ((i = find_slot(mpp->paths, (void *)pp)) != -1)
862                         vector_del_slot(mpp->paths, i);
863
864                 /*
865                  * remove the map IFF removing the last path
866                  */
867                 if (VECTOR_SIZE(mpp->paths) == 0) {
868                         char alias[WWID_SIZE];
869
870                         /*
871                          * flush_map will fail if the device is open
872                          */
873                         strncpy(alias, mpp->alias, WWID_SIZE);
874                         if (mpp->flush_on_last_del == FLUSH_ENABLED) {
875                                 condlog(2, "%s Last path deleted, disabling queueing", mpp->alias);
876                                 mpp->retry_tick = 0;
877                                 mpp->no_path_retry = NO_PATH_RETRY_FAIL;
878                                 mpp->flush_on_last_del = FLUSH_IN_PROGRESS;
879                                 dm_queue_if_no_path(mpp->alias, 0);
880                         }
881                         if (!flush_map(mpp, vecs, 1)) {
882                                 condlog(2, "%s: removed map after"
883                                         " removing all paths",
884                                         alias);
885                                 retval = 0;
886                                 goto out;
887                         }
888                         /*
889                          * Not an error, continue
890                          */
891                 }
892
893                 if (setup_map(mpp, params, PARAMS_SIZE)) {
894                         condlog(0, "%s: failed to setup map for"
895                                 " removal of path %s", mpp->alias, pp->dev);
896                         goto fail;
897                 }
898
899                 if (mpp->wait_for_udev) {
900                         mpp->wait_for_udev = 2;
901                         goto out;
902                 }
903
904                 /*
905                  * reload the map
906                  */
907                 mpp->action = ACT_RELOAD;
908                 if (domap(mpp, params, 1) <= 0) {
909                         condlog(0, "%s: failed in domap for "
910                                 "removal of path %s",
911                                 mpp->alias, pp->dev);
912                         retval = 1;
913                 } else {
914                         /*
915                          * update our state from kernel
916                          */
917                         if (setup_multipath(vecs, mpp))
918                                 return 1;
919                         sync_map_state(mpp);
920
921                         condlog(2, "%s [%s]: path removed from map %s",
922                                 pp->dev, pp->dev_t, mpp->alias);
923                 }
924         }
925
926 out:
927         if ((i = find_slot(vecs->pathvec, (void *)pp)) != -1)
928                 vector_del_slot(vecs->pathvec, i);
929
930         free_path(pp);
931
932         return retval;
933
934 fail:
935         remove_map_and_stop_waiter(mpp, vecs, 1);
936         return 1;
937 }
938
939 static int
940 uev_update_path (struct uevent *uev, struct vectors * vecs)
941 {
942         int ro, retval = 0;
943
944         ro = uevent_get_disk_ro(uev);
945
946         if (ro >= 0) {
947                 struct path * pp;
948                 struct multipath *mpp = NULL;
949
950                 condlog(2, "%s: update path write_protect to '%d' (uevent)",
951                         uev->kernel, ro);
952                 pthread_cleanup_push(cleanup_lock, &vecs->lock);
953                 lock(vecs->lock);
954                 pthread_testcancel();
955                 /*
956                  * pthread_mutex_lock() and pthread_mutex_unlock()
957                  * need to be at the same indentation level, hence
958                  * this slightly convoluted codepath.
959                  */
960                 pp = find_path_by_dev(vecs->pathvec, uev->kernel);
961                 if (pp) {
962                         if (pp->initialized == INIT_REQUESTED_UDEV) {
963                                 retval = 2;
964                         } else {
965                                 mpp = pp->mpp;
966                                 if (mpp && mpp->wait_for_udev) {
967                                         mpp->wait_for_udev = 2;
968                                         mpp = NULL;
969                                         retval = 0;
970                                 }
971                         }
972                         if (mpp) {
973                                 retval = reload_map(vecs, mpp, 0, 1);
974
975                                 condlog(2, "%s: map %s reloaded (retval %d)",
976                                         uev->kernel, mpp->alias, retval);
977                         }
978                 }
979                 lock_cleanup_pop(vecs->lock);
980                 if (!pp) {
981                         condlog(0, "%s: spurious uevent, path not found",
982                                 uev->kernel);
983                         return 1;
984                 }
985                 if (retval == 2)
986                         return uev_add_path(uev, vecs);
987         }
988
989         return retval;
990 }
991
992 static int
993 map_discovery (struct vectors * vecs)
994 {
995         struct multipath * mpp;
996         unsigned int i;
997
998         if (dm_get_maps(vecs->mpvec))
999                 return 1;
1000
1001         vector_foreach_slot (vecs->mpvec, mpp, i)
1002                 if (setup_multipath(vecs, mpp))
1003                         return 1;
1004
1005         return 0;
1006 }
1007
1008 int
1009 uxsock_trigger (char * str, char ** reply, int * len, void * trigger_data)
1010 {
1011         struct vectors * vecs;
1012         int r;
1013
1014         *reply = NULL;
1015         *len = 0;
1016         vecs = (struct vectors *)trigger_data;
1017
1018         r = parse_cmd(str, reply, len, vecs, uxsock_timeout / 1000);
1019
1020         if (r > 0) {
1021                 if (r == ETIMEDOUT)
1022                         *reply = STRDUP("timeout\n");
1023                 else
1024                         *reply = STRDUP("fail\n");
1025                 *len = strlen(*reply) + 1;
1026                 r = 1;
1027         }
1028         else if (!r && *len == 0) {
1029                 *reply = STRDUP("ok\n");
1030                 *len = strlen(*reply) + 1;
1031                 r = 0;
1032         }
1033         /* else if (r < 0) leave *reply alone */
1034
1035         return r;
1036 }
1037
1038 static int
1039 uev_discard(char * devpath)
1040 {
1041         char *tmp;
1042         char a[11], b[11];
1043
1044         /*
1045          * keep only block devices, discard partitions
1046          */
1047         tmp = strstr(devpath, "/block/");
1048         if (tmp == NULL){
1049                 condlog(4, "no /block/ in '%s'", devpath);
1050                 return 1;
1051         }
1052         if (sscanf(tmp, "/block/%10s", a) != 1 ||
1053             sscanf(tmp, "/block/%10[^/]/%10s", a, b) == 2) {
1054                 condlog(4, "discard event on %s", devpath);
1055                 return 1;
1056         }
1057         return 0;
1058 }
1059
1060 int
1061 uev_trigger (struct uevent * uev, void * trigger_data)
1062 {
1063         int r = 0;
1064         struct vectors * vecs;
1065         struct config *conf;
1066
1067         vecs = (struct vectors *)trigger_data;
1068
1069         if (uev_discard(uev->devpath))
1070                 return 0;
1071
1072         pthread_cleanup_push(config_cleanup, NULL);
1073         pthread_mutex_lock(&config_lock);
1074         if (running_state != DAEMON_IDLE &&
1075             running_state != DAEMON_RUNNING)
1076                 pthread_cond_wait(&config_cond, &config_lock);
1077         pthread_cleanup_pop(1);
1078
1079         if (running_state == DAEMON_SHUTDOWN)
1080                 return 0;
1081
1082         /*
1083          * device map event
1084          * Add events are ignored here as the tables
1085          * are not fully initialised then.
1086          */
1087         if (!strncmp(uev->kernel, "dm-", 3)) {
1088                 if (!strncmp(uev->action, "change", 6)) {
1089                         r = uev_add_map(uev, vecs);
1090                         goto out;
1091                 }
1092                 if (!strncmp(uev->action, "remove", 6)) {
1093                         r = uev_remove_map(uev, vecs);
1094                         goto out;
1095                 }
1096                 goto out;
1097         }
1098
1099         /*
1100          * path add/remove event
1101          */
1102         conf = get_multipath_config();
1103         if (filter_devnode(conf->blist_devnode, conf->elist_devnode,
1104                            uev->kernel) > 0) {
1105                 put_multipath_config(conf);
1106                 goto out;
1107         }
1108         put_multipath_config(conf);
1109
1110         if (!strncmp(uev->action, "add", 3)) {
1111                 r = uev_add_path(uev, vecs);
1112                 goto out;
1113         }
1114         if (!strncmp(uev->action, "remove", 6)) {
1115                 r = uev_remove_path(uev, vecs);
1116                 goto out;
1117         }
1118         if (!strncmp(uev->action, "change", 6)) {
1119                 r = uev_update_path(uev, vecs);
1120                 goto out;
1121         }
1122
1123 out:
1124         return r;
1125 }
1126
1127 static void *
1128 ueventloop (void * ap)
1129 {
1130         struct udev *udev = ap;
1131
1132         if (uevent_listen(udev))
1133                 condlog(0, "error starting uevent listener");
1134
1135         return NULL;
1136 }
1137
1138 static void *
1139 uevqloop (void * ap)
1140 {
1141         if (uevent_dispatch(&uev_trigger, ap))
1142                 condlog(0, "error starting uevent dispatcher");
1143
1144         return NULL;
1145 }
1146 static void *
1147 uxlsnrloop (void * ap)
1148 {
1149         if (cli_init()) {
1150                 condlog(1, "Failed to init uxsock listener");
1151                 return NULL;
1152         }
1153
1154         set_handler_callback(LIST+PATHS, cli_list_paths);
1155         set_handler_callback(LIST+PATHS+FMT, cli_list_paths_fmt);
1156         set_handler_callback(LIST+PATHS+RAW+FMT, cli_list_paths_raw);
1157         set_handler_callback(LIST+PATH, cli_list_path);
1158         set_handler_callback(LIST+MAPS, cli_list_maps);
1159         set_unlocked_handler_callback(LIST+STATUS, cli_list_status);
1160         set_unlocked_handler_callback(LIST+DAEMON, cli_list_daemon);
1161         set_handler_callback(LIST+MAPS+STATUS, cli_list_maps_status);
1162         set_handler_callback(LIST+MAPS+STATS, cli_list_maps_stats);
1163         set_handler_callback(LIST+MAPS+FMT, cli_list_maps_fmt);
1164         set_handler_callback(LIST+MAPS+RAW+FMT, cli_list_maps_raw);
1165         set_handler_callback(LIST+MAPS+TOPOLOGY, cli_list_maps_topology);
1166         set_handler_callback(LIST+TOPOLOGY, cli_list_maps_topology);
1167         set_handler_callback(LIST+MAPS+JSON, cli_list_maps_json);
1168         set_handler_callback(LIST+MAP+TOPOLOGY, cli_list_map_topology);
1169         set_handler_callback(LIST+MAP+FMT, cli_list_map_fmt);
1170         set_handler_callback(LIST+MAP+RAW+FMT, cli_list_map_fmt);
1171         set_handler_callback(LIST+MAP+JSON, cli_list_map_json);
1172         set_handler_callback(LIST+CONFIG, cli_list_config);
1173         set_handler_callback(LIST+BLACKLIST, cli_list_blacklist);
1174         set_handler_callback(LIST+DEVICES, cli_list_devices);
1175         set_handler_callback(LIST+WILDCARDS, cli_list_wildcards);
1176         set_handler_callback(ADD+PATH, cli_add_path);
1177         set_handler_callback(DEL+PATH, cli_del_path);
1178         set_handler_callback(ADD+MAP, cli_add_map);
1179         set_handler_callback(DEL+MAP, cli_del_map);
1180         set_handler_callback(SWITCH+MAP+GROUP, cli_switch_group);
1181         set_unlocked_handler_callback(RECONFIGURE, cli_reconfigure);
1182         set_handler_callback(SUSPEND+MAP, cli_suspend);
1183         set_handler_callback(RESUME+MAP, cli_resume);
1184         set_handler_callback(RESIZE+MAP, cli_resize);
1185         set_handler_callback(RELOAD+MAP, cli_reload);
1186         set_handler_callback(RESET+MAP, cli_reassign);
1187         set_handler_callback(REINSTATE+PATH, cli_reinstate);
1188         set_handler_callback(FAIL+PATH, cli_fail);
1189         set_handler_callback(DISABLEQ+MAP, cli_disable_queueing);
1190         set_handler_callback(RESTOREQ+MAP, cli_restore_queueing);
1191         set_handler_callback(DISABLEQ+MAPS, cli_disable_all_queueing);
1192         set_handler_callback(RESTOREQ+MAPS, cli_restore_all_queueing);
1193         set_unlocked_handler_callback(QUIT, cli_quit);
1194         set_unlocked_handler_callback(SHUTDOWN, cli_shutdown);
1195         set_handler_callback(GETPRSTATUS+MAP, cli_getprstatus);
1196         set_handler_callback(SETPRSTATUS+MAP, cli_setprstatus);
1197         set_handler_callback(UNSETPRSTATUS+MAP, cli_unsetprstatus);
1198         set_handler_callback(FORCEQ+DAEMON, cli_force_no_daemon_q);
1199         set_handler_callback(RESTOREQ+DAEMON, cli_restore_no_daemon_q);
1200
1201         umask(077);
1202         uxsock_listen(&uxsock_trigger, ap);
1203
1204         return NULL;
1205 }
1206
1207 void
1208 exit_daemon (void)
1209 {
1210         post_config_state(DAEMON_SHUTDOWN);
1211 }
1212
1213 static void
1214 fail_path (struct path * pp, int del_active)
1215 {
1216         if (!pp->mpp)
1217                 return;
1218
1219         condlog(2, "checker failed path %s in map %s",
1220                  pp->dev_t, pp->mpp->alias);
1221
1222         dm_fail_path(pp->mpp->alias, pp->dev_t);
1223         if (del_active)
1224                 update_queue_mode_del_path(pp->mpp);
1225 }
1226
1227 /*
1228  * caller must have locked the path list before calling that function
1229  */
1230 static int
1231 reinstate_path (struct path * pp, int add_active)
1232 {
1233         int ret = 0;
1234
1235         if (!pp->mpp)
1236                 return 0;
1237
1238         if (dm_reinstate_path(pp->mpp->alias, pp->dev_t)) {
1239                 condlog(0, "%s: reinstate failed", pp->dev_t);
1240                 ret = 1;
1241         } else {
1242                 condlog(2, "%s: reinstated", pp->dev_t);
1243                 if (add_active)
1244                         update_queue_mode_add_path(pp->mpp);
1245         }
1246         return ret;
1247 }
1248
1249 static void
1250 enable_group(struct path * pp)
1251 {
1252         struct pathgroup * pgp;
1253
1254         /*
1255          * if path is added through uev_add_path, pgindex can be unset.
1256          * next update_strings() will set it, upon map reload event.
1257          *
1258          * we can safely return here, because upon map reload, all
1259          * PG will be enabled.
1260          */
1261         if (!pp->mpp->pg || !pp->pgindex)
1262                 return;
1263
1264         pgp = VECTOR_SLOT(pp->mpp->pg, pp->pgindex - 1);
1265
1266         if (pgp->status == PGSTATE_DISABLED) {
1267                 condlog(2, "%s: enable group #%i", pp->mpp->alias, pp->pgindex);
1268                 dm_enablegroup(pp->mpp->alias, pp->pgindex);
1269         }
1270 }
1271
1272 static void
1273 mpvec_garbage_collector (struct vectors * vecs)
1274 {
1275         struct multipath * mpp;
1276         unsigned int i;
1277
1278         if (!vecs->mpvec)
1279                 return;
1280
1281         vector_foreach_slot (vecs->mpvec, mpp, i) {
1282                 if (mpp && mpp->alias && !dm_map_present(mpp->alias)) {
1283                         condlog(2, "%s: remove dead map", mpp->alias);
1284                         remove_map_and_stop_waiter(mpp, vecs, 1);
1285                         i--;
1286                 }
1287         }
1288 }
1289
1290 /* This is called after a path has started working again. It the multipath
1291  * device for this path uses the followover failback type, and this is the
1292  * best pathgroup, and this is the first path in the pathgroup to come back
1293  * up, then switch to this pathgroup */
1294 static int
1295 followover_should_failback(struct path * pp)
1296 {
1297         struct pathgroup * pgp;
1298         struct path *pp1;
1299         int i;
1300
1301         if (pp->mpp->pgfailback != -FAILBACK_FOLLOWOVER ||
1302             !pp->mpp->pg || !pp->pgindex ||
1303             pp->pgindex != pp->mpp->bestpg)
1304                 return 0;
1305
1306         pgp = VECTOR_SLOT(pp->mpp->pg, pp->pgindex - 1);
1307         vector_foreach_slot(pgp->paths, pp1, i) {
1308                 if (pp1 == pp)
1309                         continue;
1310                 if (pp1->chkrstate != PATH_DOWN && pp1->chkrstate != PATH_SHAKY)
1311                         return 0;
1312         }
1313         return 1;
1314 }
1315
1316 static void
1317 missing_uev_wait_tick(struct vectors *vecs)
1318 {
1319         struct multipath * mpp;
1320         unsigned int i;
1321         int timed_out = 0, delayed_reconfig;
1322         struct config *conf;
1323
1324         vector_foreach_slot (vecs->mpvec, mpp, i) {
1325                 if (mpp->wait_for_udev && --mpp->uev_wait_tick <= 0) {
1326                         timed_out = 1;
1327                         condlog(0, "%s: timeout waiting on creation uevent. enabling reloads", mpp->alias);
1328                         if (mpp->wait_for_udev > 1 && update_map(mpp, vecs)) {
1329                                 /* update_map removed map */
1330                                 i--;
1331                                 continue;
1332                         }
1333                         mpp->wait_for_udev = 0;
1334                 }
1335         }
1336
1337         conf = get_multipath_config();
1338         delayed_reconfig = conf->delayed_reconfig;
1339         put_multipath_config(conf);
1340         if (timed_out && delayed_reconfig &&
1341             !need_to_delay_reconfig(vecs)) {
1342                 condlog(2, "reconfigure (delayed)");
1343                 set_config_state(DAEMON_CONFIGURE);
1344         }
1345 }
1346
1347 static void
1348 defered_failback_tick (vector mpvec)
1349 {
1350         struct multipath * mpp;
1351         unsigned int i;
1352
1353         vector_foreach_slot (mpvec, mpp, i) {
1354                 /*
1355                  * defered failback getting sooner
1356                  */
1357                 if (mpp->pgfailback > 0 && mpp->failback_tick > 0) {
1358                         mpp->failback_tick--;
1359
1360                         if (!mpp->failback_tick && need_switch_pathgroup(mpp, 1))
1361                                 switch_pathgroup(mpp);
1362                 }
1363         }
1364 }
1365
1366 static void
1367 retry_count_tick(vector mpvec)
1368 {
1369         struct multipath *mpp;
1370         unsigned int i;
1371
1372         vector_foreach_slot (mpvec, mpp, i) {
1373                 if (mpp->retry_tick > 0) {
1374                         mpp->stat_total_queueing_time++;
1375                         condlog(4, "%s: Retrying.. No active path", mpp->alias);
1376                         if(--mpp->retry_tick == 0) {
1377                                 dm_queue_if_no_path(mpp->alias, 0);
1378                                 condlog(2, "%s: Disable queueing", mpp->alias);
1379                         }
1380                 }
1381         }
1382 }
1383
1384 int update_prio(struct path *pp, int refresh_all)
1385 {
1386         int oldpriority;
1387         struct path *pp1;
1388         struct pathgroup * pgp;
1389         int i, j, changed = 0;
1390         struct config *conf;
1391
1392         if (refresh_all) {
1393                 vector_foreach_slot (pp->mpp->pg, pgp, i) {
1394                         vector_foreach_slot (pgp->paths, pp1, j) {
1395                                 oldpriority = pp1->priority;
1396                                 conf = get_multipath_config();
1397                                 pathinfo(pp1, conf, DI_PRIO);
1398                                 put_multipath_config(conf);
1399                                 if (pp1->priority != oldpriority)
1400                                         changed = 1;
1401                         }
1402                 }
1403                 return changed;
1404         }
1405         oldpriority = pp->priority;
1406         conf = get_multipath_config();
1407         pathinfo(pp, conf, DI_PRIO);
1408         put_multipath_config(conf);
1409
1410         if (pp->priority == oldpriority)
1411                 return 0;
1412         return 1;
1413 }
1414
1415 int update_path_groups(struct multipath *mpp, struct vectors *vecs, int refresh)
1416 {
1417         if (reload_map(vecs, mpp, refresh, 1))
1418                 return 1;
1419
1420         dm_lib_release();
1421         if (setup_multipath(vecs, mpp) != 0)
1422                 return 1;
1423         sync_map_state(mpp);
1424
1425         return 0;
1426 }
1427
1428 /*
1429  * Returns '1' if the path has been checked, '0' otherwise
1430  */
1431 int
1432 check_path (struct vectors * vecs, struct path * pp, int ticks)
1433 {
1434         int newstate;
1435         int new_path_up = 0;
1436         int chkr_new_path_up = 0;
1437         int add_active;
1438         int disable_reinstate = 0;
1439         int oldchkrstate = pp->chkrstate;
1440         int retrigger_tries, checkint;
1441         struct config *conf;
1442
1443         if ((pp->initialized == INIT_OK ||
1444              pp->initialized == INIT_REQUESTED_UDEV) && !pp->mpp)
1445                 return 0;
1446
1447         if (pp->tick)
1448                 pp->tick -= (pp->tick > ticks) ? ticks : pp->tick;
1449         if (pp->tick)
1450                 return 0; /* don't check this path yet */
1451
1452         conf = get_multipath_config();
1453         retrigger_tries = conf->retrigger_tries;
1454         checkint = conf->checkint;
1455         put_multipath_config(conf);
1456         if (!pp->mpp && pp->initialized == INIT_MISSING_UDEV &&
1457             pp->retriggers < retrigger_tries) {
1458                 condlog(2, "%s: triggering change event to reinitialize",
1459                         pp->dev);
1460                 pp->initialized = INIT_REQUESTED_UDEV;
1461                 pp->retriggers++;
1462                 sysfs_attr_set_value(pp->udev, "uevent", "change",
1463                                      strlen("change"));
1464                 return 0;
1465         }
1466
1467         /*
1468          * provision a next check soonest,
1469          * in case we exit abnormaly from here
1470          */
1471         pp->tick = checkint;
1472
1473         newstate = path_offline(pp);
1474         /*
1475          * Wait for uevent for removed paths;
1476          * some LLDDs like zfcp keep paths unavailable
1477          * without sending uevents.
1478          */
1479         if (newstate == PATH_REMOVED)
1480                 newstate = PATH_DOWN;
1481
1482         if (newstate == PATH_UP) {
1483                 conf = get_multipath_config();
1484                 newstate = get_state(pp, conf, 1);
1485                 put_multipath_config(conf);
1486         } else
1487                 checker_clear_message(&pp->checker);
1488
1489         if (newstate == PATH_WILD || newstate == PATH_UNCHECKED) {
1490                 condlog(2, "%s: unusable path", pp->dev);
1491                 conf = get_multipath_config();
1492                 pathinfo(pp, conf, 0);
1493                 put_multipath_config(conf);
1494                 return 1;
1495         }
1496         if (!pp->mpp) {
1497                 if (!strlen(pp->wwid) && pp->initialized != INIT_MISSING_UDEV &&
1498                     (newstate == PATH_UP || newstate == PATH_GHOST)) {
1499                         condlog(2, "%s: add missing path", pp->dev);
1500                         conf = get_multipath_config();
1501                         if (pathinfo(pp, conf, DI_ALL) == 0) {
1502                                 ev_add_path(pp, vecs);
1503                                 pp->tick = 1;
1504                         }
1505                         put_multipath_config(conf);
1506                 }
1507                 return 0;
1508         }
1509         /*
1510          * Async IO in flight. Keep the previous path state
1511          * and reschedule as soon as possible
1512          */
1513         if (newstate == PATH_PENDING) {
1514                 pp->tick = 1;
1515                 return 0;
1516         }
1517         /*
1518          * Synchronize with kernel state
1519          */
1520         if (update_multipath_strings(pp->mpp, vecs->pathvec, 1)) {
1521                 condlog(1, "%s: Could not synchronize with kernel state",
1522                         pp->dev);
1523                 pp->dmstate = PSTATE_UNDEF;
1524         }
1525         /* if update_multipath_strings orphaned the path, quit early */
1526         if (!pp->mpp)
1527                 return 0;
1528
1529         if ((newstate == PATH_UP || newstate == PATH_GHOST) &&
1530              pp->wait_checks > 0) {
1531                 if (pp->mpp && pp->mpp->nr_active > 0) {
1532                         pp->state = PATH_DELAYED;
1533                         pp->wait_checks--;
1534                         return 1;
1535                 } else
1536                         pp->wait_checks = 0;
1537         }
1538
1539         /*
1540          * don't reinstate failed path, if its in stand-by
1541          * and if target supports only implicit tpgs mode.
1542          * this will prevent unnecessary i/o by dm on stand-by
1543          * paths if there are no other active paths in map.
1544          */
1545         disable_reinstate = (newstate == PATH_GHOST &&
1546                             pp->mpp->nr_active == 0 &&
1547                             pp->tpgs == TPGS_IMPLICIT) ? 1 : 0;
1548
1549         pp->chkrstate = newstate;
1550         if (newstate != pp->state) {
1551                 int oldstate = pp->state;
1552                 pp->state = newstate;
1553
1554                 if (strlen(checker_message(&pp->checker)))
1555                         LOG_MSG(1, checker_message(&pp->checker));
1556
1557                 /*
1558                  * upon state change, reset the checkint
1559                  * to the shortest delay
1560                  */
1561                 conf = get_multipath_config();
1562                 pp->checkint = conf->checkint;
1563                 put_multipath_config(conf);
1564
1565                 if (newstate == PATH_DOWN || newstate == PATH_SHAKY) {
1566                         /*
1567                          * proactively fail path in the DM
1568                          */
1569                         if (oldstate == PATH_UP ||
1570                             oldstate == PATH_GHOST) {
1571                                 fail_path(pp, 1);
1572                                 if (pp->mpp->delay_wait_checks > 0 &&
1573                                     pp->watch_checks > 0) {
1574                                         pp->wait_checks = pp->mpp->delay_wait_checks;
1575                                         pp->watch_checks = 0;
1576                                 }
1577                         }else
1578                                 fail_path(pp, 0);
1579
1580                         /*
1581                          * cancel scheduled failback
1582                          */
1583                         pp->mpp->failback_tick = 0;
1584
1585                         pp->mpp->stat_path_failures++;
1586                         return 1;
1587                 }
1588
1589                 if(newstate == PATH_UP || newstate == PATH_GHOST){
1590                         if ( pp->mpp && pp->mpp->prflag ){
1591                                 /*
1592                                  * Check Persistent Reservation.
1593                                  */
1594                         condlog(2, "%s: checking persistent reservation "
1595                                 "registration", pp->dev);
1596                         mpath_pr_event_handle(pp);
1597                         }
1598                 }
1599
1600                 /*
1601                  * reinstate this path
1602                  */
1603                 if (oldstate != PATH_UP &&
1604                     oldstate != PATH_GHOST) {
1605                         if (pp->mpp->delay_watch_checks > 0)
1606                                 pp->watch_checks = pp->mpp->delay_watch_checks;
1607                         add_active = 1;
1608                 } else {
1609                         if (pp->watch_checks > 0)
1610                                 pp->watch_checks--;
1611                         add_active = 0;
1612                 }
1613                 if (!disable_reinstate && reinstate_path(pp, add_active)) {
1614                         condlog(3, "%s: reload map", pp->dev);
1615                         ev_add_path(pp, vecs);
1616                         pp->tick = 1;
1617                         return 0;
1618                 }
1619                 new_path_up = 1;
1620
1621                 if (oldchkrstate != PATH_UP && oldchkrstate != PATH_GHOST)
1622                         chkr_new_path_up = 1;
1623
1624                 /*
1625                  * if at least one path is up in a group, and
1626                  * the group is disabled, re-enable it
1627                  */
1628                 if (newstate == PATH_UP)
1629                         enable_group(pp);
1630         }
1631         else if (newstate == PATH_UP || newstate == PATH_GHOST) {
1632                 if ((pp->dmstate == PSTATE_FAILED ||
1633                     pp->dmstate == PSTATE_UNDEF) &&
1634                     !disable_reinstate) {
1635                         /* Clear IO errors */
1636                         if (reinstate_path(pp, 0)) {
1637                                 condlog(3, "%s: reload map", pp->dev);
1638                                 ev_add_path(pp, vecs);
1639                                 pp->tick = 1;
1640                                 return 0;
1641                         }
1642                 } else {
1643                         unsigned int max_checkint;
1644                         LOG_MSG(4, checker_message(&pp->checker));
1645                         conf = get_multipath_config();
1646                         max_checkint = conf->max_checkint;
1647                         put_multipath_config(conf);
1648                         if (pp->checkint != max_checkint) {
1649                                 /*
1650                                  * double the next check delay.
1651                                  * max at conf->max_checkint
1652                                  */
1653                                 if (pp->checkint < (max_checkint / 2))
1654                                         pp->checkint = 2 * pp->checkint;
1655                                 else
1656                                         pp->checkint = max_checkint;
1657
1658                                 condlog(4, "%s: delay next check %is",
1659                                         pp->dev_t, pp->checkint);
1660                         }
1661                         if (pp->watch_checks > 0)
1662                                 pp->watch_checks--;
1663                         pp->tick = pp->checkint;
1664                 }
1665         }
1666         else if (newstate == PATH_DOWN &&
1667                  strlen(checker_message(&pp->checker))) {
1668                 int log_checker_err;
1669
1670                 conf = get_multipath_config();
1671                 log_checker_err = conf->log_checker_err;
1672                 put_multipath_config(conf);
1673                 if (log_checker_err == LOG_CHKR_ERR_ONCE)
1674                         LOG_MSG(3, checker_message(&pp->checker));
1675                 else
1676                         LOG_MSG(2, checker_message(&pp->checker));
1677         }
1678
1679         pp->state = newstate;
1680
1681
1682         if (pp->mpp->wait_for_udev)
1683                 return 1;
1684         /*
1685          * path prio refreshing
1686          */
1687         condlog(4, "path prio refresh");
1688
1689         if (update_prio(pp, new_path_up) &&
1690             (pp->mpp->pgpolicyfn == (pgpolicyfn *)group_by_prio) &&
1691              pp->mpp->pgfailback == -FAILBACK_IMMEDIATE)
1692                 update_path_groups(pp->mpp, vecs, !new_path_up);
1693         else if (need_switch_pathgroup(pp->mpp, 0)) {
1694                 if (pp->mpp->pgfailback > 0 &&
1695                     (new_path_up || pp->mpp->failback_tick <= 0))
1696                         pp->mpp->failback_tick =
1697                                 pp->mpp->pgfailback + 1;
1698                 else if (pp->mpp->pgfailback == -FAILBACK_IMMEDIATE ||
1699                          (chkr_new_path_up && followover_should_failback(pp)))
1700                         switch_pathgroup(pp->mpp);
1701         }
1702         return 1;
1703 }
1704
1705 static void *
1706 checkerloop (void *ap)
1707 {
1708         struct vectors *vecs;
1709         struct path *pp;
1710         int count = 0;
1711         unsigned int i;
1712         struct itimerval timer_tick_it;
1713         struct timeval last_time;
1714         struct config *conf;
1715
1716         mlockall(MCL_CURRENT | MCL_FUTURE);
1717         vecs = (struct vectors *)ap;
1718         condlog(2, "path checkers start up");
1719
1720         /*
1721          * init the path check interval
1722          */
1723         vector_foreach_slot (vecs->pathvec, pp, i) {
1724                 conf = get_multipath_config();
1725                 pp->checkint = conf->checkint;
1726                 put_multipath_config(conf);
1727         }
1728
1729         /* Tweak start time for initial path check */
1730         if (gettimeofday(&last_time, NULL) != 0)
1731                 last_time.tv_sec = 0;
1732         else
1733                 last_time.tv_sec -= 1;
1734
1735         while (1) {
1736                 struct timeval diff_time, start_time, end_time;
1737                 int num_paths = 0, ticks = 0, signo, strict_timing, rc = 0;
1738                 sigset_t mask;
1739
1740                 if (gettimeofday(&start_time, NULL) != 0)
1741                         start_time.tv_sec = 0;
1742                 if (start_time.tv_sec && last_time.tv_sec) {
1743                         timersub(&start_time, &last_time, &diff_time);
1744                         condlog(4, "tick (%lu.%06lu secs)",
1745                                 diff_time.tv_sec, diff_time.tv_usec);
1746                         last_time.tv_sec = start_time.tv_sec;
1747                         last_time.tv_usec = start_time.tv_usec;
1748                         ticks = diff_time.tv_sec;
1749                 } else {
1750                         ticks = 1;
1751                         condlog(4, "tick (%d ticks)", ticks);
1752                 }
1753 #ifdef USE_SYSTEMD
1754                 if (use_watchdog)
1755                         sd_notify(0, "WATCHDOG=1");
1756 #endif
1757                 rc = set_config_state(DAEMON_RUNNING);
1758                 if (rc == ETIMEDOUT) {
1759                         condlog(4, "timeout waiting for DAEMON_IDLE");
1760                         continue;
1761                 }
1762                 if (vecs->pathvec) {
1763                         pthread_cleanup_push(cleanup_lock, &vecs->lock);
1764                         lock(vecs->lock);
1765                         pthread_testcancel();
1766                         vector_foreach_slot (vecs->pathvec, pp, i) {
1767                                 num_paths += check_path(vecs, pp, ticks);
1768                         }
1769                         lock_cleanup_pop(vecs->lock);
1770                 }
1771                 if (vecs->mpvec) {
1772                         pthread_cleanup_push(cleanup_lock, &vecs->lock);
1773                         lock(vecs->lock);
1774                         pthread_testcancel();
1775                         defered_failback_tick(vecs->mpvec);
1776                         retry_count_tick(vecs->mpvec);
1777                         missing_uev_wait_tick(vecs);
1778                         lock_cleanup_pop(vecs->lock);
1779                 }
1780                 if (count)
1781                         count--;
1782                 else {
1783                         pthread_cleanup_push(cleanup_lock, &vecs->lock);
1784                         lock(vecs->lock);
1785                         pthread_testcancel();
1786                         condlog(4, "map garbage collection");
1787                         mpvec_garbage_collector(vecs);
1788                         count = MAPGCINT;
1789                         lock_cleanup_pop(vecs->lock);
1790                 }
1791
1792                 diff_time.tv_usec = 0;
1793                 if (start_time.tv_sec &&
1794                     gettimeofday(&end_time, NULL) == 0) {
1795                         timersub(&end_time, &start_time, &diff_time);
1796                         if (num_paths) {
1797                                 unsigned int max_checkint;
1798
1799                                 condlog(3, "checked %d path%s in %lu.%06lu secs",
1800                                         num_paths, num_paths > 1 ? "s" : "",
1801                                         diff_time.tv_sec, diff_time.tv_usec);
1802                                 conf = get_multipath_config();
1803                                 max_checkint = conf->max_checkint;
1804                                 put_multipath_config(conf);
1805                                 if (diff_time.tv_sec > max_checkint)
1806                                         condlog(1, "path checkers took longer "
1807                                                 "than %lu seconds, consider "
1808                                                 "increasing max_polling_interval",
1809                                                 diff_time.tv_sec);
1810                         }
1811                 }
1812
1813                 post_config_state(DAEMON_IDLE);
1814                 conf = get_multipath_config();
1815                 strict_timing = conf->strict_timing;
1816                 put_multipath_config(conf);
1817                 if (!strict_timing)
1818                         sleep(1);
1819                 else {
1820                         timer_tick_it.it_interval.tv_sec = 0;
1821                         timer_tick_it.it_interval.tv_usec = 0;
1822                         if (diff_time.tv_usec) {
1823                                 timer_tick_it.it_value.tv_sec = 0;
1824                                 timer_tick_it.it_value.tv_usec =
1825                                         (unsigned long)1000000 - diff_time.tv_usec;
1826                         } else {
1827                                 timer_tick_it.it_value.tv_sec = 1;
1828                                 timer_tick_it.it_value.tv_usec = 0;
1829                         }
1830                         setitimer(ITIMER_REAL, &timer_tick_it, NULL);
1831
1832                         sigemptyset(&mask);
1833                         sigaddset(&mask, SIGALRM);
1834                         condlog(3, "waiting for %lu.%06lu secs",
1835                                 timer_tick_it.it_value.tv_sec,
1836                                 timer_tick_it.it_value.tv_usec);
1837                         if (sigwait(&mask, &signo) != 0) {
1838                                 condlog(3, "sigwait failed with error %d",
1839                                         errno);
1840                                 conf = get_multipath_config();
1841                                 conf->strict_timing = 0;
1842                                 put_multipath_config(conf);
1843                                 break;
1844                         }
1845                 }
1846         }
1847         return NULL;
1848 }
1849
1850 int
1851 configure (struct vectors * vecs, int start_waiters)
1852 {
1853         struct multipath * mpp;
1854         struct path * pp;
1855         vector mpvec;
1856         int i, ret;
1857         struct config *conf;
1858
1859         if (!vecs->pathvec && !(vecs->pathvec = vector_alloc()))
1860                 return 1;
1861
1862         if (!vecs->mpvec && !(vecs->mpvec = vector_alloc()))
1863                 return 1;
1864
1865         if (!(mpvec = vector_alloc()))
1866                 return 1;
1867
1868         /*
1869          * probe for current path (from sysfs) and map (from dm) sets
1870          */
1871         ret = path_discovery(vecs->pathvec, DI_ALL);
1872         if (ret < 0)
1873                 return 1;
1874
1875         vector_foreach_slot (vecs->pathvec, pp, i){
1876                 conf = get_multipath_config();
1877                 if (filter_path(conf, pp) > 0){
1878                         vector_del_slot(vecs->pathvec, i);
1879                         free_path(pp);
1880                         i--;
1881                 }
1882                 else
1883                         pp->checkint = conf->checkint;
1884                 put_multipath_config(conf);
1885         }
1886         if (map_discovery(vecs))
1887                 return 1;
1888
1889         /*
1890          * create new set of maps & push changed ones into dm
1891          */
1892         if (coalesce_paths(vecs, mpvec, NULL, 1, CMD_NONE))
1893                 return 1;
1894
1895         /*
1896          * may need to remove some maps which are no longer relevant
1897          * e.g., due to blacklist changes in conf file
1898          */
1899         if (coalesce_maps(vecs, mpvec))
1900                 return 1;
1901
1902         dm_lib_release();
1903
1904         sync_maps_state(mpvec);
1905         vector_foreach_slot(mpvec, mpp, i){
1906                 remember_wwid(mpp->wwid);
1907                 update_map_pr(mpp);
1908         }
1909
1910         /*
1911          * purge dm of old maps
1912          */
1913         remove_maps(vecs);
1914
1915         /*
1916          * save new set of maps formed by considering current path state
1917          */
1918         vector_free(vecs->mpvec);
1919         vecs->mpvec = mpvec;
1920
1921         /*
1922          * start dm event waiter threads for these new maps
1923          */
1924         vector_foreach_slot(vecs->mpvec, mpp, i) {
1925                 if (setup_multipath(vecs, mpp))
1926                         return 1;
1927                 if (start_waiters)
1928                         if (start_waiter_thread(mpp, vecs))
1929                                 return 1;
1930         }
1931         return 0;
1932 }
1933
1934 int
1935 need_to_delay_reconfig(struct vectors * vecs)
1936 {
1937         struct multipath *mpp;
1938         int i;
1939
1940         if (!VECTOR_SIZE(vecs->mpvec))
1941                 return 0;
1942
1943         vector_foreach_slot(vecs->mpvec, mpp, i) {
1944                 if (mpp->wait_for_udev)
1945                         return 1;
1946         }
1947         return 0;
1948 }
1949
1950 int
1951 reconfigure (struct vectors * vecs)
1952 {
1953         struct config * old, *conf;
1954
1955         conf = load_config(DEFAULT_CONFIGFILE);
1956         if (!conf)
1957                 return 1;
1958
1959         /*
1960          * free old map and path vectors ... they use old conf state
1961          */
1962         if (VECTOR_SIZE(vecs->mpvec))
1963                 remove_maps_and_stop_waiters(vecs);
1964
1965         if (VECTOR_SIZE(vecs->pathvec))
1966                 free_pathvec(vecs->pathvec, FREE_PATHS);
1967
1968         vecs->pathvec = NULL;
1969
1970         /* Re-read any timezone changes */
1971         tzset();
1972
1973         dm_drv_version(conf->version, TGT_MPATH);
1974         if (verbosity)
1975                 conf->verbosity = verbosity;
1976         if (bindings_read_only)
1977                 conf->bindings_read_only = bindings_read_only;
1978         if (ignore_new_devs)
1979                 conf->ignore_new_devs = ignore_new_devs;
1980         uxsock_timeout = conf->uxsock_timeout;
1981
1982         old = multipath_conf;
1983         multipath_conf = conf;
1984
1985         configure(vecs, 1);
1986
1987         free_config(old);
1988
1989         return 0;
1990 }
1991
1992 static struct vectors *
1993 init_vecs (void)
1994 {
1995         struct vectors * vecs;
1996
1997         vecs = (struct vectors *)MALLOC(sizeof(struct vectors));
1998
1999         if (!vecs)
2000                 return NULL;
2001
2002         vecs->lock.mutex =
2003                 (pthread_mutex_t *)MALLOC(sizeof(pthread_mutex_t));
2004
2005         if (!vecs->lock.mutex)
2006                 goto out;
2007
2008         pthread_mutex_init(vecs->lock.mutex, NULL);
2009         vecs->lock.depth = 0;
2010
2011         return vecs;
2012
2013 out:
2014         FREE(vecs);
2015         condlog(0, "failed to init paths");
2016         return NULL;
2017 }
2018
2019 static void *
2020 signal_set(int signo, void (*func) (int))
2021 {
2022         int r;
2023         struct sigaction sig;
2024         struct sigaction osig;
2025
2026         sig.sa_handler = func;
2027         sigemptyset(&sig.sa_mask);
2028         sig.sa_flags = 0;
2029
2030         r = sigaction(signo, &sig, &osig);
2031
2032         if (r < 0)
2033                 return (SIG_ERR);
2034         else
2035                 return (osig.sa_handler);
2036 }
2037
2038 void
2039 handle_signals(void)
2040 {
2041         if (reconfig_sig) {
2042                 condlog(2, "reconfigure (signal)");
2043                 set_config_state(DAEMON_CONFIGURE);
2044         }
2045         if (log_reset_sig) {
2046                 condlog(2, "reset log (signal)");
2047                 pthread_mutex_lock(&logq_lock);
2048                 log_reset("multipathd");
2049                 pthread_mutex_unlock(&logq_lock);
2050         }
2051         reconfig_sig = 0;
2052         log_reset_sig = 0;
2053 }
2054
2055 static void
2056 sighup (int sig)
2057 {
2058         reconfig_sig = 1;
2059 }
2060
2061 static void
2062 sigend (int sig)
2063 {
2064         exit_daemon();
2065 }
2066
2067 static void
2068 sigusr1 (int sig)
2069 {
2070         log_reset_sig = 1;
2071 }
2072
2073 static void
2074 sigusr2 (int sig)
2075 {
2076         condlog(3, "SIGUSR2 received");
2077 }
2078
2079 static void
2080 signal_init(void)
2081 {
2082         sigset_t set;
2083
2084         sigemptyset(&set);
2085         sigaddset(&set, SIGHUP);
2086         sigaddset(&set, SIGUSR1);
2087         sigaddset(&set, SIGUSR2);
2088         sigaddset(&set, SIGALRM);
2089         pthread_sigmask(SIG_BLOCK, &set, NULL);
2090
2091         signal_set(SIGHUP, sighup);
2092         signal_set(SIGUSR1, sigusr1);
2093         signal_set(SIGUSR2, sigusr2);
2094         signal_set(SIGINT, sigend);
2095         signal_set(SIGTERM, sigend);
2096         signal(SIGPIPE, SIG_IGN);
2097 }
2098
2099 static void
2100 setscheduler (void)
2101 {
2102         int res;
2103         static struct sched_param sched_param = {
2104                 .sched_priority = 99
2105         };
2106
2107         res = sched_setscheduler (0, SCHED_RR, &sched_param);
2108
2109         if (res == -1)
2110                 condlog(LOG_WARNING, "Could not set SCHED_RR at priority 99");
2111         return;
2112 }
2113
2114 static void
2115 set_oom_adj (void)
2116 {
2117 #ifdef OOM_SCORE_ADJ_MIN
2118         int retry = 1;
2119         char *file = "/proc/self/oom_score_adj";
2120         int score = OOM_SCORE_ADJ_MIN;
2121 #else
2122         int retry = 0;
2123         char *file = "/proc/self/oom_adj";
2124         int score = OOM_ADJUST_MIN;
2125 #endif
2126         FILE *fp;
2127         struct stat st;
2128         char *envp;
2129
2130         envp = getenv("OOMScoreAdjust");
2131         if (envp) {
2132                 condlog(3, "Using systemd provided OOMScoreAdjust");
2133                 return;
2134         }
2135         do {
2136                 if (stat(file, &st) == 0){
2137                         fp = fopen(file, "w");
2138                         if (!fp) {
2139                                 condlog(0, "couldn't fopen %s : %s", file,
2140                                         strerror(errno));
2141                                 return;
2142                         }
2143                         fprintf(fp, "%i", score);
2144                         fclose(fp);
2145                         return;
2146                 }
2147                 if (errno != ENOENT) {
2148                         condlog(0, "couldn't stat %s : %s", file,
2149                                 strerror(errno));
2150                         return;
2151                 }
2152 #ifdef OOM_ADJUST_MIN
2153                 file = "/proc/self/oom_adj";
2154                 score = OOM_ADJUST_MIN;
2155 #else
2156                 retry = 0;
2157 #endif
2158         } while (retry--);
2159         condlog(0, "couldn't adjust oom score");
2160 }
2161
2162 static int
2163 child (void * param)
2164 {
2165         pthread_t check_thr, uevent_thr, uxlsnr_thr, uevq_thr;
2166         pthread_attr_t log_attr, misc_attr, uevent_attr;
2167         struct vectors * vecs;
2168         struct multipath * mpp;
2169         int i;
2170 #ifdef USE_SYSTEMD
2171         unsigned long checkint;
2172 #endif
2173         int rc;
2174         int pid_fd = -1;
2175         struct config *conf;
2176         char *envp;
2177
2178         mlockall(MCL_CURRENT | MCL_FUTURE);
2179         signal_init();
2180
2181         setup_thread_attr(&misc_attr, 64 * 1024, 1);
2182         setup_thread_attr(&uevent_attr, DEFAULT_UEVENT_STACKSIZE * 1024, 1);
2183         setup_thread_attr(&waiter_attr, 32 * 1024, 1);
2184
2185         if (logsink == 1) {
2186                 setup_thread_attr(&log_attr, 64 * 1024, 0);
2187                 log_thread_start(&log_attr);
2188                 pthread_attr_destroy(&log_attr);
2189         }
2190         pid_fd = pidfile_create(DEFAULT_PIDFILE, daemon_pid);
2191         if (pid_fd < 0) {
2192                 condlog(1, "failed to create pidfile");
2193                 if (logsink == 1)
2194                         log_thread_stop();
2195                 exit(1);
2196         }
2197
2198         post_config_state(DAEMON_START);
2199
2200         condlog(2, "--------start up--------");
2201         condlog(2, "read " DEFAULT_CONFIGFILE);
2202
2203         conf = load_config(DEFAULT_CONFIGFILE);
2204         if (!conf)
2205                 goto failed;
2206
2207         if (verbosity)
2208                 conf->verbosity = verbosity;
2209         if (bindings_read_only)
2210                 conf->bindings_read_only = bindings_read_only;
2211         if (ignore_new_devs)
2212                 conf->ignore_new_devs = ignore_new_devs;
2213         uxsock_timeout = conf->uxsock_timeout;
2214         multipath_conf = conf;
2215         dm_init(conf->verbosity);
2216         dm_drv_version(conf->version, TGT_MPATH);
2217         if (init_checkers(conf->multipath_dir)) {
2218                 condlog(0, "failed to initialize checkers");
2219                 goto failed;
2220         }
2221         if (init_prio(conf->multipath_dir)) {
2222                 condlog(0, "failed to initialize prioritizers");
2223                 goto failed;
2224         }
2225
2226         setlogmask(LOG_UPTO(conf->verbosity + 3));
2227
2228         envp = getenv("LimitNOFILE");
2229
2230         if (envp) {
2231                 condlog(2,"Using systemd provided open fds limit of %s", envp);
2232         } else if (conf->max_fds) {
2233                 struct rlimit fd_limit;
2234
2235                 if (getrlimit(RLIMIT_NOFILE, &fd_limit) < 0) {
2236                         condlog(0, "can't get open fds limit: %s",
2237                                 strerror(errno));
2238                         fd_limit.rlim_cur = 0;
2239                         fd_limit.rlim_max = 0;
2240                 }
2241                 if (fd_limit.rlim_cur < conf->max_fds) {
2242                         fd_limit.rlim_cur = conf->max_fds;
2243                         if (fd_limit.rlim_max < conf->max_fds)
2244                                 fd_limit.rlim_max = conf->max_fds;
2245                         if (setrlimit(RLIMIT_NOFILE, &fd_limit) < 0) {
2246                                 condlog(0, "can't set open fds limit to "
2247                                         "%lu/%lu : %s",
2248                                         fd_limit.rlim_cur, fd_limit.rlim_max,
2249                                         strerror(errno));
2250                         } else {
2251                                 condlog(3, "set open fds limit to %lu/%lu",
2252                                         fd_limit.rlim_cur, fd_limit.rlim_max);
2253                         }
2254                 }
2255
2256         }
2257
2258         vecs = gvecs = init_vecs();
2259         if (!vecs)
2260                 goto failed;
2261
2262         setscheduler();
2263         set_oom_adj();
2264
2265         dm_udev_set_sync_support(0);
2266 #ifdef USE_SYSTEMD
2267         envp = getenv("WATCHDOG_USEC");
2268         if (envp && sscanf(envp, "%lu", &checkint) == 1) {
2269                 /* Value is in microseconds */
2270                 conf->max_checkint = checkint / 1000000;
2271                 /* Rescale checkint */
2272                 if (conf->checkint > conf->max_checkint)
2273                         conf->checkint = conf->max_checkint;
2274                 else
2275                         conf->checkint = conf->max_checkint / 4;
2276                 condlog(3, "enabling watchdog, interval %d max %d",
2277                         conf->checkint, conf->max_checkint);
2278                 use_watchdog = conf->checkint;
2279         }
2280 #endif
2281         /*
2282          * Signal start of configuration
2283          */
2284         post_config_state(DAEMON_CONFIGURE);
2285
2286         /*
2287          * Start uevent listener early to catch events
2288          */
2289         if ((rc = pthread_create(&uevent_thr, &uevent_attr, ueventloop, udev))) {
2290                 condlog(0, "failed to create uevent thread: %d", rc);
2291                 goto failed;
2292         }
2293         pthread_attr_destroy(&uevent_attr);
2294         if ((rc = pthread_create(&uxlsnr_thr, &misc_attr, uxlsnrloop, vecs))) {
2295                 condlog(0, "failed to create cli listener: %d", rc);
2296                 goto failed;
2297         }
2298
2299         /*
2300          * start threads
2301          */
2302         if ((rc = pthread_create(&check_thr, &misc_attr, checkerloop, vecs))) {
2303                 condlog(0,"failed to create checker loop thread: %d", rc);
2304                 goto failed;
2305         }
2306         if ((rc = pthread_create(&uevq_thr, &misc_attr, uevqloop, vecs))) {
2307                 condlog(0, "failed to create uevent dispatcher: %d", rc);
2308                 goto failed;
2309         }
2310         pthread_attr_destroy(&misc_attr);
2311
2312 #ifdef USE_SYSTEMD
2313         sd_notify(0, "READY=1");
2314 #endif
2315
2316         while (running_state != DAEMON_SHUTDOWN) {
2317                 pthread_cleanup_push(config_cleanup, NULL);
2318                 pthread_mutex_lock(&config_lock);
2319                 if (running_state != DAEMON_CONFIGURE &&
2320                     running_state != DAEMON_SHUTDOWN) {
2321                         pthread_cond_wait(&config_cond, &config_lock);
2322                 }
2323                 pthread_cleanup_pop(1);
2324                 if (running_state == DAEMON_CONFIGURE) {
2325                         pthread_cleanup_push(cleanup_lock, &vecs->lock);
2326                         lock(vecs->lock);
2327                         pthread_testcancel();
2328                         if (!need_to_delay_reconfig(vecs)) {
2329                                 reconfigure(vecs);
2330                         } else {
2331                                 conf->delayed_reconfig = 1;
2332                         }
2333                         lock_cleanup_pop(vecs->lock);
2334                         post_config_state(DAEMON_IDLE);
2335                 }
2336         }
2337
2338         lock(vecs->lock);
2339         if (conf->queue_without_daemon == QUE_NO_DAEMON_OFF)
2340                 vector_foreach_slot(vecs->mpvec, mpp, i)
2341                         dm_queue_if_no_path(mpp->alias, 0);
2342         remove_maps_and_stop_waiters(vecs);
2343         unlock(vecs->lock);
2344
2345         pthread_cancel(check_thr);
2346         pthread_cancel(uevent_thr);
2347         pthread_cancel(uxlsnr_thr);
2348         pthread_cancel(uevq_thr);
2349
2350         lock(vecs->lock);
2351         free_pathvec(vecs->pathvec, FREE_PATHS);
2352         vecs->pathvec = NULL;
2353         unlock(vecs->lock);
2354         /* Now all the waitevent threads will start rushing in. */
2355         while (vecs->lock.depth > 0) {
2356                 sleep (1); /* This is weak. */
2357                 condlog(3, "Have %d wait event checkers threads to de-alloc,"
2358                         " waiting...", vecs->lock.depth);
2359         }
2360         pthread_mutex_destroy(vecs->lock.mutex);
2361         FREE(vecs->lock.mutex);
2362         vecs->lock.depth = 0;
2363         vecs->lock.mutex = NULL;
2364         FREE(vecs);
2365         vecs = NULL;
2366
2367         cleanup_checkers();
2368         cleanup_prio();
2369
2370         dm_lib_release();
2371         dm_lib_exit();
2372
2373         /* We're done here */
2374         condlog(3, "unlink pidfile");
2375         unlink(DEFAULT_PIDFILE);
2376
2377         condlog(2, "--------shut down-------");
2378
2379         if (logsink == 1)
2380                 log_thread_stop();
2381
2382         /*
2383          * Freeing config must be done after condlog() and dm_lib_exit(),
2384          * because logging functions like dlog() and dm_write_log()
2385          * reference the config.
2386          */
2387         free_config(conf);
2388         conf = NULL;
2389         udev_unref(udev);
2390         udev = NULL;
2391 #ifdef _DEBUG_
2392         dbg_free_final(NULL);
2393 #endif
2394
2395 #ifdef USE_SYSTEMD
2396         sd_notify(0, "ERRNO=0");
2397 #endif
2398         exit(0);
2399
2400 failed:
2401 #ifdef USE_SYSTEMD
2402         sd_notify(0, "ERRNO=1");
2403 #endif
2404         if (pid_fd >= 0)
2405                 close(pid_fd);
2406         exit(1);
2407 }
2408
2409 static int
2410 daemonize(void)
2411 {
2412         int pid;
2413         int dev_null_fd;
2414
2415         if( (pid = fork()) < 0){
2416                 fprintf(stderr, "Failed first fork : %s\n", strerror(errno));
2417                 return -1;
2418         }
2419         else if (pid != 0)
2420                 return pid;
2421
2422         setsid();
2423
2424         if ( (pid = fork()) < 0)
2425                 fprintf(stderr, "Failed second fork : %s\n", strerror(errno));
2426         else if (pid != 0)
2427                 _exit(0);
2428
2429         if (chdir("/") < 0)
2430                 fprintf(stderr, "cannot chdir to '/', continuing\n");
2431
2432         dev_null_fd = open("/dev/null", O_RDWR);
2433         if (dev_null_fd < 0){
2434                 fprintf(stderr, "cannot open /dev/null for input & output : %s\n",
2435                         strerror(errno));
2436                 _exit(0);
2437         }
2438
2439         close(STDIN_FILENO);
2440         if (dup(dev_null_fd) < 0) {
2441                 fprintf(stderr, "cannot dup /dev/null to stdin : %s\n",
2442                         strerror(errno));
2443                 _exit(0);
2444         }
2445         close(STDOUT_FILENO);
2446         if (dup(dev_null_fd) < 0) {
2447                 fprintf(stderr, "cannot dup /dev/null to stdout : %s\n",
2448                         strerror(errno));
2449                 _exit(0);
2450         }
2451         close(STDERR_FILENO);
2452         if (dup(dev_null_fd) < 0) {
2453                 fprintf(stderr, "cannot dup /dev/null to stderr : %s\n",
2454                         strerror(errno));
2455                 _exit(0);
2456         }
2457         close(dev_null_fd);
2458         daemon_pid = getpid();
2459         return 0;
2460 }
2461
2462 int
2463 main (int argc, char *argv[])
2464 {
2465         extern char *optarg;
2466         extern int optind;
2467         int arg;
2468         int err;
2469         int foreground = 0;
2470         struct config *conf;
2471
2472         logsink = 1;
2473
2474         if (getuid() != 0) {
2475                 fprintf(stderr, "need to be root\n");
2476                 exit(1);
2477         }
2478
2479         /* make sure we don't lock any path */
2480         if (chdir("/") < 0)
2481                 fprintf(stderr, "can't chdir to root directory : %s\n",
2482                         strerror(errno));
2483         umask(umask(077) | 022);
2484
2485         udev = udev_new();
2486
2487         while ((arg = getopt(argc, argv, ":dsv:k::Bn")) != EOF ) {
2488         switch(arg) {
2489                 case 'd':
2490                         foreground = 1;
2491                         if (logsink > 0)
2492                                 logsink = 0;
2493                         //debug=1; /* ### comment me out ### */
2494                         break;
2495                 case 'v':
2496                         if (sizeof(optarg) > sizeof(char *) ||
2497                             !isdigit(optarg[0]))
2498                                 exit(1);
2499
2500                         verbosity = atoi(optarg);
2501                         break;
2502                 case 's':
2503                         logsink = -1;
2504                         break;
2505                 case 'k':
2506                         conf = load_config(DEFAULT_CONFIGFILE);
2507                         if (!conf)
2508                                 exit(1);
2509                         if (verbosity)
2510                                 conf->verbosity = verbosity;
2511                         uxclnt(optarg, uxsock_timeout + 100);
2512                         exit(0);
2513                 case 'B':
2514                         bindings_read_only = 1;
2515                         break;
2516                 case 'n':
2517                         ignore_new_devs = 1;
2518                         break;
2519                 default:
2520                         fprintf(stderr, "Invalid argument '-%c'\n",
2521                                 optopt);
2522                         exit(1);
2523                 }
2524         }
2525         if (optind < argc) {
2526                 char cmd[CMDSIZE];
2527                 char * s = cmd;
2528                 char * c = s;
2529
2530                 conf = load_config(DEFAULT_CONFIGFILE);
2531                 if (!conf)
2532                         exit(1);
2533                 if (verbosity)
2534                         conf->verbosity = verbosity;
2535                 memset(cmd, 0x0, CMDSIZE);
2536                 while (optind < argc) {
2537                         if (strchr(argv[optind], ' '))
2538                                 c += snprintf(c, s + CMDSIZE - c, "\"%s\" ", argv[optind]);
2539                         else
2540                                 c += snprintf(c, s + CMDSIZE - c, "%s ", argv[optind]);
2541                         optind++;
2542                 }
2543                 c += snprintf(c, s + CMDSIZE - c, "\n");
2544                 uxclnt(s, uxsock_timeout + 100);
2545                 exit(0);
2546         }
2547
2548         if (foreground) {
2549                 if (!isatty(fileno(stdout)))
2550                         setbuf(stdout, NULL);
2551                 err = 0;
2552                 daemon_pid = getpid();
2553         } else
2554                 err = daemonize();
2555
2556         if (err < 0)
2557                 /* error */
2558                 exit(1);
2559         else if (err > 0)
2560                 /* parent dies */
2561                 exit(0);
2562         else
2563                 /* child lives */
2564                 return (child(NULL));
2565 }
2566
2567 void *  mpath_pr_event_handler_fn (void * pathp )
2568 {
2569         struct multipath * mpp;
2570         int i,j, ret, isFound;
2571         struct path * pp = (struct path *)pathp;
2572         unsigned char *keyp;
2573         uint64_t prkey;
2574         struct prout_param_descriptor *param;
2575         struct prin_resp *resp;
2576
2577         mpp = pp->mpp;
2578
2579         resp = mpath_alloc_prin_response(MPATH_PRIN_RKEY_SA);
2580         if (!resp){
2581                 condlog(0,"%s Alloc failed for prin response", pp->dev);
2582                 return NULL;
2583         }
2584
2585         ret = prin_do_scsi_ioctl(pp->dev, MPATH_PRIN_RKEY_SA, resp, 0);
2586         if (ret != MPATH_PR_SUCCESS )
2587         {
2588                 condlog(0,"%s : pr in read keys service action failed. Error=%d", pp->dev, ret);
2589                 goto out;
2590         }
2591
2592         condlog(3, " event pr=%d addlen=%d",resp->prin_descriptor.prin_readkeys.prgeneration,
2593                         resp->prin_descriptor.prin_readkeys.additional_length );
2594
2595         if (resp->prin_descriptor.prin_readkeys.additional_length == 0 )
2596         {
2597                 condlog(1, "%s: No key found. Device may not be registered.", pp->dev);
2598                 ret = MPATH_PR_SUCCESS;
2599                 goto out;
2600         }
2601         prkey = 0;
2602         keyp = (unsigned char *)mpp->reservation_key;
2603         for (j = 0; j < 8; ++j) {
2604                 if (j > 0)
2605                         prkey <<= 8;
2606                 prkey |= *keyp;
2607                 ++keyp;
2608         }
2609         condlog(2, "Multipath  reservation_key: 0x%" PRIx64 " ", prkey);
2610
2611         isFound =0;
2612         for (i = 0; i < resp->prin_descriptor.prin_readkeys.additional_length/8; i++ )
2613         {
2614                 condlog(2, "PR IN READKEYS[%d]  reservation key:",i);
2615                 dumpHex((char *)&resp->prin_descriptor.prin_readkeys.key_list[i*8], 8 , -1);
2616                 if (!memcmp(mpp->reservation_key, &resp->prin_descriptor.prin_readkeys.key_list[i*8], 8))
2617                 {
2618                         condlog(2, "%s: pr key found in prin readkeys response", mpp->alias);
2619                         isFound =1;
2620                         break;
2621                 }
2622         }
2623         if (!isFound)
2624         {
2625                 condlog(0, "%s: Either device not registered or ", pp->dev);
2626                 condlog(0, "host is not authorised for registration. Skip path");
2627                 ret = MPATH_PR_OTHER;
2628                 goto out;
2629         }
2630
2631         param= malloc(sizeof(struct prout_param_descriptor));
2632         memset(param, 0 , sizeof(struct prout_param_descriptor));
2633
2634         for (j = 7; j >= 0; --j) {
2635                 param->sa_key[j] = (prkey & 0xff);
2636                 prkey >>= 8;
2637         }
2638         param->num_transportid = 0;
2639
2640         condlog(3, "device %s:%s", pp->dev, pp->mpp->wwid);
2641
2642         ret = prout_do_scsi_ioctl(pp->dev, MPATH_PROUT_REG_IGN_SA, 0, 0, param, 0);
2643         if (ret != MPATH_PR_SUCCESS )
2644         {
2645                 condlog(0,"%s: Reservation registration failed. Error: %d", pp->dev, ret);
2646         }
2647         mpp->prflag = 1;
2648
2649         free(param);
2650 out:
2651         free(resp);
2652         return NULL;
2653 }
2654
2655 int mpath_pr_event_handle(struct path *pp)
2656 {
2657         pthread_t thread;
2658         int rc;
2659         pthread_attr_t attr;
2660         struct multipath * mpp;
2661
2662         mpp = pp->mpp;
2663
2664         if (!mpp->reservation_key)
2665                 return -1;
2666
2667         pthread_attr_init(&attr);
2668         pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
2669
2670         rc = pthread_create(&thread, NULL , mpath_pr_event_handler_fn, pp);
2671         if (rc) {
2672                 condlog(0, "%s: ERROR; return code from pthread_create() is %d", pp->dev, rc);
2673                 return -1;
2674         }
2675         pthread_attr_destroy(&attr);
2676         rc = pthread_join(thread, NULL);
2677         return 0;
2678 }
2679