c4d6f9f62a939559d95311770918eb9dbe91f69b
[multipath-tools/.git] / multipathd / main.c
1 /*
2  * Copyright (c) 2004, 2005 Christophe Varoqui
3  * Copyright (c) 2005 Kiyoshi Ueda, NEC
4  * Copyright (c) 2005 Benjamin Marzinski, Redhat
5  * Copyright (c) 2005 Edward Goggin, EMC
6  */
7 #include <unistd.h>
8 #include <sys/stat.h>
9 #include <libdevmapper.h>
10 #include <wait.h>
11 #include <sys/mman.h>
12 #include <sys/types.h>
13 #include <fcntl.h>
14 #include <errno.h>
15 #include <sys/time.h>
16 #include <sys/resource.h>
17 #include <limits.h>
18 #include <linux/oom.h>
19 #include <libudev.h>
20 #ifdef USE_SYSTEMD
21 #include <systemd/sd-daemon.h>
22 #endif
23 #include <semaphore.h>
24 #include <time.h>
25
26 /*
27  * libcheckers
28  */
29 #include <checkers.h>
30
31 #ifdef USE_SYSTEMD
32 static int use_watchdog;
33 #endif
34
35 int uxsock_timeout;
36
37 /*
38  * libmultipath
39  */
40 #include <parser.h>
41 #include <vector.h>
42 #include <memory.h>
43 #include <config.h>
44 #include <util.h>
45 #include <hwtable.h>
46 #include <defaults.h>
47 #include <structs.h>
48 #include <blacklist.h>
49 #include <structs_vec.h>
50 #include <dmparser.h>
51 #include <devmapper.h>
52 #include <sysfs.h>
53 #include <dict.h>
54 #include <discovery.h>
55 #include <debug.h>
56 #include <propsel.h>
57 #include <uevent.h>
58 #include <switchgroup.h>
59 #include <print.h>
60 #include <configure.h>
61 #include <prio.h>
62 #include <wwids.h>
63 #include <pgpolicies.h>
64 #include <uevent.h>
65 #include <log.h>
66
67 #include <mpath_cmd.h>
68 #include <mpath_persist.h>
69
70 #include "prioritizers/alua_rtpg.h"
71
72 #include "main.h"
73 #include "pidfile.h"
74 #include "uxlsnr.h"
75 #include "uxclnt.h"
76 #include "cli.h"
77 #include "cli_handlers.h"
78 #include "lock.h"
79 #include "waiter.h"
80 #include "wwids.h"
81
82 #define FILE_NAME_SIZE 256
83 #define CMDSIZE 160
84
85 #define LOG_MSG(a, b) \
86 do { \
87         if (pp->offline) \
88                 condlog(a, "%s: %s - path offline", pp->mpp->alias, pp->dev); \
89         else if (strlen(b)) \
90                 condlog(a, "%s: %s - %s", pp->mpp->alias, pp->dev, b); \
91 } while(0)
92
93 struct mpath_event_param
94 {
95         char * devname;
96         struct multipath *mpp;
97 };
98
99 unsigned int mpath_mx_alloc_len;
100
101 int logsink;
102 int verbosity;
103 int bindings_read_only;
104 int ignore_new_devs;
105 enum daemon_status running_state = DAEMON_INIT;
106 pid_t daemon_pid;
107 pthread_mutex_t config_lock = PTHREAD_MUTEX_INITIALIZER;
108 pthread_cond_t config_cond = PTHREAD_COND_INITIALIZER;
109
110 /*
111  * global copy of vecs for use in sig handlers
112  */
113 struct vectors * gvecs;
114
115 struct udev * udev;
116
117 struct config *multipath_conf;
118
119 const char *
120 daemon_status(void)
121 {
122         switch (running_state) {
123         case DAEMON_INIT:
124                 return "init";
125         case DAEMON_START:
126                 return "startup";
127         case DAEMON_CONFIGURE:
128                 return "configure";
129         case DAEMON_IDLE:
130                 return "idle";
131         case DAEMON_RUNNING:
132                 return "running";
133         case DAEMON_SHUTDOWN:
134                 return "shutdown";
135         }
136         return NULL;
137 }
138
139 /*
140  * I love you too, systemd ...
141  */
142 const char *
143 sd_notify_status(void)
144 {
145         switch (running_state) {
146         case DAEMON_INIT:
147                 return "STATUS=init";
148         case DAEMON_START:
149                 return "STATUS=startup";
150         case DAEMON_CONFIGURE:
151                 return "STATUS=configure";
152         case DAEMON_IDLE:
153                 return "STATUS=idle";
154         case DAEMON_RUNNING:
155                 return "STATUS=running";
156         case DAEMON_SHUTDOWN:
157                 return "STATUS=shutdown";
158         }
159         return NULL;
160 }
161
162 static void config_cleanup(void *arg)
163 {
164         pthread_mutex_unlock(&config_lock);
165 }
166
167 void post_config_state(enum daemon_status state)
168 {
169         pthread_mutex_lock(&config_lock);
170         if (state != running_state) {
171                 running_state = state;
172                 pthread_cond_broadcast(&config_cond);
173 #ifdef USE_SYSTEMD
174                 sd_notify(0, sd_notify_status());
175 #endif
176         }
177         pthread_mutex_unlock(&config_lock);
178 }
179
180 int set_config_state(enum daemon_status state)
181 {
182         int rc = 0;
183
184         pthread_cleanup_push(config_cleanup, NULL);
185         pthread_mutex_lock(&config_lock);
186         if (running_state != state) {
187                 if (running_state != DAEMON_IDLE) {
188                         struct timespec ts;
189
190                         clock_gettime(CLOCK_REALTIME, &ts);
191                         ts.tv_sec += 1;
192                         rc = pthread_cond_timedwait(&config_cond,
193                                                     &config_lock, &ts);
194                 }
195                 if (!rc) {
196                         running_state = state;
197                         pthread_cond_broadcast(&config_cond);
198 #ifdef USE_SYSTEMD
199                         sd_notify(0, sd_notify_status());
200 #endif
201                 }
202         }
203         pthread_cleanup_pop(1);
204         return rc;
205 }
206
207 struct config *get_multipath_config(void)
208 {
209         return multipath_conf;
210 }
211
212 void put_multipath_config(struct config *conf)
213 {
214         /* Noop for now */
215 }
216
217 static int
218 need_switch_pathgroup (struct multipath * mpp, int refresh)
219 {
220         struct pathgroup * pgp;
221         struct path * pp;
222         unsigned int i, j;
223
224         if (!mpp || mpp->pgfailback == -FAILBACK_MANUAL)
225                 return 0;
226
227         /*
228          * Refresh path priority values
229          */
230         if (refresh)
231                 vector_foreach_slot (mpp->pg, pgp, i)
232                         vector_foreach_slot (pgp->paths, pp, j)
233                                 pathinfo(pp, conf, DI_PRIO);
234
235         if (!mpp->pg || VECTOR_SIZE(mpp->paths) == 0)
236                 return 0;
237
238         mpp->bestpg = select_path_group(mpp);
239
240         if (mpp->bestpg != mpp->nextpg)
241                 return 1;
242
243         return 0;
244 }
245
246 static void
247 switch_pathgroup (struct multipath * mpp)
248 {
249         mpp->stat_switchgroup++;
250         dm_switchgroup(mpp->alias, mpp->bestpg);
251         condlog(2, "%s: switch to path group #%i",
252                  mpp->alias, mpp->bestpg);
253 }
254
255 static int
256 coalesce_maps(struct vectors *vecs, vector nmpv)
257 {
258         struct multipath * ompp;
259         vector ompv = vecs->mpvec;
260         unsigned int i;
261
262         vector_foreach_slot (ompv, ompp, i) {
263                 condlog(3, "%s: coalesce map", ompp->alias);
264                 if (!find_mp_by_wwid(nmpv, ompp->wwid)) {
265                         /*
266                          * remove all current maps not allowed by the
267                          * current configuration
268                          */
269                         if (dm_flush_map(ompp->alias)) {
270                                 condlog(0, "%s: unable to flush devmap",
271                                         ompp->alias);
272                                 /*
273                                  * may be just because the device is open
274                                  */
275                                 if (setup_multipath(vecs, ompp) != 0) {
276                                         i--;
277                                         continue;
278                                 }
279                                 if (!vector_alloc_slot(nmpv))
280                                         return 1;
281
282                                 vector_set_slot(nmpv, ompp);
283
284                                 vector_del_slot(ompv, i);
285                                 i--;
286                         }
287                         else {
288                                 dm_lib_release();
289                                 condlog(2, "%s devmap removed", ompp->alias);
290                         }
291                 } else if (conf->reassign_maps) {
292                         condlog(3, "%s: Reassign existing device-mapper"
293                                 " devices", ompp->alias);
294                         dm_reassign(ompp->alias);
295                 }
296         }
297         return 0;
298 }
299
300 void
301 sync_map_state(struct multipath *mpp)
302 {
303         struct pathgroup *pgp;
304         struct path *pp;
305         unsigned int i, j;
306
307         if (!mpp->pg)
308                 return;
309
310         vector_foreach_slot (mpp->pg, pgp, i){
311                 vector_foreach_slot (pgp->paths, pp, j){
312                         if (pp->state == PATH_UNCHECKED ||
313                             pp->state == PATH_WILD ||
314                             pp->state == PATH_DELAYED)
315                                 continue;
316                         if ((pp->dmstate == PSTATE_FAILED ||
317                              pp->dmstate == PSTATE_UNDEF) &&
318                             (pp->state == PATH_UP || pp->state == PATH_GHOST))
319                                 dm_reinstate_path(mpp->alias, pp->dev_t);
320                         else if ((pp->dmstate == PSTATE_ACTIVE ||
321                                   pp->dmstate == PSTATE_UNDEF) &&
322                                  (pp->state == PATH_DOWN ||
323                                   pp->state == PATH_SHAKY))
324                                 dm_fail_path(mpp->alias, pp->dev_t);
325                 }
326         }
327 }
328
329 static void
330 sync_maps_state(vector mpvec)
331 {
332         unsigned int i;
333         struct multipath *mpp;
334
335         vector_foreach_slot (mpvec, mpp, i)
336                 sync_map_state(mpp);
337 }
338
339 static int
340 flush_map(struct multipath * mpp, struct vectors * vecs, int nopaths)
341 {
342         int r;
343
344         if (nopaths)
345                 r = dm_flush_map_nopaths(mpp->alias, mpp->deferred_remove);
346         else
347                 r = dm_flush_map(mpp->alias);
348         /*
349          * clear references to this map before flushing so we can ignore
350          * the spurious uevent we may generate with the dm_flush_map call below
351          */
352         if (r) {
353                 /*
354                  * May not really be an error -- if the map was already flushed
355                  * from the device mapper by dmsetup(8) for instance.
356                  */
357                 if (r == 1)
358                         condlog(0, "%s: can't flush", mpp->alias);
359                 else {
360                         condlog(2, "%s: devmap deferred remove", mpp->alias);
361                         mpp->deferred_remove = DEFERRED_REMOVE_IN_PROGRESS;
362                 }
363                 return r;
364         }
365         else {
366                 dm_lib_release();
367                 condlog(2, "%s: map flushed", mpp->alias);
368         }
369
370         orphan_paths(vecs->pathvec, mpp);
371         remove_map_and_stop_waiter(mpp, vecs, 1);
372
373         return 0;
374 }
375
376 int
377 update_map (struct multipath *mpp, struct vectors *vecs)
378 {
379         int retries = 3;
380         char params[PARAMS_SIZE] = {0};
381
382 retry:
383         condlog(4, "%s: updating new map", mpp->alias);
384         if (adopt_paths(vecs->pathvec, mpp)) {
385                 condlog(0, "%s: failed to adopt paths for new map update",
386                         mpp->alias);
387                 retries = -1;
388                 goto fail;
389         }
390         verify_paths(mpp, vecs);
391         mpp->flush_on_last_del = FLUSH_UNDEF;
392         mpp->action = ACT_RELOAD;
393
394         if (setup_map(mpp, params, PARAMS_SIZE)) {
395                 condlog(0, "%s: failed to setup new map in update", mpp->alias);
396                 retries = -1;
397                 goto fail;
398         }
399         if (domap(mpp, params, 1) <= 0 && retries-- > 0) {
400                 condlog(0, "%s: map_udate sleep", mpp->alias);
401                 sleep(1);
402                 goto retry;
403         }
404         dm_lib_release();
405
406 fail:
407         if (setup_multipath(vecs, mpp))
408                 return 1;
409
410         sync_map_state(mpp);
411
412         if (retries < 0)
413                 condlog(0, "%s: failed reload in new map update", mpp->alias);
414         return 0;
415 }
416
417 static int
418 uev_add_map (struct uevent * uev, struct vectors * vecs)
419 {
420         char *alias;
421         int major = -1, minor = -1, rc;
422
423         condlog(3, "%s: add map (uevent)", uev->kernel);
424         alias = uevent_get_dm_name(uev);
425         if (!alias) {
426                 condlog(3, "%s: No DM_NAME in uevent", uev->kernel);
427                 major = uevent_get_major(uev);
428                 minor = uevent_get_minor(uev);
429                 alias = dm_mapname(major, minor);
430                 if (!alias) {
431                         condlog(2, "%s: mapname not found for %d:%d",
432                                 uev->kernel, major, minor);
433                         return 1;
434                 }
435         }
436         pthread_cleanup_push(cleanup_lock, &vecs->lock);
437         lock(vecs->lock);
438         pthread_testcancel();
439         rc = ev_add_map(uev->kernel, alias, vecs);
440         lock_cleanup_pop(vecs->lock);
441         FREE(alias);
442         return rc;
443 }
444
445 int
446 ev_add_map (char * dev, char * alias, struct vectors * vecs)
447 {
448         char * refwwid;
449         struct multipath * mpp;
450         int map_present;
451         int r = 1;
452
453         map_present = dm_map_present(alias);
454
455         if (map_present && !dm_is_mpath(alias)) {
456                 condlog(4, "%s: not a multipath map", alias);
457                 return 0;
458         }
459
460         mpp = find_mp_by_alias(vecs->mpvec, alias);
461
462         if (mpp) {
463                 if (mpp->wait_for_udev > 1) {
464                         if (update_map(mpp, vecs))
465                                 /* setup multipathd removed the map */
466                                 return 1;
467                 }
468                 if (mpp->wait_for_udev) {
469                         mpp->wait_for_udev = 0;
470                         if (conf->delayed_reconfig &&
471                             !need_to_delay_reconfig(vecs)) {
472                                 condlog(2, "reconfigure (delayed)");
473                                 set_config_state(DAEMON_CONFIGURE);
474                                 return 0;
475                         }
476                 }
477                 /*
478                  * Not really an error -- we generate our own uevent
479                  * if we create a multipath mapped device as a result
480                  * of uev_add_path
481                  */
482                 if (conf->reassign_maps) {
483                         condlog(3, "%s: Reassign existing device-mapper devices",
484                                 alias);
485                         dm_reassign(alias);
486                 }
487                 return 0;
488         }
489         condlog(2, "%s: adding map", alias);
490
491         /*
492          * now we can register the map
493          */
494         if (map_present) {
495                 if ((mpp = add_map_without_path(vecs, alias))) {
496                         sync_map_state(mpp);
497                         condlog(2, "%s: devmap %s registered", alias, dev);
498                         return 0;
499                 } else {
500                         condlog(2, "%s: uev_add_map failed", dev);
501                         return 1;
502                 }
503         }
504         r = get_refwwid(CMD_NONE, dev, DEV_DEVMAP, vecs->pathvec, &refwwid);
505
506         if (refwwid) {
507                 r = coalesce_paths(vecs, NULL, refwwid, 0, CMD_NONE);
508                 dm_lib_release();
509         }
510
511         if (!r)
512                 condlog(2, "%s: devmap %s added", alias, dev);
513         else if (r == 2)
514                 condlog(2, "%s: uev_add_map %s blacklisted", alias, dev);
515         else
516                 condlog(0, "%s: uev_add_map %s failed", alias, dev);
517
518         FREE(refwwid);
519         return r;
520 }
521
522 static int
523 uev_remove_map (struct uevent * uev, struct vectors * vecs)
524 {
525         char *alias;
526         int minor;
527         struct multipath *mpp;
528
529         condlog(2, "%s: remove map (uevent)", uev->kernel);
530         alias = uevent_get_dm_name(uev);
531         if (!alias) {
532                 condlog(3, "%s: No DM_NAME in uevent, ignoring", uev->kernel);
533                 return 0;
534         }
535         minor = uevent_get_minor(uev);
536
537         pthread_cleanup_push(cleanup_lock, &vecs->lock);
538         lock(vecs->lock);
539         pthread_testcancel();
540         mpp = find_mp_by_minor(vecs->mpvec, minor);
541
542         if (!mpp) {
543                 condlog(2, "%s: devmap not registered, can't remove",
544                         uev->kernel);
545                 goto out;
546         }
547         if (strcmp(mpp->alias, alias)) {
548                 condlog(2, "%s: minor number mismatch (map %d, event %d)",
549                         mpp->alias, mpp->dmi->minor, minor);
550                 goto out;
551         }
552
553         orphan_paths(vecs->pathvec, mpp);
554         remove_map_and_stop_waiter(mpp, vecs, 1);
555 out:
556         lock_cleanup_pop(vecs->lock);
557         FREE(alias);
558         return 0;
559 }
560
561 /* Called from CLI handler */
562 int
563 ev_remove_map (char * devname, char * alias, int minor, struct vectors * vecs)
564 {
565         struct multipath * mpp;
566
567         mpp = find_mp_by_minor(vecs->mpvec, minor);
568
569         if (!mpp) {
570                 condlog(2, "%s: devmap not registered, can't remove",
571                         devname);
572                 return 1;
573         }
574         if (strcmp(mpp->alias, alias)) {
575                 condlog(2, "%s: minor number mismatch (map %d, event %d)",
576                         mpp->alias, mpp->dmi->minor, minor);
577                 return 1;
578         }
579         return flush_map(mpp, vecs, 0);
580 }
581
582 static int
583 uev_add_path (struct uevent *uev, struct vectors * vecs)
584 {
585         struct path *pp;
586         int ret = 0, i;
587
588         condlog(2, "%s: add path (uevent)", uev->kernel);
589         if (strstr(uev->kernel, "..") != NULL) {
590                 /*
591                  * Don't allow relative device names in the pathvec
592                  */
593                 condlog(0, "%s: path name is invalid", uev->kernel);
594                 return 1;
595         }
596
597         pthread_cleanup_push(cleanup_lock, &vecs->lock);
598         lock(vecs->lock);
599         pthread_testcancel();
600         pp = find_path_by_dev(vecs->pathvec, uev->kernel);
601         if (pp) {
602                 int r;
603
604                 condlog(0, "%s: spurious uevent, path already in pathvec",
605                         uev->kernel);
606                 if (!pp->mpp && !strlen(pp->wwid)) {
607                         condlog(3, "%s: reinitialize path", uev->kernel);
608                         udev_device_unref(pp->udev);
609                         pp->udev = udev_device_ref(uev->udev);
610                         r = pathinfo(pp, conf,
611                                      DI_ALL | DI_BLACKLIST);
612                         if (r == PATHINFO_OK)
613                                 ret = ev_add_path(pp, vecs);
614                         else if (r == PATHINFO_SKIPPED) {
615                                 condlog(3, "%s: remove blacklisted path",
616                                         uev->kernel);
617                                 i = find_slot(vecs->pathvec, (void *)pp);
618                                 if (i != -1)
619                                         vector_del_slot(vecs->pathvec, i);
620                                 free_path(pp);
621                         } else {
622                                 condlog(0, "%s: failed to reinitialize path",
623                                         uev->kernel);
624                                 ret = 1;
625                         }
626                 }
627         }
628         lock_cleanup_pop(vecs->lock);
629         if (pp)
630                 return ret;
631
632         /*
633          * get path vital state
634          */
635         ret = alloc_path_with_pathinfo(conf, uev->udev,
636                                        DI_ALL, &pp);
637         if (!pp) {
638                 if (ret == PATHINFO_SKIPPED)
639                         return 0;
640                 condlog(3, "%s: failed to get path info", uev->kernel);
641                 return 1;
642         }
643         pthread_cleanup_push(cleanup_lock, &vecs->lock);
644         lock(vecs->lock);
645         pthread_testcancel();
646         ret = store_path(vecs->pathvec, pp);
647         if (!ret) {
648                 pp->checkint = conf->checkint;
649                 ret = ev_add_path(pp, vecs);
650         } else {
651                 condlog(0, "%s: failed to store path info, "
652                         "dropping event",
653                         uev->kernel);
654                 free_path(pp);
655                 ret = 1;
656         }
657         lock_cleanup_pop(vecs->lock);
658         return ret;
659 }
660
661 /*
662  * returns:
663  * 0: added
664  * 1: error
665  */
666 int
667 ev_add_path (struct path * pp, struct vectors * vecs)
668 {
669         struct multipath * mpp;
670         char params[PARAMS_SIZE] = {0};
671         int retries = 3;
672         int start_waiter = 0;
673         int ret;
674
675         /*
676          * need path UID to go any further
677          */
678         if (strlen(pp->wwid) == 0) {
679                 condlog(0, "%s: failed to get path uid", pp->dev);
680                 goto fail; /* leave path added to pathvec */
681         }
682         mpp = find_mp_by_wwid(vecs->mpvec, pp->wwid);
683         if (mpp && mpp->wait_for_udev) {
684                 mpp->wait_for_udev = 2;
685                 orphan_path(pp, "waiting for create to complete");
686                 return 0;
687         }
688
689         pp->mpp = mpp;
690 rescan:
691         if (mpp) {
692                 if (pp->size && mpp->size != pp->size) {
693                         condlog(0, "%s: failed to add new path %s, "
694                                 "device size mismatch",
695                                 mpp->alias, pp->dev);
696                         int i = find_slot(vecs->pathvec, (void *)pp);
697                         if (i != -1)
698                                 vector_del_slot(vecs->pathvec, i);
699                         free_path(pp);
700                         return 1;
701                 }
702
703                 condlog(4,"%s: adopting all paths for path %s",
704                         mpp->alias, pp->dev);
705                 if (adopt_paths(vecs->pathvec, mpp))
706                         goto fail; /* leave path added to pathvec */
707
708                 verify_paths(mpp, vecs);
709                 mpp->flush_on_last_del = FLUSH_UNDEF;
710                 mpp->action = ACT_RELOAD;
711         } else {
712                 if (!should_multipath(pp, vecs->pathvec)) {
713                         orphan_path(pp, "only one path");
714                         return 0;
715                 }
716                 condlog(4,"%s: creating new map", pp->dev);
717                 if ((mpp = add_map_with_path(vecs, pp, 1))) {
718                         mpp->action = ACT_CREATE;
719                         /*
720                          * We don't depend on ACT_CREATE, as domap will
721                          * set it to ACT_NOTHING when complete.
722                          */
723                         start_waiter = 1;
724                 }
725                 if (!start_waiter)
726                         goto fail; /* leave path added to pathvec */
727         }
728
729         /* persistent reservation check*/
730         mpath_pr_event_handle(pp);
731
732         /*
733          * push the map to the device-mapper
734          */
735         if (setup_map(mpp, params, PARAMS_SIZE)) {
736                 condlog(0, "%s: failed to setup map for addition of new "
737                         "path %s", mpp->alias, pp->dev);
738                 goto fail_map;
739         }
740         /*
741          * reload the map for the multipath mapped device
742          */
743 retry:
744         ret = domap(mpp, params, 1);
745         if (ret <= 0) {
746                 if (ret < 0 && retries-- > 0) {
747                         condlog(0, "%s: retry domap for addition of new "
748                                 "path %s", mpp->alias, pp->dev);
749                         sleep(1);
750                         goto retry;
751                 }
752                 condlog(0, "%s: failed in domap for addition of new "
753                         "path %s", mpp->alias, pp->dev);
754                 /*
755                  * deal with asynchronous uevents :((
756                  */
757                 if (mpp->action == ACT_RELOAD && retries-- > 0) {
758                         condlog(0, "%s: ev_add_path sleep", mpp->alias);
759                         sleep(1);
760                         update_mpp_paths(mpp, vecs->pathvec);
761                         goto rescan;
762                 }
763                 else if (mpp->action == ACT_RELOAD)
764                         condlog(0, "%s: giving up reload", mpp->alias);
765                 else
766                         goto fail_map;
767         }
768         dm_lib_release();
769
770         /*
771          * update our state from kernel regardless of create or reload
772          */
773         if (setup_multipath(vecs, mpp))
774                 goto fail; /* if setup_multipath fails, it removes the map */
775
776         sync_map_state(mpp);
777
778         if ((mpp->action == ACT_CREATE ||
779              (mpp->action == ACT_NOTHING && start_waiter && !mpp->waiter)) &&
780             start_waiter_thread(mpp, vecs))
781                         goto fail_map;
782
783         if (retries >= 0) {
784                 condlog(2, "%s [%s]: path added to devmap %s",
785                         pp->dev, pp->dev_t, mpp->alias);
786                 return 0;
787         } else
788                 goto fail;
789
790 fail_map:
791         remove_map(mpp, vecs, 1);
792 fail:
793         orphan_path(pp, "failed to add path");
794         return 1;
795 }
796
797 static int
798 uev_remove_path (struct uevent *uev, struct vectors * vecs)
799 {
800         struct path *pp;
801         int ret;
802
803         condlog(2, "%s: remove path (uevent)", uev->kernel);
804         pthread_cleanup_push(cleanup_lock, &vecs->lock);
805         lock(vecs->lock);
806         pthread_testcancel();
807         pp = find_path_by_dev(vecs->pathvec, uev->kernel);
808         if (pp)
809                 ret = ev_remove_path(pp, vecs);
810         lock_cleanup_pop(vecs->lock);
811         if (!pp) {
812                 /* Not an error; path might have been purged earlier */
813                 condlog(0, "%s: path already removed", uev->kernel);
814                 return 0;
815         }
816         return ret;
817 }
818
819 int
820 ev_remove_path (struct path *pp, struct vectors * vecs)
821 {
822         struct multipath * mpp;
823         int i, retval = 0;
824         char params[PARAMS_SIZE] = {0};
825
826         /*
827          * avoid referring to the map of an orphaned path
828          */
829         if ((mpp = pp->mpp)) {
830                 /*
831                  * transform the mp->pg vector of vectors of paths
832                  * into a mp->params string to feed the device-mapper
833                  */
834                 if (update_mpp_paths(mpp, vecs->pathvec)) {
835                         condlog(0, "%s: failed to update paths",
836                                 mpp->alias);
837                         goto fail;
838                 }
839                 if ((i = find_slot(mpp->paths, (void *)pp)) != -1)
840                         vector_del_slot(mpp->paths, i);
841
842                 /*
843                  * remove the map IFF removing the last path
844                  */
845                 if (VECTOR_SIZE(mpp->paths) == 0) {
846                         char alias[WWID_SIZE];
847
848                         /*
849                          * flush_map will fail if the device is open
850                          */
851                         strncpy(alias, mpp->alias, WWID_SIZE);
852                         if (mpp->flush_on_last_del == FLUSH_ENABLED) {
853                                 condlog(2, "%s Last path deleted, disabling queueing", mpp->alias);
854                                 mpp->retry_tick = 0;
855                                 mpp->no_path_retry = NO_PATH_RETRY_FAIL;
856                                 mpp->flush_on_last_del = FLUSH_IN_PROGRESS;
857                                 dm_queue_if_no_path(mpp->alias, 0);
858                         }
859                         if (!flush_map(mpp, vecs, 1)) {
860                                 condlog(2, "%s: removed map after"
861                                         " removing all paths",
862                                         alias);
863                                 retval = 0;
864                                 goto out;
865                         }
866                         /*
867                          * Not an error, continue
868                          */
869                 }
870
871                 if (setup_map(mpp, params, PARAMS_SIZE)) {
872                         condlog(0, "%s: failed to setup map for"
873                                 " removal of path %s", mpp->alias, pp->dev);
874                         goto fail;
875                 }
876
877                 if (mpp->wait_for_udev) {
878                         mpp->wait_for_udev = 2;
879                         goto out;
880                 }
881
882                 /*
883                  * reload the map
884                  */
885                 mpp->action = ACT_RELOAD;
886                 if (domap(mpp, params, 1) <= 0) {
887                         condlog(0, "%s: failed in domap for "
888                                 "removal of path %s",
889                                 mpp->alias, pp->dev);
890                         retval = 1;
891                 } else {
892                         /*
893                          * update our state from kernel
894                          */
895                         if (setup_multipath(vecs, mpp))
896                                 return 1;
897                         sync_map_state(mpp);
898
899                         condlog(2, "%s [%s]: path removed from map %s",
900                                 pp->dev, pp->dev_t, mpp->alias);
901                 }
902         }
903
904 out:
905         if ((i = find_slot(vecs->pathvec, (void *)pp)) != -1)
906                 vector_del_slot(vecs->pathvec, i);
907
908         free_path(pp);
909
910         return retval;
911
912 fail:
913         remove_map_and_stop_waiter(mpp, vecs, 1);
914         return 1;
915 }
916
917 static int
918 uev_update_path (struct uevent *uev, struct vectors * vecs)
919 {
920         int ro, retval = 0;
921
922         ro = uevent_get_disk_ro(uev);
923
924         if (ro >= 0) {
925                 struct path * pp;
926                 struct multipath *mpp = NULL;
927
928                 condlog(2, "%s: update path write_protect to '%d' (uevent)",
929                         uev->kernel, ro);
930                 pthread_cleanup_push(cleanup_lock, &vecs->lock);
931                 lock(vecs->lock);
932                 pthread_testcancel();
933                 /*
934                  * pthread_mutex_lock() and pthread_mutex_unlock()
935                  * need to be at the same indentation level, hence
936                  * this slightly convoluted codepath.
937                  */
938                 pp = find_path_by_dev(vecs->pathvec, uev->kernel);
939                 if (pp) {
940                         if (pp->initialized == INIT_REQUESTED_UDEV) {
941                                 retval = 2;
942                         } else {
943                                 mpp = pp->mpp;
944                                 if (mpp && mpp->wait_for_udev) {
945                                         mpp->wait_for_udev = 2;
946                                         mpp = NULL;
947                                         retval = 0;
948                                 }
949                         }
950                         if (mpp) {
951                                 retval = reload_map(vecs, mpp, 0, 1);
952
953                                 condlog(2, "%s: map %s reloaded (retval %d)",
954                                         uev->kernel, mpp->alias, retval);
955                         }
956                 }
957                 lock_cleanup_pop(vecs->lock);
958                 if (!pp) {
959                         condlog(0, "%s: spurious uevent, path not found",
960                                 uev->kernel);
961                         return 1;
962                 }
963                 if (retval == 2)
964                         return uev_add_path(uev, vecs);
965         }
966
967         return retval;
968 }
969
970 static int
971 map_discovery (struct vectors * vecs)
972 {
973         struct multipath * mpp;
974         unsigned int i;
975
976         if (dm_get_maps(vecs->mpvec))
977                 return 1;
978
979         vector_foreach_slot (vecs->mpvec, mpp, i)
980                 if (setup_multipath(vecs, mpp))
981                         return 1;
982
983         return 0;
984 }
985
986 int
987 uxsock_trigger (char * str, char ** reply, int * len, void * trigger_data)
988 {
989         struct vectors * vecs;
990         int r;
991
992         *reply = NULL;
993         *len = 0;
994         vecs = (struct vectors *)trigger_data;
995
996         r = parse_cmd(str, reply, len, vecs, uxsock_timeout / 1000);
997
998         if (r > 0) {
999                 if (r == ETIMEDOUT)
1000                         *reply = STRDUP("timeout\n");
1001                 else
1002                         *reply = STRDUP("fail\n");
1003                 *len = strlen(*reply) + 1;
1004                 r = 1;
1005         }
1006         else if (!r && *len == 0) {
1007                 *reply = STRDUP("ok\n");
1008                 *len = strlen(*reply) + 1;
1009                 r = 0;
1010         }
1011         /* else if (r < 0) leave *reply alone */
1012
1013         return r;
1014 }
1015
1016 static int
1017 uev_discard(char * devpath)
1018 {
1019         char *tmp;
1020         char a[11], b[11];
1021
1022         /*
1023          * keep only block devices, discard partitions
1024          */
1025         tmp = strstr(devpath, "/block/");
1026         if (tmp == NULL){
1027                 condlog(4, "no /block/ in '%s'", devpath);
1028                 return 1;
1029         }
1030         if (sscanf(tmp, "/block/%10s", a) != 1 ||
1031             sscanf(tmp, "/block/%10[^/]/%10s", a, b) == 2) {
1032                 condlog(4, "discard event on %s", devpath);
1033                 return 1;
1034         }
1035         return 0;
1036 }
1037
1038 int
1039 uev_trigger (struct uevent * uev, void * trigger_data)
1040 {
1041         int r = 0;
1042         struct vectors * vecs;
1043
1044         vecs = (struct vectors *)trigger_data;
1045
1046         if (uev_discard(uev->devpath))
1047                 return 0;
1048
1049         pthread_cleanup_push(config_cleanup, NULL);
1050         pthread_mutex_lock(&config_lock);
1051         if (running_state != DAEMON_IDLE &&
1052             running_state != DAEMON_RUNNING)
1053                 pthread_cond_wait(&config_cond, &config_lock);
1054         pthread_cleanup_pop(1);
1055
1056         if (running_state == DAEMON_SHUTDOWN)
1057                 return 0;
1058
1059         /*
1060          * device map event
1061          * Add events are ignored here as the tables
1062          * are not fully initialised then.
1063          */
1064         if (!strncmp(uev->kernel, "dm-", 3)) {
1065                 if (!strncmp(uev->action, "change", 6)) {
1066                         r = uev_add_map(uev, vecs);
1067                         goto out;
1068                 }
1069                 if (!strncmp(uev->action, "remove", 6)) {
1070                         r = uev_remove_map(uev, vecs);
1071                         goto out;
1072                 }
1073                 goto out;
1074         }
1075
1076         /*
1077          * path add/remove event
1078          */
1079         if (filter_devnode(conf->blist_devnode, conf->elist_devnode,
1080                            uev->kernel) > 0)
1081                 goto out;
1082
1083         if (!strncmp(uev->action, "add", 3)) {
1084                 r = uev_add_path(uev, vecs);
1085                 goto out;
1086         }
1087         if (!strncmp(uev->action, "remove", 6)) {
1088                 r = uev_remove_path(uev, vecs);
1089                 goto out;
1090         }
1091         if (!strncmp(uev->action, "change", 6)) {
1092                 r = uev_update_path(uev, vecs);
1093                 goto out;
1094         }
1095
1096 out:
1097         return r;
1098 }
1099
1100 static void *
1101 ueventloop (void * ap)
1102 {
1103         struct udev *udev = ap;
1104
1105         if (uevent_listen(udev))
1106                 condlog(0, "error starting uevent listener");
1107
1108         return NULL;
1109 }
1110
1111 static void *
1112 uevqloop (void * ap)
1113 {
1114         if (uevent_dispatch(&uev_trigger, ap))
1115                 condlog(0, "error starting uevent dispatcher");
1116
1117         return NULL;
1118 }
1119 static void *
1120 uxlsnrloop (void * ap)
1121 {
1122         if (cli_init()) {
1123                 condlog(1, "Failed to init uxsock listener");
1124                 return NULL;
1125         }
1126
1127         set_handler_callback(LIST+PATHS, cli_list_paths);
1128         set_handler_callback(LIST+PATHS+FMT, cli_list_paths_fmt);
1129         set_handler_callback(LIST+PATHS+RAW+FMT, cli_list_paths_raw);
1130         set_handler_callback(LIST+PATH, cli_list_path);
1131         set_handler_callback(LIST+MAPS, cli_list_maps);
1132         set_unlocked_handler_callback(LIST+STATUS, cli_list_status);
1133         set_unlocked_handler_callback(LIST+DAEMON, cli_list_daemon);
1134         set_handler_callback(LIST+MAPS+STATUS, cli_list_maps_status);
1135         set_handler_callback(LIST+MAPS+STATS, cli_list_maps_stats);
1136         set_handler_callback(LIST+MAPS+FMT, cli_list_maps_fmt);
1137         set_handler_callback(LIST+MAPS+RAW+FMT, cli_list_maps_raw);
1138         set_handler_callback(LIST+MAPS+TOPOLOGY, cli_list_maps_topology);
1139         set_handler_callback(LIST+TOPOLOGY, cli_list_maps_topology);
1140         set_handler_callback(LIST+MAPS+JSON, cli_list_maps_json);
1141         set_handler_callback(LIST+MAP+TOPOLOGY, cli_list_map_topology);
1142         set_handler_callback(LIST+MAP+FMT, cli_list_map_fmt);
1143         set_handler_callback(LIST+MAP+RAW+FMT, cli_list_map_fmt);
1144         set_handler_callback(LIST+MAP+JSON, cli_list_map_json);
1145         set_handler_callback(LIST+CONFIG, cli_list_config);
1146         set_handler_callback(LIST+BLACKLIST, cli_list_blacklist);
1147         set_handler_callback(LIST+DEVICES, cli_list_devices);
1148         set_handler_callback(LIST+WILDCARDS, cli_list_wildcards);
1149         set_handler_callback(ADD+PATH, cli_add_path);
1150         set_handler_callback(DEL+PATH, cli_del_path);
1151         set_handler_callback(ADD+MAP, cli_add_map);
1152         set_handler_callback(DEL+MAP, cli_del_map);
1153         set_handler_callback(SWITCH+MAP+GROUP, cli_switch_group);
1154         set_unlocked_handler_callback(RECONFIGURE, cli_reconfigure);
1155         set_handler_callback(SUSPEND+MAP, cli_suspend);
1156         set_handler_callback(RESUME+MAP, cli_resume);
1157         set_handler_callback(RESIZE+MAP, cli_resize);
1158         set_handler_callback(RELOAD+MAP, cli_reload);
1159         set_handler_callback(RESET+MAP, cli_reassign);
1160         set_handler_callback(REINSTATE+PATH, cli_reinstate);
1161         set_handler_callback(FAIL+PATH, cli_fail);
1162         set_handler_callback(DISABLEQ+MAP, cli_disable_queueing);
1163         set_handler_callback(RESTOREQ+MAP, cli_restore_queueing);
1164         set_handler_callback(DISABLEQ+MAPS, cli_disable_all_queueing);
1165         set_handler_callback(RESTOREQ+MAPS, cli_restore_all_queueing);
1166         set_unlocked_handler_callback(QUIT, cli_quit);
1167         set_unlocked_handler_callback(SHUTDOWN, cli_shutdown);
1168         set_handler_callback(GETPRSTATUS+MAP, cli_getprstatus);
1169         set_handler_callback(SETPRSTATUS+MAP, cli_setprstatus);
1170         set_handler_callback(UNSETPRSTATUS+MAP, cli_unsetprstatus);
1171         set_handler_callback(FORCEQ+DAEMON, cli_force_no_daemon_q);
1172         set_handler_callback(RESTOREQ+DAEMON, cli_restore_no_daemon_q);
1173
1174         umask(077);
1175         uxsock_listen(&uxsock_trigger, ap);
1176
1177         return NULL;
1178 }
1179
1180 void
1181 exit_daemon (void)
1182 {
1183         post_config_state(DAEMON_SHUTDOWN);
1184 }
1185
1186 static void
1187 fail_path (struct path * pp, int del_active)
1188 {
1189         if (!pp->mpp)
1190                 return;
1191
1192         condlog(2, "checker failed path %s in map %s",
1193                  pp->dev_t, pp->mpp->alias);
1194
1195         dm_fail_path(pp->mpp->alias, pp->dev_t);
1196         if (del_active)
1197                 update_queue_mode_del_path(pp->mpp);
1198 }
1199
1200 /*
1201  * caller must have locked the path list before calling that function
1202  */
1203 static int
1204 reinstate_path (struct path * pp, int add_active)
1205 {
1206         int ret = 0;
1207
1208         if (!pp->mpp)
1209                 return 0;
1210
1211         if (dm_reinstate_path(pp->mpp->alias, pp->dev_t)) {
1212                 condlog(0, "%s: reinstate failed", pp->dev_t);
1213                 ret = 1;
1214         } else {
1215                 condlog(2, "%s: reinstated", pp->dev_t);
1216                 if (add_active)
1217                         update_queue_mode_add_path(pp->mpp);
1218         }
1219         return ret;
1220 }
1221
1222 static void
1223 enable_group(struct path * pp)
1224 {
1225         struct pathgroup * pgp;
1226
1227         /*
1228          * if path is added through uev_add_path, pgindex can be unset.
1229          * next update_strings() will set it, upon map reload event.
1230          *
1231          * we can safely return here, because upon map reload, all
1232          * PG will be enabled.
1233          */
1234         if (!pp->mpp->pg || !pp->pgindex)
1235                 return;
1236
1237         pgp = VECTOR_SLOT(pp->mpp->pg, pp->pgindex - 1);
1238
1239         if (pgp->status == PGSTATE_DISABLED) {
1240                 condlog(2, "%s: enable group #%i", pp->mpp->alias, pp->pgindex);
1241                 dm_enablegroup(pp->mpp->alias, pp->pgindex);
1242         }
1243 }
1244
1245 static void
1246 mpvec_garbage_collector (struct vectors * vecs)
1247 {
1248         struct multipath * mpp;
1249         unsigned int i;
1250
1251         if (!vecs->mpvec)
1252                 return;
1253
1254         vector_foreach_slot (vecs->mpvec, mpp, i) {
1255                 if (mpp && mpp->alias && !dm_map_present(mpp->alias)) {
1256                         condlog(2, "%s: remove dead map", mpp->alias);
1257                         remove_map_and_stop_waiter(mpp, vecs, 1);
1258                         i--;
1259                 }
1260         }
1261 }
1262
1263 /* This is called after a path has started working again. It the multipath
1264  * device for this path uses the followover failback type, and this is the
1265  * best pathgroup, and this is the first path in the pathgroup to come back
1266  * up, then switch to this pathgroup */
1267 static int
1268 followover_should_failback(struct path * pp)
1269 {
1270         struct pathgroup * pgp;
1271         struct path *pp1;
1272         int i;
1273
1274         if (pp->mpp->pgfailback != -FAILBACK_FOLLOWOVER ||
1275             !pp->mpp->pg || !pp->pgindex ||
1276             pp->pgindex != pp->mpp->bestpg)
1277                 return 0;
1278
1279         pgp = VECTOR_SLOT(pp->mpp->pg, pp->pgindex - 1);
1280         vector_foreach_slot(pgp->paths, pp1, i) {
1281                 if (pp1 == pp)
1282                         continue;
1283                 if (pp1->chkrstate != PATH_DOWN && pp1->chkrstate != PATH_SHAKY)
1284                         return 0;
1285         }
1286         return 1;
1287 }
1288
1289 static void
1290 missing_uev_wait_tick(struct vectors *vecs)
1291 {
1292         struct multipath * mpp;
1293         unsigned int i;
1294         int timed_out = 0;
1295
1296         vector_foreach_slot (vecs->mpvec, mpp, i) {
1297                 if (mpp->wait_for_udev && --mpp->uev_wait_tick <= 0) {
1298                         timed_out = 1;
1299                         condlog(0, "%s: timeout waiting on creation uevent. enabling reloads", mpp->alias);
1300                         if (mpp->wait_for_udev > 1 && update_map(mpp, vecs)) {
1301                                 /* update_map removed map */
1302                                 i--;
1303                                 continue;
1304                         }
1305                         mpp->wait_for_udev = 0;
1306                 }
1307         }
1308
1309         if (timed_out && conf->delayed_reconfig &&
1310             !need_to_delay_reconfig(vecs)) {
1311                 condlog(2, "reconfigure (delayed)");
1312                 set_config_state(DAEMON_CONFIGURE);
1313         }
1314 }
1315
1316 static void
1317 defered_failback_tick (vector mpvec)
1318 {
1319         struct multipath * mpp;
1320         unsigned int i;
1321
1322         vector_foreach_slot (mpvec, mpp, i) {
1323                 /*
1324                  * defered failback getting sooner
1325                  */
1326                 if (mpp->pgfailback > 0 && mpp->failback_tick > 0) {
1327                         mpp->failback_tick--;
1328
1329                         if (!mpp->failback_tick && need_switch_pathgroup(mpp, 1))
1330                                 switch_pathgroup(mpp);
1331                 }
1332         }
1333 }
1334
1335 static void
1336 retry_count_tick(vector mpvec)
1337 {
1338         struct multipath *mpp;
1339         unsigned int i;
1340
1341         vector_foreach_slot (mpvec, mpp, i) {
1342                 if (mpp->retry_tick > 0) {
1343                         mpp->stat_total_queueing_time++;
1344                         condlog(4, "%s: Retrying.. No active path", mpp->alias);
1345                         if(--mpp->retry_tick == 0) {
1346                                 dm_queue_if_no_path(mpp->alias, 0);
1347                                 condlog(2, "%s: Disable queueing", mpp->alias);
1348                         }
1349                 }
1350         }
1351 }
1352
1353 int update_prio(struct path *pp, int refresh_all)
1354 {
1355         int oldpriority;
1356         struct path *pp1;
1357         struct pathgroup * pgp;
1358         int i, j, changed = 0;
1359
1360         if (refresh_all) {
1361                 vector_foreach_slot (pp->mpp->pg, pgp, i) {
1362                         vector_foreach_slot (pgp->paths, pp1, j) {
1363                                 oldpriority = pp1->priority;
1364                                 pathinfo(pp1, conf, DI_PRIO);
1365                                 if (pp1->priority != oldpriority)
1366                                         changed = 1;
1367                         }
1368                 }
1369                 return changed;
1370         }
1371         oldpriority = pp->priority;
1372         pathinfo(pp, conf, DI_PRIO);
1373
1374         if (pp->priority == oldpriority)
1375                 return 0;
1376         return 1;
1377 }
1378
1379 int update_path_groups(struct multipath *mpp, struct vectors *vecs, int refresh)
1380 {
1381         if (reload_map(vecs, mpp, refresh, 1))
1382                 return 1;
1383
1384         dm_lib_release();
1385         if (setup_multipath(vecs, mpp) != 0)
1386                 return 1;
1387         sync_map_state(mpp);
1388
1389         return 0;
1390 }
1391
1392 /*
1393  * Returns '1' if the path has been checked, '0' otherwise
1394  */
1395 int
1396 check_path (struct vectors * vecs, struct path * pp, int ticks)
1397 {
1398         int newstate;
1399         int new_path_up = 0;
1400         int chkr_new_path_up = 0;
1401         int add_active;
1402         int disable_reinstate = 0;
1403         int oldchkrstate = pp->chkrstate;
1404
1405         if ((pp->initialized == INIT_OK ||
1406              pp->initialized == INIT_REQUESTED_UDEV) && !pp->mpp)
1407                 return 0;
1408
1409         if (pp->tick)
1410                 pp->tick -= (pp->tick > ticks) ? ticks : pp->tick;
1411         if (pp->tick)
1412                 return 0; /* don't check this path yet */
1413
1414         if (!pp->mpp && pp->initialized == INIT_MISSING_UDEV &&
1415             pp->retriggers < conf->retrigger_tries) {
1416                 condlog(2, "%s: triggering change event to reinitialize",
1417                         pp->dev);
1418                 pp->initialized = INIT_REQUESTED_UDEV;
1419                 pp->retriggers++;
1420                 sysfs_attr_set_value(pp->udev, "uevent", "change",
1421                                      strlen("change"));
1422                 return 0;
1423         }
1424
1425         /*
1426          * provision a next check soonest,
1427          * in case we exit abnormaly from here
1428          */
1429         pp->tick = conf->checkint;
1430
1431         newstate = path_offline(pp);
1432         /*
1433          * Wait for uevent for removed paths;
1434          * some LLDDs like zfcp keep paths unavailable
1435          * without sending uevents.
1436          */
1437         if (newstate == PATH_REMOVED)
1438                 newstate = PATH_DOWN;
1439
1440         if (newstate == PATH_UP)
1441                 newstate = get_state(pp, conf, 1);
1442         else
1443                 checker_clear_message(&pp->checker);
1444
1445         if (newstate == PATH_WILD || newstate == PATH_UNCHECKED) {
1446                 condlog(2, "%s: unusable path", pp->dev);
1447                 pathinfo(pp, conf, 0);
1448                 return 1;
1449         }
1450         if (!pp->mpp) {
1451                 if (!strlen(pp->wwid) && pp->initialized != INIT_MISSING_UDEV &&
1452                     (newstate == PATH_UP || newstate == PATH_GHOST)) {
1453                         condlog(2, "%s: add missing path", pp->dev);
1454                         if (pathinfo(pp, conf, DI_ALL) == 0) {
1455                                 ev_add_path(pp, vecs);
1456                                 pp->tick = 1;
1457                         }
1458                 }
1459                 return 0;
1460         }
1461         /*
1462          * Async IO in flight. Keep the previous path state
1463          * and reschedule as soon as possible
1464          */
1465         if (newstate == PATH_PENDING) {
1466                 pp->tick = 1;
1467                 return 0;
1468         }
1469         /*
1470          * Synchronize with kernel state
1471          */
1472         if (update_multipath_strings(pp->mpp, vecs->pathvec, 1)) {
1473                 condlog(1, "%s: Could not synchronize with kernel state",
1474                         pp->dev);
1475                 pp->dmstate = PSTATE_UNDEF;
1476         }
1477         /* if update_multipath_strings orphaned the path, quit early */
1478         if (!pp->mpp)
1479                 return 0;
1480
1481         if ((newstate == PATH_UP || newstate == PATH_GHOST) &&
1482              pp->wait_checks > 0) {
1483                 if (pp->mpp && pp->mpp->nr_active > 0) {
1484                         pp->state = PATH_DELAYED;
1485                         pp->wait_checks--;
1486                         return 1;
1487                 } else
1488                         pp->wait_checks = 0;
1489         }
1490
1491         /*
1492          * don't reinstate failed path, if its in stand-by
1493          * and if target supports only implicit tpgs mode.
1494          * this will prevent unnecessary i/o by dm on stand-by
1495          * paths if there are no other active paths in map.
1496          */
1497         disable_reinstate = (newstate == PATH_GHOST &&
1498                             pp->mpp->nr_active == 0 &&
1499                             pp->tpgs == TPGS_IMPLICIT) ? 1 : 0;
1500
1501         pp->chkrstate = newstate;
1502         if (newstate != pp->state) {
1503                 int oldstate = pp->state;
1504                 pp->state = newstate;
1505
1506                 if (strlen(checker_message(&pp->checker)))
1507                         LOG_MSG(1, checker_message(&pp->checker));
1508
1509                 /*
1510                  * upon state change, reset the checkint
1511                  * to the shortest delay
1512                  */
1513                 pp->checkint = conf->checkint;
1514
1515                 if (newstate == PATH_DOWN || newstate == PATH_SHAKY) {
1516                         /*
1517                          * proactively fail path in the DM
1518                          */
1519                         if (oldstate == PATH_UP ||
1520                             oldstate == PATH_GHOST) {
1521                                 fail_path(pp, 1);
1522                                 if (pp->mpp->delay_wait_checks > 0 &&
1523                                     pp->watch_checks > 0) {
1524                                         pp->wait_checks = pp->mpp->delay_wait_checks;
1525                                         pp->watch_checks = 0;
1526                                 }
1527                         }else
1528                                 fail_path(pp, 0);
1529
1530                         /*
1531                          * cancel scheduled failback
1532                          */
1533                         pp->mpp->failback_tick = 0;
1534
1535                         pp->mpp->stat_path_failures++;
1536                         return 1;
1537                 }
1538
1539                 if(newstate == PATH_UP || newstate == PATH_GHOST){
1540                         if ( pp->mpp && pp->mpp->prflag ){
1541                                 /*
1542                                  * Check Persistent Reservation.
1543                                  */
1544                         condlog(2, "%s: checking persistent reservation "
1545                                 "registration", pp->dev);
1546                         mpath_pr_event_handle(pp);
1547                         }
1548                 }
1549
1550                 /*
1551                  * reinstate this path
1552                  */
1553                 if (oldstate != PATH_UP &&
1554                     oldstate != PATH_GHOST) {
1555                         if (pp->mpp->delay_watch_checks > 0)
1556                                 pp->watch_checks = pp->mpp->delay_watch_checks;
1557                         add_active = 1;
1558                 } else {
1559                         if (pp->watch_checks > 0)
1560                                 pp->watch_checks--;
1561                         add_active = 0;
1562                 }
1563                 if (!disable_reinstate && reinstate_path(pp, add_active)) {
1564                         condlog(3, "%s: reload map", pp->dev);
1565                         ev_add_path(pp, vecs);
1566                         pp->tick = 1;
1567                         return 0;
1568                 }
1569                 new_path_up = 1;
1570
1571                 if (oldchkrstate != PATH_UP && oldchkrstate != PATH_GHOST)
1572                         chkr_new_path_up = 1;
1573
1574                 /*
1575                  * if at least one path is up in a group, and
1576                  * the group is disabled, re-enable it
1577                  */
1578                 if (newstate == PATH_UP)
1579                         enable_group(pp);
1580         }
1581         else if (newstate == PATH_UP || newstate == PATH_GHOST) {
1582                 if ((pp->dmstate == PSTATE_FAILED ||
1583                     pp->dmstate == PSTATE_UNDEF) &&
1584                     !disable_reinstate) {
1585                         /* Clear IO errors */
1586                         if (reinstate_path(pp, 0)) {
1587                                 condlog(3, "%s: reload map", pp->dev);
1588                                 ev_add_path(pp, vecs);
1589                                 pp->tick = 1;
1590                                 return 0;
1591                         }
1592                 } else {
1593                         LOG_MSG(4, checker_message(&pp->checker));
1594                         if (pp->checkint != conf->max_checkint) {
1595                                 /*
1596                                  * double the next check delay.
1597                                  * max at conf->max_checkint
1598                                  */
1599                                 if (pp->checkint < (conf->max_checkint / 2))
1600                                         pp->checkint = 2 * pp->checkint;
1601                                 else
1602                                         pp->checkint = conf->max_checkint;
1603
1604                                 condlog(4, "%s: delay next check %is",
1605                                         pp->dev_t, pp->checkint);
1606                         }
1607                         if (pp->watch_checks > 0)
1608                                 pp->watch_checks--;
1609                         pp->tick = pp->checkint;
1610                 }
1611         }
1612         else if (newstate == PATH_DOWN &&
1613                  strlen(checker_message(&pp->checker))) {
1614                 if (conf->log_checker_err == LOG_CHKR_ERR_ONCE)
1615                         LOG_MSG(3, checker_message(&pp->checker));
1616                 else
1617                         LOG_MSG(2, checker_message(&pp->checker));
1618         }
1619
1620         pp->state = newstate;
1621
1622
1623         if (pp->mpp->wait_for_udev)
1624                 return 1;
1625         /*
1626          * path prio refreshing
1627          */
1628         condlog(4, "path prio refresh");
1629
1630         if (update_prio(pp, new_path_up) &&
1631             (pp->mpp->pgpolicyfn == (pgpolicyfn *)group_by_prio) &&
1632              pp->mpp->pgfailback == -FAILBACK_IMMEDIATE)
1633                 update_path_groups(pp->mpp, vecs, !new_path_up);
1634         else if (need_switch_pathgroup(pp->mpp, 0)) {
1635                 if (pp->mpp->pgfailback > 0 &&
1636                     (new_path_up || pp->mpp->failback_tick <= 0))
1637                         pp->mpp->failback_tick =
1638                                 pp->mpp->pgfailback + 1;
1639                 else if (pp->mpp->pgfailback == -FAILBACK_IMMEDIATE ||
1640                          (chkr_new_path_up && followover_should_failback(pp)))
1641                         switch_pathgroup(pp->mpp);
1642         }
1643         return 1;
1644 }
1645
1646 static void *
1647 checkerloop (void *ap)
1648 {
1649         struct vectors *vecs;
1650         struct path *pp;
1651         int count = 0;
1652         unsigned int i;
1653         struct itimerval timer_tick_it;
1654         struct timeval last_time;
1655
1656         mlockall(MCL_CURRENT | MCL_FUTURE);
1657         vecs = (struct vectors *)ap;
1658         condlog(2, "path checkers start up");
1659
1660         /*
1661          * init the path check interval
1662          */
1663         vector_foreach_slot (vecs->pathvec, pp, i) {
1664                 pp->checkint = conf->checkint;
1665         }
1666
1667         /* Tweak start time for initial path check */
1668         if (gettimeofday(&last_time, NULL) != 0)
1669                 last_time.tv_sec = 0;
1670         else
1671                 last_time.tv_sec -= 1;
1672
1673         while (1) {
1674                 struct timeval diff_time, start_time, end_time;
1675                 int num_paths = 0, ticks = 0, signo, strict_timing, rc = 0;
1676                 sigset_t mask;
1677
1678                 if (gettimeofday(&start_time, NULL) != 0)
1679                         start_time.tv_sec = 0;
1680                 if (start_time.tv_sec && last_time.tv_sec) {
1681                         timersub(&start_time, &last_time, &diff_time);
1682                         condlog(4, "tick (%lu.%06lu secs)",
1683                                 diff_time.tv_sec, diff_time.tv_usec);
1684                         last_time.tv_sec = start_time.tv_sec;
1685                         last_time.tv_usec = start_time.tv_usec;
1686                         ticks = diff_time.tv_sec;
1687                 } else {
1688                         ticks = 1;
1689                         condlog(4, "tick (%d ticks)", ticks);
1690                 }
1691 #ifdef USE_SYSTEMD
1692                 if (use_watchdog)
1693                         sd_notify(0, "WATCHDOG=1");
1694 #endif
1695                 rc = set_config_state(DAEMON_RUNNING);
1696                 if (rc == ETIMEDOUT) {
1697                         condlog(4, "timeout waiting for DAEMON_IDLE");
1698                         continue;
1699                 }
1700                 strict_timing = conf->strict_timing;
1701                 if (vecs->pathvec) {
1702                         pthread_cleanup_push(cleanup_lock, &vecs->lock);
1703                         lock(vecs->lock);
1704                         pthread_testcancel();
1705                         vector_foreach_slot (vecs->pathvec, pp, i) {
1706                                 num_paths += check_path(vecs, pp, ticks);
1707                         }
1708                         lock_cleanup_pop(vecs->lock);
1709                 }
1710                 if (vecs->mpvec) {
1711                         pthread_cleanup_push(cleanup_lock, &vecs->lock);
1712                         lock(vecs->lock);
1713                         pthread_testcancel();
1714                         defered_failback_tick(vecs->mpvec);
1715                         retry_count_tick(vecs->mpvec);
1716                         missing_uev_wait_tick(vecs);
1717                         lock_cleanup_pop(vecs->lock);
1718                 }
1719                 if (count)
1720                         count--;
1721                 else {
1722                         pthread_cleanup_push(cleanup_lock, &vecs->lock);
1723                         lock(vecs->lock);
1724                         pthread_testcancel();
1725                         condlog(4, "map garbage collection");
1726                         mpvec_garbage_collector(vecs);
1727                         count = MAPGCINT;
1728                         lock_cleanup_pop(vecs->lock);
1729                 }
1730
1731                 diff_time.tv_usec = 0;
1732                 if (start_time.tv_sec &&
1733                     gettimeofday(&end_time, NULL) == 0) {
1734                         timersub(&end_time, &start_time, &diff_time);
1735                         if (num_paths) {
1736                                 condlog(3, "checked %d path%s in %lu.%06lu secs",
1737                                         num_paths, num_paths > 1 ? "s" : "",
1738                                         diff_time.tv_sec, diff_time.tv_usec);
1739                                 if (diff_time.tv_sec > conf->max_checkint)
1740                                         condlog(1, "path checkers took longer "
1741                                                 "than %lu seconds, consider "
1742                                                 "increasing max_polling_interval",
1743                                                 diff_time.tv_sec);
1744                         }
1745                 }
1746
1747                 post_config_state(DAEMON_IDLE);
1748                 if (!strict_timing)
1749                         sleep(1);
1750                 else {
1751                         timer_tick_it.it_interval.tv_sec = 0;
1752                         timer_tick_it.it_interval.tv_usec = 0;
1753                         if (diff_time.tv_usec) {
1754                                 timer_tick_it.it_value.tv_sec = 0;
1755                                 timer_tick_it.it_value.tv_usec =
1756                                         (unsigned long)1000000 - diff_time.tv_usec;
1757                         } else {
1758                                 timer_tick_it.it_value.tv_sec = 1;
1759                                 timer_tick_it.it_value.tv_usec = 0;
1760                         }
1761                         setitimer(ITIMER_REAL, &timer_tick_it, NULL);
1762
1763                         sigemptyset(&mask);
1764                         sigaddset(&mask, SIGALRM);
1765                         condlog(3, "waiting for %lu.%06lu secs",
1766                                 timer_tick_it.it_value.tv_sec,
1767                                 timer_tick_it.it_value.tv_usec);
1768                         if (sigwait(&mask, &signo) != 0) {
1769                                 condlog(3, "sigwait failed with error %d",
1770                                         errno);
1771                                 conf->strict_timing = 0;
1772                                 break;
1773                         }
1774                 }
1775         }
1776         return NULL;
1777 }
1778
1779 int
1780 configure (struct vectors * vecs, int start_waiters)
1781 {
1782         struct multipath * mpp;
1783         struct path * pp;
1784         vector mpvec;
1785         int i, ret;
1786
1787         if (!vecs->pathvec && !(vecs->pathvec = vector_alloc()))
1788                 return 1;
1789
1790         if (!vecs->mpvec && !(vecs->mpvec = vector_alloc()))
1791                 return 1;
1792
1793         if (!(mpvec = vector_alloc()))
1794                 return 1;
1795
1796         /*
1797          * probe for current path (from sysfs) and map (from dm) sets
1798          */
1799         ret = path_discovery(vecs->pathvec, DI_ALL);
1800         if (ret < 0)
1801                 return 1;
1802
1803         vector_foreach_slot (vecs->pathvec, pp, i){
1804                 if (filter_path(conf, pp) > 0){
1805                         vector_del_slot(vecs->pathvec, i);
1806                         free_path(pp);
1807                         i--;
1808                 }
1809                 else
1810                         pp->checkint = conf->checkint;
1811         }
1812         if (map_discovery(vecs))
1813                 return 1;
1814
1815         /*
1816          * create new set of maps & push changed ones into dm
1817          */
1818         if (coalesce_paths(vecs, mpvec, NULL, 1, CMD_NONE))
1819                 return 1;
1820
1821         /*
1822          * may need to remove some maps which are no longer relevant
1823          * e.g., due to blacklist changes in conf file
1824          */
1825         if (coalesce_maps(vecs, mpvec))
1826                 return 1;
1827
1828         dm_lib_release();
1829
1830         sync_maps_state(mpvec);
1831         vector_foreach_slot(mpvec, mpp, i){
1832                 remember_wwid(mpp->wwid);
1833                 update_map_pr(mpp);
1834         }
1835
1836         /*
1837          * purge dm of old maps
1838          */
1839         remove_maps(vecs);
1840
1841         /*
1842          * save new set of maps formed by considering current path state
1843          */
1844         vector_free(vecs->mpvec);
1845         vecs->mpvec = mpvec;
1846
1847         /*
1848          * start dm event waiter threads for these new maps
1849          */
1850         vector_foreach_slot(vecs->mpvec, mpp, i) {
1851                 if (setup_multipath(vecs, mpp))
1852                         return 1;
1853                 if (start_waiters)
1854                         if (start_waiter_thread(mpp, vecs))
1855                                 return 1;
1856         }
1857         return 0;
1858 }
1859
1860 int
1861 need_to_delay_reconfig(struct vectors * vecs)
1862 {
1863         struct multipath *mpp;
1864         int i;
1865
1866         if (!VECTOR_SIZE(vecs->mpvec))
1867                 return 0;
1868
1869         vector_foreach_slot(vecs->mpvec, mpp, i) {
1870                 if (mpp->wait_for_udev)
1871                         return 1;
1872         }
1873         return 0;
1874 }
1875
1876 int
1877 reconfigure (struct vectors * vecs)
1878 {
1879         struct config * old = conf;
1880         int retval = 1;
1881
1882         /*
1883          * free old map and path vectors ... they use old conf state
1884          */
1885         if (VECTOR_SIZE(vecs->mpvec))
1886                 remove_maps_and_stop_waiters(vecs);
1887
1888         if (VECTOR_SIZE(vecs->pathvec))
1889                 free_pathvec(vecs->pathvec, FREE_PATHS);
1890
1891         vecs->pathvec = NULL;
1892         conf = NULL;
1893
1894         /* Re-read any timezone changes */
1895         tzset();
1896
1897         if (!load_config(DEFAULT_CONFIGFILE)) {
1898                 dm_drv_version(conf->version, TGT_MPATH);
1899                 if (verbosity)
1900                         conf->verbosity = verbosity;
1901                 if (bindings_read_only)
1902                         conf->bindings_read_only = bindings_read_only;
1903                 if (ignore_new_devs)
1904                         conf->ignore_new_devs = ignore_new_devs;
1905                 configure(vecs, 1);
1906                 multipath_conf = conf;
1907                 free_config(old);
1908                 retval = 0;
1909         } else {
1910                 conf = old;
1911         }
1912         uxsock_timeout = conf->uxsock_timeout;
1913
1914         return retval;
1915 }
1916
1917 static struct vectors *
1918 init_vecs (void)
1919 {
1920         struct vectors * vecs;
1921
1922         vecs = (struct vectors *)MALLOC(sizeof(struct vectors));
1923
1924         if (!vecs)
1925                 return NULL;
1926
1927         vecs->lock.mutex =
1928                 (pthread_mutex_t *)MALLOC(sizeof(pthread_mutex_t));
1929
1930         if (!vecs->lock.mutex)
1931                 goto out;
1932
1933         pthread_mutex_init(vecs->lock.mutex, NULL);
1934         vecs->lock.depth = 0;
1935
1936         return vecs;
1937
1938 out:
1939         FREE(vecs);
1940         condlog(0, "failed to init paths");
1941         return NULL;
1942 }
1943
1944 static void *
1945 signal_set(int signo, void (*func) (int))
1946 {
1947         int r;
1948         struct sigaction sig;
1949         struct sigaction osig;
1950
1951         sig.sa_handler = func;
1952         sigemptyset(&sig.sa_mask);
1953         sig.sa_flags = 0;
1954
1955         r = sigaction(signo, &sig, &osig);
1956
1957         if (r < 0)
1958                 return (SIG_ERR);
1959         else
1960                 return (osig.sa_handler);
1961 }
1962
1963 void
1964 handle_signals(void)
1965 {
1966         if (reconfig_sig) {
1967                 condlog(2, "reconfigure (signal)");
1968                 set_config_state(DAEMON_CONFIGURE);
1969         }
1970         if (log_reset_sig) {
1971                 condlog(2, "reset log (signal)");
1972                 pthread_mutex_lock(&logq_lock);
1973                 log_reset("multipathd");
1974                 pthread_mutex_unlock(&logq_lock);
1975         }
1976         reconfig_sig = 0;
1977         log_reset_sig = 0;
1978 }
1979
1980 static void
1981 sighup (int sig)
1982 {
1983         reconfig_sig = 1;
1984 }
1985
1986 static void
1987 sigend (int sig)
1988 {
1989         exit_daemon();
1990 }
1991
1992 static void
1993 sigusr1 (int sig)
1994 {
1995         log_reset_sig = 1;
1996 }
1997
1998 static void
1999 sigusr2 (int sig)
2000 {
2001         condlog(3, "SIGUSR2 received");
2002 }
2003
2004 static void
2005 signal_init(void)
2006 {
2007         sigset_t set;
2008
2009         sigemptyset(&set);
2010         sigaddset(&set, SIGHUP);
2011         sigaddset(&set, SIGUSR1);
2012         sigaddset(&set, SIGUSR2);
2013         sigaddset(&set, SIGALRM);
2014         pthread_sigmask(SIG_BLOCK, &set, NULL);
2015
2016         signal_set(SIGHUP, sighup);
2017         signal_set(SIGUSR1, sigusr1);
2018         signal_set(SIGUSR2, sigusr2);
2019         signal_set(SIGINT, sigend);
2020         signal_set(SIGTERM, sigend);
2021         signal(SIGPIPE, SIG_IGN);
2022 }
2023
2024 static void
2025 setscheduler (void)
2026 {
2027         int res;
2028         static struct sched_param sched_param = {
2029                 .sched_priority = 99
2030         };
2031
2032         res = sched_setscheduler (0, SCHED_RR, &sched_param);
2033
2034         if (res == -1)
2035                 condlog(LOG_WARNING, "Could not set SCHED_RR at priority 99");
2036         return;
2037 }
2038
2039 static void
2040 set_oom_adj (void)
2041 {
2042 #ifdef OOM_SCORE_ADJ_MIN
2043         int retry = 1;
2044         char *file = "/proc/self/oom_score_adj";
2045         int score = OOM_SCORE_ADJ_MIN;
2046 #else
2047         int retry = 0;
2048         char *file = "/proc/self/oom_adj";
2049         int score = OOM_ADJUST_MIN;
2050 #endif
2051         FILE *fp;
2052         struct stat st;
2053         char *envp;
2054
2055         envp = getenv("OOMScoreAdjust");
2056         if (envp) {
2057                 condlog(3, "Using systemd provided OOMScoreAdjust");
2058                 return;
2059         }
2060         do {
2061                 if (stat(file, &st) == 0){
2062                         fp = fopen(file, "w");
2063                         if (!fp) {
2064                                 condlog(0, "couldn't fopen %s : %s", file,
2065                                         strerror(errno));
2066                                 return;
2067                         }
2068                         fprintf(fp, "%i", score);
2069                         fclose(fp);
2070                         return;
2071                 }
2072                 if (errno != ENOENT) {
2073                         condlog(0, "couldn't stat %s : %s", file,
2074                                 strerror(errno));
2075                         return;
2076                 }
2077 #ifdef OOM_ADJUST_MIN
2078                 file = "/proc/self/oom_adj";
2079                 score = OOM_ADJUST_MIN;
2080 #else
2081                 retry = 0;
2082 #endif
2083         } while (retry--);
2084         condlog(0, "couldn't adjust oom score");
2085 }
2086
2087 static int
2088 child (void * param)
2089 {
2090         pthread_t check_thr, uevent_thr, uxlsnr_thr, uevq_thr;
2091         pthread_attr_t log_attr, misc_attr, uevent_attr;
2092         struct vectors * vecs;
2093         struct multipath * mpp;
2094         int i;
2095 #ifdef USE_SYSTEMD
2096         unsigned long checkint;
2097 #endif
2098         int rc;
2099         int pid_fd = -1;
2100         char *envp;
2101
2102         mlockall(MCL_CURRENT | MCL_FUTURE);
2103         signal_init();
2104
2105         setup_thread_attr(&misc_attr, 64 * 1024, 1);
2106         setup_thread_attr(&uevent_attr, DEFAULT_UEVENT_STACKSIZE * 1024, 1);
2107         setup_thread_attr(&waiter_attr, 32 * 1024, 1);
2108
2109         if (logsink == 1) {
2110                 setup_thread_attr(&log_attr, 64 * 1024, 0);
2111                 log_thread_start(&log_attr);
2112                 pthread_attr_destroy(&log_attr);
2113         }
2114         pid_fd = pidfile_create(DEFAULT_PIDFILE, daemon_pid);
2115         if (pid_fd < 0) {
2116                 condlog(1, "failed to create pidfile");
2117                 if (logsink == 1)
2118                         log_thread_stop();
2119                 exit(1);
2120         }
2121
2122         post_config_state(DAEMON_START);
2123
2124         condlog(2, "--------start up--------");
2125         condlog(2, "read " DEFAULT_CONFIGFILE);
2126
2127         if (load_config(DEFAULT_CONFIGFILE))
2128                 goto failed;
2129
2130         if (verbosity)
2131                 conf->verbosity = verbosity;
2132         if (bindings_read_only)
2133                 conf->bindings_read_only = bindings_read_only;
2134         if (ignore_new_devs)
2135                 conf->ignore_new_devs = ignore_new_devs;
2136         uxsock_timeout = conf->uxsock_timeout;
2137         multipath_conf = conf;
2138         dm_init(conf->verbosity);
2139         dm_drv_version(conf->version, TGT_MPATH);
2140         if (init_checkers(conf->multipath_dir)) {
2141                 condlog(0, "failed to initialize checkers");
2142                 goto failed;
2143         }
2144         if (init_prio(conf->multipath_dir)) {
2145                 condlog(0, "failed to initialize prioritizers");
2146                 goto failed;
2147         }
2148
2149         setlogmask(LOG_UPTO(conf->verbosity + 3));
2150
2151         envp = getenv("LimitNOFILE");
2152
2153         if (envp) {
2154                 condlog(2,"Using systemd provided open fds limit of %s", envp);
2155         } else if (conf->max_fds) {
2156                 struct rlimit fd_limit;
2157
2158                 if (getrlimit(RLIMIT_NOFILE, &fd_limit) < 0) {
2159                         condlog(0, "can't get open fds limit: %s",
2160                                 strerror(errno));
2161                         fd_limit.rlim_cur = 0;
2162                         fd_limit.rlim_max = 0;
2163                 }
2164                 if (fd_limit.rlim_cur < conf->max_fds) {
2165                         fd_limit.rlim_cur = conf->max_fds;
2166                         if (fd_limit.rlim_max < conf->max_fds)
2167                                 fd_limit.rlim_max = conf->max_fds;
2168                         if (setrlimit(RLIMIT_NOFILE, &fd_limit) < 0) {
2169                                 condlog(0, "can't set open fds limit to "
2170                                         "%lu/%lu : %s",
2171                                         fd_limit.rlim_cur, fd_limit.rlim_max,
2172                                         strerror(errno));
2173                         } else {
2174                                 condlog(3, "set open fds limit to %lu/%lu",
2175                                         fd_limit.rlim_cur, fd_limit.rlim_max);
2176                         }
2177                 }
2178
2179         }
2180
2181         vecs = gvecs = init_vecs();
2182         if (!vecs)
2183                 goto failed;
2184
2185         setscheduler();
2186         set_oom_adj();
2187
2188         dm_udev_set_sync_support(0);
2189 #ifdef USE_SYSTEMD
2190         envp = getenv("WATCHDOG_USEC");
2191         if (envp && sscanf(envp, "%lu", &checkint) == 1) {
2192                 /* Value is in microseconds */
2193                 conf->max_checkint = checkint / 1000000;
2194                 /* Rescale checkint */
2195                 if (conf->checkint > conf->max_checkint)
2196                         conf->checkint = conf->max_checkint;
2197                 else
2198                         conf->checkint = conf->max_checkint / 4;
2199                 condlog(3, "enabling watchdog, interval %d max %d",
2200                         conf->checkint, conf->max_checkint);
2201                 use_watchdog = conf->checkint;
2202         }
2203 #endif
2204         /*
2205          * Signal start of configuration
2206          */
2207         post_config_state(DAEMON_CONFIGURE);
2208
2209         /*
2210          * Start uevent listener early to catch events
2211          */
2212         if ((rc = pthread_create(&uevent_thr, &uevent_attr, ueventloop, udev))) {
2213                 condlog(0, "failed to create uevent thread: %d", rc);
2214                 goto failed;
2215         }
2216         pthread_attr_destroy(&uevent_attr);
2217         if ((rc = pthread_create(&uxlsnr_thr, &misc_attr, uxlsnrloop, vecs))) {
2218                 condlog(0, "failed to create cli listener: %d", rc);
2219                 goto failed;
2220         }
2221
2222         /*
2223          * start threads
2224          */
2225         if ((rc = pthread_create(&check_thr, &misc_attr, checkerloop, vecs))) {
2226                 condlog(0,"failed to create checker loop thread: %d", rc);
2227                 goto failed;
2228         }
2229         if ((rc = pthread_create(&uevq_thr, &misc_attr, uevqloop, vecs))) {
2230                 condlog(0, "failed to create uevent dispatcher: %d", rc);
2231                 goto failed;
2232         }
2233         pthread_attr_destroy(&misc_attr);
2234
2235 #ifdef USE_SYSTEMD
2236         sd_notify(0, "READY=1");
2237 #endif
2238
2239         while (running_state != DAEMON_SHUTDOWN) {
2240                 pthread_cleanup_push(config_cleanup, NULL);
2241                 pthread_mutex_lock(&config_lock);
2242                 if (running_state != DAEMON_CONFIGURE &&
2243                     running_state != DAEMON_SHUTDOWN) {
2244                         pthread_cond_wait(&config_cond, &config_lock);
2245                 }
2246                 pthread_cleanup_pop(1);
2247                 if (running_state == DAEMON_CONFIGURE) {
2248                         pthread_cleanup_push(cleanup_lock, &vecs->lock);
2249                         lock(vecs->lock);
2250                         pthread_testcancel();
2251                         if (!need_to_delay_reconfig(vecs)) {
2252                                 reconfigure(vecs);
2253                         } else {
2254                                 conf->delayed_reconfig = 1;
2255                         }
2256                         lock_cleanup_pop(vecs->lock);
2257                         post_config_state(DAEMON_IDLE);
2258                 }
2259         }
2260
2261         lock(vecs->lock);
2262         if (conf->queue_without_daemon == QUE_NO_DAEMON_OFF)
2263                 vector_foreach_slot(vecs->mpvec, mpp, i)
2264                         dm_queue_if_no_path(mpp->alias, 0);
2265         remove_maps_and_stop_waiters(vecs);
2266         unlock(vecs->lock);
2267
2268         pthread_cancel(check_thr);
2269         pthread_cancel(uevent_thr);
2270         pthread_cancel(uxlsnr_thr);
2271         pthread_cancel(uevq_thr);
2272
2273         lock(vecs->lock);
2274         free_pathvec(vecs->pathvec, FREE_PATHS);
2275         vecs->pathvec = NULL;
2276         unlock(vecs->lock);
2277         /* Now all the waitevent threads will start rushing in. */
2278         while (vecs->lock.depth > 0) {
2279                 sleep (1); /* This is weak. */
2280                 condlog(3, "Have %d wait event checkers threads to de-alloc,"
2281                         " waiting...", vecs->lock.depth);
2282         }
2283         pthread_mutex_destroy(vecs->lock.mutex);
2284         FREE(vecs->lock.mutex);
2285         vecs->lock.depth = 0;
2286         vecs->lock.mutex = NULL;
2287         FREE(vecs);
2288         vecs = NULL;
2289
2290         cleanup_checkers();
2291         cleanup_prio();
2292
2293         dm_lib_release();
2294         dm_lib_exit();
2295
2296         /* We're done here */
2297         condlog(3, "unlink pidfile");
2298         unlink(DEFAULT_PIDFILE);
2299
2300         condlog(2, "--------shut down-------");
2301
2302         if (logsink == 1)
2303                 log_thread_stop();
2304
2305         /*
2306          * Freeing config must be done after condlog() and dm_lib_exit(),
2307          * because logging functions like dlog() and dm_write_log()
2308          * reference the config.
2309          */
2310         free_config(conf);
2311         conf = NULL;
2312         udev_unref(udev);
2313         udev = NULL;
2314 #ifdef _DEBUG_
2315         dbg_free_final(NULL);
2316 #endif
2317
2318 #ifdef USE_SYSTEMD
2319         sd_notify(0, "ERRNO=0");
2320 #endif
2321         exit(0);
2322
2323 failed:
2324 #ifdef USE_SYSTEMD
2325         sd_notify(0, "ERRNO=1");
2326 #endif
2327         if (pid_fd >= 0)
2328                 close(pid_fd);
2329         exit(1);
2330 }
2331
2332 static int
2333 daemonize(void)
2334 {
2335         int pid;
2336         int dev_null_fd;
2337
2338         if( (pid = fork()) < 0){
2339                 fprintf(stderr, "Failed first fork : %s\n", strerror(errno));
2340                 return -1;
2341         }
2342         else if (pid != 0)
2343                 return pid;
2344
2345         setsid();
2346
2347         if ( (pid = fork()) < 0)
2348                 fprintf(stderr, "Failed second fork : %s\n", strerror(errno));
2349         else if (pid != 0)
2350                 _exit(0);
2351
2352         if (chdir("/") < 0)
2353                 fprintf(stderr, "cannot chdir to '/', continuing\n");
2354
2355         dev_null_fd = open("/dev/null", O_RDWR);
2356         if (dev_null_fd < 0){
2357                 fprintf(stderr, "cannot open /dev/null for input & output : %s\n",
2358                         strerror(errno));
2359                 _exit(0);
2360         }
2361
2362         close(STDIN_FILENO);
2363         if (dup(dev_null_fd) < 0) {
2364                 fprintf(stderr, "cannot dup /dev/null to stdin : %s\n",
2365                         strerror(errno));
2366                 _exit(0);
2367         }
2368         close(STDOUT_FILENO);
2369         if (dup(dev_null_fd) < 0) {
2370                 fprintf(stderr, "cannot dup /dev/null to stdout : %s\n",
2371                         strerror(errno));
2372                 _exit(0);
2373         }
2374         close(STDERR_FILENO);
2375         if (dup(dev_null_fd) < 0) {
2376                 fprintf(stderr, "cannot dup /dev/null to stderr : %s\n",
2377                         strerror(errno));
2378                 _exit(0);
2379         }
2380         close(dev_null_fd);
2381         daemon_pid = getpid();
2382         return 0;
2383 }
2384
2385 int
2386 main (int argc, char *argv[])
2387 {
2388         extern char *optarg;
2389         extern int optind;
2390         int arg;
2391         int err;
2392         int foreground = 0;
2393
2394         logsink = 1;
2395
2396         if (getuid() != 0) {
2397                 fprintf(stderr, "need to be root\n");
2398                 exit(1);
2399         }
2400
2401         /* make sure we don't lock any path */
2402         if (chdir("/") < 0)
2403                 fprintf(stderr, "can't chdir to root directory : %s\n",
2404                         strerror(errno));
2405         umask(umask(077) | 022);
2406
2407         udev = udev_new();
2408
2409         while ((arg = getopt(argc, argv, ":dsv:k::Bn")) != EOF ) {
2410         switch(arg) {
2411                 case 'd':
2412                         foreground = 1;
2413                         if (logsink > 0)
2414                                 logsink = 0;
2415                         //debug=1; /* ### comment me out ### */
2416                         break;
2417                 case 'v':
2418                         if (sizeof(optarg) > sizeof(char *) ||
2419                             !isdigit(optarg[0]))
2420                                 exit(1);
2421
2422                         verbosity = atoi(optarg);
2423                         break;
2424                 case 's':
2425                         logsink = -1;
2426                         break;
2427                 case 'k':
2428                         if (load_config(DEFAULT_CONFIGFILE))
2429                                 exit(1);
2430                         if (verbosity)
2431                                 conf->verbosity = verbosity;
2432                         uxclnt(optarg, uxsock_timeout + 100);
2433                         exit(0);
2434                 case 'B':
2435                         bindings_read_only = 1;
2436                         break;
2437                 case 'n':
2438                         ignore_new_devs = 1;
2439                         break;
2440                 default:
2441                         fprintf(stderr, "Invalid argument '-%c'\n",
2442                                 optopt);
2443                         exit(1);
2444                 }
2445         }
2446         if (optind < argc) {
2447                 char cmd[CMDSIZE];
2448                 char * s = cmd;
2449                 char * c = s;
2450
2451                 if (load_config(DEFAULT_CONFIGFILE))
2452                         exit(1);
2453                 if (verbosity)
2454                         conf->verbosity = verbosity;
2455                 memset(cmd, 0x0, CMDSIZE);
2456                 while (optind < argc) {
2457                         if (strchr(argv[optind], ' '))
2458                                 c += snprintf(c, s + CMDSIZE - c, "\"%s\" ", argv[optind]);
2459                         else
2460                                 c += snprintf(c, s + CMDSIZE - c, "%s ", argv[optind]);
2461                         optind++;
2462                 }
2463                 c += snprintf(c, s + CMDSIZE - c, "\n");
2464                 uxclnt(s, uxsock_timeout + 100);
2465                 exit(0);
2466         }
2467
2468         if (foreground) {
2469                 if (!isatty(fileno(stdout)))
2470                         setbuf(stdout, NULL);
2471                 err = 0;
2472                 daemon_pid = getpid();
2473         } else
2474                 err = daemonize();
2475
2476         if (err < 0)
2477                 /* error */
2478                 exit(1);
2479         else if (err > 0)
2480                 /* parent dies */
2481                 exit(0);
2482         else
2483                 /* child lives */
2484                 return (child(NULL));
2485 }
2486
2487 void *  mpath_pr_event_handler_fn (void * pathp )
2488 {
2489         struct multipath * mpp;
2490         int i,j, ret, isFound;
2491         struct path * pp = (struct path *)pathp;
2492         unsigned char *keyp;
2493         uint64_t prkey;
2494         struct prout_param_descriptor *param;
2495         struct prin_resp *resp;
2496
2497         mpp = pp->mpp;
2498
2499         resp = mpath_alloc_prin_response(MPATH_PRIN_RKEY_SA);
2500         if (!resp){
2501                 condlog(0,"%s Alloc failed for prin response", pp->dev);
2502                 return NULL;
2503         }
2504
2505         ret = prin_do_scsi_ioctl(pp->dev, MPATH_PRIN_RKEY_SA, resp, 0);
2506         if (ret != MPATH_PR_SUCCESS )
2507         {
2508                 condlog(0,"%s : pr in read keys service action failed. Error=%d", pp->dev, ret);
2509                 goto out;
2510         }
2511
2512         condlog(3, " event pr=%d addlen=%d",resp->prin_descriptor.prin_readkeys.prgeneration,
2513                         resp->prin_descriptor.prin_readkeys.additional_length );
2514
2515         if (resp->prin_descriptor.prin_readkeys.additional_length == 0 )
2516         {
2517                 condlog(1, "%s: No key found. Device may not be registered.", pp->dev);
2518                 ret = MPATH_PR_SUCCESS;
2519                 goto out;
2520         }
2521         prkey = 0;
2522         keyp = (unsigned char *)mpp->reservation_key;
2523         for (j = 0; j < 8; ++j) {
2524                 if (j > 0)
2525                         prkey <<= 8;
2526                 prkey |= *keyp;
2527                 ++keyp;
2528         }
2529         condlog(2, "Multipath  reservation_key: 0x%" PRIx64 " ", prkey);
2530
2531         isFound =0;
2532         for (i = 0; i < resp->prin_descriptor.prin_readkeys.additional_length/8; i++ )
2533         {
2534                 condlog(2, "PR IN READKEYS[%d]  reservation key:",i);
2535                 dumpHex((char *)&resp->prin_descriptor.prin_readkeys.key_list[i*8], 8 , -1);
2536                 if (!memcmp(mpp->reservation_key, &resp->prin_descriptor.prin_readkeys.key_list[i*8], 8))
2537                 {
2538                         condlog(2, "%s: pr key found in prin readkeys response", mpp->alias);
2539                         isFound =1;
2540                         break;
2541                 }
2542         }
2543         if (!isFound)
2544         {
2545                 condlog(0, "%s: Either device not registered or ", pp->dev);
2546                 condlog(0, "host is not authorised for registration. Skip path");
2547                 ret = MPATH_PR_OTHER;
2548                 goto out;
2549         }
2550
2551         param= malloc(sizeof(struct prout_param_descriptor));
2552         memset(param, 0 , sizeof(struct prout_param_descriptor));
2553
2554         for (j = 7; j >= 0; --j) {
2555                 param->sa_key[j] = (prkey & 0xff);
2556                 prkey >>= 8;
2557         }
2558         param->num_transportid = 0;
2559
2560         condlog(3, "device %s:%s", pp->dev, pp->mpp->wwid);
2561
2562         ret = prout_do_scsi_ioctl(pp->dev, MPATH_PROUT_REG_IGN_SA, 0, 0, param, 0);
2563         if (ret != MPATH_PR_SUCCESS )
2564         {
2565                 condlog(0,"%s: Reservation registration failed. Error: %d", pp->dev, ret);
2566         }
2567         mpp->prflag = 1;
2568
2569         free(param);
2570 out:
2571         free(resp);
2572         return NULL;
2573 }
2574
2575 int mpath_pr_event_handle(struct path *pp)
2576 {
2577         pthread_t thread;
2578         int rc;
2579         pthread_attr_t attr;
2580         struct multipath * mpp;
2581
2582         mpp = pp->mpp;
2583
2584         if (!mpp->reservation_key)
2585                 return -1;
2586
2587         pthread_attr_init(&attr);
2588         pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
2589
2590         rc = pthread_create(&thread, NULL , mpath_pr_event_handler_fn, pp);
2591         if (rc) {
2592                 condlog(0, "%s: ERROR; return code from pthread_create() is %d", pp->dev, rc);
2593                 return -1;
2594         }
2595         pthread_attr_destroy(&attr);
2596         rc = pthread_join(thread, NULL);
2597         return 0;
2598 }
2599