aac8a1948c7a90287238d51a37d4026b36b8f506
[multipath-tools/.git] / multipathd / main.c
1 /*
2  * Copyright (c) 2004, 2005 Christophe Varoqui
3  * Copyright (c) 2005 Kiyoshi Ueda, NEC
4  * Copyright (c) 2005 Benjamin Marzinski, Redhat
5  * Copyright (c) 2005 Edward Goggin, EMC
6  */
7 #include <unistd.h>
8 #include <sys/stat.h>
9 #include <libdevmapper.h>
10 #include <wait.h>
11 #include <sys/mman.h>
12 #include <sys/types.h>
13 #include <fcntl.h>
14 #include <errno.h>
15 #include <sys/time.h>
16 #include <sys/resource.h>
17 #include <limits.h>
18 #include <linux/oom.h>
19 #include <libudev.h>
20 #ifdef USE_SYSTEMD
21 #include <systemd/sd-daemon.h>
22 #endif
23 #include <semaphore.h>
24 #include <mpath_persist.h>
25 #include <time.h>
26
27 /*
28  * libcheckers
29  */
30 #include <checkers.h>
31
32 /*
33  * libmultipath
34  */
35 #include <parser.h>
36 #include <vector.h>
37 #include <memory.h>
38 #include <config.h>
39 #include <util.h>
40 #include <hwtable.h>
41 #include <defaults.h>
42 #include <structs.h>
43 #include <blacklist.h>
44 #include <structs_vec.h>
45 #include <dmparser.h>
46 #include <devmapper.h>
47 #include <sysfs.h>
48 #include <dict.h>
49 #include <discovery.h>
50 #include <debug.h>
51 #include <propsel.h>
52 #include <uevent.h>
53 #include <switchgroup.h>
54 #include <print.h>
55 #include <configure.h>
56 #include <prio.h>
57 #include <pgpolicies.h>
58 #include <uevent.h>
59 #include <log.h>
60
61 #include "main.h"
62 #include "pidfile.h"
63 #include "uxlsnr.h"
64 #include "uxclnt.h"
65 #include "cli.h"
66 #include "cli_handlers.h"
67 #include "lock.h"
68 #include "waiter.h"
69 #include "wwids.h"
70
71 #define FILE_NAME_SIZE 256
72 #define CMDSIZE 160
73
74 #define LOG_MSG(a, b) \
75 do { \
76         if (pp->offline) \
77                 condlog(a, "%s: %s - path offline", pp->mpp->alias, pp->dev); \
78         else if (strlen(b)) \
79                 condlog(a, "%s: %s - %s", pp->mpp->alias, pp->dev, b); \
80 } while(0)
81
82 struct mpath_event_param
83 {
84         char * devname;
85         struct multipath *mpp;
86 };
87
88 unsigned int mpath_mx_alloc_len;
89
90 int logsink;
91 enum daemon_status running_state;
92 pid_t daemon_pid;
93
94 static sem_t exit_sem;
95 /*
96  * global copy of vecs for use in sig handlers
97  */
98 struct vectors * gvecs;
99
100 struct udev * udev;
101
102 static int
103 need_switch_pathgroup (struct multipath * mpp, int refresh)
104 {
105         struct pathgroup * pgp;
106         struct path * pp;
107         unsigned int i, j;
108
109         if (!mpp || mpp->pgfailback == -FAILBACK_MANUAL)
110                 return 0;
111
112         /*
113          * Refresh path priority values
114          */
115         if (refresh)
116                 vector_foreach_slot (mpp->pg, pgp, i)
117                         vector_foreach_slot (pgp->paths, pp, j)
118                                 pathinfo(pp, conf->hwtable, DI_PRIO);
119
120         mpp->bestpg = select_path_group(mpp);
121
122         if (mpp->bestpg != mpp->nextpg)
123                 return 1;
124
125         return 0;
126 }
127
128 static void
129 switch_pathgroup (struct multipath * mpp)
130 {
131         mpp->stat_switchgroup++;
132         dm_switchgroup(mpp->alias, mpp->bestpg);
133         condlog(2, "%s: switch to path group #%i",
134                  mpp->alias, mpp->bestpg);
135 }
136
137 static int
138 coalesce_maps(struct vectors *vecs, vector nmpv)
139 {
140         struct multipath * ompp;
141         vector ompv = vecs->mpvec;
142         unsigned int i;
143
144         vector_foreach_slot (ompv, ompp, i) {
145                 condlog(3, "%s: coalesce map", ompp->alias);
146                 if (!find_mp_by_wwid(nmpv, ompp->wwid)) {
147                         /*
148                          * remove all current maps not allowed by the
149                          * current configuration
150                          */
151                         if (dm_flush_map(ompp->alias)) {
152                                 condlog(0, "%s: unable to flush devmap",
153                                         ompp->alias);
154                                 /*
155                                  * may be just because the device is open
156                                  */
157                                 if (setup_multipath(vecs, ompp) != 0) {
158                                         i--;
159                                         continue;
160                                 }
161                                 if (!vector_alloc_slot(nmpv))
162                                         return 1;
163
164                                 vector_set_slot(nmpv, ompp);
165
166                                 vector_del_slot(ompv, i);
167                                 i--;
168                         }
169                         else {
170                                 dm_lib_release();
171                                 condlog(2, "%s devmap removed", ompp->alias);
172                         }
173                 } else if (conf->reassign_maps) {
174                         condlog(3, "%s: Reassign existing device-mapper"
175                                 " devices", ompp->alias);
176                         dm_reassign(ompp->alias);
177                 }
178         }
179         return 0;
180 }
181
182 void
183 sync_map_state(struct multipath *mpp)
184 {
185         struct pathgroup *pgp;
186         struct path *pp;
187         unsigned int i, j;
188
189         if (!mpp->pg)
190                 return;
191
192         vector_foreach_slot (mpp->pg, pgp, i){
193                 vector_foreach_slot (pgp->paths, pp, j){
194                         if (pp->state == PATH_UNCHECKED || 
195                             pp->state == PATH_WILD ||
196                             pp->state == PATH_DELAYED)
197                                 continue;
198                         if ((pp->dmstate == PSTATE_FAILED ||
199                              pp->dmstate == PSTATE_UNDEF) &&
200                             (pp->state == PATH_UP || pp->state == PATH_GHOST))
201                                 dm_reinstate_path(mpp->alias, pp->dev_t);
202                         else if ((pp->dmstate == PSTATE_ACTIVE ||
203                                   pp->dmstate == PSTATE_UNDEF) &&
204                                  (pp->state == PATH_DOWN ||
205                                   pp->state == PATH_SHAKY))
206                                 dm_fail_path(mpp->alias, pp->dev_t);
207                 }
208         }
209 }
210
211 static void
212 sync_maps_state(vector mpvec)
213 {
214         unsigned int i;
215         struct multipath *mpp;
216
217         vector_foreach_slot (mpvec, mpp, i)
218                 sync_map_state(mpp);
219 }
220
221 static int
222 flush_map(struct multipath * mpp, struct vectors * vecs, int nopaths)
223 {
224         int r;
225
226         if (nopaths)
227                 r = dm_flush_map_nopaths(mpp->alias, mpp->deferred_remove);
228         else
229                 r = dm_flush_map(mpp->alias);
230         /*
231          * clear references to this map before flushing so we can ignore
232          * the spurious uevent we may generate with the dm_flush_map call below
233          */
234         if (r) {
235                 /*
236                  * May not really be an error -- if the map was already flushed
237                  * from the device mapper by dmsetup(8) for instance.
238                  */
239                 if (r == 1)
240                         condlog(0, "%s: can't flush", mpp->alias);
241                 else {
242                         condlog(2, "%s: devmap deferred remove", mpp->alias);
243                         mpp->deferred_remove = DEFERRED_REMOVE_IN_PROGRESS;
244                 }
245                 return r;
246         }
247         else {
248                 dm_lib_release();
249                 condlog(2, "%s: map flushed", mpp->alias);
250         }
251
252         orphan_paths(vecs->pathvec, mpp);
253         remove_map_and_stop_waiter(mpp, vecs, 1);
254
255         return 0;
256 }
257
258 static int
259 uev_add_map (struct uevent * uev, struct vectors * vecs)
260 {
261         char *alias;
262         int major = -1, minor = -1, rc;
263
264         condlog(3, "%s: add map (uevent)", uev->kernel);
265         alias = uevent_get_dm_name(uev);
266         if (!alias) {
267                 condlog(3, "%s: No DM_NAME in uevent", uev->kernel);
268                 major = uevent_get_major(uev);
269                 minor = uevent_get_minor(uev);
270                 alias = dm_mapname(major, minor);
271                 if (!alias) {
272                         condlog(2, "%s: mapname not found for %d:%d",
273                                 uev->kernel, major, minor);
274                         return 1;
275                 }
276         }
277         rc = ev_add_map(uev->kernel, alias, vecs);
278         FREE(alias);
279         return rc;
280 }
281
282 int
283 ev_add_map (char * dev, char * alias, struct vectors * vecs)
284 {
285         char * refwwid;
286         struct multipath * mpp;
287         int map_present;
288         int r = 1;
289
290         map_present = dm_map_present(alias);
291
292         if (map_present && dm_type(alias, TGT_MPATH) <= 0) {
293                 condlog(4, "%s: not a multipath map", alias);
294                 return 0;
295         }
296
297         mpp = find_mp_by_alias(vecs->mpvec, alias);
298
299         if (mpp) {
300                 /*
301                  * Not really an error -- we generate our own uevent
302                  * if we create a multipath mapped device as a result
303                  * of uev_add_path
304                  */
305                 if (conf->reassign_maps) {
306                         condlog(3, "%s: Reassign existing device-mapper devices",
307                                 alias);
308                         dm_reassign(alias);
309                 }
310                 return 0;
311         }
312         condlog(2, "%s: adding map", alias);
313
314         /*
315          * now we can register the map
316          */
317         if (map_present && (mpp = add_map_without_path(vecs, alias))) {
318                 sync_map_state(mpp);
319                 condlog(2, "%s: devmap %s registered", alias, dev);
320                 return 0;
321         }
322         r = get_refwwid(dev, DEV_DEVMAP, vecs->pathvec, &refwwid);
323
324         if (refwwid) {
325                 r = coalesce_paths(vecs, NULL, refwwid, 0);
326                 dm_lib_release();
327         }
328
329         if (!r)
330                 condlog(2, "%s: devmap %s added", alias, dev);
331         else if (r == 2)
332                 condlog(2, "%s: uev_add_map %s blacklisted", alias, dev);
333         else
334                 condlog(0, "%s: uev_add_map %s failed", alias, dev);
335
336         FREE(refwwid);
337         return r;
338 }
339
340 static int
341 uev_remove_map (struct uevent * uev, struct vectors * vecs)
342 {
343         char *alias;
344         int minor;
345         struct multipath *mpp;
346
347         condlog(2, "%s: remove map (uevent)", uev->kernel);
348         alias = uevent_get_dm_name(uev);
349         if (!alias) {
350                 condlog(3, "%s: No DM_NAME in uevent, ignoring", uev->kernel);
351                 return 0;
352         }
353         minor = uevent_get_minor(uev);
354         mpp = find_mp_by_minor(vecs->mpvec, minor);
355
356         if (!mpp) {
357                 condlog(2, "%s: devmap not registered, can't remove",
358                         uev->kernel);
359                 goto out;
360         }
361         if (strcmp(mpp->alias, alias)) {
362                 condlog(2, "%s: minor number mismatch (map %d, event %d)",
363                         mpp->alias, mpp->dmi->minor, minor);
364                 goto out;
365         }
366
367         orphan_paths(vecs->pathvec, mpp);
368         remove_map_and_stop_waiter(mpp, vecs, 1);
369 out:
370         FREE(alias);
371         return 0;
372 }
373
374 int
375 ev_remove_map (char * devname, char * alias, int minor, struct vectors * vecs)
376 {
377         struct multipath * mpp;
378
379         mpp = find_mp_by_minor(vecs->mpvec, minor);
380
381         if (!mpp) {
382                 condlog(2, "%s: devmap not registered, can't remove",
383                         devname);
384                 return 0;
385         }
386         if (strcmp(mpp->alias, alias)) {
387                 condlog(2, "%s: minor number mismatch (map %d, event %d)",
388                         mpp->alias, mpp->dmi->minor, minor);
389                 return 0;
390         }
391         return flush_map(mpp, vecs, 0);
392 }
393
394 static int
395 uev_add_path (struct uevent *uev, struct vectors * vecs)
396 {
397         struct path *pp;
398         int ret, i;
399
400         condlog(2, "%s: add path (uevent)", uev->kernel);
401         if (strstr(uev->kernel, "..") != NULL) {
402                 /*
403                  * Don't allow relative device names in the pathvec
404                  */
405                 condlog(0, "%s: path name is invalid", uev->kernel);
406                 return 1;
407         }
408
409         pp = find_path_by_dev(vecs->pathvec, uev->kernel);
410         if (pp) {
411                 condlog(0, "%s: spurious uevent, path already in pathvec",
412                         uev->kernel);
413                 if (pp->mpp)
414                         return 0;
415                 if (!strlen(pp->wwid)) {
416                         udev_device_unref(pp->udev);
417                         pp->udev = udev_device_ref(uev->udev);
418                         ret = pathinfo(pp, conf->hwtable,
419                                        DI_ALL | DI_BLACKLIST);
420                         if (ret == 2) {
421                                 i = find_slot(vecs->pathvec, (void *)pp);
422                                 if (i != -1)
423                                         vector_del_slot(vecs->pathvec, i);
424                                 free_path(pp);
425                                 return 0;
426                         } else if (ret == 1) {
427                                 condlog(0, "%s: failed to reinitialize path",
428                                         uev->kernel);
429                                 return 1;
430                         }
431                 }
432         } else {
433                 /*
434                  * get path vital state
435                  */
436                 ret = store_pathinfo(vecs->pathvec, conf->hwtable,
437                                      uev->udev, DI_ALL, &pp);
438                 if (!pp) {
439                         if (ret == 2)
440                                 return 0;
441                         condlog(0, "%s: failed to store path info",
442                                 uev->kernel);
443                         return 1;
444                 }
445                 pp->checkint = conf->checkint;
446         }
447
448         return ev_add_path(pp, vecs);
449 }
450
451 /*
452  * returns:
453  * 0: added
454  * 1: error
455  */
456 int
457 ev_add_path (struct path * pp, struct vectors * vecs)
458 {
459         struct multipath * mpp;
460         char empty_buff[WWID_SIZE] = {0};
461         char params[PARAMS_SIZE] = {0};
462         int retries = 3;
463         int start_waiter = 0;
464
465         /*
466          * need path UID to go any further
467          */
468         if (memcmp(empty_buff, pp->wwid, WWID_SIZE) == 0) {
469                 condlog(0, "%s: failed to get path uid", pp->dev);
470                 goto fail; /* leave path added to pathvec */
471         }
472         mpp = pp->mpp = find_mp_by_wwid(vecs->mpvec, pp->wwid);
473 rescan:
474         if (mpp) {
475                 if ((!pp->size) || (mpp->size != pp->size)) {
476                         if (!pp->size)
477                                 condlog(0, "%s: failed to add new path %s, "
478                                         "device size is 0",
479                                         mpp->alias, pp->dev);
480                         else
481                                 condlog(0, "%s: failed to add new path %s, "
482                                         "device size mismatch",
483                                         mpp->alias, pp->dev);
484                         int i = find_slot(vecs->pathvec, (void *)pp);
485                         if (i != -1)
486                                 vector_del_slot(vecs->pathvec, i);
487                         free_path(pp);
488                         return 1;
489                 }
490
491                 condlog(4,"%s: adopting all paths for path %s",
492                         mpp->alias, pp->dev);
493                 if (adopt_paths(vecs->pathvec, mpp, 1))
494                         goto fail; /* leave path added to pathvec */
495
496                 verify_paths(mpp, vecs, NULL);
497                 mpp->flush_on_last_del = FLUSH_UNDEF;
498                 mpp->action = ACT_RELOAD;
499         }
500         else {
501                 if (!pp->size) {
502                         condlog(0, "%s: failed to create new map,"
503                                 " device size is 0 ", pp->dev);
504                         int i = find_slot(vecs->pathvec, (void *)pp);
505                         if (i != -1)
506                                 vector_del_slot(vecs->pathvec, i);
507                         free_path(pp);
508                         return 1;
509                 }
510
511                 condlog(4,"%s: creating new map", pp->dev);
512                 if ((mpp = add_map_with_path(vecs, pp, 1))) {
513                         mpp->action = ACT_CREATE;
514                         /*
515                          * We don't depend on ACT_CREATE, as domap will
516                          * set it to ACT_NOTHING when complete.
517                          */
518                         start_waiter = 1;
519                 }
520                 else
521                         goto fail; /* leave path added to pathvec */
522         }
523
524         /* persistent reseravtion check*/
525         mpath_pr_event_handle(pp);      
526
527         /*
528          * push the map to the device-mapper
529          */
530         if (setup_map(mpp, params, PARAMS_SIZE)) {
531                 condlog(0, "%s: failed to setup map for addition of new "
532                         "path %s", mpp->alias, pp->dev);
533                 goto fail_map;
534         }
535         /*
536          * reload the map for the multipath mapped device
537          */
538         if (domap(mpp, params) <= 0) {
539                 condlog(0, "%s: failed in domap for addition of new "
540                         "path %s", mpp->alias, pp->dev);
541                 /*
542                  * deal with asynchronous uevents :((
543                  */
544                 if (mpp->action == ACT_RELOAD && retries-- > 0) {
545                         condlog(0, "%s: uev_add_path sleep", mpp->alias);
546                         sleep(1);
547                         update_mpp_paths(mpp, vecs->pathvec);
548                         goto rescan;
549                 }
550                 else if (mpp->action == ACT_RELOAD)
551                         condlog(0, "%s: giving up reload", mpp->alias);
552                 else
553                         goto fail_map;
554         }
555         dm_lib_release();
556
557         /*
558          * update our state from kernel regardless of create or reload
559          */
560         if (setup_multipath(vecs, mpp))
561                 goto fail; /* if setup_multipath fails, it removes the map */
562
563         sync_map_state(mpp);
564
565         if ((mpp->action == ACT_CREATE ||
566              (mpp->action == ACT_NOTHING && start_waiter && !mpp->waiter)) &&
567             start_waiter_thread(mpp, vecs))
568                         goto fail_map;
569
570         if (retries >= 0) {
571                 condlog(2, "%s [%s]: path added to devmap %s",
572                         pp->dev, pp->dev_t, mpp->alias);
573                 return 0;
574         }
575         else
576                 goto fail;
577
578 fail_map:
579         remove_map(mpp, vecs, 1);
580 fail:
581         orphan_path(pp, "failed to add path");
582         return 1;
583 }
584
585 static int
586 uev_remove_path (struct uevent *uev, struct vectors * vecs)
587 {
588         struct path *pp;
589
590         condlog(2, "%s: remove path (uevent)", uev->kernel);
591         pp = find_path_by_dev(vecs->pathvec, uev->kernel);
592
593         if (!pp) {
594                 /* Not an error; path might have been purged earlier */
595                 condlog(0, "%s: path already removed", uev->kernel);
596                 return 0;
597         }
598
599         return ev_remove_path(pp, vecs);
600 }
601
602 int
603 ev_remove_path (struct path *pp, struct vectors * vecs)
604 {
605         struct multipath * mpp;
606         int i, retval = 0;
607         char params[PARAMS_SIZE] = {0};
608
609         /*
610          * avoid referring to the map of an orphaned path
611          */
612         if ((mpp = pp->mpp)) {
613                 /*
614                  * transform the mp->pg vector of vectors of paths
615                  * into a mp->params string to feed the device-mapper
616                  */
617                 if (update_mpp_paths(mpp, vecs->pathvec)) {
618                         condlog(0, "%s: failed to update paths",
619                                 mpp->alias);
620                         goto fail;
621                 }
622                 if ((i = find_slot(mpp->paths, (void *)pp)) != -1)
623                         vector_del_slot(mpp->paths, i);
624
625                 /*
626                  * remove the map IFF removing the last path
627                  */
628                 if (VECTOR_SIZE(mpp->paths) == 0) {
629                         char alias[WWID_SIZE];
630
631                         /*
632                          * flush_map will fail if the device is open
633                          */
634                         strncpy(alias, mpp->alias, WWID_SIZE);
635                         if (mpp->flush_on_last_del == FLUSH_ENABLED) {
636                                 condlog(2, "%s Last path deleted, disabling queueing", mpp->alias);
637                                 mpp->retry_tick = 0;
638                                 mpp->no_path_retry = NO_PATH_RETRY_FAIL;
639                                 mpp->flush_on_last_del = FLUSH_IN_PROGRESS;
640                                 dm_queue_if_no_path(mpp->alias, 0);
641                         }
642                         if (!flush_map(mpp, vecs, 1)) {
643                                 condlog(2, "%s: removed map after"
644                                         " removing all paths",
645                                         alias);
646                                 retval = 0;
647                                 goto out;
648                         }
649                         /*
650                          * Not an error, continue
651                          */
652                 }
653
654                 if (setup_map(mpp, params, PARAMS_SIZE)) {
655                         condlog(0, "%s: failed to setup map for"
656                                 " removal of path %s", mpp->alias, pp->dev);
657                         goto fail;
658                 }
659                 /*
660                  * reload the map
661                  */
662                 mpp->action = ACT_RELOAD;
663                 if (domap(mpp, params) <= 0) {
664                         condlog(0, "%s: failed in domap for "
665                                 "removal of path %s",
666                                 mpp->alias, pp->dev);
667                         retval = 1;
668                 } else {
669                         /*
670                          * update our state from kernel
671                          */
672                         if (setup_multipath(vecs, mpp))
673                                 return 1;
674                         sync_map_state(mpp);
675
676                         condlog(2, "%s [%s]: path removed from map %s",
677                                 pp->dev, pp->dev_t, mpp->alias);
678                 }
679         }
680
681 out:
682         if ((i = find_slot(vecs->pathvec, (void *)pp)) != -1)
683                 vector_del_slot(vecs->pathvec, i);
684
685         free_path(pp);
686
687         return retval;
688
689 fail:
690         remove_map_and_stop_waiter(mpp, vecs, 1);
691         return 1;
692 }
693
694 static int
695 uev_update_path (struct uevent *uev, struct vectors * vecs)
696 {
697         int ro, retval = 0;
698
699         ro = uevent_get_disk_ro(uev);
700
701         if (ro >= 0) {
702                 struct path * pp;
703
704                 condlog(2, "%s: update path write_protect to '%d' (uevent)",
705                         uev->kernel, ro);
706                 pp = find_path_by_dev(vecs->pathvec, uev->kernel);
707                 if (!pp) {
708                         condlog(0, "%s: spurious uevent, path not found",
709                                 uev->kernel);
710                         return 1;
711                 }
712                 if (pp->mpp) {
713                         retval = reload_map(vecs, pp->mpp, 0);
714
715                         condlog(2, "%s: map %s reloaded (retval %d)",
716                                 uev->kernel, pp->mpp->alias, retval);
717                 }
718
719         }
720
721         return retval;
722 }
723
724 static int
725 map_discovery (struct vectors * vecs)
726 {
727         struct multipath * mpp;
728         unsigned int i;
729
730         if (dm_get_maps(vecs->mpvec))
731                 return 1;
732
733         vector_foreach_slot (vecs->mpvec, mpp, i)
734                 if (setup_multipath(vecs, mpp))
735                         return 1;
736
737         return 0;
738 }
739
740 int
741 uxsock_trigger (char * str, char ** reply, int * len, void * trigger_data)
742 {
743         struct vectors * vecs;
744         int r;
745
746         *reply = NULL;
747         *len = 0;
748         vecs = (struct vectors *)trigger_data;
749
750         pthread_cleanup_push(cleanup_lock, &vecs->lock);
751         lock(vecs->lock);
752         pthread_testcancel();
753
754         r = parse_cmd(str, reply, len, vecs);
755
756         if (r > 0) {
757                 *reply = STRDUP("fail\n");
758                 *len = strlen(*reply) + 1;
759                 r = 1;
760         }
761         else if (!r && *len == 0) {
762                 *reply = STRDUP("ok\n");
763                 *len = strlen(*reply) + 1;
764                 r = 0;
765         }
766         /* else if (r < 0) leave *reply alone */
767
768         lock_cleanup_pop(vecs->lock);
769         return r;
770 }
771
772 static int
773 uev_discard(char * devpath)
774 {
775         char *tmp;
776         char a[11], b[11];
777
778         /*
779          * keep only block devices, discard partitions
780          */
781         tmp = strstr(devpath, "/block/");
782         if (tmp == NULL){
783                 condlog(4, "no /block/ in '%s'", devpath);
784                 return 1;
785         }
786         if (sscanf(tmp, "/block/%10s", a) != 1 ||
787             sscanf(tmp, "/block/%10[^/]/%10s", a, b) == 2) {
788                 condlog(4, "discard event on %s", devpath);
789                 return 1;
790         }
791         return 0;
792 }
793
794 int
795 uev_trigger (struct uevent * uev, void * trigger_data)
796 {
797         int r = 0;
798         struct vectors * vecs;
799
800         vecs = (struct vectors *)trigger_data;
801
802         if (uev_discard(uev->devpath))
803                 return 0;
804
805         pthread_cleanup_push(cleanup_lock, &vecs->lock);
806         lock(vecs->lock);
807         pthread_testcancel();
808
809         /*
810          * device map event
811          * Add events are ignored here as the tables
812          * are not fully initialised then.
813          */
814         if (!strncmp(uev->kernel, "dm-", 3)) {
815                 if (!strncmp(uev->action, "change", 6)) {
816                         r = uev_add_map(uev, vecs);
817                         goto out;
818                 }
819                 if (!strncmp(uev->action, "remove", 6)) {
820                         r = uev_remove_map(uev, vecs);
821                         goto out;
822                 }
823                 goto out;
824         }
825
826         /*
827          * path add/remove event
828          */
829         if (filter_devnode(conf->blist_devnode, conf->elist_devnode,
830                            uev->kernel) > 0)
831                 goto out;
832
833         if (!strncmp(uev->action, "add", 3)) {
834                 r = uev_add_path(uev, vecs);
835                 goto out;
836         }
837         if (!strncmp(uev->action, "remove", 6)) {
838                 r = uev_remove_path(uev, vecs);
839                 goto out;
840         }
841         if (!strncmp(uev->action, "change", 6)) {
842                 r = uev_update_path(uev, vecs);
843                 goto out;
844         }
845
846 out:
847         lock_cleanup_pop(vecs->lock);
848         return r;
849 }
850
851 static void *
852 ueventloop (void * ap)
853 {
854         struct udev *udev = ap;
855
856         if (uevent_listen(udev))
857                 condlog(0, "error starting uevent listener");
858
859         return NULL;
860 }
861
862 static void *
863 uevqloop (void * ap)
864 {
865         if (uevent_dispatch(&uev_trigger, ap))
866                 condlog(0, "error starting uevent dispatcher");
867
868         return NULL;
869 }
870 static void *
871 uxlsnrloop (void * ap)
872 {
873         if (cli_init())
874                 return NULL;
875
876         set_handler_callback(LIST+PATHS, cli_list_paths);
877         set_handler_callback(LIST+PATHS+FMT, cli_list_paths_fmt);
878         set_handler_callback(LIST+MAPS, cli_list_maps);
879         set_handler_callback(LIST+STATUS, cli_list_status);
880         set_handler_callback(LIST+DAEMON, cli_list_daemon);
881         set_handler_callback(LIST+MAPS+STATUS, cli_list_maps_status);
882         set_handler_callback(LIST+MAPS+STATS, cli_list_maps_stats);
883         set_handler_callback(LIST+MAPS+FMT, cli_list_maps_fmt);
884         set_handler_callback(LIST+MAPS+TOPOLOGY, cli_list_maps_topology);
885         set_handler_callback(LIST+TOPOLOGY, cli_list_maps_topology);
886         set_handler_callback(LIST+MAP+TOPOLOGY, cli_list_map_topology);
887         set_handler_callback(LIST+CONFIG, cli_list_config);
888         set_handler_callback(LIST+BLACKLIST, cli_list_blacklist);
889         set_handler_callback(LIST+DEVICES, cli_list_devices);
890         set_handler_callback(LIST+WILDCARDS, cli_list_wildcards);
891         set_handler_callback(ADD+PATH, cli_add_path);
892         set_handler_callback(DEL+PATH, cli_del_path);
893         set_handler_callback(ADD+MAP, cli_add_map);
894         set_handler_callback(DEL+MAP, cli_del_map);
895         set_handler_callback(SWITCH+MAP+GROUP, cli_switch_group);
896         set_handler_callback(RECONFIGURE, cli_reconfigure);
897         set_handler_callback(SUSPEND+MAP, cli_suspend);
898         set_handler_callback(RESUME+MAP, cli_resume);
899         set_handler_callback(RESIZE+MAP, cli_resize);
900         set_handler_callback(RELOAD+MAP, cli_reload);
901         set_handler_callback(RESET+MAP, cli_reassign);
902         set_handler_callback(REINSTATE+PATH, cli_reinstate);
903         set_handler_callback(FAIL+PATH, cli_fail);
904         set_handler_callback(DISABLEQ+MAP, cli_disable_queueing);
905         set_handler_callback(RESTOREQ+MAP, cli_restore_queueing);
906         set_handler_callback(DISABLEQ+MAPS, cli_disable_all_queueing);
907         set_handler_callback(RESTOREQ+MAPS, cli_restore_all_queueing);
908         set_handler_callback(QUIT, cli_quit);
909         set_handler_callback(SHUTDOWN, cli_shutdown);
910         set_handler_callback(GETPRSTATUS+MAP, cli_getprstatus);
911         set_handler_callback(SETPRSTATUS+MAP, cli_setprstatus);
912         set_handler_callback(UNSETPRSTATUS+MAP, cli_unsetprstatus);
913         set_handler_callback(FORCEQ+DAEMON, cli_force_no_daemon_q);
914         set_handler_callback(RESTOREQ+DAEMON, cli_restore_no_daemon_q);
915
916         umask(077);
917         uxsock_listen(&uxsock_trigger, ap);
918
919         return NULL;
920 }
921
922 void
923 exit_daemon (void)
924 {
925         sem_post(&exit_sem);
926 }
927
928 const char *
929 daemon_status(void)
930 {
931         switch (running_state) {
932         case DAEMON_INIT:
933                 return "init";
934         case DAEMON_START:
935                 return "startup";
936         case DAEMON_CONFIGURE:
937                 return "configure";
938         case DAEMON_RUNNING:
939                 return "running";
940         case DAEMON_SHUTDOWN:
941                 return "shutdown";
942         }
943         return NULL;
944 }
945
946 static void
947 fail_path (struct path * pp, int del_active)
948 {
949         if (!pp->mpp)
950                 return;
951
952         condlog(2, "checker failed path %s in map %s",
953                  pp->dev_t, pp->mpp->alias);
954
955         dm_fail_path(pp->mpp->alias, pp->dev_t);
956         if (del_active)
957                 update_queue_mode_del_path(pp->mpp);
958 }
959
960 /*
961  * caller must have locked the path list before calling that function
962  */
963 static void
964 reinstate_path (struct path * pp, int add_active)
965 {
966         if (!pp->mpp)
967                 return;
968
969         if (dm_reinstate_path(pp->mpp->alias, pp->dev_t))
970                 condlog(0, "%s: reinstate failed", pp->dev_t);
971         else {
972                 condlog(2, "%s: reinstated", pp->dev_t);
973                 if (add_active)
974                         update_queue_mode_add_path(pp->mpp);
975         }
976 }
977
978 static void
979 enable_group(struct path * pp)
980 {
981         struct pathgroup * pgp;
982
983         /*
984          * if path is added through uev_add_path, pgindex can be unset.
985          * next update_strings() will set it, upon map reload event.
986          *
987          * we can safely return here, because upon map reload, all
988          * PG will be enabled.
989          */
990         if (!pp->mpp->pg || !pp->pgindex)
991                 return;
992
993         pgp = VECTOR_SLOT(pp->mpp->pg, pp->pgindex - 1);
994
995         if (pgp->status == PGSTATE_DISABLED) {
996                 condlog(2, "%s: enable group #%i", pp->mpp->alias, pp->pgindex);
997                 dm_enablegroup(pp->mpp->alias, pp->pgindex);
998         }
999 }
1000
1001 static void
1002 mpvec_garbage_collector (struct vectors * vecs)
1003 {
1004         struct multipath * mpp;
1005         unsigned int i;
1006
1007         if (!vecs->mpvec)
1008                 return;
1009
1010         vector_foreach_slot (vecs->mpvec, mpp, i) {
1011                 if (mpp && mpp->alias && !dm_map_present(mpp->alias)) {
1012                         condlog(2, "%s: remove dead map", mpp->alias);
1013                         remove_map_and_stop_waiter(mpp, vecs, 1);
1014                         i--;
1015                 }
1016         }
1017 }
1018
1019 /* This is called after a path has started working again. It the multipath
1020  * device for this path uses the followover failback type, and this is the
1021  * best pathgroup, and this is the first path in the pathgroup to come back
1022  * up, then switch to this pathgroup */
1023 static int
1024 followover_should_failback(struct path * pp)
1025 {
1026         struct pathgroup * pgp;
1027         struct path *pp1;
1028         int i;
1029
1030         if (pp->mpp->pgfailback != -FAILBACK_FOLLOWOVER ||
1031             !pp->mpp->pg || !pp->pgindex ||
1032             pp->pgindex != pp->mpp->bestpg)
1033                 return 0;
1034
1035         pgp = VECTOR_SLOT(pp->mpp->pg, pp->pgindex - 1);
1036         vector_foreach_slot(pgp->paths, pp1, i) {
1037                 if (pp1 == pp)
1038                         continue;
1039                 if (pp1->chkrstate != PATH_DOWN && pp1->chkrstate != PATH_SHAKY)
1040                         return 0;
1041         }
1042         return 1;
1043 }
1044
1045 static void
1046 defered_failback_tick (vector mpvec)
1047 {
1048         struct multipath * mpp;
1049         unsigned int i;
1050
1051         vector_foreach_slot (mpvec, mpp, i) {
1052                 /*
1053                  * defered failback getting sooner
1054                  */
1055                 if (mpp->pgfailback > 0 && mpp->failback_tick > 0) {
1056                         mpp->failback_tick--;
1057
1058                         if (!mpp->failback_tick && need_switch_pathgroup(mpp, 1))
1059                                 switch_pathgroup(mpp);
1060                 }
1061         }
1062 }
1063
1064 static void
1065 retry_count_tick(vector mpvec)
1066 {
1067         struct multipath *mpp;
1068         unsigned int i;
1069
1070         vector_foreach_slot (mpvec, mpp, i) {
1071                 if (mpp->retry_tick) {
1072                         mpp->stat_total_queueing_time++;
1073                         condlog(4, "%s: Retrying.. No active path", mpp->alias);
1074                         if(--mpp->retry_tick == 0) {
1075                                 dm_queue_if_no_path(mpp->alias, 0);
1076                                 condlog(2, "%s: Disable queueing", mpp->alias);
1077                         }
1078                 }
1079         }
1080 }
1081
1082 int update_prio(struct path *pp, int refresh_all)
1083 {
1084         int oldpriority;
1085         struct path *pp1;
1086         struct pathgroup * pgp;
1087         int i, j, changed = 0;
1088
1089         if (refresh_all) {
1090                 vector_foreach_slot (pp->mpp->pg, pgp, i) {
1091                         vector_foreach_slot (pgp->paths, pp1, j) {
1092                                 oldpriority = pp1->priority;
1093                                 pathinfo(pp1, conf->hwtable, DI_PRIO);
1094                                 if (pp1->priority != oldpriority)
1095                                         changed = 1;
1096                         }
1097                 }
1098                 return changed;
1099         }
1100         oldpriority = pp->priority;
1101         pathinfo(pp, conf->hwtable, DI_PRIO);
1102
1103         if (pp->priority == oldpriority)
1104                 return 0;
1105         return 1;
1106 }
1107
1108 int update_path_groups(struct multipath *mpp, struct vectors *vecs, int refresh)
1109 {
1110         if (reload_map(vecs, mpp, refresh))
1111                 return 1;
1112
1113         dm_lib_release();
1114         if (setup_multipath(vecs, mpp) != 0)
1115                 return 1;
1116         sync_map_state(mpp);
1117
1118         return 0;
1119 }
1120
1121 /*
1122  * Returns '1' if the path has been checked, '0' otherwise
1123  */
1124 int
1125 check_path (struct vectors * vecs, struct path * pp)
1126 {
1127         int newstate;
1128         int new_path_up = 0;
1129         int chkr_new_path_up = 0;
1130         int oldchkrstate = pp->chkrstate;
1131
1132         if (pp->tick && --pp->tick)
1133                 return 0; /* don't check this path yet */
1134
1135         /*
1136          * provision a next check soonest,
1137          * in case we exit abnormaly from here
1138          */
1139         pp->tick = conf->checkint;
1140
1141         newstate = path_offline(pp);
1142         if (newstate == PATH_REMOVED) {
1143                 condlog(2, "%s: remove path (checker)", pp->dev);
1144                 ev_remove_path(pp, vecs);
1145                 return 0;
1146         }
1147         if (newstate == PATH_UP)
1148                 newstate = get_state(pp, 1);
1149         else
1150                 checker_clear_message(&pp->checker);
1151
1152         if (newstate == PATH_WILD || newstate == PATH_UNCHECKED) {
1153                 condlog(2, "%s: unusable path", pp->dev);
1154                 pathinfo(pp, conf->hwtable, 0);
1155                 return 1;
1156         }
1157         if (!pp->mpp) {
1158                 if (!strlen(pp->wwid) &&
1159                     (newstate == PATH_UP || newstate == PATH_GHOST)) {
1160                         condlog(2, "%s: add missing path", pp->dev);
1161                         if (pathinfo(pp, conf->hwtable, DI_ALL) == 0) {
1162                                 ev_add_path(pp, vecs);
1163                                 pp->tick = 1;
1164                         }
1165                 }
1166                 return 0;
1167         }
1168         /*
1169          * Async IO in flight. Keep the previous path state
1170          * and reschedule as soon as possible
1171          */
1172         if (newstate == PATH_PENDING) {
1173                 pp->tick = 1;
1174                 return 0;
1175         }
1176         /*
1177          * Synchronize with kernel state
1178          */
1179         if (update_multipath_strings(pp->mpp, vecs->pathvec)) {
1180                 condlog(1, "%s: Could not synchronize with kernel state",
1181                         pp->dev);
1182                 pp->dmstate = PSTATE_UNDEF;
1183         }
1184         /* if update_multipath_strings orphaned the path, quit early */
1185         if (!pp->mpp)
1186                 return 0;
1187
1188         if ((newstate == PATH_UP || newstate == PATH_GHOST) &&
1189              pp->wait_checks > 0) {
1190                 if (pp->mpp && pp->mpp->nr_active > 0) {
1191                         pp->state = PATH_DELAYED;
1192                         pp->wait_checks--;
1193                         return 1;
1194                 } else
1195                         pp->wait_checks = 0;
1196         }
1197
1198         pp->chkrstate = newstate;
1199         if (newstate != pp->state) {
1200                 int oldstate = pp->state;
1201                 pp->state = newstate;
1202
1203                 if (strlen(checker_message(&pp->checker)))
1204                         LOG_MSG(1, checker_message(&pp->checker));
1205
1206                 /*
1207                  * upon state change, reset the checkint
1208                  * to the shortest delay
1209                  */
1210                 pp->checkint = conf->checkint;
1211
1212                 if (newstate == PATH_DOWN || newstate == PATH_SHAKY) {
1213                         /*
1214                          * proactively fail path in the DM
1215                          */
1216                         if (oldstate == PATH_UP ||
1217                             oldstate == PATH_GHOST) {
1218                                 fail_path(pp, 1);
1219                                 if (pp->mpp->delay_wait_checks > 0 &&
1220                                     pp->watch_checks > 0) {
1221                                         pp->wait_checks = pp->mpp->delay_wait_checks;
1222                                         pp->watch_checks = 0;
1223                                 }
1224                         }else
1225                                 fail_path(pp, 0);
1226
1227                         /*
1228                          * cancel scheduled failback
1229                          */
1230                         pp->mpp->failback_tick = 0;
1231
1232                         pp->mpp->stat_path_failures++;
1233                         return 1;
1234                 }
1235
1236                 if(newstate == PATH_UP || newstate == PATH_GHOST){
1237                         if ( pp->mpp && pp->mpp->prflag ){
1238                                 /*
1239                                  * Check Persistent Reservation.
1240                                  */
1241                         condlog(2, "%s: checking persistent reservation "
1242                                 "registration", pp->dev);
1243                         mpath_pr_event_handle(pp);
1244                         }
1245                 }
1246
1247                 /*
1248                  * reinstate this path
1249                  */
1250                 if (oldstate != PATH_UP &&
1251                     oldstate != PATH_GHOST) {
1252                         if (pp->mpp->delay_watch_checks > 0)
1253                                 pp->watch_checks = pp->mpp->delay_watch_checks;
1254                         reinstate_path(pp, 1);
1255                 } else {
1256                         if (pp->watch_checks > 0)
1257                                 pp->watch_checks--;
1258                         reinstate_path(pp, 0);
1259                 }
1260                 new_path_up = 1;
1261
1262                 if (oldchkrstate != PATH_UP && oldchkrstate != PATH_GHOST)
1263                         chkr_new_path_up = 1;
1264
1265                 /*
1266                  * if at least one path is up in a group, and
1267                  * the group is disabled, re-enable it
1268                  */
1269                 if (newstate == PATH_UP)
1270                         enable_group(pp);
1271         }
1272         else if (newstate == PATH_UP || newstate == PATH_GHOST) {
1273                 if (pp->dmstate == PSTATE_FAILED ||
1274                     pp->dmstate == PSTATE_UNDEF) {
1275                         /* Clear IO errors */
1276                         reinstate_path(pp, 0);
1277                 } else {
1278                         LOG_MSG(4, checker_message(&pp->checker));
1279                         if (pp->checkint != conf->max_checkint) {
1280                                 /*
1281                                  * double the next check delay.
1282                                  * max at conf->max_checkint
1283                                  */
1284                                 if (pp->checkint < (conf->max_checkint / 2))
1285                                         pp->checkint = 2 * pp->checkint;
1286                                 else
1287                                         pp->checkint = conf->max_checkint;
1288
1289                                 condlog(4, "%s: delay next check %is",
1290                                         pp->dev_t, pp->checkint);
1291                         }
1292                         if (pp->watch_checks > 0)
1293                                 pp->watch_checks--;
1294                         pp->tick = pp->checkint;
1295                 }
1296         }
1297         else if (newstate == PATH_DOWN &&
1298                  strlen(checker_message(&pp->checker))) {
1299                 if (conf->log_checker_err == LOG_CHKR_ERR_ONCE)
1300                         LOG_MSG(3, checker_message(&pp->checker));
1301                 else
1302                         LOG_MSG(2, checker_message(&pp->checker));
1303         }
1304
1305         pp->state = newstate;
1306
1307         /*
1308          * path prio refreshing
1309          */
1310         condlog(4, "path prio refresh");
1311
1312         if (update_prio(pp, new_path_up) &&
1313             (pp->mpp->pgpolicyfn == (pgpolicyfn *)group_by_prio) &&
1314              pp->mpp->pgfailback == -FAILBACK_IMMEDIATE)
1315                 update_path_groups(pp->mpp, vecs, !new_path_up);
1316         else if (need_switch_pathgroup(pp->mpp, 0)) {
1317                 if (pp->mpp->pgfailback > 0 &&
1318                     (new_path_up || pp->mpp->failback_tick <= 0))
1319                         pp->mpp->failback_tick =
1320                                 pp->mpp->pgfailback + 1;
1321                 else if (pp->mpp->pgfailback == -FAILBACK_IMMEDIATE ||
1322                          (chkr_new_path_up && followover_should_failback(pp)))
1323                         switch_pathgroup(pp->mpp);
1324         }
1325         return 1;
1326 }
1327
1328 static void *
1329 checkerloop (void *ap)
1330 {
1331         struct vectors *vecs;
1332         struct path *pp;
1333         int count = 0;
1334         unsigned int i;
1335
1336         mlockall(MCL_CURRENT | MCL_FUTURE);
1337         vecs = (struct vectors *)ap;
1338         condlog(2, "path checkers start up");
1339
1340         /*
1341          * init the path check interval
1342          */
1343         vector_foreach_slot (vecs->pathvec, pp, i) {
1344                 pp->checkint = conf->checkint;
1345         }
1346
1347         while (1) {
1348                 struct timeval diff_time, start_time, end_time;
1349                 int num_paths = 0;
1350
1351                 if (gettimeofday(&start_time, NULL) != 0)
1352                         start_time.tv_sec = 0;
1353                 pthread_cleanup_push(cleanup_lock, &vecs->lock);
1354                 lock(vecs->lock);
1355                 pthread_testcancel();
1356                 condlog(4, "tick");
1357 #ifdef USE_SYSTEMD
1358                 if (conf->watchdog)
1359                         sd_notify(0, "WATCHDOG=1");
1360 #endif
1361                 if (vecs->pathvec) {
1362                         vector_foreach_slot (vecs->pathvec, pp, i) {
1363                                 num_paths += check_path(vecs, pp);
1364                         }
1365                 }
1366                 if (vecs->mpvec) {
1367                         defered_failback_tick(vecs->mpvec);
1368                         retry_count_tick(vecs->mpvec);
1369                 }
1370                 if (count)
1371                         count--;
1372                 else {
1373                         condlog(4, "map garbage collection");
1374                         mpvec_garbage_collector(vecs);
1375                         count = MAPGCINT;
1376                 }
1377
1378                 lock_cleanup_pop(vecs->lock);
1379                 if (start_time.tv_sec &&
1380                     gettimeofday(&end_time, NULL) == 0 &&
1381                     num_paths) {
1382                         timersub(&end_time, &start_time, &diff_time);
1383                         condlog(3, "checked %d path%s in %lu.%06lu secs",
1384                                 num_paths, num_paths > 1 ? "s" : "",
1385                                 diff_time.tv_sec, diff_time.tv_usec);
1386                 }
1387                 sleep(1);
1388         }
1389         return NULL;
1390 }
1391
1392 int
1393 configure (struct vectors * vecs, int start_waiters)
1394 {
1395         struct multipath * mpp;
1396         struct path * pp;
1397         vector mpvec;
1398         int i;
1399
1400         if (!vecs->pathvec && !(vecs->pathvec = vector_alloc()))
1401                 return 1;
1402
1403         if (!vecs->mpvec && !(vecs->mpvec = vector_alloc()))
1404                 return 1;
1405
1406         if (!(mpvec = vector_alloc()))
1407                 return 1;
1408
1409         /*
1410          * probe for current path (from sysfs) and map (from dm) sets
1411          */
1412         path_discovery(vecs->pathvec, conf, DI_ALL);
1413
1414         vector_foreach_slot (vecs->pathvec, pp, i){
1415                 if (filter_path(conf, pp) > 0){
1416                         vector_del_slot(vecs->pathvec, i);
1417                         free_path(pp);
1418                         i--;
1419                 }
1420                 else
1421                         pp->checkint = conf->checkint;
1422         }
1423         if (map_discovery(vecs))
1424                 return 1;
1425
1426         /*
1427          * create new set of maps & push changed ones into dm
1428          */
1429         if (coalesce_paths(vecs, mpvec, NULL, 1))
1430                 return 1;
1431
1432         /*
1433          * may need to remove some maps which are no longer relevant
1434          * e.g., due to blacklist changes in conf file
1435          */
1436         if (coalesce_maps(vecs, mpvec))
1437                 return 1;
1438
1439         dm_lib_release();
1440
1441         sync_maps_state(mpvec);
1442         vector_foreach_slot(mpvec, mpp, i){
1443                 remember_wwid(mpp->wwid);
1444                 update_map_pr(mpp);
1445         }
1446
1447         /*
1448          * purge dm of old maps
1449          */
1450         remove_maps(vecs);
1451
1452         /*
1453          * save new set of maps formed by considering current path state
1454          */
1455         vector_free(vecs->mpvec);
1456         vecs->mpvec = mpvec;
1457
1458         /*
1459          * start dm event waiter threads for these new maps
1460          */
1461         vector_foreach_slot(vecs->mpvec, mpp, i) {
1462                 if (setup_multipath(vecs, mpp))
1463                         return 1;
1464                 if (start_waiters)
1465                         if (start_waiter_thread(mpp, vecs))
1466                                 return 1;
1467         }
1468         return 0;
1469 }
1470
1471 int
1472 reconfigure (struct vectors * vecs)
1473 {
1474         struct config * old = conf;
1475         int retval = 1;
1476
1477         /*
1478          * free old map and path vectors ... they use old conf state
1479          */
1480         if (VECTOR_SIZE(vecs->mpvec))
1481                 remove_maps_and_stop_waiters(vecs);
1482
1483         if (VECTOR_SIZE(vecs->pathvec))
1484                 free_pathvec(vecs->pathvec, FREE_PATHS);
1485
1486         vecs->pathvec = NULL;
1487         conf = NULL;
1488
1489         /* Re-read any timezone changes */
1490         tzset();
1491
1492         if (!load_config(DEFAULT_CONFIGFILE, udev)) {
1493                 dm_drv_version(conf->version, TGT_MPATH);
1494                 conf->verbosity = old->verbosity;
1495                 conf->daemon = 1;
1496                 configure(vecs, 1);
1497                 free_config(old);
1498                 retval = 0;
1499         }
1500
1501         return retval;
1502 }
1503
1504 static struct vectors *
1505 init_vecs (void)
1506 {
1507         struct vectors * vecs;
1508
1509         vecs = (struct vectors *)MALLOC(sizeof(struct vectors));
1510
1511         if (!vecs)
1512                 return NULL;
1513
1514         vecs->lock.mutex =
1515                 (pthread_mutex_t *)MALLOC(sizeof(pthread_mutex_t));
1516
1517         if (!vecs->lock.mutex)
1518                 goto out;
1519
1520         pthread_mutex_init(vecs->lock.mutex, NULL);
1521         vecs->lock.depth = 0;
1522
1523         return vecs;
1524
1525 out:
1526         FREE(vecs);
1527         condlog(0, "failed to init paths");
1528         return NULL;
1529 }
1530
1531 static void *
1532 signal_set(int signo, void (*func) (int))
1533 {
1534         int r;
1535         struct sigaction sig;
1536         struct sigaction osig;
1537
1538         sig.sa_handler = func;
1539         sigemptyset(&sig.sa_mask);
1540         sig.sa_flags = 0;
1541
1542         r = sigaction(signo, &sig, &osig);
1543
1544         if (r < 0)
1545                 return (SIG_ERR);
1546         else
1547                 return (osig.sa_handler);
1548 }
1549
1550 void
1551 handle_signals(void)
1552 {
1553         if (reconfig_sig && running_state == DAEMON_RUNNING) {
1554                 condlog(2, "reconfigure (signal)");
1555                 pthread_cleanup_push(cleanup_lock,
1556                                 &gvecs->lock);
1557                 lock(gvecs->lock);
1558                 pthread_testcancel();
1559                 reconfigure(gvecs);
1560                 lock_cleanup_pop(gvecs->lock);
1561         }
1562         if (log_reset_sig) {
1563                 condlog(2, "reset log (signal)");
1564                 pthread_mutex_lock(&logq_lock);
1565                 log_reset("multipathd");
1566                 pthread_mutex_unlock(&logq_lock);
1567         }
1568         reconfig_sig = 0;
1569         log_reset_sig = 0;
1570 }
1571
1572 static void
1573 sighup (int sig)
1574 {
1575         reconfig_sig = 1;
1576 }
1577
1578 static void
1579 sigend (int sig)
1580 {
1581         exit_daemon();
1582 }
1583
1584 static void
1585 sigusr1 (int sig)
1586 {
1587         log_reset_sig = 1;
1588 }
1589
1590 static void
1591 sigusr2 (int sig)
1592 {
1593         condlog(3, "SIGUSR2 received");
1594 }
1595
1596 static void
1597 signal_init(void)
1598 {
1599         sigset_t set;
1600
1601         sigemptyset(&set);
1602         sigaddset(&set, SIGHUP);
1603         sigaddset(&set, SIGUSR1);
1604         sigaddset(&set, SIGUSR2);
1605         pthread_sigmask(SIG_BLOCK, &set, NULL);
1606
1607         signal_set(SIGHUP, sighup);
1608         signal_set(SIGUSR1, sigusr1);
1609         signal_set(SIGUSR2, sigusr2);
1610         signal_set(SIGINT, sigend);
1611         signal_set(SIGTERM, sigend);
1612         signal(SIGPIPE, SIG_IGN);
1613 }
1614
1615 static void
1616 setscheduler (void)
1617 {
1618         int res;
1619         static struct sched_param sched_param = {
1620                 .sched_priority = 99
1621         };
1622
1623         res = sched_setscheduler (0, SCHED_RR, &sched_param);
1624
1625         if (res == -1)
1626                 condlog(LOG_WARNING, "Could not set SCHED_RR at priority 99");
1627         return;
1628 }
1629
1630 static void
1631 set_oom_adj (void)
1632 {
1633 #ifdef OOM_SCORE_ADJ_MIN
1634         int retry = 1;
1635         char *file = "/proc/self/oom_score_adj";
1636         int score = OOM_SCORE_ADJ_MIN;
1637 #else
1638         int retry = 0;
1639         char *file = "/proc/self/oom_adj";
1640         int score = OOM_ADJUST_MIN;
1641 #endif
1642         FILE *fp;
1643         struct stat st;
1644         char *envp;
1645
1646         envp = getenv("OOMScoreAdjust");
1647         if (envp) {
1648                 condlog(3, "Using systemd provided OOMScoreAdjust");
1649                 return;
1650         }
1651         do {
1652                 if (stat(file, &st) == 0){
1653                         fp = fopen(file, "w");
1654                         if (!fp) {
1655                                 condlog(0, "couldn't fopen %s : %s", file,
1656                                         strerror(errno));
1657                                 return;
1658                         }
1659                         fprintf(fp, "%i", score);
1660                         fclose(fp);
1661                         return;
1662                 }
1663                 if (errno != ENOENT) {
1664                         condlog(0, "couldn't stat %s : %s", file,
1665                                 strerror(errno));
1666                         return;
1667                 }
1668 #ifdef OOM_ADJUST_MIN
1669                 file = "/proc/self/oom_adj";
1670                 score = OOM_ADJUST_MIN;
1671 #else
1672                 retry = 0;
1673 #endif
1674         } while (retry--);
1675         condlog(0, "couldn't adjust oom score");
1676 }
1677
1678 static int
1679 child (void * param)
1680 {
1681         pthread_t check_thr, uevent_thr, uxlsnr_thr, uevq_thr;
1682         pthread_attr_t log_attr, misc_attr, uevent_attr;
1683         struct vectors * vecs;
1684         struct multipath * mpp;
1685         int i;
1686 #ifdef USE_SYSTEMD
1687         unsigned long checkint;
1688 #endif
1689         int rc, pid_rc;
1690         char *envp;
1691
1692         mlockall(MCL_CURRENT | MCL_FUTURE);
1693         sem_init(&exit_sem, 0, 0);
1694         signal_init();
1695
1696         udev = udev_new();
1697
1698         setup_thread_attr(&misc_attr, 64 * 1024, 1);
1699         setup_thread_attr(&uevent_attr, 128 * 1024, 1);
1700         setup_thread_attr(&waiter_attr, 32 * 1024, 1);
1701
1702         if (logsink == 1) {
1703                 setup_thread_attr(&log_attr, 64 * 1024, 0);
1704                 log_thread_start(&log_attr);
1705                 pthread_attr_destroy(&log_attr);
1706         }
1707
1708         running_state = DAEMON_START;
1709
1710 #ifdef USE_SYSTEMD
1711         sd_notify(0, "STATUS=startup");
1712 #endif
1713         condlog(2, "--------start up--------");
1714         condlog(2, "read " DEFAULT_CONFIGFILE);
1715
1716         if (load_config(DEFAULT_CONFIGFILE, udev))
1717                 goto failed;
1718
1719         dm_drv_version(conf->version, TGT_MPATH);
1720         if (init_checkers()) {
1721                 condlog(0, "failed to initialize checkers");
1722                 goto failed;
1723         }
1724         if (init_prio()) {
1725                 condlog(0, "failed to initialize prioritizers");
1726                 goto failed;
1727         }
1728
1729         setlogmask(LOG_UPTO(conf->verbosity + 3));
1730
1731         envp = getenv("LimitNOFILE");
1732
1733         if (envp) {
1734                 condlog(2,"Using systemd provided open fds limit of %s", envp);
1735         } else if (conf->max_fds) {
1736                 struct rlimit fd_limit;
1737
1738                 if (getrlimit(RLIMIT_NOFILE, &fd_limit) < 0) {
1739                         condlog(0, "can't get open fds limit: %s",
1740                                 strerror(errno));
1741                         fd_limit.rlim_cur = 0;
1742                         fd_limit.rlim_max = 0;
1743                 }
1744                 if (fd_limit.rlim_cur < conf->max_fds) {
1745                         fd_limit.rlim_cur = conf->max_fds;
1746                         if (fd_limit.rlim_max < conf->max_fds)
1747                                 fd_limit.rlim_max = conf->max_fds;
1748                         if (setrlimit(RLIMIT_NOFILE, &fd_limit) < 0) {
1749                                 condlog(0, "can't set open fds limit to "
1750                                         "%lu/%lu : %s",
1751                                         fd_limit.rlim_cur, fd_limit.rlim_max,
1752                                         strerror(errno));
1753                         } else {
1754                                 condlog(3, "set open fds limit to %lu/%lu",
1755                                         fd_limit.rlim_cur, fd_limit.rlim_max);
1756                         }
1757                 }
1758
1759         }
1760
1761         vecs = gvecs = init_vecs();
1762         if (!vecs)
1763                 goto failed;
1764
1765         setscheduler();
1766         set_oom_adj();
1767
1768         conf->daemon = 1;
1769         udev_set_sync_support(0);
1770 #ifdef USE_SYSTEMD
1771         envp = getenv("WATCHDOG_USEC");
1772         if (envp && sscanf(envp, "%lu", &checkint) == 1) {
1773                 /* Value is in microseconds */
1774                 conf->max_checkint = checkint / 1000000;
1775                 /* Rescale checkint */
1776                 if (conf->checkint > conf->max_checkint)
1777                         conf->checkint = conf->max_checkint;
1778                 else
1779                         conf->checkint = conf->max_checkint / 4;
1780                 condlog(3, "enabling watchdog, interval %d max %d",
1781                         conf->checkint, conf->max_checkint);
1782                 conf->watchdog = conf->checkint;
1783         }
1784 #endif
1785         /*
1786          * Start uevent listener early to catch events
1787          */
1788         if ((rc = pthread_create(&uevent_thr, &uevent_attr, ueventloop, udev))) {
1789                 condlog(0, "failed to create uevent thread: %d", rc);
1790                 goto failed;
1791         }
1792         pthread_attr_destroy(&uevent_attr);
1793         if ((rc = pthread_create(&uxlsnr_thr, &misc_attr, uxlsnrloop, vecs))) {
1794                 condlog(0, "failed to create cli listener: %d", rc);
1795                 goto failed;
1796         }
1797         /*
1798          * fetch and configure both paths and multipaths
1799          */
1800 #ifdef USE_SYSTEMD
1801         sd_notify(0, "STATUS=configure");
1802 #endif
1803         running_state = DAEMON_CONFIGURE;
1804
1805         lock(vecs->lock);
1806         if (configure(vecs, 1)) {
1807                 unlock(vecs->lock);
1808                 condlog(0, "failure during configuration");
1809                 goto failed;
1810         }
1811         unlock(vecs->lock);
1812
1813         /*
1814          * start threads
1815          */
1816         if ((rc = pthread_create(&check_thr, &misc_attr, checkerloop, vecs))) {
1817                 condlog(0,"failed to create checker loop thread: %d", rc);
1818                 goto failed;
1819         }
1820         if ((rc = pthread_create(&uevq_thr, &misc_attr, uevqloop, vecs))) {
1821                 condlog(0, "failed to create uevent dispatcher: %d", rc);
1822                 goto failed;
1823         }
1824         pthread_attr_destroy(&misc_attr);
1825
1826         /* Startup complete, create logfile */
1827         pid_rc = pidfile_create(DEFAULT_PIDFILE, daemon_pid);
1828         /* Ignore errors, we can live without */
1829
1830         running_state = DAEMON_RUNNING;
1831 #ifdef USE_SYSTEMD
1832         sd_notify(0, "READY=1\nSTATUS=running");
1833 #endif
1834
1835         /*
1836          * exit path
1837          */
1838         while(sem_wait(&exit_sem) != 0); /* Do nothing */
1839
1840 #ifdef USE_SYSTEMD
1841         sd_notify(0, "STATUS=shutdown");
1842 #endif
1843         running_state = DAEMON_SHUTDOWN;
1844         lock(vecs->lock);
1845         if (conf->queue_without_daemon == QUE_NO_DAEMON_OFF)
1846                 vector_foreach_slot(vecs->mpvec, mpp, i)
1847                         dm_queue_if_no_path(mpp->alias, 0);
1848         remove_maps_and_stop_waiters(vecs);
1849         unlock(vecs->lock);
1850
1851         pthread_cancel(check_thr);
1852         pthread_cancel(uevent_thr);
1853         pthread_cancel(uxlsnr_thr);
1854         pthread_cancel(uevq_thr);
1855
1856         lock(vecs->lock);
1857         free_pathvec(vecs->pathvec, FREE_PATHS);
1858         vecs->pathvec = NULL;
1859         unlock(vecs->lock);
1860         /* Now all the waitevent threads will start rushing in. */
1861         while (vecs->lock.depth > 0) {
1862                 sleep (1); /* This is weak. */
1863                 condlog(3, "Have %d wait event checkers threads to de-alloc,"
1864                         " waiting...", vecs->lock.depth);
1865         }
1866         pthread_mutex_destroy(vecs->lock.mutex);
1867         FREE(vecs->lock.mutex);
1868         vecs->lock.depth = 0;
1869         vecs->lock.mutex = NULL;
1870         FREE(vecs);
1871         vecs = NULL;
1872
1873         cleanup_checkers();
1874         cleanup_prio();
1875
1876         dm_lib_release();
1877         dm_lib_exit();
1878
1879         /* We're done here */
1880         if (!pid_rc) {
1881                 condlog(3, "unlink pidfile");
1882                 unlink(DEFAULT_PIDFILE);
1883         }
1884
1885         condlog(2, "--------shut down-------");
1886
1887         if (logsink == 1)
1888                 log_thread_stop();
1889
1890         /*
1891          * Freeing config must be done after condlog() and dm_lib_exit(),
1892          * because logging functions like dlog() and dm_write_log()
1893          * reference the config.
1894          */
1895         free_config(conf);
1896         conf = NULL;
1897         udev_unref(udev);
1898         udev = NULL;
1899 #ifdef _DEBUG_
1900         dbg_free_final(NULL);
1901 #endif
1902
1903 #ifdef USE_SYSTEMD
1904         sd_notify(0, "ERRNO=0");
1905 #endif
1906         exit(0);
1907
1908 failed:
1909 #ifdef USE_SYSTEMD
1910         sd_notify(0, "ERRNO=1");
1911 #endif
1912         exit(1);
1913 }
1914
1915 static int
1916 daemonize(void)
1917 {
1918         int pid;
1919         int dev_null_fd;
1920
1921         if( (pid = fork()) < 0){
1922                 fprintf(stderr, "Failed first fork : %s\n", strerror(errno));
1923                 return -1;
1924         }
1925         else if (pid != 0)
1926                 return pid;
1927
1928         setsid();
1929
1930         if ( (pid = fork()) < 0)
1931                 fprintf(stderr, "Failed second fork : %s\n", strerror(errno));
1932         else if (pid != 0)
1933                 _exit(0);
1934
1935         if (chdir("/") < 0)
1936                 fprintf(stderr, "cannot chdir to '/', continuing\n");
1937
1938         dev_null_fd = open("/dev/null", O_RDWR);
1939         if (dev_null_fd < 0){
1940                 fprintf(stderr, "cannot open /dev/null for input & output : %s\n",
1941                         strerror(errno));
1942                 _exit(0);
1943         }
1944
1945         close(STDIN_FILENO);
1946         if (dup(dev_null_fd) < 0) {
1947                 fprintf(stderr, "cannot dup /dev/null to stdin : %s\n",
1948                         strerror(errno));
1949                 _exit(0);
1950         }
1951         close(STDOUT_FILENO);
1952         if (dup(dev_null_fd) < 0) {
1953                 fprintf(stderr, "cannot dup /dev/null to stdout : %s\n",
1954                         strerror(errno));
1955                 _exit(0);
1956         }
1957         close(STDERR_FILENO);
1958         if (dup(dev_null_fd) < 0) {
1959                 fprintf(stderr, "cannot dup /dev/null to stderr : %s\n",
1960                         strerror(errno));
1961                 _exit(0);
1962         }
1963         close(dev_null_fd);
1964         daemon_pid = getpid();
1965         return 0;
1966 }
1967
1968 int
1969 main (int argc, char *argv[])
1970 {
1971         extern char *optarg;
1972         extern int optind;
1973         int arg;
1974         int err;
1975
1976         logsink = 1;
1977         running_state = DAEMON_INIT;
1978         dm_init();
1979
1980         if (getuid() != 0) {
1981                 fprintf(stderr, "need to be root\n");
1982                 exit(1);
1983         }
1984
1985         /* make sure we don't lock any path */
1986         if (chdir("/") < 0)
1987                 fprintf(stderr, "can't chdir to root directory : %s\n",
1988                         strerror(errno));
1989         umask(umask(077) | 022);
1990
1991         conf = alloc_config();
1992
1993         if (!conf)
1994                 exit(1);
1995
1996         while ((arg = getopt(argc, argv, ":dsv:k::B")) != EOF ) {
1997         switch(arg) {
1998                 case 'd':
1999                         logsink = 0;
2000                         //debug=1; /* ### comment me out ### */
2001                         break;
2002                 case 'v':
2003                         if (sizeof(optarg) > sizeof(char *) ||
2004                             !isdigit(optarg[0]))
2005                                 exit(1);
2006
2007                         conf->verbosity = atoi(optarg);
2008                         break;
2009                 case 's':
2010                         logsink = -1;
2011                         break;
2012                 case 'k':
2013                         uxclnt(optarg);
2014                         exit(0);
2015                 case 'B':
2016                         conf->bindings_read_only = 1;
2017                         break;
2018                 default:
2019                         ;
2020                 }
2021         }
2022         if (optind < argc) {
2023                 char cmd[CMDSIZE];
2024                 char * s = cmd;
2025                 char * c = s;
2026
2027                 while (optind < argc) {
2028                         if (strchr(argv[optind], ' '))
2029                                 c += snprintf(c, s + CMDSIZE - c, "\"%s\" ", argv[optind]);
2030                         else
2031                                 c += snprintf(c, s + CMDSIZE - c, "%s ", argv[optind]);
2032                         optind++;
2033                 }
2034                 c += snprintf(c, s + CMDSIZE - c, "\n");
2035                 uxclnt(s);
2036                 exit(0);
2037         }
2038
2039         if (logsink < 1) {
2040                 if (!isatty(fileno(stdout)))
2041                         setbuf(stdout, NULL);
2042                 err = 0;
2043         } else
2044                 err = daemonize();
2045
2046         if (err < 0)
2047                 /* error */
2048                 exit(1);
2049         else if (err > 0)
2050                 /* parent dies */
2051                 exit(0);
2052         else
2053                 /* child lives */
2054                 return (child(NULL));
2055 }
2056
2057 void *  mpath_pr_event_handler_fn (void * pathp )
2058 {
2059         struct multipath * mpp;
2060         int i,j, ret, isFound;
2061         struct path * pp = (struct path *)pathp;
2062         unsigned char *keyp;
2063         uint64_t prkey;
2064         struct prout_param_descriptor *param;
2065         struct prin_resp *resp;
2066
2067         mpp = pp->mpp;
2068
2069         resp = mpath_alloc_prin_response(MPATH_PRIN_RKEY_SA);
2070         if (!resp){
2071                 condlog(0,"%s Alloc failed for prin response", pp->dev);
2072                 return NULL;
2073         }
2074
2075         ret = prin_do_scsi_ioctl(pp->dev, MPATH_PRIN_RKEY_SA, resp, 0);
2076         if (ret != MPATH_PR_SUCCESS )
2077         {
2078                 condlog(0,"%s : pr in read keys service action failed. Error=%d", pp->dev, ret);
2079                 goto out;
2080         }
2081
2082         condlog(3, " event pr=%d addlen=%d",resp->prin_descriptor.prin_readkeys.prgeneration,
2083                         resp->prin_descriptor.prin_readkeys.additional_length );
2084
2085         if (resp->prin_descriptor.prin_readkeys.additional_length == 0 )
2086         {
2087                 condlog(1, "%s: No key found. Device may not be registered.", pp->dev);
2088                 ret = MPATH_PR_SUCCESS;
2089                 goto out;
2090         }
2091         prkey = 0;
2092         keyp = (unsigned char *)mpp->reservation_key;
2093         for (j = 0; j < 8; ++j) {
2094                 if (j > 0)
2095                         prkey <<= 8;
2096                 prkey |= *keyp;
2097                 ++keyp;
2098         }
2099         condlog(2, "Multipath  reservation_key: 0x%" PRIx64 " ", prkey);
2100
2101         isFound =0;
2102         for (i = 0; i < resp->prin_descriptor.prin_readkeys.additional_length/8; i++ )
2103         {
2104                 condlog(2, "PR IN READKEYS[%d]  reservation key:",i);
2105                 dumpHex((char *)&resp->prin_descriptor.prin_readkeys.key_list[i*8], 8 , -1);
2106                 if (!memcmp(mpp->reservation_key, &resp->prin_descriptor.prin_readkeys.key_list[i*8], 8))
2107                 {
2108                         condlog(2, "%s: pr key found in prin readkeys response", mpp->alias);
2109                         isFound =1;
2110                         break;
2111                 }
2112         }
2113         if (!isFound)
2114         {
2115                 condlog(0, "%s: Either device not registered or ", pp->dev);
2116                 condlog(0, "host is not authorised for registration. Skip path");
2117                 ret = MPATH_PR_OTHER;
2118                 goto out;
2119         }
2120
2121         param= malloc(sizeof(struct prout_param_descriptor));
2122         memset(param, 0 , sizeof(struct prout_param_descriptor));
2123
2124         for (j = 7; j >= 0; --j) {
2125                 param->sa_key[j] = (prkey & 0xff);
2126                 prkey >>= 8;
2127         }
2128         param->num_transportid = 0;
2129
2130         condlog(3, "device %s:%s", pp->dev, pp->mpp->wwid);
2131
2132         ret = prout_do_scsi_ioctl(pp->dev, MPATH_PROUT_REG_IGN_SA, 0, 0, param, 0);
2133         if (ret != MPATH_PR_SUCCESS )
2134         {
2135                 condlog(0,"%s: Reservation registration failed. Error: %d", pp->dev, ret);
2136         }
2137         mpp->prflag = 1;
2138
2139         free(param);
2140 out:
2141         free(resp);
2142         return NULL;
2143 }
2144
2145 int mpath_pr_event_handle(struct path *pp)
2146 {
2147         pthread_t thread;
2148         int rc;
2149         pthread_attr_t attr;
2150         struct multipath * mpp;
2151
2152         mpp = pp->mpp;
2153
2154         if (!mpp->reservation_key)
2155                 return -1;
2156
2157         pthread_attr_init(&attr);
2158         pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
2159
2160         rc = pthread_create(&thread, NULL , mpath_pr_event_handler_fn, pp);
2161         if (rc) {
2162                 condlog(0, "%s: ERROR; return code from pthread_create() is %d", pp->dev, rc);
2163                 return -1;
2164         }
2165         pthread_attr_destroy(&attr);
2166         rc = pthread_join(thread, NULL);
2167         return 0;
2168 }
2169