[multipathd] unix socket daemon control interface
[multipath-tools/.git] / multipathd / main.c
1 #include <string.h>
2 #include <pthread.h>
3 #include <stdio.h>
4 #include <unistd.h>
5 #include <linux/unistd.h>
6 #include <stdlib.h>
7 #include <sys/types.h>
8 #include <sys/stat.h>
9 #include <fcntl.h>
10 #include <libdevmapper.h>
11 #include <signal.h>
12 #include <wait.h>
13 #include <sched.h>
14 #include <errno.h>
15 #include <sys/mount.h>
16 #include <sys/mman.h>
17
18 /*
19  * libsysfs
20  */
21 #include <sysfs/libsysfs.h>
22 #include <sysfs/dlist.h>
23
24 /*
25  * libcheckers
26  */
27 #include <checkers.h>
28 #include <path_state.h>
29
30 /*
31  * libmultipath
32  */
33 #include <parser.h>
34 #include <vector.h>
35 #include <memory.h>
36 #include <config.h>
37 #include <callout.h>
38 #include <util.h>
39 #include <blacklist.h>
40 #include <hwtable.h>
41 #include <defaults.h>
42 #include <structs.h>
43 #include <dmparser.h>
44 #include <devmapper.h>
45 #include <dict.h>
46 #include <discovery.h>
47 #include <debug.h>
48 #include <propsel.h>
49 #include <uevent.h>
50 #include <switchgroup.h>
51
52 #include "main.h"
53 #include "copy.h"
54 #include "clone_platform.h"
55 #include "pidfile.h"
56 #include "uxlsnr.h"
57 #include "uxclnt.h"
58
59 #define FILE_NAME_SIZE 256
60 #define CMDSIZE 160
61 #define MAX_REPLY_LEN 1000
62
63 #define CALLOUT_DIR "/var/cache/multipathd"
64
65 #define LOG_MSG(a,b) \
66         if (strlen(a)) { \
67                 condlog(1, "%s: %s", b, a); \
68                 memset(a, 0, MAX_CHECKER_MSG_SIZE); \
69         }
70
71 #ifdef LCKDBG
72 #define lock(a) \
73         fprintf(stderr, "%s:%s(%i) lock %p\n", __FILE__, __FUNCTION__, __LINE__, a); \
74         pthread_mutex_lock(a)
75 #define unlock(a) \
76         fprintf(stderr, "%s:%s(%i) unlock %p\n", __FILE__, __FUNCTION__, __LINE__, a); \
77         pthread_mutex_unlock(a)
78 #else
79 #define lock(a) pthread_mutex_lock(a)
80 #define unlock(a) pthread_mutex_unlock(a)
81 #endif
82
83 /*
84  * structs
85  */
86 struct paths {
87         pthread_mutex_t *lock;
88         vector pathvec;
89         vector mpvec;
90 };
91
92 struct event_thread {
93         pthread_t *thread;
94         int event_nr;
95         char mapname[WWID_SIZE];
96         struct paths *allpaths;
97 };
98
99 static void *
100 alloc_waiter (void)
101 {
102
103         struct event_thread * wp;
104
105         wp = MALLOC(sizeof(struct event_thread));
106
107         if (!wp)
108                 return NULL;
109
110         wp->thread = MALLOC(sizeof(pthread_t));
111
112         if (!wp->thread)
113                 goto out;
114                 
115         return wp;
116
117 out:
118         free(wp);
119         condlog(0, "failed to alloc waiter");
120         return NULL;
121 }
122
123 static void
124 set_paths_owner (struct paths * allpaths, struct multipath * mpp)
125 {
126         int i;
127         struct path * pp;
128
129         vector_foreach_slot (allpaths->pathvec, pp, i) {
130                 if (!strncmp(mpp->wwid, pp->wwid, WWID_SIZE)) {
131                         condlog(4, "%s ownership set", pp->dev_t);
132                         pp->mpp = mpp;
133                 }
134         }
135 }
136
137 static int
138 update_multipath_table (struct multipath *mpp, vector pathvec)
139 {
140         if (dm_get_map(mpp->alias, &mpp->size, mpp->params))
141                 return 1;
142
143         if(disassemble_map(pathvec, mpp->params, mpp))
144                 return 1;
145
146         return 0;
147 }
148
149 static int
150 update_multipath_status (struct multipath *mpp)
151 {
152         if(dm_get_status(mpp->alias, mpp->status))
153                 return 1;
154
155         if (disassemble_status(mpp->status, mpp))
156                 return 1;
157
158         return 0;
159 }
160
161 static int
162 update_multipath_strings (struct multipath *mpp, vector pathvec)
163 {
164         if (update_multipath_table(mpp, pathvec))
165                 return 1;
166
167         if (update_multipath_status(mpp))
168                 return 1;
169
170         return 0;
171 }
172
173 static int
174 setup_multipath (struct paths * allpaths, struct multipath * mpp)
175 {
176         char * wwid;
177
178         wwid = get_mpe_wwid(mpp->alias);
179
180         if (wwid) {
181                 strncpy(mpp->wwid, wwid, WWID_SIZE);
182                 wwid = NULL;
183         } else
184                 strncpy(mpp->wwid, mpp->alias, WWID_SIZE);
185
186         condlog(4, "discovered map %s", mpp->alias);
187
188         if (update_multipath_strings(mpp, allpaths->pathvec))
189                 goto out;
190
191         set_paths_owner(allpaths, mpp);
192         mpp->mpe = find_mpe(mpp->wwid);
193         select_pgfailback(mpp);
194
195         return 0;
196 out:
197         free_multipath(mpp, KEEP_PATHS);
198         condlog(0, "failed to setup multipath");
199         return 1;
200 }
201
202 static void
203 switch_pathgroup (struct multipath * mpp)
204 {
205         struct pathgroup * pgp;
206         struct path * pp;
207         int i, j;
208         
209         if (!mpp || mpp->pgfailback == FAILBACK_MANUAL)
210                 return;
211         /*
212          * Refresh path priority values
213          */
214         vector_foreach_slot (mpp->pg, pgp, i)
215                 vector_foreach_slot (pgp->paths, pp, j)
216                         pathinfo(pp, conf->hwtable, DI_PRIO);
217
218         select_path_group(mpp); /* sets mpp->nextpg */
219         pgp = VECTOR_SLOT(mpp->pg, mpp->nextpg - 1);
220         
221         if (pgp && pgp->status != PGSTATE_ACTIVE) {
222                 dm_switchgroup(mpp->alias, mpp->nextpg);
223                 condlog(2, "%s: switch to path group #%i",
224                          mpp->alias, mpp->nextpg);
225         }
226 }
227
228 static int
229 update_multipath (struct paths *allpaths, char *mapname)
230 {
231         struct multipath *mpp;
232         struct pathgroup  *pgp;
233         struct path *pp;
234         int i, j;
235         int r = 1;
236
237         lock(allpaths->lock);
238         mpp = find_mp(allpaths->mpvec, mapname);
239
240         if (!mpp)
241                 goto out;
242
243         free_pgvec(mpp->pg, KEEP_PATHS);
244         mpp->pg = NULL;
245
246         setup_multipath(allpaths, mpp);
247
248         /*
249          * compare checkers states with DM states
250          */
251         vector_foreach_slot (mpp->pg, pgp, i) {
252                 vector_foreach_slot (pgp->paths, pp, j) {
253                         if (pp->dmstate != PSTATE_FAILED)
254                                 continue;
255
256                         if (pp->state != PATH_DOWN) {
257                                 condlog(2, "%s: mark as failed", pp->dev_t);
258                                 pp->state = PATH_DOWN;
259
260                                 /*
261                                  * if opportune,
262                                  * schedule the next check earlier
263                                  */
264                                 if (pp->tick > conf->checkint)
265                                         pp->tick = conf->checkint;
266                         }
267                 }
268         }
269         r = 0;
270 out:
271         unlock(allpaths->lock);
272
273         if (r)
274                 condlog(0, "failed to update multipath");
275
276         return r;
277 }
278
279 /*
280  * returns the reschedule delay
281  * negative means *stop*
282  */
283 static int
284 waiteventloop (struct event_thread * waiter)
285 {
286         struct dm_task *dmt;
287         int event_nr;
288         int r = 1; /* upon problem reschedule 1s later */
289
290         if (!waiter->event_nr)
291                 waiter->event_nr = dm_geteventnr(waiter->mapname);
292
293         if (!(dmt = dm_task_create(DM_DEVICE_WAITEVENT)))
294                 goto out;
295
296         if (!dm_task_set_name(dmt, waiter->mapname))
297                 goto out;
298
299         if (waiter->event_nr && !dm_task_set_event_nr(dmt, waiter->event_nr))
300                 goto out;
301
302         dm_task_no_open_count(dmt);
303
304         dm_task_run(dmt);
305
306         waiter->event_nr++;
307
308         /*
309          * upon event ...
310          */
311         while (1) {
312                 condlog(2, "devmap event (%i) on %s",
313                                 waiter->event_nr, waiter->mapname);
314
315                 /*
316                  * event might be :
317                  *
318                  * 1) a table reload, which means our mpp structure is
319                  *    obsolete : refresh it through update_multipath()
320                  * 2) a path failed by DM : mark as such through
321                  *    update_multipath()
322                  * 3) map has gone away : stop the thread.
323                  * 4) a path reinstate : nothing to do
324                  * 5) a switch group : nothing to do
325                  */
326                 if (update_multipath(waiter->allpaths, waiter->mapname)) {
327                         r = -1; /* stop the thread */
328                         goto out;
329                 }
330                 event_nr = dm_geteventnr(waiter->mapname);
331
332                 if (waiter->event_nr == event_nr)
333                         break;
334
335                 waiter->event_nr = event_nr;
336         }
337
338 out:
339         dm_task_destroy(dmt);
340         return r;
341 }
342
343 static void *
344 waitevent (void * et)
345 {
346         int r;
347         struct event_thread *waiter;
348
349         mlockall(MCL_CURRENT | MCL_FUTURE);
350
351         waiter = (struct event_thread *)et;
352         pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL);
353
354         while (1) {
355                 r = waiteventloop(waiter);
356
357                 if (r < 0)
358                         break;
359
360                 sleep(r);
361         }
362
363         pthread_exit(waiter->thread);
364
365         return NULL;
366 }
367
368 static void
369 free_waiter (struct event_thread * wp)
370 {
371         free(wp->thread);
372         free(wp);
373 }
374
375 static int
376 stop_waiter_thread (struct multipath * mpp, struct paths * allpaths)
377 {
378         struct event_thread * wp;
379
380         if (!mpp)
381                 return 0;
382
383         wp = (struct event_thread *)mpp->waiter;
384
385         if (!wp)
386                 return 1;
387
388         condlog(2, "reap event checker : %s", wp->mapname);
389         pthread_cancel(*wp->thread);
390         free_waiter(wp);
391
392         return 0;
393 }
394
395 static int
396 start_waiter_thread (struct multipath * mpp, struct paths * allpaths)
397 {
398         pthread_attr_t attr;
399         struct event_thread * wp;
400
401         if (!mpp)
402                 return 0;
403
404         if (pthread_attr_init(&attr))
405                 return 1;
406
407         pthread_attr_setstacksize(&attr, 32 * 1024);
408         wp = alloc_waiter();
409
410         if (!wp)
411                 return 1;
412
413         mpp->waiter = (void *)wp;
414         strncpy(wp->mapname, mpp->alias, WWID_SIZE);
415         wp->allpaths = allpaths;
416
417         if (pthread_create(wp->thread, &attr, waitevent, wp)) {
418                 condlog(0, "%s: cannot create event checker", wp->mapname);
419                 goto out;
420         }
421         condlog(2, "%s: event checker started", wp->mapname);
422
423         return 0;
424 out:
425         free_waiter(wp);
426         mpp->waiter = NULL;
427         condlog(0, "failed to start waiter thread");
428         return 1;
429 }
430
431 static void
432 remove_map (struct multipath * mpp, struct paths * allpaths)
433 {
434         int i;
435
436         stop_waiter_thread(mpp, allpaths);
437         i = find_slot(allpaths->mpvec, (void *)mpp);
438         vector_del_slot(allpaths->mpvec, i);
439         free_multipath(mpp, KEEP_PATHS);
440 }
441
442 static int
443 uev_add_map (char * devname, struct paths * allpaths)
444 {
445         int major, minor;
446         char dev_t[BLK_DEV_SIZE];
447         char * buff;
448         struct multipath * mpp;
449
450         if (sysfs_get_dev(sysfs_path, devname, dev_t, BLK_DEV_SIZE))
451                 return 1;
452
453         if (sscanf(dev_t, "%d:%d", &major, &minor) != 2)
454                 return 1;
455
456         buff = dm_mapname(major, minor, "multipath");
457                 
458         if (!buff)
459                 return 1;
460         
461         mpp = find_mp(allpaths->mpvec, buff);
462
463         if (mpp) {
464                 /*
465                  * devmap already in mpvec
466                  * but remove DM uevent are somewhet unreliable
467                  * so for now consider safer to remove and re-add the map
468                  */
469                 condlog(2, "%s: remove dead config", mpp->alias);
470                 remove_map(mpp, allpaths);
471                 mpp = NULL;
472         }
473         if (!mpp) {
474                 mpp = alloc_multipath();
475
476                 if (!mpp)
477                         return 1;
478
479                 mpp->minor = minor;
480                 mpp->alias = MALLOC(strlen(buff) + 1);
481
482                 if (!mpp->alias)
483                         goto out;
484
485                 strncat(mpp->alias, buff, strlen(buff));
486
487                 dm_get_map(mpp->alias, &mpp->size, mpp->params);
488                 dm_get_status(mpp->alias, mpp->status);
489
490                 if (setup_multipath(allpaths, mpp))
491                         return 1; /* mpp freed in setup_multipath */
492
493                 if (!vector_alloc_slot(allpaths->mpvec))
494                         goto out;
495
496                 vector_set_slot(allpaths->mpvec, mpp);
497                 set_paths_owner(allpaths, mpp);
498
499                 if (start_waiter_thread(mpp, allpaths))
500                         goto out;
501         }
502         return 0;
503 out:
504         free_multipath(mpp, KEEP_PATHS);
505         return 1;
506 }
507
508 static int
509 uev_remove_map (char * devname, struct paths * allpaths)
510 {
511         int minor;
512         struct multipath * mpp;
513
514         mpp->minor = atoi(devname + 3);
515         mpp = find_mp_by_minor(allpaths->mpvec, minor);
516
517         if (mpp)
518                 remove_map(mpp, allpaths);
519
520         return 0;
521 }
522
523 static int
524 uev_add_path (char * devname, struct paths * allpaths)
525 {
526         struct path * pp;
527
528         pp = find_path_by_dev(allpaths->pathvec, devname);
529
530         if (pp) {
531                 condlog(3, "%s: already in pathvec");
532                 return 0;
533         }
534         condlog(2, "add %s path checker", devname);
535         pp = store_pathinfo(allpaths->pathvec, conf->hwtable,
536                        devname, DI_SYSFS | DI_WWID);
537
538         if (!pp)
539                 return 1;
540
541         pp->mpp = find_mp_by_wwid(allpaths->mpvec, pp->wwid);
542         condlog(4, "%s: ownership set to %s", pp->dev_t, pp->mpp->alias);
543
544         return 0;
545 }
546
547 static int
548 uev_remove_path (char * devname, struct paths * allpaths)
549 {
550         int i;
551         struct path * pp;
552
553         pp = find_path_by_dev(allpaths->pathvec, devname);
554
555         if (!pp) {
556                 condlog(3, "%s: not in pathvec");
557                 return 0;
558         }
559         condlog(2, "remove %s path checker", devname);
560         i = find_slot(allpaths->pathvec, (void *)pp);
561         vector_del_slot(allpaths->pathvec, i);
562         free_path(pp);
563
564         return 0;
565 }
566
567 static char *
568 show_paths (struct paths * allpaths)
569 {
570         int i, j, k;
571         struct path * pp;
572         char * c;
573         char * reply;
574
575         reply = MALLOC(MAX_REPLY_LEN);
576
577         if (!reply)
578                 return NULL;
579
580         c = reply;
581         c += sprintf(c, "\n");
582
583         vector_foreach_slot(allpaths->pathvec, pp, i) {
584                 c += sprintf(c, "%10s: ", pp->dev);
585                 c += sprintf(c, "state %i, ", pp->state);
586
587                 j = pp->tick;
588                 k = pp->checkint - pp->tick;
589                 c += sprintf(c, "%3i/%3i ", j, pp->checkint);
590
591                 while (j-- > 0)
592                         c += sprintf(c, "X");
593
594
595                 while (k-- > 0)
596                         c += sprintf(c, ".");
597
598                 c += sprintf(c, "\n");
599         }
600
601         return reply;
602 }
603
604 static char *
605 show_maps (struct paths * allpaths)
606 {
607         int i, j, k;
608         struct multipath * mpp;
609         char * c;
610         char * reply;
611
612         reply = MALLOC(MAX_REPLY_LEN);
613
614         if (!reply)
615                 return NULL;
616
617         c = reply;
618         c += sprintf(c, "\n");
619
620         vector_foreach_slot(allpaths->mpvec, mpp, i) {
621                 c += sprintf(c, "%20s: ", mpp->alias);
622
623                 j = mpp->failback_tick;
624                 k = mpp->pgfailback - mpp->failback_tick;
625                 c += sprintf(c, "%3i/%3i ", j, mpp->pgfailback);
626
627                 while (j-- > 0)
628                         c += sprintf(c, "X");
629
630
631                 while (k-- > 0)
632                         c += sprintf(c, ".");
633
634                 c += sprintf(c, "\n");
635         }
636
637         return reply;
638 }
639
640 char *
641 uxsock_trigger (char * str, void * trigger_data)
642 {
643         struct paths * allpaths;
644         char * reply = NULL;
645
646         allpaths = (struct paths *)trigger_data;
647
648         lock(allpaths->lock);
649
650         if (*str == 'l' && *(str + 1) == 'p')
651                 reply = show_paths(allpaths);
652
653         else if (*str == 'l' && *(str + 1) == 'm')
654                 reply = show_maps(allpaths);
655
656         else if (*str == 'r' && *(str + 1) == 'p')
657                 uev_remove_path(str + 3, allpaths);
658
659         else if (*str == 'a' && *(str + 1) == 'p')
660                 uev_add_path(str + 3, allpaths);
661
662         else if (*str == 'r' && *(str + 1) == 'm')
663                 uev_remove_map(str + 3, allpaths);
664
665         else if (*str == 'a' && *(str + 1) == 'm')
666                 uev_add_map(str + 3, allpaths);
667
668         if (!reply)
669                 asprintf(&reply, "ok\n");
670
671         unlock(allpaths->lock);
672
673         return reply;
674 }
675
676 int 
677 uev_trigger (struct uevent * uev, void * trigger_data)
678 {
679         int r = 0;
680         char devname[32];
681         struct paths * allpaths;
682
683         allpaths = (struct paths *)trigger_data;
684         lock(allpaths->lock);
685
686         if (strncmp(uev->devpath, "/block", 6))
687                 goto out;
688
689         basename(uev->devpath, devname);
690
691         /*
692          * device map add/remove event
693          */
694         if (!strncmp(devname, "dm-", 3)) {
695                 condlog(2, "%s %s devmap", uev->action, devname);
696
697                 if (!strncmp(uev->action, "add", 3)) {
698                         r = uev_add_map(devname, allpaths);
699                         goto out;
700                 }
701                 if (!strncmp(uev->action, "remove", 6)) {
702                         r = uev_remove_map(devname, allpaths);
703                         goto out;
704                 }
705                 goto out;
706         }
707         
708         /*
709          * path add/remove event
710          */
711         if (blacklist(conf->blist, devname))
712                 goto out;
713
714         if (!strncmp(uev->action, "add", 3)) {
715                 r = uev_add_path(devname, allpaths);
716                 goto out;
717         }
718         if (!strncmp(uev->action, "remove", 6)) {
719                 r = uev_remove_path(devname, allpaths);
720                 goto out;
721         }
722
723 out:
724         FREE(uev);
725         unlock(allpaths->lock);
726         return r;
727 }
728
729 static void *
730 ueventloop (void * ap)
731 {
732         uevent_listen(&uev_trigger, ap);
733
734         return NULL;
735 }
736
737 static void *
738 uxlsnrloop (void * ap)
739 {
740         uxsock_listen(&uxsock_trigger, ap);
741
742         return NULL;
743 }
744
745 static void
746 strvec_free (vector vec)
747 {
748         int i;
749         char * str;
750
751         vector_foreach_slot (vec, str, i)
752                 if (str)
753                         FREE(str);
754
755         vector_free(vec);
756 }
757
758 static int
759 exit_daemon (int status)
760 {
761         if (status != 0)
762                 fprintf(stderr, "bad exit status. see daemon.log\n");
763
764         condlog(3, "umount ramfs");
765         umount(CALLOUT_DIR);
766
767         condlog(3, "unlink pidfile");
768         unlink(DEFAULT_PIDFILE);
769
770         condlog(2, "--------shut down-------");
771         
772         if (logsink)
773                 log_thread_stop();
774
775         exit(status);
776 }
777
778 /*
779  * caller must have locked the path list before calling that function
780  */
781 static int
782 get_dm_mpvec (struct paths * allpaths)
783 {
784         int i;
785         struct multipath * mpp;
786
787         if (dm_get_maps(allpaths->mpvec, "multipath"))
788                 return 1;
789
790         vector_foreach_slot (allpaths->mpvec, mpp, i) {
791                 setup_multipath(allpaths, mpp);
792                 mpp->minor = dm_get_minor(mpp->alias);
793                 start_waiter_thread(mpp, allpaths);
794         }
795
796         return 0;
797 }
798
799 static void
800 fail_path (struct path * pp)
801 {
802         if (!pp->mpp)
803                 return;
804
805         condlog(2, "checker failed path %s in map %s",
806                  pp->dev_t, pp->mpp->alias);
807
808         dm_fail_path(pp->mpp->alias, pp->dev_t);
809 }
810
811 /*
812  * caller must have locked the path list before calling that function
813  */
814 static void
815 reinstate_path (struct path * pp)
816 {
817         if (pp->mpp) {
818                 if (dm_reinstate(pp->mpp->alias, pp->dev_t))
819                         condlog(0, "%s: reinstate failed", pp->dev_t);
820                 else
821                         condlog(2, "%s: reinstated", pp->dev_t);
822         }
823 }
824
825 static void *
826 checkerloop (void *ap)
827 {
828         struct paths *allpaths;
829         struct path *pp;
830         int i;
831         int newstate;
832         char checker_msg[MAX_CHECKER_MSG_SIZE];
833
834         mlockall(MCL_CURRENT | MCL_FUTURE);
835
836         memset(checker_msg, 0, MAX_CHECKER_MSG_SIZE);
837         allpaths = (struct paths *)ap;
838
839         condlog(2, "path checkers start up");
840
841         while (1) {
842                 lock(allpaths->lock);
843                 condlog(4, "tick");
844
845                 vector_foreach_slot (allpaths->pathvec, pp, i) {
846                         if (pp->tick) {
847                                 /*
848                                  * don't check this path yet
849                                  */
850                                 pp->tick--;
851                                 continue;
852                         }
853
854                         /*
855                          * provision a next check soonest,
856                          * in case we exit abnormaly from here
857                          */
858                         pp->tick = conf->checkint;
859                         
860                         if (!pp->checkfn) {
861                                 pathinfo(pp, conf->hwtable, DI_SYSFS);
862                                 select_checkfn(pp);
863                         }
864
865                         if (!pp->checkfn) {
866                                 condlog(0, "%s: checkfn is void", pp->dev);
867                                 continue;
868                         }
869                         newstate = pp->checkfn(pp->fd, checker_msg,
870                                                &pp->checker_context);
871                         
872                         if (newstate != pp->state) {
873                                 pp->state = newstate;
874                                 LOG_MSG(checker_msg, pp->dev_t);
875
876                                 /*
877                                  * upon state change, reset the checkint
878                                  * to the shortest delay
879                                  */
880                                 pp->checkint = conf->checkint;
881
882                                 if (newstate == PATH_DOWN ||
883                                     newstate == PATH_SHAKY) {
884                                         /*
885                                          * proactively fail path in the DM
886                                          */
887                                         fail_path(pp);
888
889                                         /*
890                                          * cancel scheduled failback
891                                          */
892                                         pp->mpp->failback_tick = 0;
893                                         continue;
894                                 }
895
896                                 /*
897                                  * reinstate this path
898                                  */
899                                 reinstate_path(pp);
900
901                                 /*
902                                  * need to switch group ?
903                                  */
904                                 update_multipath_strings(pp->mpp,
905                                                          allpaths->pathvec);
906
907                                 /*
908                                  * schedule defered failback
909                                  */
910                                 if (pp->mpp->pgfailback > 0)
911                                         pp->mpp->failback_tick =
912                                                 pp->mpp->pgfailback;
913
914                                 if (pp->mpp->pgfailback == FAILBACK_IMMEDIATE)
915                                         switch_pathgroup(pp->mpp);
916                         }
917                         else if (newstate == PATH_UP || newstate == PATH_GHOST) {
918                                 /*
919                                  * PATH_UP for last two checks
920                                  * defered failback getting sooner
921                                  */
922                                 if (pp->mpp->pgfailback > 0) {
923                                         if (pp->mpp->failback_tick > 0) {
924                                                 pp->mpp->failback_tick--;
925
926                                                 if (!pp->mpp->failback_tick)
927                                                         switch_pathgroup(pp->mpp);
928                                         }
929                                 }
930                                 
931                                 /*
932                                  * and double the next check delay.
933                                  * max at conf->max_checkint
934                                  */
935                                 if (pp->checkint < (conf->max_checkint / 2))
936                                         pp->checkint = 2 * pp->checkint;
937                                 else
938                                         pp->checkint = conf->max_checkint;
939
940                                 pp->tick = pp->checkint;
941                                 condlog(4, "%s: delay next check %is",
942                                                 pp->dev_t, pp->tick);
943                         }
944                         pp->state = newstate;
945                 }
946                 unlock(allpaths->lock);
947                 sleep(1);
948         }
949         return NULL;
950 }
951
952 static struct paths *
953 init_paths (void)
954 {
955         struct paths *allpaths;
956
957         allpaths = MALLOC(sizeof(struct paths));
958
959         if (!allpaths)
960                 return NULL;
961
962         allpaths->lock = 
963                 (pthread_mutex_t *)MALLOC(sizeof(pthread_mutex_t));
964
965         if (!allpaths->lock)
966                 goto out;
967
968         allpaths->pathvec = vector_alloc();
969
970         if (!allpaths->pathvec)
971                 goto out1;
972                 
973         allpaths->mpvec = vector_alloc();
974
975         if (!allpaths->mpvec)
976                 goto out2;
977         
978         pthread_mutex_init(allpaths->lock, NULL);
979
980         return allpaths;
981
982 out2:
983         vector_free(allpaths->pathvec);
984 out1:
985         FREE(allpaths->lock);
986 out:
987         FREE(allpaths);
988         condlog(0, "failed to init paths");
989         return NULL;
990 }
991
992 /*
993  * this logic is all about keeping callouts working in case of
994  * system disk outage (think system over SAN)
995  * this needs the clone syscall, so don't bother if not present
996  * (Debian Woody)
997  */
998 #ifdef CLONE_NEWNS
999 static int
1000 prepare_namespace(void)
1001 {
1002         mode_t mode = S_IRWXU;
1003         struct stat *buf;
1004         char ramfs_args[64];
1005         int i;
1006         int fd;
1007         char * bin;
1008         size_t size = 10;
1009         struct stat statbuf;
1010         
1011         buf = MALLOC(sizeof(struct stat));
1012
1013         /*
1014          * create a temp mount point for ramfs
1015          */
1016         if (stat(CALLOUT_DIR, buf) < 0) {
1017                 if (mkdir(CALLOUT_DIR, mode) < 0) {
1018                         condlog(0, "cannot create " CALLOUT_DIR);
1019                         return -1;
1020                 }
1021                 condlog(4, "created " CALLOUT_DIR);
1022         }
1023
1024         /*
1025          * compute the optimal ramdisk size
1026          */
1027         vector_foreach_slot (conf->binvec, bin,i) {
1028                 if ((fd = open(bin, O_RDONLY)) < 0) {
1029                         condlog(0, "cannot open %s", bin);
1030                         return -1;
1031                 }
1032                 if (fstat(fd, &statbuf) < 0) {
1033                         condlog(0, "cannot stat %s", bin);
1034                         return -1;
1035                 }
1036                 size += statbuf.st_size;
1037                 close(fd);
1038         }
1039         condlog(3, "ramfs maxsize is %u", (unsigned int) size);
1040         
1041         /*
1042          * mount the ramfs
1043          */
1044         if (safe_sprintf(ramfs_args, "maxsize=%u", (unsigned int) size)) {
1045                 fprintf(stderr, "ramfs_args too small\n");
1046                 return -1;
1047         }
1048         if (mount(NULL, CALLOUT_DIR, "ramfs", MS_SYNCHRONOUS, ramfs_args) < 0) {
1049                 condlog(0, "cannot mount ramfs on " CALLOUT_DIR);
1050                 return -1;
1051         }
1052         condlog(4, "mount ramfs on " CALLOUT_DIR);
1053
1054         /*
1055          * populate the ramfs with callout binaries
1056          */
1057         vector_foreach_slot (conf->binvec, bin,i) {
1058                 if (copytodir(bin, CALLOUT_DIR) < 0) {
1059                         condlog(0, "cannot copy %s in ramfs", bin);
1060                         exit_daemon(1);
1061                 }
1062                 condlog(4, "cp %s in ramfs", bin);
1063         }
1064         strvec_free(conf->binvec);
1065
1066         /*
1067          * bind the ramfs to :
1068          * /sbin : default home of multipath ...
1069          * /bin  : default home of scsi_id ...
1070          * /tmp  : home of scsi_id temp files
1071          */
1072         if (mount(CALLOUT_DIR, "/sbin", NULL, MS_BIND, NULL) < 0) {
1073                 condlog(0, "cannot bind ramfs on /sbin");
1074                 return -1;
1075         }
1076         condlog(4, "bind ramfs on /sbin");
1077         if (mount(CALLOUT_DIR, "/bin", NULL, MS_BIND, NULL) < 0) {
1078                 condlog(0, "cannot bind ramfs on /bin");
1079                 return -1;
1080         }
1081         condlog(4, "bind ramfs on /bin");
1082         if (mount(CALLOUT_DIR, "/tmp", NULL, MS_BIND, NULL) < 0) {
1083                 condlog(0, "cannot bind ramfs on /tmp");
1084                 return -1;
1085         }
1086         condlog(4, "bind ramfs on /tmp");
1087
1088         return 0;
1089 }
1090 #endif
1091
1092 static void *
1093 signal_set(int signo, void (*func) (int))
1094 {
1095         int r;
1096         struct sigaction sig;
1097         struct sigaction osig;
1098
1099         sig.sa_handler = func;
1100         sigemptyset(&sig.sa_mask);
1101         sig.sa_flags = 0;
1102
1103         r = sigaction(signo, &sig, &osig);
1104
1105         if (r < 0)
1106                 return (SIG_ERR);
1107         else
1108                 return (osig.sa_handler);
1109 }
1110
1111 static void
1112 sighup (int sig)
1113 {
1114         condlog(2, "SIGHUP received");
1115
1116 #ifdef _DEBUG_
1117         dbg_free_final(NULL);
1118 #endif
1119 }
1120
1121 static void
1122 sigend (int sig)
1123 {
1124         exit_daemon(0);
1125 }
1126
1127 static void
1128 signal_init(void)
1129 {
1130         signal_set(SIGHUP, sighup);
1131         signal_set(SIGINT, sigend);
1132         signal_set(SIGTERM, sigend);
1133         signal_set(SIGKILL, sigend);
1134 }
1135
1136 static void
1137 setscheduler (void)
1138 {
1139         int res;
1140         static struct sched_param sched_param = {
1141                 sched_priority: 99
1142         };
1143
1144         res = sched_setscheduler (0, SCHED_RR, &sched_param);
1145
1146         if (res == -1)
1147                 condlog(LOG_WARNING, "Could not set SCHED_RR at priority 99");
1148         return;
1149 }
1150
1151 static void
1152 set_oom_adj (int val)
1153 {
1154         FILE *fp;
1155
1156         fp = fopen("/proc/self/oom_adj", "w");
1157
1158         if (!fp)
1159                 return;
1160
1161         fprintf(fp, "%i", val);
1162         fclose(fp);
1163 }
1164         
1165 static int
1166 child (void * param)
1167 {
1168         pthread_t check_thr, uevent_thr, uxlsnr_thr;
1169         pthread_attr_t attr;
1170         struct paths * allpaths;
1171
1172         mlockall(MCL_CURRENT | MCL_FUTURE);
1173
1174         if (logsink)
1175                 log_thread_start();
1176
1177         condlog(2, "--------start up--------");
1178         condlog(2, "read " DEFAULT_CONFIGFILE);
1179
1180         if (load_config(DEFAULT_CONFIGFILE))
1181                 exit(1);
1182
1183         setlogmask(LOG_UPTO(conf->verbosity + 3));
1184
1185         /*
1186          * fill the voids left in the config file
1187          */
1188         if (!conf->binvec) {
1189                 conf->binvec = vector_alloc();
1190                 push_callout("/sbin/scsi_id");
1191         }
1192         if (!conf->multipath) {
1193                 conf->multipath = MULTIPATH;
1194                 push_callout(conf->multipath);
1195         }
1196         if (!conf->checkint) {
1197                 conf->checkint = CHECKINT;
1198                 conf->max_checkint = MAX_CHECKINT;
1199         }
1200
1201         if (pidfile_create(DEFAULT_PIDFILE, getpid())) {
1202                 if (logsink)
1203                         log_thread_stop();
1204
1205                 exit(1);
1206         }
1207         signal_init();
1208         setscheduler();
1209         set_oom_adj(-17);
1210         allpaths = init_paths();
1211
1212         if (!allpaths)
1213                 exit(1);
1214
1215         if (sysfs_get_mnt_path(sysfs_path, FILE_NAME_SIZE)) {
1216                 condlog(0, "can not find sysfs mount point");
1217                 exit(1);
1218         }
1219
1220 #ifdef CLONE_NEWNS
1221         if (prepare_namespace() < 0) {
1222                 condlog(0, "cannot prepare namespace");
1223                 exit_daemon(1);
1224         }
1225 #endif
1226
1227         /*
1228          * fetch paths and multipaths lists
1229          * no paths and/or no multipaths are valid scenarii
1230          * vectors maintenance will be driven by events
1231          */
1232         path_discovery(allpaths->pathvec, conf, DI_SYSFS | DI_WWID);
1233         get_dm_mpvec(allpaths);
1234
1235         /*
1236          * start threads
1237          */
1238         pthread_attr_init(&attr);
1239         pthread_attr_setstacksize(&attr, 64 * 1024);
1240         
1241         pthread_create(&check_thr, &attr, checkerloop, allpaths);
1242         pthread_create(&uevent_thr, &attr, ueventloop, allpaths);
1243         pthread_create(&uxlsnr_thr, &attr, uxlsnrloop, allpaths);
1244         pthread_join(check_thr, NULL);
1245         pthread_join(uevent_thr, NULL);
1246         pthread_join(uxlsnr_thr, NULL);
1247
1248         return 0;
1249 }
1250
1251 int
1252 main (int argc, char *argv[])
1253 {
1254         extern char *optarg;
1255         extern int optind;
1256         int arg;
1257         int err;
1258         void * child_stack;
1259         
1260         logsink = 1;
1261
1262         if (getuid() != 0) {
1263                 fprintf(stderr, "need to be root\n");
1264                 exit(1);
1265         }
1266
1267         /* make sure we don't lock any path */
1268         chdir("/");
1269         umask(umask(077) | 022);
1270
1271         child_stack = (void *)malloc(CHILD_STACK_SIZE);
1272
1273         if (!child_stack)
1274                 exit(1);
1275
1276         conf = alloc_config();
1277
1278         if (!conf)
1279                 exit(1);
1280
1281         while ((arg = getopt(argc, argv, ":dv:k::")) != EOF ) {
1282         switch(arg) {
1283                 case 'd':
1284                         logsink = 0;
1285                         break;
1286                 case 'v':
1287                         if (sizeof(optarg) > sizeof(char *) ||
1288                             !isdigit(optarg[0]))
1289                                 exit(1);
1290
1291                         conf->verbosity = atoi(optarg);
1292                         break;
1293                 case 'k':
1294                         uxclnt(optarg);
1295                         exit(0);
1296                 default:
1297                         ;
1298                 }
1299         }
1300
1301 #ifdef CLONE_NEWNS      /* recent systems have clone() */
1302
1303 #    if defined(__hppa__) || defined(__powerpc64__)
1304         err = clone(child, child_stack, CLONE_NEWNS, NULL);
1305 #    elif defined(__ia64__)
1306         err = clone2(child, child_stack,
1307                      CHILD_STACK_SIZE, CLONE_NEWNS, NULL,
1308                      NULL, NULL, NULL);
1309 #    else
1310         err = clone(child, child_stack + CHILD_STACK_SIZE, CLONE_NEWNS, NULL);
1311 #    endif
1312         if (err < 0)
1313                 exit (1);
1314
1315         exit(0);
1316 #else                   /* older system fallback to fork() */
1317         err = fork();
1318         
1319         if (err < 0)
1320                 exit (1);
1321
1322         return (child(child_stack));
1323 #endif
1324
1325 }