5e76ff263797ce6e0430f58dcb227999781ba41c
[multipath-tools/.git] / multipathd / main.c
1 #include <string.h>
2 #include <pthread.h>
3 #include <stdio.h>
4 #include <unistd.h>
5 #include <linux/unistd.h>
6 #include <stdlib.h>
7 #include <sys/types.h>
8 #include <sys/stat.h>
9 #include <fcntl.h>
10 #include <libdevmapper.h>
11 #include <signal.h>
12 #include <wait.h>
13 #include <sched.h>
14 #include <errno.h>
15 #include <sys/mount.h>
16 #include <sys/mman.h>
17
18 /*
19  * libsysfs
20  */
21 #include <sysfs/libsysfs.h>
22 #include <sysfs/dlist.h>
23
24 /*
25  * libcheckers
26  */
27 #include <checkers.h>
28 #include <path_state.h>
29
30 /*
31  * libmultipath
32  */
33 #include <parser.h>
34 #include <vector.h>
35 #include <memory.h>
36 #include <config.h>
37 #include <callout.h>
38 #include <util.h>
39 #include <blacklist.h>
40 #include <hwtable.h>
41 #include <defaults.h>
42 #include <structs.h>
43 #include <dmparser.h>
44 #include <devmapper.h>
45 #include <dict.h>
46 #include <discovery.h>
47 #include <debug.h>
48 #include <propsel.h>
49 #include <uevent.h>
50 #include <switchgroup.h>
51
52 #include "main.h"
53 #include "copy.h"
54 #include "clone_platform.h"
55 #include "pidfile.h"
56
57 #define FILE_NAME_SIZE 256
58 #define CMDSIZE 160
59
60 #define CALLOUT_DIR "/var/cache/multipathd"
61
62 #define LOG_MSG(a,b) \
63         if (strlen(a)) { \
64                 condlog(1, "%s: %s", b, a); \
65                 memset(a, 0, MAX_CHECKER_MSG_SIZE); \
66         }
67
68 #ifdef LCKDBG
69 #define lock(a) \
70         fprintf(stderr, "%s:%s(%i) lock %p\n", __FILE__, __FUNCTION__, __LINE__, a); \
71         pthread_mutex_lock(a)
72 #define unlock(a) \
73         fprintf(stderr, "%s:%s(%i) unlock %p\n", __FILE__, __FUNCTION__, __LINE__, a); \
74         pthread_mutex_unlock(a)
75 #else
76 #define lock(a) pthread_mutex_lock(a)
77 #define unlock(a) pthread_mutex_unlock(a)
78 #endif
79
80 /*
81  * structs
82  */
83 struct paths {
84         pthread_mutex_t *lock;
85         vector pathvec;
86         vector mpvec;
87 };
88
89 struct event_thread {
90         pthread_t *thread;
91         int event_nr;
92         char mapname[WWID_SIZE];
93         struct paths *allpaths;
94 };
95
96 static void *
97 alloc_waiter (void)
98 {
99
100         struct event_thread * wp;
101
102         wp = MALLOC(sizeof(struct event_thread));
103
104         if (!wp)
105                 return NULL;
106
107         wp->thread = MALLOC(sizeof(pthread_t));
108
109         if (!wp->thread)
110                 goto out;
111                 
112         return wp;
113
114 out:
115         free(wp);
116         condlog(0, "failed to alloc waiter");
117         return NULL;
118 }
119
120 static void
121 set_paths_owner (struct paths * allpaths, struct multipath * mpp)
122 {
123         int i;
124         struct path * pp;
125
126         vector_foreach_slot (allpaths->pathvec, pp, i) {
127                 if (!strncmp(mpp->wwid, pp->wwid, WWID_SIZE)) {
128                         condlog(4, "%s ownership set", pp->dev_t);
129                         pp->mpp = mpp;
130                 }
131         }
132 }
133
134 static int
135 update_multipath_table (struct multipath *mpp, vector pathvec)
136 {
137         if (dm_get_map(mpp->alias, &mpp->size, mpp->params))
138                 return 1;
139
140         if(disassemble_map(pathvec, mpp->params, mpp))
141                 return 1;
142
143         return 0;
144 }
145
146 static int
147 update_multipath_status (struct multipath *mpp)
148 {
149         if(dm_get_status(mpp->alias, mpp->status))
150                 return 1;
151
152         if (disassemble_status(mpp->status, mpp))
153                 return 1;
154
155         return 0;
156 }
157
158 static int
159 update_multipath_strings (struct multipath *mpp, vector pathvec)
160 {
161         if (update_multipath_table(mpp, pathvec))
162                 return 1;
163
164         if (update_multipath_status(mpp))
165                 return 1;
166
167         return 0;
168 }
169
170 static int
171 setup_multipath (struct paths * allpaths, struct multipath * mpp)
172 {
173         char * wwid;
174
175         wwid = get_mpe_wwid(mpp->alias);
176
177         if (wwid) {
178                 strncpy(mpp->wwid, wwid, WWID_SIZE);
179                 wwid = NULL;
180         } else
181                 strncpy(mpp->wwid, mpp->alias, WWID_SIZE);
182
183         condlog(4, "discovered map %s", mpp->alias);
184
185         if (update_multipath_strings(mpp, allpaths->pathvec))
186                 goto out;
187
188         set_paths_owner(allpaths, mpp);
189         mpp->mpe = find_mpe(mpp->wwid);
190         select_pgfailback(mpp);
191
192         return 0;
193 out:
194         free_multipath(mpp, KEEP_PATHS);
195         condlog(0, "failed to setup multipath");
196         return 1;
197 }
198
199 static void
200 switch_pathgroup (struct multipath * mpp)
201 {
202         struct pathgroup * pgp;
203         struct path * pp;
204         int i, j;
205         
206         if (!mpp || mpp->pgfailback == FAILBACK_MANUAL)
207                 return;
208         /*
209          * Refresh path priority values
210          */
211         vector_foreach_slot (mpp->pg, pgp, i)
212                 vector_foreach_slot (pgp->paths, pp, j)
213                         pathinfo(pp, conf->hwtable, DI_PRIO);
214
215         select_path_group(mpp); /* sets mpp->nextpg */
216         pgp = VECTOR_SLOT(mpp->pg, mpp->nextpg - 1);
217         
218         if (pgp && pgp->status != PGSTATE_ACTIVE) {
219                 dm_switchgroup(mpp->alias, mpp->nextpg);
220                 condlog(2, "%s: switch to path group #%i",
221                          mpp->alias, mpp->nextpg);
222         }
223 }
224
225 static int
226 update_multipath (struct paths *allpaths, char *mapname)
227 {
228         struct multipath *mpp;
229         struct pathgroup  *pgp;
230         struct path *pp;
231         int i, j;
232         int r = 1;
233
234         lock(allpaths->lock);
235         mpp = find_mp(allpaths->mpvec, mapname);
236
237         if (!mpp)
238                 goto out;
239
240         free_pgvec(mpp->pg, KEEP_PATHS);
241         mpp->pg = NULL;
242
243         setup_multipath(allpaths, mpp);
244
245         /*
246          * compare checkers states with DM states
247          */
248         vector_foreach_slot (mpp->pg, pgp, i) {
249                 vector_foreach_slot (pgp->paths, pp, j) {
250                         if (pp->dmstate != PSTATE_FAILED)
251                                 continue;
252
253                         if (pp->state != PATH_DOWN) {
254                                 condlog(2, "%s: mark as failed", pp->dev_t);
255                                 pp->state = PATH_DOWN;
256
257                                 /*
258                                  * if opportune,
259                                  * schedule the next check earlier
260                                  */
261                                 if (pp->tick > conf->checkint)
262                                         pp->tick = conf->checkint;
263                         }
264                 }
265         }
266         r = 0;
267 out:
268         unlock(allpaths->lock);
269
270         if (r)
271                 condlog(0, "failed to update multipath");
272
273         return r;
274 }
275
276 /*
277  * returns the reschedule delay
278  * negative means *stop*
279  */
280 static int
281 waiteventloop (struct event_thread * waiter)
282 {
283         struct dm_task *dmt;
284         int event_nr;
285         int r = 1; /* upon problem reschedule 1s later */
286
287         if (!waiter->event_nr)
288                 waiter->event_nr = dm_geteventnr(waiter->mapname);
289
290         if (!(dmt = dm_task_create(DM_DEVICE_WAITEVENT)))
291                 goto out;
292
293         if (!dm_task_set_name(dmt, waiter->mapname))
294                 goto out;
295
296         if (waiter->event_nr && !dm_task_set_event_nr(dmt, waiter->event_nr))
297                 goto out;
298
299         dm_task_no_open_count(dmt);
300
301         dm_task_run(dmt);
302
303         waiter->event_nr++;
304
305         /*
306          * upon event ...
307          */
308         while (1) {
309                 condlog(2, "devmap event (%i) on %s",
310                                 waiter->event_nr, waiter->mapname);
311
312                 /*
313                  * event might be :
314                  *
315                  * 1) a table reload, which means our mpp structure is
316                  *    obsolete : refresh it through update_multipath()
317                  * 2) a path failed by DM : mark as such through
318                  *    update_multipath()
319                  * 3) map has gone away : stop the thread.
320                  * 4) a path reinstate : nothing to do
321                  * 5) a switch group : nothing to do
322                  */
323                 if (update_multipath(waiter->allpaths, waiter->mapname)) {
324                         r = -1; /* stop the thread */
325                         goto out;
326                 }
327                 event_nr = dm_geteventnr(waiter->mapname);
328
329                 if (waiter->event_nr == event_nr)
330                         break;
331
332                 waiter->event_nr = event_nr;
333         }
334
335 out:
336         dm_task_destroy(dmt);
337         return r;
338 }
339
340 static void *
341 waitevent (void * et)
342 {
343         int r;
344         struct event_thread *waiter;
345
346         mlockall(MCL_CURRENT | MCL_FUTURE);
347
348         waiter = (struct event_thread *)et;
349         pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL);
350
351         while (1) {
352                 r = waiteventloop(waiter);
353
354                 if (r < 0)
355                         break;
356
357                 sleep(r);
358         }
359
360         pthread_exit(waiter->thread);
361
362         return NULL;
363 }
364
365 static void
366 free_waiter (struct event_thread * wp)
367 {
368         free(wp->thread);
369         free(wp);
370 }
371
372 static int
373 stop_waiter_thread (struct multipath * mpp, struct paths * allpaths)
374 {
375         struct event_thread * wp;
376
377         if (!mpp)
378                 return 0;
379
380         wp = (struct event_thread *)mpp->waiter;
381
382         if (!wp)
383                 return 1;
384
385         condlog(2, "reap event checker : %s", wp->mapname);
386         pthread_cancel(*wp->thread);
387         free_waiter(wp);
388
389         return 0;
390 }
391
392 static int
393 start_waiter_thread (struct multipath * mpp, struct paths * allpaths)
394 {
395         pthread_attr_t attr;
396         struct event_thread * wp;
397
398         if (!mpp)
399                 return 0;
400
401         if (pthread_attr_init(&attr))
402                 return 1;
403
404         pthread_attr_setstacksize(&attr, 32 * 1024);
405         wp = alloc_waiter();
406
407         if (!wp)
408                 return 1;
409
410         mpp->waiter = (void *)wp;
411         strncpy(wp->mapname, mpp->alias, WWID_SIZE);
412         wp->allpaths = allpaths;
413
414         if (pthread_create(wp->thread, &attr, waitevent, wp)) {
415                 condlog(0, "%s: cannot create event checker", wp->mapname);
416                 goto out;
417         }
418         condlog(2, "%s: event checker started", wp->mapname);
419
420         return 0;
421 out:
422         free_waiter(wp);
423         mpp->waiter = NULL;
424         condlog(0, "failed to start waiter thread");
425         return 1;
426 }
427
428 static void
429 remove_map (struct multipath * mpp, struct paths * allpaths)
430 {
431         int i;
432
433         stop_waiter_thread(mpp, allpaths);
434         i = find_slot(allpaths->mpvec, (void *)mpp);
435         vector_del_slot(allpaths->mpvec, i);
436         free_multipath(mpp, KEEP_PATHS);
437 }
438
439 static int
440 uev_add_map (char * devname, struct paths * allpaths)
441 {
442         int major, minor;
443         char dev_t[BLK_DEV_SIZE];
444         char * buff;
445         struct multipath * mpp;
446
447         if (sysfs_get_dev(sysfs_path, devname, dev_t, BLK_DEV_SIZE))
448                 return 1;
449
450         if (sscanf(dev_t, "%d:%d", &major, &minor) != 2)
451                 return 1;
452
453         buff = dm_mapname(major, minor, "multipath");
454                 
455         if (!buff)
456                 return 1;
457         
458         mpp = find_mp(allpaths->mpvec, buff);
459
460         if (mpp) {
461                 /*
462                  * devmap already in mpvec
463                  * but remove DM uevent are somewhet unreliable
464                  * so for now consider safer to remove and re-add the map
465                  */
466                 condlog(2, "%s: remove dead config", mpp->alias);
467                 remove_map(mpp, allpaths);
468                 mpp = NULL;
469         }
470         if (!mpp) {
471                 mpp = alloc_multipath();
472
473                 if (!mpp)
474                         return 1;
475
476                 mpp->minor = minor;
477                 mpp->alias = MALLOC(strlen(buff) + 1);
478
479                 if (!mpp->alias)
480                         goto out;
481
482                 strncat(mpp->alias, buff, strlen(buff));
483
484                 dm_get_map(mpp->alias, &mpp->size, mpp->params);
485                 dm_get_status(mpp->alias, mpp->status);
486
487                 if (setup_multipath(allpaths, mpp))
488                         return 1; /* mpp freed in setup_multipath */
489
490                 if (!vector_alloc_slot(allpaths->mpvec))
491                         goto out;
492
493                 vector_set_slot(allpaths->mpvec, mpp);
494                 set_paths_owner(allpaths, mpp);
495
496                 if (start_waiter_thread(mpp, allpaths))
497                         goto out;
498         }
499         return 0;
500 out:
501         free_multipath(mpp, KEEP_PATHS);
502         return 1;
503 }
504
505 static int
506 uev_remove_map (char * devname, struct paths * allpaths)
507 {
508         int minor;
509         struct multipath * mpp;
510
511         mpp->minor = atoi(devname + 3);
512         mpp = find_mp_by_minor(allpaths->mpvec, minor);
513
514         if (mpp)
515                 remove_map(mpp, allpaths);
516
517         return 0;
518 }
519
520 static int
521 uev_add_path (char * devname, struct paths * allpaths)
522 {
523         struct path * pp;
524
525         pp = find_path_by_dev(allpaths->pathvec, devname);
526
527         if (pp) {
528                 condlog(3, "%s: already in pathvec");
529                 return 0;
530         }
531         condlog(2, "add %s path checker", devname);
532         pp = store_pathinfo(allpaths->pathvec, conf->hwtable,
533                        devname, DI_SYSFS | DI_WWID);
534
535         if (!pp)
536                 return 1;
537
538         pp->mpp = find_mp_by_wwid(allpaths->mpvec, pp->wwid);
539         condlog(4, "%s: ownership set to %s", pp->dev_t, pp->mpp->alias);
540
541         return 0;
542 }
543
544 static int
545 uev_remove_path (char * devname, struct paths * allpaths)
546 {
547         int i;
548         struct path * pp;
549
550         pp = find_path_by_dev(allpaths->pathvec, devname);
551
552         if (!pp) {
553                 condlog(3, "%s: not in pathvec");
554                 return 0;
555         }
556         condlog(2, "remove %s path checker", devname);
557         i = find_slot(allpaths->pathvec, (void *)pp);
558         vector_del_slot(allpaths->pathvec, i);
559         free_path(pp);
560
561         return 0;
562 }
563
564 int 
565 uev_trigger (struct uevent * uev, void * trigger_data)
566 {
567         int r = 0;
568         char devname[32];
569         struct paths * allpaths;
570
571         allpaths = (struct paths *)trigger_data;
572         lock(allpaths->lock);
573
574         if (strncmp(uev->devpath, "/block", 6))
575                 goto out;
576
577         basename(uev->devpath, devname);
578
579         /*
580          * device map add/remove event
581          */
582         if (!strncmp(devname, "dm-", 3)) {
583                 condlog(2, "%s %s devmap", uev->action, devname);
584
585                 if (!strncmp(uev->action, "add", 3)) {
586                         r = uev_add_map(devname, allpaths);
587                         goto out;
588                 }
589                 if (!strncmp(uev->action, "remove", 6)) {
590                         r = uev_remove_map(devname, allpaths);
591                         goto out;
592                 }
593                 goto out;
594         }
595         
596         /*
597          * path add/remove event
598          */
599         if (blacklist(conf->blist, devname))
600                 goto out;
601
602         if (!strncmp(uev->action, "add", 3)) {
603                 r = uev_add_path(devname, allpaths);
604                 goto out;
605         }
606         if (!strncmp(uev->action, "remove", 6)) {
607                 r = uev_remove_path(devname, allpaths);
608                 goto out;
609         }
610
611 out:
612         FREE(uev);
613         unlock(allpaths->lock);
614         return r;
615 }
616
617 static void *
618 ueventloop (void * ap)
619 {
620         uevent_listen(&uev_trigger, ap);
621
622         return NULL;
623 }
624
625 static void
626 strvec_free (vector vec)
627 {
628         int i;
629         char * str;
630
631         vector_foreach_slot (vec, str, i)
632                 if (str)
633                         FREE(str);
634
635         vector_free(vec);
636 }
637
638 static int
639 exit_daemon (int status)
640 {
641         if (status != 0)
642                 fprintf(stderr, "bad exit status. see daemon.log\n");
643
644         condlog(3, "umount ramfs");
645         umount(CALLOUT_DIR);
646
647         condlog(3, "unlink pidfile");
648         unlink(DEFAULT_PIDFILE);
649
650         condlog(2, "--------shut down-------");
651         
652         if (logsink)
653                 log_thread_stop();
654
655         exit(status);
656 }
657
658 /*
659  * caller must have locked the path list before calling that function
660  */
661 static int
662 get_dm_mpvec (struct paths * allpaths)
663 {
664         int i;
665         struct multipath * mpp;
666
667         if (dm_get_maps(allpaths->mpvec, "multipath"))
668                 return 1;
669
670         vector_foreach_slot (allpaths->mpvec, mpp, i) {
671                 setup_multipath(allpaths, mpp);
672                 mpp->minor = dm_get_minor(mpp->alias);
673                 start_waiter_thread(mpp, allpaths);
674         }
675
676         return 0;
677 }
678
679 static void
680 fail_path (struct path * pp)
681 {
682         if (!pp->mpp)
683                 return;
684
685         condlog(2, "checker failed path %s in map %s",
686                  pp->dev_t, pp->mpp->alias);
687
688         dm_fail_path(pp->mpp->alias, pp->dev_t);
689 }
690
691 /*
692  * caller must have locked the path list before calling that function
693  */
694 static void
695 reinstate_path (struct path * pp)
696 {
697         if (pp->mpp) {
698                 if (dm_reinstate(pp->mpp->alias, pp->dev_t))
699                         condlog(0, "%s: reinstate failed", pp->dev_t);
700                 else
701                         condlog(2, "%s: reinstated", pp->dev_t);
702         }
703 }
704
705 static void *
706 checkerloop (void *ap)
707 {
708         struct paths *allpaths;
709         struct path *pp;
710         int i;
711         int newstate;
712         char checker_msg[MAX_CHECKER_MSG_SIZE];
713
714         mlockall(MCL_CURRENT | MCL_FUTURE);
715
716         memset(checker_msg, 0, MAX_CHECKER_MSG_SIZE);
717         allpaths = (struct paths *)ap;
718
719         condlog(2, "path checkers start up");
720
721         while (1) {
722                 lock(allpaths->lock);
723                 condlog(4, "tick");
724
725                 vector_foreach_slot (allpaths->pathvec, pp, i) {
726                         if (pp->tick) {
727                                 /*
728                                  * don't check this path yet
729                                  */
730                                 pp->tick--;
731                                 continue;
732                         }
733
734                         /*
735                          * provision a next check soonest,
736                          * in case we exit abnormaly from here
737                          */
738                         pp->tick = conf->checkint;
739                         
740                         if (!pp->checkfn) {
741                                 pathinfo(pp, conf->hwtable, DI_SYSFS);
742                                 select_checkfn(pp);
743                         }
744
745                         if (!pp->checkfn) {
746                                 condlog(0, "%s: checkfn is void", pp->dev);
747                                 continue;
748                         }
749                         newstate = pp->checkfn(pp->fd, checker_msg,
750                                                &pp->checker_context);
751                         
752                         if (newstate != pp->state) {
753                                 pp->state = newstate;
754                                 LOG_MSG(checker_msg, pp->dev_t);
755
756                                 /*
757                                  * upon state change, reset the checkint
758                                  * to the shortest delay
759                                  */
760                                 pp->checkint = conf->checkint;
761
762                                 if (newstate == PATH_DOWN ||
763                                     newstate == PATH_SHAKY) {
764                                         /*
765                                          * proactively fail path in the DM
766                                          */
767                                         fail_path(pp);
768
769                                         /*
770                                          * cancel scheduled failback
771                                          */
772                                         pp->mpp->failback_tick = 0;
773                                         continue;
774                                 }
775
776                                 /*
777                                  * reinstate this path
778                                  */
779                                 reinstate_path(pp);
780
781                                 /*
782                                  * need to switch group ?
783                                  */
784                                 update_multipath_strings(pp->mpp,
785                                                          allpaths->pathvec);
786
787                                 /*
788                                  * schedule defered failback
789                                  */
790                                 if (pp->mpp->pgfailback > 0)
791                                         pp->mpp->failback_tick =
792                                                 pp->mpp->pgfailback;
793
794                                 if (pp->mpp->pgfailback == FAILBACK_IMMEDIATE)
795                                         switch_pathgroup(pp->mpp);
796                         }
797                         else if (newstate == PATH_UP || newstate == PATH_GHOST) {
798                                 /*
799                                  * PATH_UP for last two checks
800                                  * defered failback getting sooner
801                                  */
802                                 if (pp->mpp->pgfailback > 0) {
803                                         if (pp->mpp->failback_tick > 0) {
804                                                 pp->mpp->failback_tick--;
805
806                                                 if (!pp->mpp->failback_tick)
807                                                         switch_pathgroup(pp->mpp);
808                                         }
809                                 }
810                                 
811                                 /*
812                                  * and double the next check delay.
813                                  * max at conf->max_checkint
814                                  */
815                                 if (pp->checkint < (conf->max_checkint / 2))
816                                         pp->checkint = 2 * pp->checkint;
817                                 else
818                                         pp->checkint = conf->max_checkint;
819
820                                 pp->tick = pp->checkint;
821                                 condlog(4, "%s: delay next check %is",
822                                                 pp->dev_t, pp->tick);
823                         }
824                         pp->state = newstate;
825                 }
826                 unlock(allpaths->lock);
827                 sleep(1);
828         }
829         return NULL;
830 }
831
832 static struct paths *
833 init_paths (void)
834 {
835         struct paths *allpaths;
836
837         allpaths = MALLOC(sizeof(struct paths));
838
839         if (!allpaths)
840                 return NULL;
841
842         allpaths->lock = 
843                 (pthread_mutex_t *)MALLOC(sizeof(pthread_mutex_t));
844
845         if (!allpaths->lock)
846                 goto out;
847
848         allpaths->pathvec = vector_alloc();
849
850         if (!allpaths->pathvec)
851                 goto out1;
852                 
853         allpaths->mpvec = vector_alloc();
854
855         if (!allpaths->mpvec)
856                 goto out2;
857         
858         pthread_mutex_init(allpaths->lock, NULL);
859
860         return allpaths;
861
862 out2:
863         vector_free(allpaths->pathvec);
864 out1:
865         FREE(allpaths->lock);
866 out:
867         FREE(allpaths);
868         condlog(0, "failed to init paths");
869         return NULL;
870 }
871
872 /*
873  * this logic is all about keeping callouts working in case of
874  * system disk outage (think system over SAN)
875  * this needs the clone syscall, so don't bother if not present
876  * (Debian Woody)
877  */
878 #ifdef CLONE_NEWNS
879 static int
880 prepare_namespace(void)
881 {
882         mode_t mode = S_IRWXU;
883         struct stat *buf;
884         char ramfs_args[64];
885         int i;
886         int fd;
887         char * bin;
888         size_t size = 10;
889         struct stat statbuf;
890         
891         buf = MALLOC(sizeof(struct stat));
892
893         /*
894          * create a temp mount point for ramfs
895          */
896         if (stat(CALLOUT_DIR, buf) < 0) {
897                 if (mkdir(CALLOUT_DIR, mode) < 0) {
898                         condlog(0, "cannot create " CALLOUT_DIR);
899                         return -1;
900                 }
901                 condlog(4, "created " CALLOUT_DIR);
902         }
903
904         /*
905          * compute the optimal ramdisk size
906          */
907         vector_foreach_slot (conf->binvec, bin,i) {
908                 if ((fd = open(bin, O_RDONLY)) < 0) {
909                         condlog(0, "cannot open %s", bin);
910                         return -1;
911                 }
912                 if (fstat(fd, &statbuf) < 0) {
913                         condlog(0, "cannot stat %s", bin);
914                         return -1;
915                 }
916                 size += statbuf.st_size;
917                 close(fd);
918         }
919         condlog(3, "ramfs maxsize is %u", (unsigned int) size);
920         
921         /*
922          * mount the ramfs
923          */
924         if (safe_sprintf(ramfs_args, "maxsize=%u", (unsigned int) size)) {
925                 fprintf(stderr, "ramfs_args too small\n");
926                 return -1;
927         }
928         if (mount(NULL, CALLOUT_DIR, "ramfs", MS_SYNCHRONOUS, ramfs_args) < 0) {
929                 condlog(0, "cannot mount ramfs on " CALLOUT_DIR);
930                 return -1;
931         }
932         condlog(4, "mount ramfs on " CALLOUT_DIR);
933
934         /*
935          * populate the ramfs with callout binaries
936          */
937         vector_foreach_slot (conf->binvec, bin,i) {
938                 if (copytodir(bin, CALLOUT_DIR) < 0) {
939                         condlog(0, "cannot copy %s in ramfs", bin);
940                         exit_daemon(1);
941                 }
942                 condlog(4, "cp %s in ramfs", bin);
943         }
944         strvec_free(conf->binvec);
945
946         /*
947          * bind the ramfs to :
948          * /sbin : default home of multipath ...
949          * /bin  : default home of scsi_id ...
950          * /tmp  : home of scsi_id temp files
951          */
952         if (mount(CALLOUT_DIR, "/sbin", NULL, MS_BIND, NULL) < 0) {
953                 condlog(0, "cannot bind ramfs on /sbin");
954                 return -1;
955         }
956         condlog(4, "bind ramfs on /sbin");
957         if (mount(CALLOUT_DIR, "/bin", NULL, MS_BIND, NULL) < 0) {
958                 condlog(0, "cannot bind ramfs on /bin");
959                 return -1;
960         }
961         condlog(4, "bind ramfs on /bin");
962         if (mount(CALLOUT_DIR, "/tmp", NULL, MS_BIND, NULL) < 0) {
963                 condlog(0, "cannot bind ramfs on /tmp");
964                 return -1;
965         }
966         condlog(4, "bind ramfs on /tmp");
967
968         return 0;
969 }
970 #endif
971
972 static void *
973 signal_set(int signo, void (*func) (int))
974 {
975         int r;
976         struct sigaction sig;
977         struct sigaction osig;
978
979         sig.sa_handler = func;
980         sigemptyset(&sig.sa_mask);
981         sig.sa_flags = 0;
982
983         r = sigaction(signo, &sig, &osig);
984
985         if (r < 0)
986                 return (SIG_ERR);
987         else
988                 return (osig.sa_handler);
989 }
990
991 static void
992 sighup (int sig)
993 {
994         condlog(2, "SIGHUP received");
995
996 #ifdef _DEBUG_
997         dbg_free_final(NULL);
998 #endif
999 }
1000
1001 static void
1002 sigend (int sig)
1003 {
1004         exit_daemon(0);
1005 }
1006
1007 static void
1008 signal_init(void)
1009 {
1010         signal_set(SIGHUP, sighup);
1011         signal_set(SIGINT, sigend);
1012         signal_set(SIGTERM, sigend);
1013         signal_set(SIGKILL, sigend);
1014 }
1015
1016 static void
1017 setscheduler (void)
1018 {
1019         int res;
1020         static struct sched_param sched_param = {
1021                 sched_priority: 99
1022         };
1023
1024         res = sched_setscheduler (0, SCHED_RR, &sched_param);
1025
1026         if (res == -1)
1027                 condlog(LOG_WARNING, "Could not set SCHED_RR at priority 99");
1028         return;
1029 }
1030
1031 static void
1032 set_oom_adj (int val)
1033 {
1034         FILE *fp;
1035
1036         fp = fopen("/proc/self/oom_adj", "w");
1037
1038         if (!fp)
1039                 return;
1040
1041         fprintf(fp, "%i", val);
1042         fclose(fp);
1043 }
1044         
1045 static int
1046 child (void * param)
1047 {
1048         pthread_t check_thr, uevent_thr;
1049         pthread_attr_t attr;
1050         struct paths * allpaths;
1051
1052         mlockall(MCL_CURRENT | MCL_FUTURE);
1053
1054         if (logsink)
1055                 log_thread_start();
1056
1057         condlog(2, "--------start up--------");
1058         condlog(2, "read " DEFAULT_CONFIGFILE);
1059
1060         if (load_config(DEFAULT_CONFIGFILE))
1061                 exit(1);
1062
1063         setlogmask(LOG_UPTO(conf->verbosity + 3));
1064
1065         /*
1066          * fill the voids left in the config file
1067          */
1068         if (!conf->binvec) {
1069                 conf->binvec = vector_alloc();
1070                 push_callout("/sbin/scsi_id");
1071         }
1072         if (!conf->multipath) {
1073                 conf->multipath = MULTIPATH;
1074                 push_callout(conf->multipath);
1075         }
1076         if (!conf->checkint) {
1077                 conf->checkint = CHECKINT;
1078                 conf->max_checkint = MAX_CHECKINT;
1079         }
1080
1081         if (pidfile_create(DEFAULT_PIDFILE, getpid())) {
1082                 if (logsink)
1083                         log_thread_stop();
1084
1085                 exit(1);
1086         }
1087         signal_init();
1088         setscheduler();
1089         set_oom_adj(-17);
1090         allpaths = init_paths();
1091
1092         if (!allpaths)
1093                 exit(1);
1094
1095         if (sysfs_get_mnt_path(sysfs_path, FILE_NAME_SIZE)) {
1096                 condlog(0, "can not find sysfs mount point");
1097                 exit(1);
1098         }
1099
1100 #ifdef CLONE_NEWNS
1101         if (prepare_namespace() < 0) {
1102                 condlog(0, "cannot prepare namespace");
1103                 exit_daemon(1);
1104         }
1105 #endif
1106
1107         /*
1108          * fetch paths and multipaths lists
1109          * no paths and/or no multipaths are valid scenarii
1110          * vectors maintenance will be driven by events
1111          */
1112         path_discovery(allpaths->pathvec, conf, DI_SYSFS | DI_WWID);
1113         get_dm_mpvec(allpaths);
1114
1115         /*
1116          * start threads
1117          */
1118         pthread_attr_init(&attr);
1119         pthread_attr_setstacksize(&attr, 64 * 1024);
1120         
1121         pthread_create(&check_thr, &attr, checkerloop, allpaths);
1122         pthread_create(&uevent_thr, &attr, ueventloop, allpaths);
1123         pthread_join(check_thr, NULL);
1124         pthread_join(uevent_thr, NULL);
1125
1126         return 0;
1127 }
1128
1129 int
1130 main (int argc, char *argv[])
1131 {
1132         extern char *optarg;
1133         extern int optind;
1134         int arg;
1135         int err;
1136         void * child_stack;
1137         
1138         logsink = 1;
1139
1140         if (getuid() != 0) {
1141                 fprintf(stderr, "need to be root\n");
1142                 exit(1);
1143         }
1144
1145         /* make sure we don't lock any path */
1146         chdir("/");
1147         umask(umask(077) | 022);
1148
1149         child_stack = (void *)malloc(CHILD_STACK_SIZE);
1150
1151         if (!child_stack)
1152                 exit(1);
1153
1154         conf = alloc_config();
1155
1156         if (!conf)
1157                 exit(1);
1158
1159         while ((arg = getopt(argc, argv, ":dv:")) != EOF ) {
1160         switch(arg) {
1161                 case 'd':
1162                         logsink = 0;
1163                         break;
1164                 case 'v':
1165                         if (sizeof(optarg) > sizeof(char *) ||
1166                             !isdigit(optarg[0]))
1167                                 exit(1);
1168
1169                         conf->verbosity = atoi(optarg);
1170                         break;
1171                 default:
1172                         ;
1173                 }
1174         }
1175
1176 #ifdef CLONE_NEWNS      /* recent systems have clone() */
1177
1178 #    if defined(__hppa__) || defined(__powerpc64__)
1179         err = clone(child, child_stack, CLONE_NEWNS, NULL);
1180 #    elif defined(__ia64__)
1181         err = clone2(child, child_stack,
1182                      CHILD_STACK_SIZE, CLONE_NEWNS, NULL,
1183                      NULL, NULL, NULL);
1184 #    else
1185         err = clone(child, child_stack + CHILD_STACK_SIZE, CLONE_NEWNS, NULL);
1186 #    endif
1187         if (err < 0)
1188                 exit (1);
1189
1190         exit(0);
1191 #else                   /* older system fallback to fork() */
1192         err = fork();
1193         
1194         if (err < 0)
1195                 exit (1);
1196
1197         return (child(child_stack));
1198 #endif
1199
1200 }