41259c38ed23cb83efc30076c809ea65a1839f49
[multipath-tools/.git] / libmultipath / checkers / rbd.c
1 /*
2  * Copyright (c) 2016 Red Hat
3  * Copyright (c) 2004 Christophe Varoqui
4  *
5  * Code based off of tur.c and ceph's krbd.cc
6  */
7 #define _GNU_SOURCE
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <string.h>
11 #include <unistd.h>
12 #include <fcntl.h>
13 #include <errno.h>
14 #include <pthread.h>
15 #include <libudev.h>
16 #include <ifaddrs.h>
17 #include <sys/types.h>
18 #include <sys/stat.h>
19 #include <sys/ioctl.h>
20 #include <sys/time.h>
21 #include <sys/wait.h>
22
23 #include "rados/librados.h"
24
25 #include "structs.h"
26 #include "checkers.h"
27
28 #include "../libmultipath/debug.h"
29 #include "../libmultipath/uevent.h"
30
31 struct rbd_checker_context;
32 typedef int (thread_fn)(struct rbd_checker_context *ct, char *msg);
33
34 #define RBD_MSG(msg, fmt, args...) snprintf(msg, CHECKER_MSG_LEN, fmt, ##args);
35
36 struct rbd_checker_context {
37         int rbd_bus_id;
38         char *client_addr;
39         char *config_info;
40         char *snap;
41         char *pool;
42         char *image;
43         char *username;
44         int remapped;
45         int blacklisted;
46
47         rados_t cluster;
48
49         int state;
50         int running;
51         time_t time;
52         thread_fn *fn;
53         pthread_t thread;
54         pthread_mutex_t lock;
55         pthread_cond_t active;
56         pthread_spinlock_t hldr_lock;
57         int holders;
58         char message[CHECKER_MSG_LEN];
59 };
60
61 int libcheck_init(struct checker * c)
62 {
63         struct rbd_checker_context *ct;
64         struct udev_device *block_dev;
65         struct udev_device *bus_dev;
66         struct udev *udev;
67         struct stat sb;
68         const char *block_name, *addr, *config_info;
69         const char *image, *pool, *snap, *username;
70         char sysfs_path[PATH_SIZE];
71         int ret;
72
73         ct = malloc(sizeof(struct rbd_checker_context));
74         if (!ct)
75                 return 1;
76         memset(ct, 0, sizeof(struct rbd_checker_context));
77         ct->holders = 1;
78         pthread_cond_init(&ct->active, NULL);
79         pthread_mutex_init(&ct->lock, NULL);
80         pthread_spin_init(&ct->hldr_lock, PTHREAD_PROCESS_PRIVATE);
81         c->context = ct;
82
83         /*
84          * The rbd block layer sysfs device is not linked to the rbd bus
85          * device that we interact with, so figure that out now.
86          */
87         if (fstat(c->fd, &sb) != 0)
88                 goto free_ct;
89
90         udev = udev_new();
91         if (!udev)
92                 goto free_ct;
93
94         block_dev = udev_device_new_from_devnum(udev, 'b', sb.st_rdev);
95         if (!block_dev)
96                 goto free_udev;
97
98         block_name  = udev_device_get_sysname(block_dev);
99         ret = sscanf(block_name, "rbd%d", &ct->rbd_bus_id);
100
101         udev_device_unref(block_dev);
102         if (ret != 1)
103                 goto free_udev;
104
105         snprintf(sysfs_path, sizeof(sysfs_path), "/sys/bus/rbd/devices/%d",
106                  ct->rbd_bus_id);
107         bus_dev = udev_device_new_from_syspath(udev, sysfs_path);
108         if (!bus_dev)
109                 goto free_udev;
110
111         addr = udev_device_get_sysattr_value(bus_dev, "client_addr");
112         if (!addr) {
113                 condlog(0, "Could not find client_addr in rbd sysfs. Try "
114                         "updating kernel");
115                 goto free_dev;
116         }
117
118         ct->client_addr = strdup(addr);
119         if (!ct->client_addr)
120                 goto free_dev;
121
122         config_info = udev_device_get_sysattr_value(bus_dev, "config_info");
123         if (!config_info)
124                 goto free_addr;
125
126         ct->config_info = strdup(config_info);
127         if (!ct->config_info)
128                 goto free_addr;
129
130         username = strstr(config_info, "name=");
131         if (username) {
132                 char *end;
133                 int len;
134
135                 username += 5;
136                 end = strchr(username, ',');
137                 if (!end)
138                         goto free_info;
139                 len = end - username;
140
141                 ct->username = malloc(len + 1);
142                 if (!ct->username)
143                         goto free_info;
144                 strncpy(ct->username, username, len);
145                 ct->username[len] = '\0';
146         }
147
148         image = udev_device_get_sysattr_value(bus_dev, "name");
149         if (!image)
150                 goto free_username;
151
152         ct->image = strdup(image);
153         if (!ct->image)
154                 goto free_info;
155
156         pool = udev_device_get_sysattr_value(bus_dev, "pool");
157         if (!pool)
158                 goto free_image;
159
160         ct->pool = strdup(pool);
161         if (!ct->pool)
162                 goto free_image;
163
164         snap = udev_device_get_sysattr_value(bus_dev, "current_snap");
165         if (!snap)
166                 goto free_pool;
167
168         if (strcmp("-", snap)) {
169                 ct->snap = strdup(snap);
170                 if (!ct->snap)
171                         goto free_pool;
172         }
173
174         if (rados_create(&ct->cluster, NULL) < 0) {
175                 condlog(0, "Could not create rados cluster");
176                 goto free_snap;
177         }
178
179         if (rados_conf_read_file(ct->cluster, NULL) < 0) {
180                 condlog(0, "Could not read rados conf");
181                 goto shutdown_rados;
182         }
183
184         ret = rados_connect(ct->cluster);
185         if (ret < 0) {
186                 condlog(0, "Could not connect to rados cluster");
187                 goto shutdown_rados;
188         }
189
190         udev_device_unref(bus_dev);
191         udev_unref(udev);
192
193         condlog(3, "rbd%d checker init %s %s/%s@%s %s", ct->rbd_bus_id,
194                 ct->client_addr, ct->pool, ct->image, ct->snap ? ct->snap : "-",
195                 ct->username ? ct->username : "none");
196         return 0;
197
198 shutdown_rados:
199         rados_shutdown(ct->cluster);
200 free_snap:
201         if (ct->snap)
202                 free(ct->snap);
203 free_pool:
204         free(ct->pool);
205 free_image:
206         free(ct->image);
207 free_username:
208         if (ct->username)
209                 free(ct->username);
210 free_info:
211         free(ct->config_info);
212 free_addr:
213         free(ct->client_addr);
214 free_dev:
215         udev_device_unref(bus_dev);
216 free_udev:
217         udev_unref(udev);
218 free_ct:
219         free(ct);
220         return 1;
221 }
222
223 static void cleanup_context(struct rbd_checker_context *ct)
224 {
225         pthread_mutex_destroy(&ct->lock);
226         pthread_cond_destroy(&ct->active);
227         pthread_spin_destroy(&ct->hldr_lock);
228
229         rados_shutdown(ct->cluster);
230
231         if (ct->username)
232                 free(ct->username);
233         if (ct->snap)
234                 free(ct->snap);
235         free(ct->pool);
236         free(ct->image);
237         free(ct->config_info);
238         free(ct->client_addr);
239         free(ct);
240 }
241
242 void libcheck_free(struct checker * c)
243 {
244         if (c->context) {
245                 struct rbd_checker_context *ct = c->context;
246                 int holders;
247                 pthread_t thread;
248
249                 pthread_spin_lock(&ct->hldr_lock);
250                 ct->holders--;
251                 holders = ct->holders;
252                 thread = ct->thread;
253                 pthread_spin_unlock(&ct->hldr_lock);
254                 if (holders)
255                         pthread_cancel(thread);
256                 else
257                         cleanup_context(ct);
258                 c->context = NULL;
259         }
260 }
261
262 static int rbd_is_blacklisted(struct rbd_checker_context *ct, char *msg)
263 {
264         char *addr_tok, *start, *save;
265         char *cmd[2];
266         char *blklist, *stat;
267         size_t blklist_len, stat_len;
268         int ret;
269         char *end;
270
271         cmd[0] = "{\"prefix\": \"osd blacklist ls\"}";
272         cmd[1] = NULL;
273
274         ret = rados_mon_command(ct->cluster, (const char **)cmd, 1, "", 0,
275                                 &blklist, &blklist_len, &stat, &stat_len);
276         if (ret < 0) {
277                 RBD_MSG(msg, "rbd checker failed: mon command failed %d",
278                         ret);
279                 return ret;
280         }
281
282         if (!blklist || !blklist_len)
283                 goto free_bufs;
284
285         /*
286          * parse list of addrs with the format
287          * ipv4:port/nonce date time\n
288          * or
289          * [ipv6]:port/nonce date time\n
290          */
291         ret = 0;
292         for (start = blklist; ; start = NULL) {
293                 addr_tok = strtok_r(start, "\n", &save);
294                 if (!addr_tok || !strlen(addr_tok))
295                         break;
296
297                 end = strchr(addr_tok, ' ');
298                 if (!end) {
299                         RBD_MSG(msg, "rbd%d checker failed: invalid blacklist %s",
300                                  ct->rbd_bus_id, addr_tok);
301                         break;
302                 }
303                 *end = '\0';
304
305                 if (!strcmp(addr_tok, ct->client_addr)) {
306                         ct->blacklisted = 1;
307                         RBD_MSG(msg, "rbd%d checker: %s is blacklisted",
308                                 ct->rbd_bus_id, ct->client_addr);
309                         ret = 1;
310                         break;
311                 }
312         }
313
314 free_bufs:
315         rados_buffer_free(blklist);
316         rados_buffer_free(stat);
317         return ret;
318 }
319
320 static int rbd_check(struct rbd_checker_context *ct, char *msg)
321 {
322         if (ct->blacklisted || rbd_is_blacklisted(ct, msg) == 1)
323                 return PATH_DOWN;
324
325         RBD_MSG(msg, "rbd checker reports path is up");
326         /*
327          * Path may have issues, but the ceph cluster is at least
328          * accepting IO, so we can attempt to do IO.
329          *
330          * TODO: in future versions, we can run other tests to
331          * verify OSDs and networks.
332          */
333         return PATH_UP;
334 }
335
336 static int safe_write(int fd, const void *buf, size_t count)
337 {
338         while (count > 0) {
339                 ssize_t r = write(fd, buf, count);
340                 if (r < 0) {
341                         if (errno == EINTR)
342                                 continue;
343                         return -errno;
344                 }
345                 count -= r;
346                 buf = (char *)buf + r;
347         }
348         return 0;
349 }
350
351 static int sysfs_write_rbd_bus(const char *which, const char *buf,
352                                size_t buf_len)
353 {
354         char sysfs_path[PATH_SIZE];
355         int fd;
356         int r;
357
358         /* we require newer kernels so single_major should always be there */
359         snprintf(sysfs_path, sizeof(sysfs_path),
360                  "/sys/bus/rbd/%s_single_major", which);
361         fd = open(sysfs_path, O_WRONLY);
362         if (fd < 0)
363                 return -errno;
364
365         r = safe_write(fd, buf, buf_len);
366         close(fd);
367         return r;
368 }
369
370 static int rbd_remap(struct rbd_checker_context *ct)
371 {
372         char *argv[11];
373         pid_t pid;
374         int ret = 0, i = 0;
375         int status;
376
377         pid = fork();
378         switch (pid) {
379         case 0:
380                 argv[i++] = "rbd";
381                 argv[i++] = "map";
382                 argv[i++] = "-o noshare";
383                 if (ct->username) {
384                         argv[i++] = "--id";
385                         argv[i++] = ct->username;
386                 }
387                 argv[i++] = "--pool";
388                 argv[i++] = ct->pool;
389                 if (ct->snap) {
390                         argv[i++] = "--snap";
391                         argv[i++] = ct->snap;
392                 }
393                 argv[i++] = ct->image;
394                 argv[i] = NULL;
395
396                 ret = execvp(argv[0], argv);
397                 condlog(0, "Error executing rbd: %s", strerror(errno));
398                 exit(-1);
399         case -1:
400                 condlog(0, "fork failed: %s", strerror(errno));
401                 return -1;
402         default:
403                 ret = -1;
404                 wait(&status);
405                 if (WIFEXITED(status)) {
406                         status = WEXITSTATUS(status);
407                         if (status == 0)
408                                 ret = 0;
409                         else
410                                 condlog(0, "rbd failed with %d", status);
411                 }
412         }
413
414         return ret;
415 }
416
417 static int sysfs_write_rbd_remove(const char *buf, int buf_len)
418 {
419         return sysfs_write_rbd_bus("remove", buf, buf_len);
420 }
421
422 static int rbd_rm_blacklist(struct rbd_checker_context *ct)
423 {
424         char *cmd[2];
425         char *stat, *cmd_str;
426         size_t stat_len;
427         int ret;
428
429         ret = asprintf(&cmd_str, "{\"prefix\": \"osd blacklist\", \"blacklistop\": \"rm\", \"addr\": \"%s\"}",
430                        ct->client_addr);
431         if (ret == -1)
432                 return -ENOMEM;
433
434         cmd[0] = cmd_str;
435         cmd[1] = NULL;
436
437         ret = rados_mon_command(ct->cluster, (const char **)cmd, 1, "", 0,
438                                 NULL, 0, &stat, &stat_len);
439         if (ret < 0) {
440                 condlog(1, "rbd%d repair failed to remove blacklist for %s %d",
441                         ct->rbd_bus_id, ct->client_addr, ret);
442                 goto free_cmd;
443         }
444
445         condlog(1, "rbd%d repair rm blacklist for %s",
446                ct->rbd_bus_id, ct->client_addr);
447         free(stat);
448 free_cmd:
449         free(cmd_str);
450         return ret;
451 }
452
453 static int rbd_repair(struct rbd_checker_context *ct, char *msg)
454 {
455         char del[17];
456         int ret;
457
458         if (!ct->blacklisted)
459                 return PATH_UP;
460
461         if (!ct->remapped) {
462                 ret = rbd_remap(ct);
463                 if (ret) {
464                         RBD_MSG(msg, "rbd%d repair failed to remap. Err %d",
465                                 ct->rbd_bus_id, ret);
466                         return PATH_DOWN;
467                 }
468         }
469         ct->remapped = 1;
470
471         snprintf(del, sizeof(del), "%d force", ct->rbd_bus_id);
472         ret = sysfs_write_rbd_remove(del, strlen(del) + 1);
473         if (ret) {
474                 RBD_MSG(msg, "rbd%d repair failed to clean up. Err %d",
475                         ct->rbd_bus_id, ret);
476                 return PATH_DOWN;
477         }
478
479         ret = rbd_rm_blacklist(ct);
480         if (ret) {
481                 RBD_MSG(msg, "rbd%d repair could not remove blacklist entry. Err %d",
482                         ct->rbd_bus_id, ret);
483                 return PATH_DOWN;
484         }
485
486         ct->remapped = 0;
487         ct->blacklisted = 0;
488
489         RBD_MSG(msg, "rbd%d has been repaired", ct->rbd_bus_id);
490         return PATH_UP;
491 }
492
493 #define rbd_thread_cleanup_push(ct) pthread_cleanup_push(cleanup_func, ct)
494 #define rbd_thread_cleanup_pop(ct) pthread_cleanup_pop(1)
495
496 static void cleanup_func(void *data)
497 {
498         int holders;
499         struct rbd_checker_context *ct = data;
500         pthread_spin_lock(&ct->hldr_lock);
501         ct->holders--;
502         holders = ct->holders;
503         ct->thread = 0;
504         pthread_spin_unlock(&ct->hldr_lock);
505         if (!holders)
506                 cleanup_context(ct);
507 }
508
509 static void *rbd_thread(void *ctx)
510 {
511         struct rbd_checker_context *ct = ctx;
512         int state;
513
514         condlog(3, "rbd%d thread starting up", ct->rbd_bus_id);
515
516         ct->message[0] = '\0';
517         /* This thread can be canceled, so setup clean up */
518         rbd_thread_cleanup_push(ct)
519
520         /* checker start up */
521         pthread_mutex_lock(&ct->lock);
522         ct->state = PATH_PENDING;
523         pthread_mutex_unlock(&ct->lock);
524
525         state = ct->fn(ct, ct->message);
526
527         /* checker done */
528         pthread_mutex_lock(&ct->lock);
529         ct->state = state;
530         pthread_cond_signal(&ct->active);
531         pthread_mutex_unlock(&ct->lock);
532
533         condlog(3, "rbd%d thead finished, state %s", ct->rbd_bus_id,
534                 checker_state_name(state));
535         rbd_thread_cleanup_pop(ct);
536         return ((void *)0);
537 }
538
539 static void rbd_timeout(struct timespec *tsp)
540 {
541         struct timeval now;
542
543         gettimeofday(&now, NULL);
544         tsp->tv_sec = now.tv_sec;
545         tsp->tv_nsec = now.tv_usec * 1000;
546         tsp->tv_nsec += 1000000; /* 1 millisecond */
547 }
548
549 static int rbd_exec_fn(struct checker *c, thread_fn *fn)
550 {
551         struct rbd_checker_context *ct = c->context;
552         struct timespec tsp;
553         pthread_attr_t attr;
554         int rbd_status, r;
555
556         if (c->sync)
557                 return rbd_check(ct, c->message);
558         /*
559          * Async mode
560          */
561         r = pthread_mutex_lock(&ct->lock);
562         if (r != 0) {
563                 condlog(2, "rbd%d mutex lock failed with %d", ct->rbd_bus_id,
564                         r);
565                 MSG(c, "rbd%d thread failed to initialize", ct->rbd_bus_id);
566                 return PATH_WILD;
567         }
568
569         if (ct->running) {
570                 /* Check if checker is still running */
571                 if (ct->thread) {
572                         condlog(3, "rbd%d thread not finished", ct->rbd_bus_id);
573                         rbd_status = PATH_PENDING;
574                 } else {
575                         /* checker done */
576                         ct->running = 0;
577                         rbd_status = ct->state;
578                         strncpy(c->message, ct->message, CHECKER_MSG_LEN);
579                         c->message[CHECKER_MSG_LEN - 1] = '\0';
580                 }
581                 pthread_mutex_unlock(&ct->lock);
582         } else {
583                 /* Start new checker */
584                 ct->state = PATH_UNCHECKED;
585                 ct->fn = fn;
586                 pthread_spin_lock(&ct->hldr_lock);
587                 ct->holders++;
588                 pthread_spin_unlock(&ct->hldr_lock);
589                 setup_thread_attr(&attr, 32 * 1024, 1);
590                 r = pthread_create(&ct->thread, &attr, rbd_thread, ct);
591                 if (r) {
592                         pthread_mutex_unlock(&ct->lock);
593                         ct->thread = 0;
594                         ct->holders--;
595                         condlog(3, "rbd%d failed to start rbd thread, using sync mode",
596                                 ct->rbd_bus_id);
597                         return fn(ct, c->message);
598                 }
599                 pthread_attr_destroy(&attr);
600                 rbd_timeout(&tsp);
601                 r = pthread_cond_timedwait(&ct->active, &ct->lock, &tsp);
602                 rbd_status = ct->state;
603                 strncpy(c->message, ct->message,CHECKER_MSG_LEN);
604                 c->message[CHECKER_MSG_LEN -1] = '\0';
605                 pthread_mutex_unlock(&ct->lock);
606
607                 if (ct->thread &&
608                     (rbd_status == PATH_PENDING || rbd_status == PATH_UNCHECKED)) {
609                         condlog(3, "rbd%d thread still running",
610                                 ct->rbd_bus_id);
611                         ct->running = 1;
612                         rbd_status = PATH_PENDING;
613                 }
614         }
615
616         return rbd_status;
617 }
618
619 void libcheck_repair(struct checker * c)
620 {
621         struct rbd_checker_context *ct = c->context;
622
623         if (!ct || !ct->blacklisted)
624                 return;
625         rbd_exec_fn(c, rbd_repair);
626 }
627
628 int libcheck_check(struct checker * c)
629 {
630         struct rbd_checker_context *ct = c->context;
631
632         if (!ct)
633                 return PATH_UNCHECKED;
634
635         if (ct->blacklisted)
636                 return PATH_DOWN;
637
638         return rbd_exec_fn(c, rbd_check);
639 }