multipathd: register threads that use rcu calls
[multipath-tools/.git] / libmultipath / checkers / tur.c
1 /*
2  * Some code borrowed from sg-utils.
3  *
4  * Copyright (c) 2004 Christophe Varoqui
5  */
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <string.h>
9 #include <sys/types.h>
10 #include <sys/stat.h>
11 #include <unistd.h>
12 #include <fcntl.h>
13 #include <sys/ioctl.h>
14 #include <sys/sysmacros.h>
15 #include <errno.h>
16 #include <sys/time.h>
17 #include <pthread.h>
18 #include <urcu.h>
19 #include <urcu/uatomic.h>
20
21 #include "checkers.h"
22
23 #include "../libmultipath/debug.h"
24 #include "../libmultipath/sg_include.h"
25 #include "../libmultipath/util.h"
26 #include "../libmultipath/time-util.h"
27 #include "../libmultipath/util.h"
28
29 #define TUR_CMD_LEN 6
30 #define HEAVY_CHECK_COUNT       10
31
32 #define MSG_TUR_UP      "tur checker reports path is up"
33 #define MSG_TUR_DOWN    "tur checker reports path is down"
34 #define MSG_TUR_GHOST   "tur checker reports path is in standby state"
35 #define MSG_TUR_RUNNING "tur checker still running"
36 #define MSG_TUR_TIMEOUT "tur checker timed out"
37 #define MSG_TUR_FAILED  "tur checker failed to initialize"
38
39 struct tur_checker_context {
40         dev_t devt;
41         int state;
42         int running;
43         int fd;
44         unsigned int timeout;
45         time_t time;
46         pthread_t thread;
47         pthread_mutex_t lock;
48         pthread_cond_t active;
49         int holders;
50         char message[CHECKER_MSG_LEN];
51 };
52
53 static const char *tur_devt(char *devt_buf, int size,
54                             struct tur_checker_context *ct)
55 {
56         dev_t devt;
57
58         pthread_mutex_lock(&ct->lock);
59         devt = ct->devt;
60         pthread_mutex_unlock(&ct->lock);
61
62         snprintf(devt_buf, size, "%d:%d", major(devt), minor(devt));
63         return devt_buf;
64 }
65
66 int libcheck_init (struct checker * c)
67 {
68         struct tur_checker_context *ct;
69         pthread_mutexattr_t attr;
70
71         ct = malloc(sizeof(struct tur_checker_context));
72         if (!ct)
73                 return 1;
74         memset(ct, 0, sizeof(struct tur_checker_context));
75
76         ct->state = PATH_UNCHECKED;
77         ct->fd = -1;
78         uatomic_set(&ct->holders, 1);
79         pthread_cond_init_mono(&ct->active);
80         pthread_mutexattr_init(&attr);
81         pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE);
82         pthread_mutex_init(&ct->lock, &attr);
83         pthread_mutexattr_destroy(&attr);
84         c->context = ct;
85
86         return 0;
87 }
88
89 static void cleanup_context(struct tur_checker_context *ct)
90 {
91         pthread_mutex_destroy(&ct->lock);
92         pthread_cond_destroy(&ct->active);
93         free(ct);
94 }
95
96 void libcheck_free (struct checker * c)
97 {
98         if (c->context) {
99                 struct tur_checker_context *ct = c->context;
100                 int holders;
101                 int running;
102
103                 running = uatomic_xchg(&ct->running, 0);
104                 if (running)
105                         pthread_cancel(ct->thread);
106                 ct->thread = 0;
107                 holders = uatomic_sub_return(&ct->holders, 1);
108                 if (!holders)
109                         cleanup_context(ct);
110                 c->context = NULL;
111         }
112         return;
113 }
114
115 void libcheck_repair (struct checker * c)
116 {
117         return;
118 }
119
120 #define TUR_MSG(fmt, args...)                                   \
121         do {                                                    \
122                 char msg[CHECKER_MSG_LEN];                      \
123                                                                 \
124                 snprintf(msg, sizeof(msg), fmt, ##args);        \
125                 copy_message(cb_arg, msg);                      \
126         } while (0)
127
128 static int
129 tur_check(int fd, unsigned int timeout,
130           void (*copy_message)(void *, const char *), void *cb_arg)
131 {
132         struct sg_io_hdr io_hdr;
133         unsigned char turCmdBlk[TUR_CMD_LEN] = { 0x00, 0, 0, 0, 0, 0 };
134         unsigned char sense_buffer[32];
135         int retry_tur = 5;
136
137 retry:
138         memset(&io_hdr, 0, sizeof (struct sg_io_hdr));
139         memset(&sense_buffer, 0, 32);
140         io_hdr.interface_id = 'S';
141         io_hdr.cmd_len = sizeof (turCmdBlk);
142         io_hdr.mx_sb_len = sizeof (sense_buffer);
143         io_hdr.dxfer_direction = SG_DXFER_NONE;
144         io_hdr.cmdp = turCmdBlk;
145         io_hdr.sbp = sense_buffer;
146         io_hdr.timeout = timeout * 1000;
147         io_hdr.pack_id = 0;
148         if (ioctl(fd, SG_IO, &io_hdr) < 0) {
149                 TUR_MSG(MSG_TUR_DOWN);
150                 return PATH_DOWN;
151         }
152         if ((io_hdr.status & 0x7e) == 0x18) {
153                 /*
154                  * SCSI-3 arrays might return
155                  * reservation conflict on TUR
156                  */
157                 TUR_MSG(MSG_TUR_UP);
158                 return PATH_UP;
159         }
160         if (io_hdr.info & SG_INFO_OK_MASK) {
161                 int key = 0, asc, ascq;
162
163                 switch (io_hdr.host_status) {
164                 case DID_OK:
165                 case DID_NO_CONNECT:
166                 case DID_BAD_TARGET:
167                 case DID_ABORT:
168                 case DID_TRANSPORT_FAILFAST:
169                         break;
170                 default:
171                         /* Driver error, retry */
172                         if (--retry_tur)
173                                 goto retry;
174                         break;
175                 }
176                 if (io_hdr.sb_len_wr > 3) {
177                         if (io_hdr.sbp[0] == 0x72 || io_hdr.sbp[0] == 0x73) {
178                                 key = io_hdr.sbp[1] & 0x0f;
179                                 asc = io_hdr.sbp[2];
180                                 ascq = io_hdr.sbp[3];
181                         } else if (io_hdr.sb_len_wr > 13 &&
182                                    ((io_hdr.sbp[0] & 0x7f) == 0x70 ||
183                                     (io_hdr.sbp[0] & 0x7f) == 0x71)) {
184                                 key = io_hdr.sbp[2] & 0x0f;
185                                 asc = io_hdr.sbp[12];
186                                 ascq = io_hdr.sbp[13];
187                         }
188                 }
189                 if (key == 0x6) {
190                         /* Unit Attention, retry */
191                         if (--retry_tur)
192                                 goto retry;
193                 }
194                 else if (key == 0x2) {
195                         /* Not Ready */
196                         /* Note: Other ALUA states are either UP or DOWN */
197                         if( asc == 0x04 && ascq == 0x0b){
198                                 /*
199                                  * LOGICAL UNIT NOT ACCESSIBLE,
200                                  * TARGET PORT IN STANDBY STATE
201                                  */
202                                 TUR_MSG(MSG_TUR_GHOST);
203                                 return PATH_GHOST;
204                         }
205                 }
206                 TUR_MSG(MSG_TUR_DOWN);
207                 return PATH_DOWN;
208         }
209         TUR_MSG(MSG_TUR_UP);
210         return PATH_UP;
211 }
212
213 #define tur_thread_cleanup_push(ct) pthread_cleanup_push(cleanup_func, ct)
214 #define tur_thread_cleanup_pop(ct) pthread_cleanup_pop(1)
215
216 static void cleanup_func(void *data)
217 {
218         int holders;
219         struct tur_checker_context *ct = data;
220
221         holders = uatomic_sub_return(&ct->holders, 1);
222         if (!holders)
223                 cleanup_context(ct);
224         rcu_unregister_thread();
225 }
226
227 static void copy_msg_to_tcc(void *ct_p, const char *msg)
228 {
229         struct tur_checker_context *ct = ct_p;
230
231         pthread_mutex_lock(&ct->lock);
232         strlcpy(ct->message, msg, sizeof(ct->message));
233         pthread_mutex_unlock(&ct->lock);
234 }
235
236 static void *tur_thread(void *ctx)
237 {
238         struct tur_checker_context *ct = ctx;
239         int state, running;
240         char devt[32];
241
242         /* This thread can be canceled, so setup clean up */
243         tur_thread_cleanup_push(ct);
244         rcu_register_thread();
245
246         condlog(3, "%s: tur checker starting up",
247                 tur_devt(devt, sizeof(devt), ct));
248
249         /* TUR checker start up */
250         pthread_mutex_lock(&ct->lock);
251         ct->state = PATH_PENDING;
252         ct->message[0] = '\0';
253         pthread_mutex_unlock(&ct->lock);
254
255         state = tur_check(ct->fd, ct->timeout, copy_msg_to_tcc, ct->message);
256         pthread_testcancel();
257
258         /* TUR checker done */
259         pthread_mutex_lock(&ct->lock);
260         ct->state = state;
261         pthread_cond_signal(&ct->active);
262         pthread_mutex_unlock(&ct->lock);
263
264         condlog(3, "%s: tur checker finished, state %s",
265                 tur_devt(devt, sizeof(devt), ct), checker_state_name(state));
266
267         running = uatomic_xchg(&ct->running, 0);
268         if (!running)
269                 pause();
270
271         tur_thread_cleanup_pop(ct);
272
273         return ((void *)0);
274 }
275
276
277 static void tur_timeout(struct timespec *tsp)
278 {
279         clock_gettime(CLOCK_MONOTONIC, tsp);
280         tsp->tv_nsec += 1000 * 1000; /* 1 millisecond */
281         normalize_timespec(tsp);
282 }
283
284 static void tur_set_async_timeout(struct checker *c)
285 {
286         struct tur_checker_context *ct = c->context;
287         struct timespec now;
288
289         clock_gettime(CLOCK_MONOTONIC, &now);
290         ct->time = now.tv_sec + c->timeout;
291 }
292
293 static int tur_check_async_timeout(struct checker *c)
294 {
295         struct tur_checker_context *ct = c->context;
296         struct timespec now;
297
298         clock_gettime(CLOCK_MONOTONIC, &now);
299         return (now.tv_sec > ct->time);
300 }
301
302 static void copy_msg_to_checker(void *c_p, const char *msg)
303 {
304         struct checker *c = c_p;
305
306         strlcpy(c->message, msg, sizeof(c->message));
307 }
308
309 int libcheck_check(struct checker * c)
310 {
311         struct tur_checker_context *ct = c->context;
312         struct timespec tsp;
313         struct stat sb;
314         pthread_attr_t attr;
315         int tur_status, r;
316         char devt[32];
317
318         if (!ct)
319                 return PATH_UNCHECKED;
320
321         if (fstat(c->fd, &sb) == 0) {
322                 pthread_mutex_lock(&ct->lock);
323                 ct->devt = sb.st_rdev;
324                 pthread_mutex_unlock(&ct->lock);
325         }
326
327         if (c->sync)
328                 return tur_check(c->fd, c->timeout, copy_msg_to_checker, c);
329
330         /*
331          * Async mode
332          */
333         r = pthread_mutex_lock(&ct->lock);
334         if (r != 0) {
335                 condlog(2, "%s: tur mutex lock failed with %d",
336                         tur_devt(devt, sizeof(devt), ct), r);
337                 MSG(c, MSG_TUR_FAILED);
338                 return PATH_WILD;
339         }
340
341         if (ct->thread) {
342                 if (tur_check_async_timeout(c)) {
343                         int running = uatomic_xchg(&ct->running, 0);
344                         if (running)
345                                 pthread_cancel(ct->thread);
346                         condlog(3, "%s: tur checker timeout",
347                                 tur_devt(devt, sizeof(devt), ct));
348                         ct->thread = 0;
349                         MSG(c, MSG_TUR_TIMEOUT);
350                         tur_status = PATH_TIMEOUT;
351                 } else if (uatomic_read(&ct->running) != 0) {
352                         condlog(3, "%s: tur checker not finished",
353                                         tur_devt(devt, sizeof(devt), ct));
354                         tur_status = PATH_PENDING;
355                 } else {
356                         /* TUR checker done */
357                         ct->thread = 0;
358                         tur_status = ct->state;
359                         strlcpy(c->message, ct->message, sizeof(c->message));
360                 }
361                 pthread_mutex_unlock(&ct->lock);
362         } else {
363                 if (uatomic_read(&ct->running) != 0) {
364                         /* pthread cancel failed. continue in sync mode */
365                         pthread_mutex_unlock(&ct->lock);
366                         condlog(3, "%s: tur thread not responding",
367                                 tur_devt(devt, sizeof(devt), ct));
368                         return PATH_TIMEOUT;
369                 }
370                 /* Start new TUR checker */
371                 ct->state = PATH_UNCHECKED;
372                 ct->fd = c->fd;
373                 ct->timeout = c->timeout;
374                 uatomic_add(&ct->holders, 1);
375                 uatomic_set(&ct->running, 1);
376                 tur_set_async_timeout(c);
377                 setup_thread_attr(&attr, 32 * 1024, 1);
378                 r = pthread_create(&ct->thread, &attr, tur_thread, ct);
379                 pthread_attr_destroy(&attr);
380                 if (r) {
381                         uatomic_sub(&ct->holders, 1);
382                         uatomic_set(&ct->running, 0);
383                         ct->thread = 0;
384                         pthread_mutex_unlock(&ct->lock);
385                         condlog(3, "%s: failed to start tur thread, using"
386                                 " sync mode", tur_devt(devt, sizeof(devt), ct));
387                         return tur_check(c->fd, c->timeout,
388                                          copy_msg_to_checker, c);
389                 }
390                 tur_timeout(&tsp);
391                 r = pthread_cond_timedwait(&ct->active, &ct->lock, &tsp);
392                 tur_status = ct->state;
393                 strlcpy(c->message, ct->message, sizeof(c->message));
394                 pthread_mutex_unlock(&ct->lock);
395                 if (uatomic_read(&ct->running) != 0 &&
396                     (tur_status == PATH_PENDING || tur_status == PATH_UNCHECKED)) {
397                         condlog(3, "%s: tur checker still running",
398                                 tur_devt(devt, sizeof(devt), ct));
399                         tur_status = PATH_PENDING;
400                 } else {
401                         int running = uatomic_xchg(&ct->running, 0);
402                         if (running)
403                                 pthread_cancel(ct->thread);
404                         ct->thread = 0;
405                 }
406         }
407
408         return tur_status;
409 }