libmultipath: fix tur checker double locking
[multipath-tools/.git] / libmultipath / checkers / tur.c
1 /*
2  * Some code borrowed from sg-utils.
3  *
4  * Copyright (c) 2004 Christophe Varoqui
5  */
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <string.h>
9 #include <sys/types.h>
10 #include <sys/stat.h>
11 #include <unistd.h>
12 #include <fcntl.h>
13 #include <sys/ioctl.h>
14 #include <sys/sysmacros.h>
15 #include <errno.h>
16 #include <sys/time.h>
17 #include <pthread.h>
18 #include <urcu.h>
19 #include <urcu/uatomic.h>
20
21 #include "checkers.h"
22
23 #include "../libmultipath/debug.h"
24 #include "../libmultipath/sg_include.h"
25 #include "../libmultipath/util.h"
26 #include "../libmultipath/time-util.h"
27 #include "../libmultipath/util.h"
28
29 #define TUR_CMD_LEN 6
30 #define HEAVY_CHECK_COUNT       10
31
32 #define MSG_TUR_UP      "tur checker reports path is up"
33 #define MSG_TUR_DOWN    "tur checker reports path is down"
34 #define MSG_TUR_GHOST   "tur checker reports path is in standby state"
35 #define MSG_TUR_RUNNING "tur checker still running"
36 #define MSG_TUR_TIMEOUT "tur checker timed out"
37 #define MSG_TUR_FAILED  "tur checker failed to initialize"
38
39 struct tur_checker_context {
40         dev_t devt;
41         int state;
42         int running; /* uatomic access only */
43         int fd;
44         unsigned int timeout;
45         time_t time;
46         pthread_t thread;
47         pthread_mutex_t lock;
48         pthread_cond_t active;
49         int holders; /* uatomic access only */
50         char message[CHECKER_MSG_LEN];
51 };
52
53 int libcheck_init (struct checker * c)
54 {
55         struct tur_checker_context *ct;
56         pthread_mutexattr_t attr;
57         struct stat sb;
58
59         ct = malloc(sizeof(struct tur_checker_context));
60         if (!ct)
61                 return 1;
62         memset(ct, 0, sizeof(struct tur_checker_context));
63
64         ct->state = PATH_UNCHECKED;
65         ct->fd = -1;
66         uatomic_set(&ct->holders, 1);
67         pthread_cond_init_mono(&ct->active);
68         pthread_mutexattr_init(&attr);
69         pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE);
70         pthread_mutex_init(&ct->lock, &attr);
71         pthread_mutexattr_destroy(&attr);
72         if (fstat(c->fd, &sb) == 0)
73                 ct->devt = sb.st_rdev;
74         c->context = ct;
75
76         return 0;
77 }
78
79 static void cleanup_context(struct tur_checker_context *ct)
80 {
81         pthread_mutex_destroy(&ct->lock);
82         pthread_cond_destroy(&ct->active);
83         free(ct);
84 }
85
86 void libcheck_free (struct checker * c)
87 {
88         if (c->context) {
89                 struct tur_checker_context *ct = c->context;
90                 int holders;
91                 int running;
92
93                 running = uatomic_xchg(&ct->running, 0);
94                 if (running)
95                         pthread_cancel(ct->thread);
96                 ct->thread = 0;
97                 holders = uatomic_sub_return(&ct->holders, 1);
98                 if (!holders)
99                         cleanup_context(ct);
100                 c->context = NULL;
101         }
102         return;
103 }
104
105 #define TUR_MSG(fmt, args...)                                   \
106         do {                                                    \
107                 char msg[CHECKER_MSG_LEN];                      \
108                                                                 \
109                 snprintf(msg, sizeof(msg), fmt, ##args);        \
110                 copy_message(cb_arg, msg);                      \
111         } while (0)
112
113 static int
114 tur_check(int fd, unsigned int timeout,
115           void (*copy_message)(void *, const char *), void *cb_arg)
116 {
117         struct sg_io_hdr io_hdr;
118         unsigned char turCmdBlk[TUR_CMD_LEN] = { 0x00, 0, 0, 0, 0, 0 };
119         unsigned char sense_buffer[32];
120         int retry_tur = 5;
121
122 retry:
123         memset(&io_hdr, 0, sizeof (struct sg_io_hdr));
124         memset(&sense_buffer, 0, 32);
125         io_hdr.interface_id = 'S';
126         io_hdr.cmd_len = sizeof (turCmdBlk);
127         io_hdr.mx_sb_len = sizeof (sense_buffer);
128         io_hdr.dxfer_direction = SG_DXFER_NONE;
129         io_hdr.cmdp = turCmdBlk;
130         io_hdr.sbp = sense_buffer;
131         io_hdr.timeout = timeout * 1000;
132         io_hdr.pack_id = 0;
133         if (ioctl(fd, SG_IO, &io_hdr) < 0) {
134                 TUR_MSG(MSG_TUR_DOWN);
135                 return PATH_DOWN;
136         }
137         if ((io_hdr.status & 0x7e) == 0x18) {
138                 /*
139                  * SCSI-3 arrays might return
140                  * reservation conflict on TUR
141                  */
142                 TUR_MSG(MSG_TUR_UP);
143                 return PATH_UP;
144         }
145         if (io_hdr.info & SG_INFO_OK_MASK) {
146                 int key = 0, asc, ascq;
147
148                 switch (io_hdr.host_status) {
149                 case DID_OK:
150                 case DID_NO_CONNECT:
151                 case DID_BAD_TARGET:
152                 case DID_ABORT:
153                 case DID_TRANSPORT_FAILFAST:
154                         break;
155                 default:
156                         /* Driver error, retry */
157                         if (--retry_tur)
158                                 goto retry;
159                         break;
160                 }
161                 if (io_hdr.sb_len_wr > 3) {
162                         if (io_hdr.sbp[0] == 0x72 || io_hdr.sbp[0] == 0x73) {
163                                 key = io_hdr.sbp[1] & 0x0f;
164                                 asc = io_hdr.sbp[2];
165                                 ascq = io_hdr.sbp[3];
166                         } else if (io_hdr.sb_len_wr > 13 &&
167                                    ((io_hdr.sbp[0] & 0x7f) == 0x70 ||
168                                     (io_hdr.sbp[0] & 0x7f) == 0x71)) {
169                                 key = io_hdr.sbp[2] & 0x0f;
170                                 asc = io_hdr.sbp[12];
171                                 ascq = io_hdr.sbp[13];
172                         }
173                 }
174                 if (key == 0x6) {
175                         /* Unit Attention, retry */
176                         if (--retry_tur)
177                                 goto retry;
178                 }
179                 else if (key == 0x2) {
180                         /* Not Ready */
181                         /* Note: Other ALUA states are either UP or DOWN */
182                         if( asc == 0x04 && ascq == 0x0b){
183                                 /*
184                                  * LOGICAL UNIT NOT ACCESSIBLE,
185                                  * TARGET PORT IN STANDBY STATE
186                                  */
187                                 TUR_MSG(MSG_TUR_GHOST);
188                                 return PATH_GHOST;
189                         }
190                 }
191                 TUR_MSG(MSG_TUR_DOWN);
192                 return PATH_DOWN;
193         }
194         TUR_MSG(MSG_TUR_UP);
195         return PATH_UP;
196 }
197
198 #define tur_thread_cleanup_push(ct) pthread_cleanup_push(cleanup_func, ct)
199 #define tur_thread_cleanup_pop(ct) pthread_cleanup_pop(1)
200
201 static void cleanup_func(void *data)
202 {
203         int holders;
204         struct tur_checker_context *ct = data;
205
206         holders = uatomic_sub_return(&ct->holders, 1);
207         if (!holders)
208                 cleanup_context(ct);
209         rcu_unregister_thread();
210 }
211
212 static void copy_msg_to_tcc(void *ct_p, const char *msg)
213 {
214         struct tur_checker_context *ct = ct_p;
215
216         pthread_mutex_lock(&ct->lock);
217         strlcpy(ct->message, msg, sizeof(ct->message));
218         pthread_mutex_unlock(&ct->lock);
219 }
220
221 static void *tur_thread(void *ctx)
222 {
223         struct tur_checker_context *ct = ctx;
224         int state, running;
225
226         /* This thread can be canceled, so setup clean up */
227         tur_thread_cleanup_push(ct);
228         rcu_register_thread();
229
230         condlog(3, "%d:%d : tur checker starting up", major(ct->devt),
231                 minor(ct->devt));
232
233         /* TUR checker start up */
234         pthread_mutex_lock(&ct->lock);
235         ct->state = PATH_PENDING;
236         ct->message[0] = '\0';
237         pthread_mutex_unlock(&ct->lock);
238
239         state = tur_check(ct->fd, ct->timeout, copy_msg_to_tcc, ct->message);
240         pthread_testcancel();
241
242         /* TUR checker done */
243         pthread_mutex_lock(&ct->lock);
244         ct->state = state;
245         pthread_cond_signal(&ct->active);
246         pthread_mutex_unlock(&ct->lock);
247
248         condlog(3, "%d:%d : tur checker finished, state %s", major(ct->devt),
249                 minor(ct->devt), checker_state_name(state));
250
251         running = uatomic_xchg(&ct->running, 0);
252         if (!running)
253                 pause();
254
255         tur_thread_cleanup_pop(ct);
256
257         return ((void *)0);
258 }
259
260
261 static void tur_timeout(struct timespec *tsp)
262 {
263         clock_gettime(CLOCK_MONOTONIC, tsp);
264         tsp->tv_nsec += 1000 * 1000; /* 1 millisecond */
265         normalize_timespec(tsp);
266 }
267
268 static void tur_set_async_timeout(struct checker *c)
269 {
270         struct tur_checker_context *ct = c->context;
271         struct timespec now;
272
273         clock_gettime(CLOCK_MONOTONIC, &now);
274         ct->time = now.tv_sec + c->timeout;
275 }
276
277 static int tur_check_async_timeout(struct checker *c)
278 {
279         struct tur_checker_context *ct = c->context;
280         struct timespec now;
281
282         clock_gettime(CLOCK_MONOTONIC, &now);
283         return (now.tv_sec > ct->time);
284 }
285
286 static void copy_msg_to_checker(void *c_p, const char *msg)
287 {
288         struct checker *c = c_p;
289
290         strlcpy(c->message, msg, sizeof(c->message));
291 }
292
293 int libcheck_check(struct checker * c)
294 {
295         struct tur_checker_context *ct = c->context;
296         struct timespec tsp;
297         pthread_attr_t attr;
298         int tur_status, r;
299
300         if (!ct)
301                 return PATH_UNCHECKED;
302
303         if (c->sync)
304                 return tur_check(c->fd, c->timeout, copy_msg_to_checker, c);
305
306         /*
307          * Async mode
308          */
309         r = pthread_mutex_lock(&ct->lock);
310         if (r != 0) {
311                 condlog(2, "%s: tur mutex lock failed with %d", ct->devt, r);
312                 MSG(c, MSG_TUR_FAILED);
313                 return PATH_WILD;
314         }
315
316         if (ct->thread) {
317                 if (tur_check_async_timeout(c)) {
318                         int running = uatomic_xchg(&ct->running, 0);
319                         if (running)
320                                 pthread_cancel(ct->thread);
321                         condlog(3, "%d:%d : tur checker timeout",
322                                 major(ct->devt), minor(ct->devt));
323                         ct->thread = 0;
324                         MSG(c, MSG_TUR_TIMEOUT);
325                         tur_status = PATH_TIMEOUT;
326                 } else if (uatomic_read(&ct->running) != 0) {
327                         condlog(3, "%d:%d : tur checker not finished",
328                                 major(ct->devt), minor(ct->devt));
329                         tur_status = PATH_PENDING;
330                 } else {
331                         /* TUR checker done */
332                         ct->thread = 0;
333                         tur_status = ct->state;
334                         strlcpy(c->message, ct->message, sizeof(c->message));
335                 }
336                 pthread_mutex_unlock(&ct->lock);
337         } else {
338                 if (uatomic_read(&ct->holders) > 1) {
339                         /* The thread has been cancelled but hasn't
340                          * quilt. Fail back to synchronous mode */
341                         pthread_mutex_unlock(&ct->lock);
342                         condlog(3, "%d:%d : tur checker failing back to sync",
343                                 major(ct->devt), minor(ct->devt));
344                         return tur_check(c->fd, c->timeout, copy_msg_to_checker, c);
345                 }
346                 /* Start new TUR checker */
347                 ct->state = PATH_UNCHECKED;
348                 ct->fd = c->fd;
349                 ct->timeout = c->timeout;
350                 uatomic_add(&ct->holders, 1);
351                 uatomic_set(&ct->running, 1);
352                 tur_set_async_timeout(c);
353                 setup_thread_attr(&attr, 32 * 1024, 1);
354                 r = pthread_create(&ct->thread, &attr, tur_thread, ct);
355                 pthread_attr_destroy(&attr);
356                 if (r) {
357                         uatomic_sub(&ct->holders, 1);
358                         uatomic_set(&ct->running, 0);
359                         ct->thread = 0;
360                         pthread_mutex_unlock(&ct->lock);
361                         condlog(3, "%d:%d : failed to start tur thread, using"
362                                 " sync mode", major(ct->devt), minor(ct->devt));
363                         return tur_check(c->fd, c->timeout,
364                                          copy_msg_to_checker, c);
365                 }
366                 tur_timeout(&tsp);
367                 r = pthread_cond_timedwait(&ct->active, &ct->lock, &tsp);
368                 tur_status = ct->state;
369                 strlcpy(c->message, ct->message, sizeof(c->message));
370                 pthread_mutex_unlock(&ct->lock);
371                 if (uatomic_read(&ct->running) != 0 &&
372                     (tur_status == PATH_PENDING || tur_status == PATH_UNCHECKED)) {
373                         condlog(3, "%d:%d : tur checker still running",
374                                 major(ct->devt), minor(ct->devt));
375                         tur_status = PATH_PENDING;
376                 } else {
377                         int running = uatomic_xchg(&ct->running, 0);
378                         if (running)
379                                 pthread_cancel(ct->thread);
380                         ct->thread = 0;
381                 }
382         }
383
384         return tur_status;
385 }