libmultipath: fix tur checker locking
[multipath-tools/.git] / libmultipath / checkers / tur.c
1 /*
2  * Some code borrowed from sg-utils.
3  *
4  * Copyright (c) 2004 Christophe Varoqui
5  */
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <string.h>
9 #include <sys/types.h>
10 #include <sys/stat.h>
11 #include <unistd.h>
12 #include <fcntl.h>
13 #include <sys/ioctl.h>
14 #include <sys/sysmacros.h>
15 #include <errno.h>
16 #include <sys/time.h>
17 #include <pthread.h>
18 #include <urcu/uatomic.h>
19
20 #include "checkers.h"
21
22 #include "../libmultipath/debug.h"
23 #include "../libmultipath/sg_include.h"
24 #include "../libmultipath/util.h"
25 #include "../libmultipath/time-util.h"
26 #include "../libmultipath/util.h"
27
28 #define TUR_CMD_LEN 6
29 #define HEAVY_CHECK_COUNT       10
30
31 #define MSG_TUR_UP      "tur checker reports path is up"
32 #define MSG_TUR_DOWN    "tur checker reports path is down"
33 #define MSG_TUR_GHOST   "tur checker reports path is in standby state"
34 #define MSG_TUR_RUNNING "tur checker still running"
35 #define MSG_TUR_TIMEOUT "tur checker timed out"
36 #define MSG_TUR_FAILED  "tur checker failed to initialize"
37
38 struct tur_checker_context {
39         dev_t devt;
40         int state;
41         int running;
42         int fd;
43         unsigned int timeout;
44         time_t time;
45         pthread_t thread;
46         pthread_mutex_t lock;
47         pthread_cond_t active;
48         int holders;
49         char message[CHECKER_MSG_LEN];
50 };
51
52 static const char *tur_devt(char *devt_buf, int size,
53                             struct tur_checker_context *ct)
54 {
55         dev_t devt;
56
57         pthread_mutex_lock(&ct->lock);
58         devt = ct->devt;
59         pthread_mutex_unlock(&ct->lock);
60
61         snprintf(devt_buf, size, "%d:%d", major(devt), minor(devt));
62         return devt_buf;
63 }
64
65 int libcheck_init (struct checker * c)
66 {
67         struct tur_checker_context *ct;
68         pthread_mutexattr_t attr;
69
70         ct = malloc(sizeof(struct tur_checker_context));
71         if (!ct)
72                 return 1;
73         memset(ct, 0, sizeof(struct tur_checker_context));
74
75         ct->state = PATH_UNCHECKED;
76         ct->fd = -1;
77         uatomic_set(&ct->holders, 1);
78         pthread_cond_init_mono(&ct->active);
79         pthread_mutexattr_init(&attr);
80         pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE);
81         pthread_mutex_init(&ct->lock, &attr);
82         pthread_mutexattr_destroy(&attr);
83         c->context = ct;
84
85         return 0;
86 }
87
88 static void cleanup_context(struct tur_checker_context *ct)
89 {
90         pthread_mutex_destroy(&ct->lock);
91         pthread_cond_destroy(&ct->active);
92         free(ct);
93 }
94
95 void libcheck_free (struct checker * c)
96 {
97         if (c->context) {
98                 struct tur_checker_context *ct = c->context;
99                 int holders;
100                 int running;
101
102                 running = uatomic_xchg(&ct->running, 0);
103                 if (running)
104                         pthread_cancel(ct->thread);
105                 ct->thread = 0;
106                 holders = uatomic_sub_return(&ct->holders, 1);
107                 if (!holders)
108                         cleanup_context(ct);
109                 c->context = NULL;
110         }
111         return;
112 }
113
114 void libcheck_repair (struct checker * c)
115 {
116         return;
117 }
118
119 #define TUR_MSG(fmt, args...)                                   \
120         do {                                                    \
121                 char msg[CHECKER_MSG_LEN];                      \
122                                                                 \
123                 snprintf(msg, sizeof(msg), fmt, ##args);        \
124                 copy_message(cb_arg, msg);                      \
125         } while (0)
126
127 static int
128 tur_check(int fd, unsigned int timeout,
129           void (*copy_message)(void *, const char *), void *cb_arg)
130 {
131         struct sg_io_hdr io_hdr;
132         unsigned char turCmdBlk[TUR_CMD_LEN] = { 0x00, 0, 0, 0, 0, 0 };
133         unsigned char sense_buffer[32];
134         int retry_tur = 5;
135
136 retry:
137         memset(&io_hdr, 0, sizeof (struct sg_io_hdr));
138         memset(&sense_buffer, 0, 32);
139         io_hdr.interface_id = 'S';
140         io_hdr.cmd_len = sizeof (turCmdBlk);
141         io_hdr.mx_sb_len = sizeof (sense_buffer);
142         io_hdr.dxfer_direction = SG_DXFER_NONE;
143         io_hdr.cmdp = turCmdBlk;
144         io_hdr.sbp = sense_buffer;
145         io_hdr.timeout = timeout * 1000;
146         io_hdr.pack_id = 0;
147         if (ioctl(fd, SG_IO, &io_hdr) < 0) {
148                 TUR_MSG(MSG_TUR_DOWN);
149                 return PATH_DOWN;
150         }
151         if ((io_hdr.status & 0x7e) == 0x18) {
152                 /*
153                  * SCSI-3 arrays might return
154                  * reservation conflict on TUR
155                  */
156                 TUR_MSG(MSG_TUR_UP);
157                 return PATH_UP;
158         }
159         if (io_hdr.info & SG_INFO_OK_MASK) {
160                 int key = 0, asc, ascq;
161
162                 switch (io_hdr.host_status) {
163                 case DID_OK:
164                 case DID_NO_CONNECT:
165                 case DID_BAD_TARGET:
166                 case DID_ABORT:
167                 case DID_TRANSPORT_FAILFAST:
168                         break;
169                 default:
170                         /* Driver error, retry */
171                         if (--retry_tur)
172                                 goto retry;
173                         break;
174                 }
175                 if (io_hdr.sb_len_wr > 3) {
176                         if (io_hdr.sbp[0] == 0x72 || io_hdr.sbp[0] == 0x73) {
177                                 key = io_hdr.sbp[1] & 0x0f;
178                                 asc = io_hdr.sbp[2];
179                                 ascq = io_hdr.sbp[3];
180                         } else if (io_hdr.sb_len_wr > 13 &&
181                                    ((io_hdr.sbp[0] & 0x7f) == 0x70 ||
182                                     (io_hdr.sbp[0] & 0x7f) == 0x71)) {
183                                 key = io_hdr.sbp[2] & 0x0f;
184                                 asc = io_hdr.sbp[12];
185                                 ascq = io_hdr.sbp[13];
186                         }
187                 }
188                 if (key == 0x6) {
189                         /* Unit Attention, retry */
190                         if (--retry_tur)
191                                 goto retry;
192                 }
193                 else if (key == 0x2) {
194                         /* Not Ready */
195                         /* Note: Other ALUA states are either UP or DOWN */
196                         if( asc == 0x04 && ascq == 0x0b){
197                                 /*
198                                  * LOGICAL UNIT NOT ACCESSIBLE,
199                                  * TARGET PORT IN STANDBY STATE
200                                  */
201                                 TUR_MSG(MSG_TUR_GHOST);
202                                 return PATH_GHOST;
203                         }
204                 }
205                 TUR_MSG(MSG_TUR_DOWN);
206                 return PATH_DOWN;
207         }
208         TUR_MSG(MSG_TUR_UP);
209         return PATH_UP;
210 }
211
212 #define tur_thread_cleanup_push(ct) pthread_cleanup_push(cleanup_func, ct)
213 #define tur_thread_cleanup_pop(ct) pthread_cleanup_pop(1)
214
215 static void cleanup_func(void *data)
216 {
217         int holders;
218         struct tur_checker_context *ct = data;
219
220         holders = uatomic_sub_return(&ct->holders, 1);
221         if (!holders)
222                 cleanup_context(ct);
223 }
224
225 static void copy_msg_to_tcc(void *ct_p, const char *msg)
226 {
227         struct tur_checker_context *ct = ct_p;
228
229         pthread_mutex_lock(&ct->lock);
230         strlcpy(ct->message, msg, sizeof(ct->message));
231         pthread_mutex_unlock(&ct->lock);
232 }
233
234 static void *tur_thread(void *ctx)
235 {
236         struct tur_checker_context *ct = ctx;
237         int state, running;
238         char devt[32];
239
240         condlog(3, "%s: tur checker starting up",
241                 tur_devt(devt, sizeof(devt), ct));
242
243         /* This thread can be canceled, so setup clean up */
244         tur_thread_cleanup_push(ct);
245
246         /* TUR checker start up */
247         pthread_mutex_lock(&ct->lock);
248         ct->state = PATH_PENDING;
249         ct->message[0] = '\0';
250         pthread_mutex_unlock(&ct->lock);
251
252         state = tur_check(ct->fd, ct->timeout, copy_msg_to_tcc, ct->message);
253         pthread_testcancel();
254
255         /* TUR checker done */
256         pthread_mutex_lock(&ct->lock);
257         ct->state = state;
258         pthread_cond_signal(&ct->active);
259         pthread_mutex_unlock(&ct->lock);
260
261         condlog(3, "%s: tur checker finished, state %s",
262                 tur_devt(devt, sizeof(devt), ct), checker_state_name(state));
263
264         running = uatomic_xchg(&ct->running, 0);
265         if (!running)
266                 pause();
267
268         tur_thread_cleanup_pop(ct);
269
270         return ((void *)0);
271 }
272
273
274 static void tur_timeout(struct timespec *tsp)
275 {
276         clock_gettime(CLOCK_MONOTONIC, tsp);
277         tsp->tv_nsec += 1000 * 1000; /* 1 millisecond */
278         normalize_timespec(tsp);
279 }
280
281 static void tur_set_async_timeout(struct checker *c)
282 {
283         struct tur_checker_context *ct = c->context;
284         struct timespec now;
285
286         clock_gettime(CLOCK_MONOTONIC, &now);
287         ct->time = now.tv_sec + c->timeout;
288 }
289
290 static int tur_check_async_timeout(struct checker *c)
291 {
292         struct tur_checker_context *ct = c->context;
293         struct timespec now;
294
295         clock_gettime(CLOCK_MONOTONIC, &now);
296         return (now.tv_sec > ct->time);
297 }
298
299 static void copy_msg_to_checker(void *c_p, const char *msg)
300 {
301         struct checker *c = c_p;
302
303         strlcpy(c->message, msg, sizeof(c->message));
304 }
305
306 int libcheck_check(struct checker * c)
307 {
308         struct tur_checker_context *ct = c->context;
309         struct timespec tsp;
310         struct stat sb;
311         pthread_attr_t attr;
312         int tur_status, r;
313         char devt[32];
314
315         if (!ct)
316                 return PATH_UNCHECKED;
317
318         if (fstat(c->fd, &sb) == 0) {
319                 pthread_mutex_lock(&ct->lock);
320                 ct->devt = sb.st_rdev;
321                 pthread_mutex_unlock(&ct->lock);
322         }
323
324         if (c->sync)
325                 return tur_check(c->fd, c->timeout, copy_msg_to_checker, c);
326
327         /*
328          * Async mode
329          */
330         r = pthread_mutex_lock(&ct->lock);
331         if (r != 0) {
332                 condlog(2, "%s: tur mutex lock failed with %d",
333                         tur_devt(devt, sizeof(devt), ct), r);
334                 MSG(c, MSG_TUR_FAILED);
335                 return PATH_WILD;
336         }
337
338         if (ct->thread) {
339                 if (tur_check_async_timeout(c)) {
340                         int running = uatomic_xchg(&ct->running, 0);
341                         if (running)
342                                 pthread_cancel(ct->thread);
343                         condlog(3, "%s: tur checker timeout",
344                                 tur_devt(devt, sizeof(devt), ct));
345                         ct->thread = 0;
346                         MSG(c, MSG_TUR_TIMEOUT);
347                         tur_status = PATH_TIMEOUT;
348                 } else if (uatomic_read(&ct->running) != 0) {
349                         condlog(3, "%s: tur checker not finished",
350                                         tur_devt(devt, sizeof(devt), ct));
351                         tur_status = PATH_PENDING;
352                 } else {
353                         /* TUR checker done */
354                         ct->thread = 0;
355                         tur_status = ct->state;
356                         strlcpy(c->message, ct->message, sizeof(c->message));
357                 }
358                 pthread_mutex_unlock(&ct->lock);
359         } else {
360                 if (uatomic_read(&ct->running) != 0) {
361                         /* pthread cancel failed. continue in sync mode */
362                         pthread_mutex_unlock(&ct->lock);
363                         condlog(3, "%s: tur thread not responding",
364                                 tur_devt(devt, sizeof(devt), ct));
365                         return PATH_TIMEOUT;
366                 }
367                 /* Start new TUR checker */
368                 ct->state = PATH_UNCHECKED;
369                 ct->fd = c->fd;
370                 ct->timeout = c->timeout;
371                 uatomic_add(&ct->holders, 1);
372                 uatomic_set(&ct->running, 1);
373                 tur_set_async_timeout(c);
374                 setup_thread_attr(&attr, 32 * 1024, 1);
375                 r = pthread_create(&ct->thread, &attr, tur_thread, ct);
376                 pthread_attr_destroy(&attr);
377                 if (r) {
378                         uatomic_sub(&ct->holders, 1);
379                         uatomic_set(&ct->running, 0);
380                         ct->thread = 0;
381                         pthread_mutex_unlock(&ct->lock);
382                         condlog(3, "%s: failed to start tur thread, using"
383                                 " sync mode", tur_devt(devt, sizeof(devt), ct));
384                         return tur_check(c->fd, c->timeout,
385                                          copy_msg_to_checker, c);
386                 }
387                 tur_timeout(&tsp);
388                 r = pthread_cond_timedwait(&ct->active, &ct->lock, &tsp);
389                 tur_status = ct->state;
390                 strlcpy(c->message, ct->message, sizeof(c->message));
391                 pthread_mutex_unlock(&ct->lock);
392                 if (uatomic_read(&ct->running) != 0 &&
393                     (tur_status == PATH_PENDING || tur_status == PATH_UNCHECKED)) {
394                         condlog(3, "%s: tur checker still running",
395                                 tur_devt(devt, sizeof(devt), ct));
396                         tur_status = PATH_PENDING;
397                 } else {
398                         int running = uatomic_xchg(&ct->running, 0);
399                         if (running)
400                                 pthread_cancel(ct->thread);
401                         ct->thread = 0;
402                 }
403         }
404
405         return tur_status;
406 }