3c5e236aabcfa1251b3f4190419c8e237b4c5c0b
[multipath-tools/.git] / libmultipath / checkers / tur.c
1 /*
2  * Some code borrowed from sg-utils.
3  *
4  * Copyright (c) 2004 Christophe Varoqui
5  */
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <string.h>
9 #include <sys/types.h>
10 #include <sys/stat.h>
11 #include <unistd.h>
12 #include <fcntl.h>
13 #include <sys/ioctl.h>
14 #include <sys/sysmacros.h>
15 #include <errno.h>
16 #include <sys/time.h>
17 #include <pthread.h>
18 #include <urcu.h>
19 #include <urcu/uatomic.h>
20
21 #include "checkers.h"
22
23 #include "../libmultipath/debug.h"
24 #include "../libmultipath/sg_include.h"
25 #include "../libmultipath/util.h"
26 #include "../libmultipath/time-util.h"
27 #include "../libmultipath/util.h"
28
29 #define TUR_CMD_LEN 6
30 #define HEAVY_CHECK_COUNT       10
31
32 #define MSG_TUR_UP      "tur checker reports path is up"
33 #define MSG_TUR_DOWN    "tur checker reports path is down"
34 #define MSG_TUR_GHOST   "tur checker reports path is in standby state"
35 #define MSG_TUR_RUNNING "tur checker still running"
36 #define MSG_TUR_TIMEOUT "tur checker timed out"
37 #define MSG_TUR_FAILED  "tur checker failed to initialize"
38
39 struct tur_checker_context {
40         dev_t devt;
41         int state;
42         int running;
43         int fd;
44         unsigned int timeout;
45         time_t time;
46         pthread_t thread;
47         pthread_mutex_t lock;
48         pthread_cond_t active;
49         int holders;
50         char message[CHECKER_MSG_LEN];
51 };
52
53 static const char *tur_devt(char *devt_buf, int size,
54                             struct tur_checker_context *ct)
55 {
56         dev_t devt;
57
58         pthread_mutex_lock(&ct->lock);
59         devt = ct->devt;
60         pthread_mutex_unlock(&ct->lock);
61
62         snprintf(devt_buf, size, "%d:%d", major(devt), minor(devt));
63         return devt_buf;
64 }
65
66 int libcheck_init (struct checker * c)
67 {
68         struct tur_checker_context *ct;
69         pthread_mutexattr_t attr;
70
71         ct = malloc(sizeof(struct tur_checker_context));
72         if (!ct)
73                 return 1;
74         memset(ct, 0, sizeof(struct tur_checker_context));
75
76         ct->state = PATH_UNCHECKED;
77         ct->fd = -1;
78         uatomic_set(&ct->holders, 1);
79         pthread_cond_init_mono(&ct->active);
80         pthread_mutexattr_init(&attr);
81         pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE);
82         pthread_mutex_init(&ct->lock, &attr);
83         pthread_mutexattr_destroy(&attr);
84         c->context = ct;
85
86         return 0;
87 }
88
89 static void cleanup_context(struct tur_checker_context *ct)
90 {
91         pthread_mutex_destroy(&ct->lock);
92         pthread_cond_destroy(&ct->active);
93         free(ct);
94 }
95
96 void libcheck_free (struct checker * c)
97 {
98         if (c->context) {
99                 struct tur_checker_context *ct = c->context;
100                 int holders;
101                 int running;
102
103                 running = uatomic_xchg(&ct->running, 0);
104                 if (running)
105                         pthread_cancel(ct->thread);
106                 ct->thread = 0;
107                 holders = uatomic_sub_return(&ct->holders, 1);
108                 if (!holders)
109                         cleanup_context(ct);
110                 c->context = NULL;
111         }
112         return;
113 }
114
115 #define TUR_MSG(fmt, args...)                                   \
116         do {                                                    \
117                 char msg[CHECKER_MSG_LEN];                      \
118                                                                 \
119                 snprintf(msg, sizeof(msg), fmt, ##args);        \
120                 copy_message(cb_arg, msg);                      \
121         } while (0)
122
123 static int
124 tur_check(int fd, unsigned int timeout,
125           void (*copy_message)(void *, const char *), void *cb_arg)
126 {
127         struct sg_io_hdr io_hdr;
128         unsigned char turCmdBlk[TUR_CMD_LEN] = { 0x00, 0, 0, 0, 0, 0 };
129         unsigned char sense_buffer[32];
130         int retry_tur = 5;
131
132 retry:
133         memset(&io_hdr, 0, sizeof (struct sg_io_hdr));
134         memset(&sense_buffer, 0, 32);
135         io_hdr.interface_id = 'S';
136         io_hdr.cmd_len = sizeof (turCmdBlk);
137         io_hdr.mx_sb_len = sizeof (sense_buffer);
138         io_hdr.dxfer_direction = SG_DXFER_NONE;
139         io_hdr.cmdp = turCmdBlk;
140         io_hdr.sbp = sense_buffer;
141         io_hdr.timeout = timeout * 1000;
142         io_hdr.pack_id = 0;
143         if (ioctl(fd, SG_IO, &io_hdr) < 0) {
144                 TUR_MSG(MSG_TUR_DOWN);
145                 return PATH_DOWN;
146         }
147         if ((io_hdr.status & 0x7e) == 0x18) {
148                 /*
149                  * SCSI-3 arrays might return
150                  * reservation conflict on TUR
151                  */
152                 TUR_MSG(MSG_TUR_UP);
153                 return PATH_UP;
154         }
155         if (io_hdr.info & SG_INFO_OK_MASK) {
156                 int key = 0, asc, ascq;
157
158                 switch (io_hdr.host_status) {
159                 case DID_OK:
160                 case DID_NO_CONNECT:
161                 case DID_BAD_TARGET:
162                 case DID_ABORT:
163                 case DID_TRANSPORT_FAILFAST:
164                         break;
165                 default:
166                         /* Driver error, retry */
167                         if (--retry_tur)
168                                 goto retry;
169                         break;
170                 }
171                 if (io_hdr.sb_len_wr > 3) {
172                         if (io_hdr.sbp[0] == 0x72 || io_hdr.sbp[0] == 0x73) {
173                                 key = io_hdr.sbp[1] & 0x0f;
174                                 asc = io_hdr.sbp[2];
175                                 ascq = io_hdr.sbp[3];
176                         } else if (io_hdr.sb_len_wr > 13 &&
177                                    ((io_hdr.sbp[0] & 0x7f) == 0x70 ||
178                                     (io_hdr.sbp[0] & 0x7f) == 0x71)) {
179                                 key = io_hdr.sbp[2] & 0x0f;
180                                 asc = io_hdr.sbp[12];
181                                 ascq = io_hdr.sbp[13];
182                         }
183                 }
184                 if (key == 0x6) {
185                         /* Unit Attention, retry */
186                         if (--retry_tur)
187                                 goto retry;
188                 }
189                 else if (key == 0x2) {
190                         /* Not Ready */
191                         /* Note: Other ALUA states are either UP or DOWN */
192                         if( asc == 0x04 && ascq == 0x0b){
193                                 /*
194                                  * LOGICAL UNIT NOT ACCESSIBLE,
195                                  * TARGET PORT IN STANDBY STATE
196                                  */
197                                 TUR_MSG(MSG_TUR_GHOST);
198                                 return PATH_GHOST;
199                         }
200                 }
201                 TUR_MSG(MSG_TUR_DOWN);
202                 return PATH_DOWN;
203         }
204         TUR_MSG(MSG_TUR_UP);
205         return PATH_UP;
206 }
207
208 #define tur_thread_cleanup_push(ct) pthread_cleanup_push(cleanup_func, ct)
209 #define tur_thread_cleanup_pop(ct) pthread_cleanup_pop(1)
210
211 static void cleanup_func(void *data)
212 {
213         int holders;
214         struct tur_checker_context *ct = data;
215
216         holders = uatomic_sub_return(&ct->holders, 1);
217         if (!holders)
218                 cleanup_context(ct);
219         rcu_unregister_thread();
220 }
221
222 static void copy_msg_to_tcc(void *ct_p, const char *msg)
223 {
224         struct tur_checker_context *ct = ct_p;
225
226         pthread_mutex_lock(&ct->lock);
227         strlcpy(ct->message, msg, sizeof(ct->message));
228         pthread_mutex_unlock(&ct->lock);
229 }
230
231 static void *tur_thread(void *ctx)
232 {
233         struct tur_checker_context *ct = ctx;
234         int state, running;
235         char devt[32];
236
237         /* This thread can be canceled, so setup clean up */
238         tur_thread_cleanup_push(ct);
239         rcu_register_thread();
240
241         condlog(3, "%s: tur checker starting up",
242                 tur_devt(devt, sizeof(devt), ct));
243
244         /* TUR checker start up */
245         pthread_mutex_lock(&ct->lock);
246         ct->state = PATH_PENDING;
247         ct->message[0] = '\0';
248         pthread_mutex_unlock(&ct->lock);
249
250         state = tur_check(ct->fd, ct->timeout, copy_msg_to_tcc, ct->message);
251         pthread_testcancel();
252
253         /* TUR checker done */
254         pthread_mutex_lock(&ct->lock);
255         ct->state = state;
256         pthread_cond_signal(&ct->active);
257         pthread_mutex_unlock(&ct->lock);
258
259         condlog(3, "%s: tur checker finished, state %s",
260                 tur_devt(devt, sizeof(devt), ct), checker_state_name(state));
261
262         running = uatomic_xchg(&ct->running, 0);
263         if (!running)
264                 pause();
265
266         tur_thread_cleanup_pop(ct);
267
268         return ((void *)0);
269 }
270
271
272 static void tur_timeout(struct timespec *tsp)
273 {
274         clock_gettime(CLOCK_MONOTONIC, tsp);
275         tsp->tv_nsec += 1000 * 1000; /* 1 millisecond */
276         normalize_timespec(tsp);
277 }
278
279 static void tur_set_async_timeout(struct checker *c)
280 {
281         struct tur_checker_context *ct = c->context;
282         struct timespec now;
283
284         clock_gettime(CLOCK_MONOTONIC, &now);
285         ct->time = now.tv_sec + c->timeout;
286 }
287
288 static int tur_check_async_timeout(struct checker *c)
289 {
290         struct tur_checker_context *ct = c->context;
291         struct timespec now;
292
293         clock_gettime(CLOCK_MONOTONIC, &now);
294         return (now.tv_sec > ct->time);
295 }
296
297 static void copy_msg_to_checker(void *c_p, const char *msg)
298 {
299         struct checker *c = c_p;
300
301         strlcpy(c->message, msg, sizeof(c->message));
302 }
303
304 int libcheck_check(struct checker * c)
305 {
306         struct tur_checker_context *ct = c->context;
307         struct timespec tsp;
308         struct stat sb;
309         pthread_attr_t attr;
310         int tur_status, r;
311         char devt[32];
312
313         if (!ct)
314                 return PATH_UNCHECKED;
315
316         if (fstat(c->fd, &sb) == 0) {
317                 pthread_mutex_lock(&ct->lock);
318                 ct->devt = sb.st_rdev;
319                 pthread_mutex_unlock(&ct->lock);
320         }
321
322         if (c->sync)
323                 return tur_check(c->fd, c->timeout, copy_msg_to_checker, c);
324
325         /*
326          * Async mode
327          */
328         r = pthread_mutex_lock(&ct->lock);
329         if (r != 0) {
330                 condlog(2, "%s: tur mutex lock failed with %d",
331                         tur_devt(devt, sizeof(devt), ct), r);
332                 MSG(c, MSG_TUR_FAILED);
333                 return PATH_WILD;
334         }
335
336         if (ct->thread) {
337                 if (tur_check_async_timeout(c)) {
338                         int running = uatomic_xchg(&ct->running, 0);
339                         if (running)
340                                 pthread_cancel(ct->thread);
341                         condlog(3, "%s: tur checker timeout",
342                                 tur_devt(devt, sizeof(devt), ct));
343                         ct->thread = 0;
344                         MSG(c, MSG_TUR_TIMEOUT);
345                         tur_status = PATH_TIMEOUT;
346                 } else if (uatomic_read(&ct->running) != 0) {
347                         condlog(3, "%s: tur checker not finished",
348                                         tur_devt(devt, sizeof(devt), ct));
349                         tur_status = PATH_PENDING;
350                 } else {
351                         /* TUR checker done */
352                         ct->thread = 0;
353                         tur_status = ct->state;
354                         strlcpy(c->message, ct->message, sizeof(c->message));
355                 }
356                 pthread_mutex_unlock(&ct->lock);
357         } else {
358                 if (uatomic_read(&ct->holders) > 1) {
359                         /* The thread has been cancelled but hasn't
360                          * quilt. Fail back to synchronous mode */
361                         pthread_mutex_unlock(&ct->lock);
362                         condlog(3, "%s: tur checker failing back to sync",
363                                 tur_devt(devt, sizeof(devt), ct));
364                         return tur_check(c->fd, c->timeout, copy_msg_to_checker, c);
365                 }
366                 /* Start new TUR checker */
367                 ct->state = PATH_UNCHECKED;
368                 ct->fd = c->fd;
369                 ct->timeout = c->timeout;
370                 uatomic_add(&ct->holders, 1);
371                 uatomic_set(&ct->running, 1);
372                 tur_set_async_timeout(c);
373                 setup_thread_attr(&attr, 32 * 1024, 1);
374                 r = pthread_create(&ct->thread, &attr, tur_thread, ct);
375                 pthread_attr_destroy(&attr);
376                 if (r) {
377                         uatomic_sub(&ct->holders, 1);
378                         uatomic_set(&ct->running, 0);
379                         ct->thread = 0;
380                         pthread_mutex_unlock(&ct->lock);
381                         condlog(3, "%s: failed to start tur thread, using"
382                                 " sync mode", tur_devt(devt, sizeof(devt), ct));
383                         return tur_check(c->fd, c->timeout,
384                                          copy_msg_to_checker, c);
385                 }
386                 tur_timeout(&tsp);
387                 r = pthread_cond_timedwait(&ct->active, &ct->lock, &tsp);
388                 tur_status = ct->state;
389                 strlcpy(c->message, ct->message, sizeof(c->message));
390                 pthread_mutex_unlock(&ct->lock);
391                 if (uatomic_read(&ct->running) != 0 &&
392                     (tur_status == PATH_PENDING || tur_status == PATH_UNCHECKED)) {
393                         condlog(3, "%s: tur checker still running",
394                                 tur_devt(devt, sizeof(devt), ct));
395                         tur_status = PATH_PENDING;
396                 } else {
397                         int running = uatomic_xchg(&ct->running, 0);
398                         if (running)
399                                 pthread_cancel(ct->thread);
400                         ct->thread = 0;
401                 }
402         }
403
404         return tur_status;
405 }