libmultipath: fix tur checker timeout issue
[multipath-tools/.git] / libmultipath / checkers / tur.c
1 /*
2  * Some code borrowed from sg-utils.
3  *
4  * Copyright (c) 2004 Christophe Varoqui
5  */
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <string.h>
9 #include <sys/types.h>
10 #include <sys/stat.h>
11 #include <unistd.h>
12 #include <fcntl.h>
13 #include <sys/ioctl.h>
14 #include <sys/sysmacros.h>
15 #include <errno.h>
16 #include <sys/time.h>
17 #include <pthread.h>
18 #include <urcu.h>
19 #include <urcu/uatomic.h>
20
21 #include "checkers.h"
22
23 #include "../libmultipath/debug.h"
24 #include "../libmultipath/sg_include.h"
25 #include "../libmultipath/util.h"
26 #include "../libmultipath/time-util.h"
27 #include "../libmultipath/util.h"
28
29 #define TUR_CMD_LEN 6
30 #define HEAVY_CHECK_COUNT       10
31
32 #define MSG_TUR_UP      "tur checker reports path is up"
33 #define MSG_TUR_DOWN    "tur checker reports path is down"
34 #define MSG_TUR_GHOST   "tur checker reports path is in standby state"
35 #define MSG_TUR_RUNNING "tur checker still running"
36 #define MSG_TUR_TIMEOUT "tur checker timed out"
37 #define MSG_TUR_FAILED  "tur checker failed to initialize"
38
39 struct tur_checker_context {
40         dev_t devt;
41         int state;
42         int running; /* uatomic access only */
43         int fd;
44         unsigned int timeout;
45         time_t time;
46         pthread_t thread;
47         pthread_mutex_t lock;
48         pthread_cond_t active;
49         int holders; /* uatomic access only */
50         char message[CHECKER_MSG_LEN];
51 };
52
53 int libcheck_init (struct checker * c)
54 {
55         struct tur_checker_context *ct;
56         struct stat sb;
57
58         ct = malloc(sizeof(struct tur_checker_context));
59         if (!ct)
60                 return 1;
61         memset(ct, 0, sizeof(struct tur_checker_context));
62
63         ct->state = PATH_UNCHECKED;
64         ct->fd = -1;
65         uatomic_set(&ct->holders, 1);
66         pthread_cond_init_mono(&ct->active);
67         pthread_mutex_init(&ct->lock, NULL);
68         if (fstat(c->fd, &sb) == 0)
69                 ct->devt = sb.st_rdev;
70         c->context = ct;
71
72         return 0;
73 }
74
75 static void cleanup_context(struct tur_checker_context *ct)
76 {
77         pthread_mutex_destroy(&ct->lock);
78         pthread_cond_destroy(&ct->active);
79         free(ct);
80 }
81
82 void libcheck_free (struct checker * c)
83 {
84         if (c->context) {
85                 struct tur_checker_context *ct = c->context;
86                 int holders;
87                 int running;
88
89                 running = uatomic_xchg(&ct->running, 0);
90                 if (running)
91                         pthread_cancel(ct->thread);
92                 ct->thread = 0;
93                 holders = uatomic_sub_return(&ct->holders, 1);
94                 if (!holders)
95                         cleanup_context(ct);
96                 c->context = NULL;
97         }
98         return;
99 }
100
101 static int
102 tur_check(int fd, unsigned int timeout, char *msg)
103 {
104         struct sg_io_hdr io_hdr;
105         unsigned char turCmdBlk[TUR_CMD_LEN] = { 0x00, 0, 0, 0, 0, 0 };
106         unsigned char sense_buffer[32];
107         int retry_tur = 5;
108
109 retry:
110         memset(&io_hdr, 0, sizeof (struct sg_io_hdr));
111         memset(&sense_buffer, 0, 32);
112         io_hdr.interface_id = 'S';
113         io_hdr.cmd_len = sizeof (turCmdBlk);
114         io_hdr.mx_sb_len = sizeof (sense_buffer);
115         io_hdr.dxfer_direction = SG_DXFER_NONE;
116         io_hdr.cmdp = turCmdBlk;
117         io_hdr.sbp = sense_buffer;
118         io_hdr.timeout = timeout * 1000;
119         io_hdr.pack_id = 0;
120         if (ioctl(fd, SG_IO, &io_hdr) < 0) {
121                 snprintf(msg, CHECKER_MSG_LEN, MSG_TUR_DOWN);
122                 return PATH_DOWN;
123         }
124         if ((io_hdr.status & 0x7e) == 0x18) {
125                 /*
126                  * SCSI-3 arrays might return
127                  * reservation conflict on TUR
128                  */
129                 snprintf(msg, CHECKER_MSG_LEN, MSG_TUR_UP);
130                 return PATH_UP;
131         }
132         if (io_hdr.info & SG_INFO_OK_MASK) {
133                 int key = 0, asc, ascq;
134
135                 switch (io_hdr.host_status) {
136                 case DID_OK:
137                 case DID_NO_CONNECT:
138                 case DID_BAD_TARGET:
139                 case DID_ABORT:
140                 case DID_TRANSPORT_FAILFAST:
141                         break;
142                 default:
143                         /* Driver error, retry */
144                         if (--retry_tur)
145                                 goto retry;
146                         break;
147                 }
148                 if (io_hdr.sb_len_wr > 3) {
149                         if (io_hdr.sbp[0] == 0x72 || io_hdr.sbp[0] == 0x73) {
150                                 key = io_hdr.sbp[1] & 0x0f;
151                                 asc = io_hdr.sbp[2];
152                                 ascq = io_hdr.sbp[3];
153                         } else if (io_hdr.sb_len_wr > 13 &&
154                                    ((io_hdr.sbp[0] & 0x7f) == 0x70 ||
155                                     (io_hdr.sbp[0] & 0x7f) == 0x71)) {
156                                 key = io_hdr.sbp[2] & 0x0f;
157                                 asc = io_hdr.sbp[12];
158                                 ascq = io_hdr.sbp[13];
159                         }
160                 }
161                 if (key == 0x6) {
162                         /* Unit Attention, retry */
163                         if (--retry_tur)
164                                 goto retry;
165                 }
166                 else if (key == 0x2) {
167                         /* Not Ready */
168                         /* Note: Other ALUA states are either UP or DOWN */
169                         if( asc == 0x04 && ascq == 0x0b){
170                                 /*
171                                  * LOGICAL UNIT NOT ACCESSIBLE,
172                                  * TARGET PORT IN STANDBY STATE
173                                  */
174                                 snprintf(msg, CHECKER_MSG_LEN, MSG_TUR_GHOST);
175                                 return PATH_GHOST;
176                         }
177                 }
178                 snprintf(msg, CHECKER_MSG_LEN, MSG_TUR_DOWN);
179                 return PATH_DOWN;
180         }
181         snprintf(msg, CHECKER_MSG_LEN, MSG_TUR_UP);
182         return PATH_UP;
183 }
184
185 #define tur_thread_cleanup_push(ct) pthread_cleanup_push(cleanup_func, ct)
186 #define tur_thread_cleanup_pop(ct) pthread_cleanup_pop(1)
187
188 static void cleanup_func(void *data)
189 {
190         int holders;
191         struct tur_checker_context *ct = data;
192
193         holders = uatomic_sub_return(&ct->holders, 1);
194         if (!holders)
195                 cleanup_context(ct);
196         rcu_unregister_thread();
197 }
198
199 static void *tur_thread(void *ctx)
200 {
201         struct tur_checker_context *ct = ctx;
202         int state, running;
203         char msg[CHECKER_MSG_LEN];
204
205         /* This thread can be canceled, so setup clean up */
206         tur_thread_cleanup_push(ct);
207         rcu_register_thread();
208
209         condlog(3, "%d:%d : tur checker starting up", major(ct->devt),
210                 minor(ct->devt));
211
212         state = tur_check(ct->fd, ct->timeout, msg);
213         pthread_testcancel();
214
215         /* TUR checker done */
216         pthread_mutex_lock(&ct->lock);
217         ct->state = state;
218         strlcpy(ct->message, msg, sizeof(ct->message));
219         pthread_cond_signal(&ct->active);
220         pthread_mutex_unlock(&ct->lock);
221
222         condlog(3, "%d:%d : tur checker finished, state %s", major(ct->devt),
223                 minor(ct->devt), checker_state_name(state));
224
225         running = uatomic_xchg(&ct->running, 0);
226         if (!running)
227                 pause();
228
229         tur_thread_cleanup_pop(ct);
230
231         return ((void *)0);
232 }
233
234
235 static void tur_timeout(struct timespec *tsp)
236 {
237         clock_gettime(CLOCK_MONOTONIC, tsp);
238         tsp->tv_nsec += 1000 * 1000; /* 1 millisecond */
239         normalize_timespec(tsp);
240 }
241
242 static void tur_set_async_timeout(struct checker *c)
243 {
244         struct tur_checker_context *ct = c->context;
245         struct timespec now;
246
247         clock_gettime(CLOCK_MONOTONIC, &now);
248         ct->time = now.tv_sec + c->timeout;
249 }
250
251 static int tur_check_async_timeout(struct checker *c)
252 {
253         struct tur_checker_context *ct = c->context;
254         struct timespec now;
255
256         clock_gettime(CLOCK_MONOTONIC, &now);
257         return (now.tv_sec > ct->time);
258 }
259
260 int libcheck_check(struct checker * c)
261 {
262         struct tur_checker_context *ct = c->context;
263         struct timespec tsp;
264         pthread_attr_t attr;
265         int tur_status, r;
266
267         if (!ct)
268                 return PATH_UNCHECKED;
269
270         if (c->sync)
271                 return tur_check(c->fd, c->timeout, c->message);
272
273         /*
274          * Async mode
275          */
276         if (ct->thread) {
277                 if (tur_check_async_timeout(c)) {
278                         int running = uatomic_xchg(&ct->running, 0);
279                         if (running) {
280                                 pthread_cancel(ct->thread);
281                                 condlog(3, "%d:%d : tur checker timeout",
282                                         major(ct->devt), minor(ct->devt));
283                                 MSG(c, MSG_TUR_TIMEOUT);
284                                 tur_status = PATH_TIMEOUT;
285                         } else {
286                                 pthread_mutex_lock(&ct->lock);
287                                 tur_status = ct->state;
288                                 strlcpy(c->message, ct->message,
289                                         sizeof(c->message));
290                                 pthread_mutex_unlock(&ct->lock);
291                         }
292                         ct->thread = 0;
293                 } else if (uatomic_read(&ct->running) != 0) {
294                         condlog(3, "%d:%d : tur checker not finished",
295                                 major(ct->devt), minor(ct->devt));
296                         tur_status = PATH_PENDING;
297                 } else {
298                         /* TUR checker done */
299                         ct->thread = 0;
300                         pthread_mutex_lock(&ct->lock);
301                         tur_status = ct->state;
302                         strlcpy(c->message, ct->message, sizeof(c->message));
303                         pthread_mutex_unlock(&ct->lock);
304                 }
305         } else {
306                 if (uatomic_read(&ct->holders) > 1) {
307                         /* The thread has been cancelled but hasn't
308                          * quilt. Fail back to synchronous mode */
309                         condlog(3, "%d:%d : tur checker failing back to sync",
310                                 major(ct->devt), minor(ct->devt));
311                         return tur_check(c->fd, c->timeout, c->message);
312                 }
313                 /* Start new TUR checker */
314                 pthread_mutex_lock(&ct->lock);
315                 tur_status = ct->state = PATH_PENDING;
316                 ct->message[0] = '\0';
317                 pthread_mutex_unlock(&ct->lock);
318                 ct->fd = c->fd;
319                 ct->timeout = c->timeout;
320                 uatomic_add(&ct->holders, 1);
321                 uatomic_set(&ct->running, 1);
322                 tur_set_async_timeout(c);
323                 setup_thread_attr(&attr, 32 * 1024, 1);
324                 r = pthread_create(&ct->thread, &attr, tur_thread, ct);
325                 pthread_attr_destroy(&attr);
326                 if (r) {
327                         uatomic_sub(&ct->holders, 1);
328                         uatomic_set(&ct->running, 0);
329                         ct->thread = 0;
330                         condlog(3, "%d:%d : failed to start tur thread, using"
331                                 " sync mode", major(ct->devt), minor(ct->devt));
332                         return tur_check(c->fd, c->timeout, c->message);
333                 }
334                 tur_timeout(&tsp);
335                 pthread_mutex_lock(&ct->lock);
336                 if (ct->state == PATH_PENDING)
337                         r = pthread_cond_timedwait(&ct->active, &ct->lock, 
338                                                    &tsp);
339                 if (!r) {
340                         tur_status = ct->state;
341                         strlcpy(c->message, ct->message, sizeof(c->message));
342                 }
343                 pthread_mutex_unlock(&ct->lock);
344                 if (tur_status == PATH_PENDING) {
345                         condlog(3, "%d:%d : tur checker still running",
346                                 major(ct->devt), minor(ct->devt));
347                 } else {
348                         int running = uatomic_xchg(&ct->running, 0);
349                         if (running)
350                                 pthread_cancel(ct->thread);
351                         ct->thread = 0;
352                 }
353         }
354
355         return tur_status;
356 }