multipath-tools (coverity): assert availability of CLOCK_MONOTONIC
[multipath-tools/.git] / libmultipath / checkers / tur.c
1 /*
2  * Some code borrowed from sg-utils.
3  *
4  * Copyright (c) 2004 Christophe Varoqui
5  */
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <string.h>
9 #include <sys/types.h>
10 #include <sys/stat.h>
11 #include <unistd.h>
12 #include <fcntl.h>
13 #include <sys/ioctl.h>
14 #include <sys/sysmacros.h>
15 #include <errno.h>
16 #include <sys/time.h>
17 #include <pthread.h>
18 #include <urcu.h>
19 #include <urcu/uatomic.h>
20
21 #include "checkers.h"
22
23 #include "../libmultipath/debug.h"
24 #include "../libmultipath/sg_include.h"
25 #include "../libmultipath/util.h"
26 #include "../libmultipath/time-util.h"
27 #include "../libmultipath/util.h"
28
29 #define TUR_CMD_LEN 6
30 #define HEAVY_CHECK_COUNT       10
31
32 enum {
33         MSG_TUR_RUNNING = CHECKER_FIRST_MSGID,
34         MSG_TUR_TIMEOUT,
35         MSG_TUR_FAILED,
36 };
37
38 #define _IDX(x) (MSG_ ## x - CHECKER_FIRST_MSGID)
39 const char *libcheck_msgtable[] = {
40         [_IDX(TUR_RUNNING)] = " still running",
41         [_IDX(TUR_TIMEOUT)] = " timed out",
42         [_IDX(TUR_FAILED)] = " failed to initialize",
43         NULL,
44 };
45
46 struct tur_checker_context {
47         dev_t devt;
48         int state;
49         int running; /* uatomic access only */
50         int fd;
51         unsigned int timeout;
52         time_t time;
53         pthread_t thread;
54         pthread_mutex_t lock;
55         pthread_cond_t active;
56         int holders; /* uatomic access only */
57         int msgid;
58 };
59
60 int libcheck_init (struct checker * c)
61 {
62         struct tur_checker_context *ct;
63         struct stat sb;
64
65         ct = malloc(sizeof(struct tur_checker_context));
66         if (!ct)
67                 return 1;
68         memset(ct, 0, sizeof(struct tur_checker_context));
69
70         ct->state = PATH_UNCHECKED;
71         ct->fd = -1;
72         uatomic_set(&ct->holders, 1);
73         pthread_cond_init_mono(&ct->active);
74         pthread_mutex_init(&ct->lock, NULL);
75         if (fstat(c->fd, &sb) == 0)
76                 ct->devt = sb.st_rdev;
77         c->context = ct;
78
79         return 0;
80 }
81
82 static void cleanup_context(struct tur_checker_context *ct)
83 {
84         pthread_mutex_destroy(&ct->lock);
85         pthread_cond_destroy(&ct->active);
86         free(ct);
87 }
88
89 void libcheck_free (struct checker * c)
90 {
91         if (c->context) {
92                 struct tur_checker_context *ct = c->context;
93                 int holders;
94                 int running;
95
96                 running = uatomic_xchg(&ct->running, 0);
97                 if (running)
98                         pthread_cancel(ct->thread);
99                 ct->thread = 0;
100                 holders = uatomic_sub_return(&ct->holders, 1);
101                 if (!holders)
102                         cleanup_context(ct);
103                 c->context = NULL;
104         }
105         return;
106 }
107
108 static int
109 tur_check(int fd, unsigned int timeout, short *msgid)
110 {
111         struct sg_io_hdr io_hdr;
112         unsigned char turCmdBlk[TUR_CMD_LEN] = { 0x00, 0, 0, 0, 0, 0 };
113         unsigned char sense_buffer[32];
114         int retry_tur = 5;
115
116 retry:
117         memset(&io_hdr, 0, sizeof (struct sg_io_hdr));
118         memset(&sense_buffer, 0, 32);
119         io_hdr.interface_id = 'S';
120         io_hdr.cmd_len = sizeof (turCmdBlk);
121         io_hdr.mx_sb_len = sizeof (sense_buffer);
122         io_hdr.dxfer_direction = SG_DXFER_NONE;
123         io_hdr.cmdp = turCmdBlk;
124         io_hdr.sbp = sense_buffer;
125         io_hdr.timeout = timeout * 1000;
126         io_hdr.pack_id = 0;
127         if (ioctl(fd, SG_IO, &io_hdr) < 0) {
128                 if (errno == ENOTTY) {
129                         *msgid = CHECKER_MSGID_UNSUPPORTED;
130                         return PATH_WILD;
131                 }
132                 *msgid = CHECKER_MSGID_DOWN;
133                 return PATH_DOWN;
134         }
135         if ((io_hdr.status & 0x7e) == 0x18) {
136                 /*
137                  * SCSI-3 arrays might return
138                  * reservation conflict on TUR
139                  */
140                 *msgid = CHECKER_MSGID_UP;
141                 return PATH_UP;
142         }
143         if (io_hdr.info & SG_INFO_OK_MASK) {
144                 int key = 0, asc, ascq;
145
146                 switch (io_hdr.host_status) {
147                 case DID_OK:
148                 case DID_NO_CONNECT:
149                 case DID_BAD_TARGET:
150                 case DID_ABORT:
151                 case DID_TRANSPORT_FAILFAST:
152                         break;
153                 default:
154                         /* Driver error, retry */
155                         if (--retry_tur)
156                                 goto retry;
157                         break;
158                 }
159                 if (io_hdr.sb_len_wr > 3) {
160                         if (io_hdr.sbp[0] == 0x72 || io_hdr.sbp[0] == 0x73) {
161                                 key = io_hdr.sbp[1] & 0x0f;
162                                 asc = io_hdr.sbp[2];
163                                 ascq = io_hdr.sbp[3];
164                         } else if (io_hdr.sb_len_wr > 13 &&
165                                    ((io_hdr.sbp[0] & 0x7f) == 0x70 ||
166                                     (io_hdr.sbp[0] & 0x7f) == 0x71)) {
167                                 key = io_hdr.sbp[2] & 0x0f;
168                                 asc = io_hdr.sbp[12];
169                                 ascq = io_hdr.sbp[13];
170                         }
171                 }
172                 if (key == 0x6) {
173                         /* Unit Attention, retry */
174                         if (--retry_tur)
175                                 goto retry;
176                 }
177                 else if (key == 0x2) {
178                         /* Not Ready */
179                         /* Note: Other ALUA states are either UP or DOWN */
180                         if( asc == 0x04 && ascq == 0x0b){
181                                 /*
182                                  * LOGICAL UNIT NOT ACCESSIBLE,
183                                  * TARGET PORT IN STANDBY STATE
184                                  */
185                                 *msgid = CHECKER_MSGID_GHOST;
186                                 return PATH_GHOST;
187                         }
188                 }
189                 *msgid = CHECKER_MSGID_DOWN;
190                 return PATH_DOWN;
191         }
192         *msgid = CHECKER_MSGID_UP;
193         return PATH_UP;
194 }
195
196 #define tur_thread_cleanup_push(ct) pthread_cleanup_push(cleanup_func, ct)
197 #define tur_thread_cleanup_pop(ct) pthread_cleanup_pop(1)
198
199 static void cleanup_func(void *data)
200 {
201         int holders;
202         struct tur_checker_context *ct = data;
203
204         holders = uatomic_sub_return(&ct->holders, 1);
205         if (!holders)
206                 cleanup_context(ct);
207         rcu_unregister_thread();
208 }
209
210 /*
211  * Test code for "zombie tur thread" handling.
212  * Compile e.g. with CFLAGS=-DTUR_TEST_MAJOR=8
213  * Additional parameters can be configure with the macros below.
214  *
215  * Everty nth started TUR thread will hang in non-cancellable state
216  * for given number of seconds, for device given by major/minor.
217  */
218 #ifdef TUR_TEST_MAJOR
219
220 #ifndef TUR_TEST_MINOR
221 #define TUR_TEST_MINOR 0
222 #endif
223 #ifndef TUR_SLEEP_INTERVAL
224 #define TUR_SLEEP_INTERVAL 3
225 #endif
226 #ifndef TUR_SLEEP_SECS
227 #define TUR_SLEEP_SECS 60
228 #endif
229
230 static void tur_deep_sleep(const struct tur_checker_context *ct)
231 {
232         static int sleep_cnt;
233         const struct timespec ts = { .tv_sec = TUR_SLEEP_SECS, .tv_nsec = 0 };
234         int oldstate;
235
236         if (ct->devt != makedev(TUR_TEST_MAJOR, TUR_TEST_MINOR) ||
237             ++sleep_cnt % TUR_SLEEP_INTERVAL != 0)
238                 return;
239
240         condlog(1, "tur thread going to sleep for %ld seconds", ts.tv_sec);
241         if (pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &oldstate) != 0)
242                 condlog(0, "pthread_setcancelstate: %m");
243         if (nanosleep(&ts, NULL) != 0)
244                 condlog(0, "nanosleep: %m");
245         condlog(1, "tur zombie thread woke up");
246         if (pthread_setcancelstate(oldstate, NULL) != 0)
247                 condlog(0, "pthread_setcancelstate (2): %m");
248         pthread_testcancel();
249 }
250 #else
251 #define tur_deep_sleep(x) do {} while (0)
252 #endif /* TUR_TEST_MAJOR */
253
254 static void *tur_thread(void *ctx)
255 {
256         struct tur_checker_context *ct = ctx;
257         int state, running;
258         short msgid;
259
260         /* This thread can be canceled, so setup clean up */
261         tur_thread_cleanup_push(ct);
262         rcu_register_thread();
263
264         condlog(4, "%d:%d : tur checker starting up", major(ct->devt),
265                 minor(ct->devt));
266
267         tur_deep_sleep(ct);
268         state = tur_check(ct->fd, ct->timeout, &msgid);
269         pthread_testcancel();
270
271         /* TUR checker done */
272         pthread_mutex_lock(&ct->lock);
273         ct->state = state;
274         ct->msgid = msgid;
275         pthread_cond_signal(&ct->active);
276         pthread_mutex_unlock(&ct->lock);
277
278         condlog(4, "%d:%d : tur checker finished, state %s", major(ct->devt),
279                 minor(ct->devt), checker_state_name(state));
280
281         running = uatomic_xchg(&ct->running, 0);
282         if (!running)
283                 pause();
284
285         tur_thread_cleanup_pop(ct);
286
287         return ((void *)0);
288 }
289
290
291 static void tur_timeout(struct timespec *tsp)
292 {
293         get_monotonic_time(tsp);
294         tsp->tv_nsec += 1000 * 1000; /* 1 millisecond */
295         normalize_timespec(tsp);
296 }
297
298 static void tur_set_async_timeout(struct checker *c)
299 {
300         struct tur_checker_context *ct = c->context;
301         struct timespec now;
302
303         get_monotonic_time(&now);
304         ct->time = now.tv_sec + c->timeout;
305 }
306
307 static int tur_check_async_timeout(struct checker *c)
308 {
309         struct tur_checker_context *ct = c->context;
310         struct timespec now;
311
312         get_monotonic_time(&now);
313         return (now.tv_sec > ct->time);
314 }
315
316 int libcheck_check(struct checker * c)
317 {
318         struct tur_checker_context *ct = c->context;
319         struct timespec tsp;
320         pthread_attr_t attr;
321         int tur_status, r;
322
323         if (!ct)
324                 return PATH_UNCHECKED;
325
326         if (checker_is_sync(c))
327                 return tur_check(c->fd, c->timeout, &c->msgid);
328
329         /*
330          * Async mode
331          */
332         if (ct->thread) {
333                 if (tur_check_async_timeout(c)) {
334                         int running = uatomic_xchg(&ct->running, 0);
335                         if (running) {
336                                 pthread_cancel(ct->thread);
337                                 condlog(3, "%d:%d : tur checker timeout",
338                                         major(ct->devt), minor(ct->devt));
339                                 c->msgid = MSG_TUR_TIMEOUT;
340                                 tur_status = PATH_TIMEOUT;
341                         } else {
342                                 pthread_mutex_lock(&ct->lock);
343                                 tur_status = ct->state;
344                                 c->msgid = ct->msgid;
345                                 pthread_mutex_unlock(&ct->lock);
346                         }
347                         ct->thread = 0;
348                 } else if (uatomic_read(&ct->running) != 0) {
349                         condlog(3, "%d:%d : tur checker not finished",
350                                 major(ct->devt), minor(ct->devt));
351                         tur_status = PATH_PENDING;
352                 } else {
353                         /* TUR checker done */
354                         ct->thread = 0;
355                         pthread_mutex_lock(&ct->lock);
356                         tur_status = ct->state;
357                         c->msgid = ct->msgid;
358                         pthread_mutex_unlock(&ct->lock);
359                 }
360         } else {
361                 if (uatomic_read(&ct->holders) > 1) {
362                         /*
363                          * The thread has been cancelled but hasn't quit.
364                          * We have to prevent it from interfering with the new
365                          * thread. We create a new context and leave the old
366                          * one with the stale thread, hoping it will clean up
367                          * eventually.
368                          */
369                         condlog(3, "%d:%d : tur thread not responding",
370                                 major(ct->devt), minor(ct->devt));
371
372                         /*
373                          * libcheck_init will replace c->context.
374                          * It fails only in OOM situations. In this case, return
375                          * PATH_UNCHECKED to avoid prematurely failing the path.
376                          */
377                         if (libcheck_init(c) != 0)
378                                 return PATH_UNCHECKED;
379
380                         if (!uatomic_sub_return(&ct->holders, 1))
381                                 /* It did terminate, eventually */
382                                 cleanup_context(ct);
383
384                         ct = c->context;
385                 }
386                 /* Start new TUR checker */
387                 pthread_mutex_lock(&ct->lock);
388                 tur_status = ct->state = PATH_PENDING;
389                 ct->msgid = CHECKER_MSGID_NONE;
390                 pthread_mutex_unlock(&ct->lock);
391                 ct->fd = c->fd;
392                 ct->timeout = c->timeout;
393                 uatomic_add(&ct->holders, 1);
394                 uatomic_set(&ct->running, 1);
395                 tur_set_async_timeout(c);
396                 setup_thread_attr(&attr, 32 * 1024, 1);
397                 r = pthread_create(&ct->thread, &attr, tur_thread, ct);
398                 pthread_attr_destroy(&attr);
399                 if (r) {
400                         uatomic_sub(&ct->holders, 1);
401                         uatomic_set(&ct->running, 0);
402                         ct->thread = 0;
403                         condlog(3, "%d:%d : failed to start tur thread, using"
404                                 " sync mode", major(ct->devt), minor(ct->devt));
405                         return tur_check(c->fd, c->timeout, &c->msgid);
406                 }
407                 tur_timeout(&tsp);
408                 pthread_mutex_lock(&ct->lock);
409                 if (ct->state == PATH_PENDING)
410                         r = pthread_cond_timedwait(&ct->active, &ct->lock,
411                                                    &tsp);
412                 if (!r) {
413                         tur_status = ct->state;
414                         c->msgid = ct->msgid;
415                 }
416                 pthread_mutex_unlock(&ct->lock);
417                 if (tur_status == PATH_PENDING) {
418                         condlog(4, "%d:%d : tur checker still running",
419                                 major(ct->devt), minor(ct->devt));
420                 } else {
421                         int running = uatomic_xchg(&ct->running, 0);
422                         if (running)
423                                 pthread_cancel(ct->thread);
424                         ct->thread = 0;
425                 }
426         }
427
428         return tur_status;
429 }