b4a5cb2fa60f34f3afa5d4e1e6095d4cf0de4baa
[multipath-tools/.git] / libmultipath / checkers / tur.c
1 /*
2  * Some code borrowed from sg-utils.
3  *
4  * Copyright (c) 2004 Christophe Varoqui
5  */
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <string.h>
9 #include <sys/types.h>
10 #include <sys/stat.h>
11 #include <unistd.h>
12 #include <fcntl.h>
13 #include <sys/ioctl.h>
14 #include <sys/sysmacros.h>
15 #include <errno.h>
16 #include <sys/time.h>
17 #include <pthread.h>
18
19 #include "checkers.h"
20
21 #include "../libmultipath/debug.h"
22 #include "../libmultipath/sg_include.h"
23 #include "../libmultipath/util.h"
24 #include "../libmultipath/time-util.h"
25 #include "../libmultipath/util.h"
26
27 #define TUR_CMD_LEN 6
28 #define HEAVY_CHECK_COUNT       10
29
30 #define MSG_TUR_UP      "tur checker reports path is up"
31 #define MSG_TUR_DOWN    "tur checker reports path is down"
32 #define MSG_TUR_GHOST   "tur checker reports path is in standby state"
33 #define MSG_TUR_RUNNING "tur checker still running"
34 #define MSG_TUR_TIMEOUT "tur checker timed out"
35 #define MSG_TUR_FAILED  "tur checker failed to initialize"
36
37 struct tur_checker_context {
38         dev_t devt;
39         int state;
40         int running;
41         int fd;
42         unsigned int timeout;
43         time_t time;
44         pthread_t thread;
45         pthread_mutex_t lock;
46         pthread_cond_t active;
47         pthread_spinlock_t hldr_lock;
48         int holders;
49         char message[CHECKER_MSG_LEN];
50 };
51
52 static const char *tur_devt(char *devt_buf, int size,
53                             struct tur_checker_context *ct)
54 {
55         dev_t devt;
56
57         pthread_mutex_lock(&ct->lock);
58         devt = ct->devt;
59         pthread_mutex_unlock(&ct->lock);
60
61         snprintf(devt_buf, size, "%d:%d", major(devt), minor(devt));
62         return devt_buf;
63 }
64
65 int libcheck_init (struct checker * c)
66 {
67         struct tur_checker_context *ct;
68         pthread_mutexattr_t attr;
69
70         ct = malloc(sizeof(struct tur_checker_context));
71         if (!ct)
72                 return 1;
73         memset(ct, 0, sizeof(struct tur_checker_context));
74
75         ct->state = PATH_UNCHECKED;
76         ct->fd = -1;
77         ct->holders = 1;
78         pthread_cond_init_mono(&ct->active);
79         pthread_mutexattr_init(&attr);
80         pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE);
81         pthread_mutex_init(&ct->lock, &attr);
82         pthread_mutexattr_destroy(&attr);
83         pthread_spin_init(&ct->hldr_lock, PTHREAD_PROCESS_PRIVATE);
84         c->context = ct;
85
86         return 0;
87 }
88
89 static void cleanup_context(struct tur_checker_context *ct)
90 {
91         pthread_mutex_destroy(&ct->lock);
92         pthread_cond_destroy(&ct->active);
93         pthread_spin_destroy(&ct->hldr_lock);
94         free(ct);
95 }
96
97 void libcheck_free (struct checker * c)
98 {
99         if (c->context) {
100                 struct tur_checker_context *ct = c->context;
101                 int holders;
102                 pthread_t thread;
103
104                 pthread_spin_lock(&ct->hldr_lock);
105                 ct->holders--;
106                 holders = ct->holders;
107                 thread = ct->thread;
108                 pthread_spin_unlock(&ct->hldr_lock);
109                 if (holders)
110                         pthread_cancel(thread);
111                 else
112                         cleanup_context(ct);
113                 c->context = NULL;
114         }
115         return;
116 }
117
118 void libcheck_repair (struct checker * c)
119 {
120         return;
121 }
122
123 #define TUR_MSG(fmt, args...)                                   \
124         do {                                                    \
125                 char msg[CHECKER_MSG_LEN];                      \
126                                                                 \
127                 snprintf(msg, sizeof(msg), fmt, ##args);        \
128                 copy_message(cb_arg, msg);                      \
129         } while (0)
130
131 static int
132 tur_check(int fd, unsigned int timeout,
133           void (*copy_message)(void *, const char *), void *cb_arg)
134 {
135         struct sg_io_hdr io_hdr;
136         unsigned char turCmdBlk[TUR_CMD_LEN] = { 0x00, 0, 0, 0, 0, 0 };
137         unsigned char sense_buffer[32];
138         int retry_tur = 5;
139
140 retry:
141         memset(&io_hdr, 0, sizeof (struct sg_io_hdr));
142         memset(&sense_buffer, 0, 32);
143         io_hdr.interface_id = 'S';
144         io_hdr.cmd_len = sizeof (turCmdBlk);
145         io_hdr.mx_sb_len = sizeof (sense_buffer);
146         io_hdr.dxfer_direction = SG_DXFER_NONE;
147         io_hdr.cmdp = turCmdBlk;
148         io_hdr.sbp = sense_buffer;
149         io_hdr.timeout = timeout * 1000;
150         io_hdr.pack_id = 0;
151         if (ioctl(fd, SG_IO, &io_hdr) < 0) {
152                 TUR_MSG(MSG_TUR_DOWN);
153                 return PATH_DOWN;
154         }
155         if ((io_hdr.status & 0x7e) == 0x18) {
156                 /*
157                  * SCSI-3 arrays might return
158                  * reservation conflict on TUR
159                  */
160                 TUR_MSG(MSG_TUR_UP);
161                 return PATH_UP;
162         }
163         if (io_hdr.info & SG_INFO_OK_MASK) {
164                 int key = 0, asc, ascq;
165
166                 switch (io_hdr.host_status) {
167                 case DID_OK:
168                 case DID_NO_CONNECT:
169                 case DID_BAD_TARGET:
170                 case DID_ABORT:
171                 case DID_TRANSPORT_FAILFAST:
172                         break;
173                 default:
174                         /* Driver error, retry */
175                         if (--retry_tur)
176                                 goto retry;
177                         break;
178                 }
179                 if (io_hdr.sb_len_wr > 3) {
180                         if (io_hdr.sbp[0] == 0x72 || io_hdr.sbp[0] == 0x73) {
181                                 key = io_hdr.sbp[1] & 0x0f;
182                                 asc = io_hdr.sbp[2];
183                                 ascq = io_hdr.sbp[3];
184                         } else if (io_hdr.sb_len_wr > 13 &&
185                                    ((io_hdr.sbp[0] & 0x7f) == 0x70 ||
186                                     (io_hdr.sbp[0] & 0x7f) == 0x71)) {
187                                 key = io_hdr.sbp[2] & 0x0f;
188                                 asc = io_hdr.sbp[12];
189                                 ascq = io_hdr.sbp[13];
190                         }
191                 }
192                 if (key == 0x6) {
193                         /* Unit Attention, retry */
194                         if (--retry_tur)
195                                 goto retry;
196                 }
197                 else if (key == 0x2) {
198                         /* Not Ready */
199                         /* Note: Other ALUA states are either UP or DOWN */
200                         if( asc == 0x04 && ascq == 0x0b){
201                                 /*
202                                  * LOGICAL UNIT NOT ACCESSIBLE,
203                                  * TARGET PORT IN STANDBY STATE
204                                  */
205                                 TUR_MSG(MSG_TUR_GHOST);
206                                 return PATH_GHOST;
207                         }
208                 }
209                 TUR_MSG(MSG_TUR_DOWN);
210                 return PATH_DOWN;
211         }
212         TUR_MSG(MSG_TUR_UP);
213         return PATH_UP;
214 }
215
216 #define tur_thread_cleanup_push(ct) pthread_cleanup_push(cleanup_func, ct)
217 #define tur_thread_cleanup_pop(ct) pthread_cleanup_pop(1)
218
219 static void cleanup_func(void *data)
220 {
221         int holders;
222         struct tur_checker_context *ct = data;
223         pthread_spin_lock(&ct->hldr_lock);
224         ct->holders--;
225         holders = ct->holders;
226         ct->thread = 0;
227         pthread_spin_unlock(&ct->hldr_lock);
228         if (!holders)
229                 cleanup_context(ct);
230 }
231
232 static int tur_running(struct tur_checker_context *ct)
233 {
234         pthread_t thread;
235
236         pthread_spin_lock(&ct->hldr_lock);
237         thread = ct->thread;
238         pthread_spin_unlock(&ct->hldr_lock);
239
240         return thread != 0;
241 }
242
243 static void copy_msg_to_tcc(void *ct_p, const char *msg)
244 {
245         struct tur_checker_context *ct = ct_p;
246
247         pthread_mutex_lock(&ct->lock);
248         strlcpy(ct->message, msg, sizeof(ct->message));
249         pthread_mutex_unlock(&ct->lock);
250 }
251
252 static void *tur_thread(void *ctx)
253 {
254         struct tur_checker_context *ct = ctx;
255         int state;
256         char devt[32];
257
258         condlog(3, "%s: tur checker starting up",
259                 tur_devt(devt, sizeof(devt), ct));
260
261         /* This thread can be canceled, so setup clean up */
262         tur_thread_cleanup_push(ct);
263
264         /* TUR checker start up */
265         pthread_mutex_lock(&ct->lock);
266         ct->state = PATH_PENDING;
267         ct->message[0] = '\0';
268         pthread_mutex_unlock(&ct->lock);
269
270         state = tur_check(ct->fd, ct->timeout, copy_msg_to_tcc, ct->message);
271         pthread_testcancel();
272
273         /* TUR checker done */
274         pthread_mutex_lock(&ct->lock);
275         ct->state = state;
276         pthread_cond_signal(&ct->active);
277         pthread_mutex_unlock(&ct->lock);
278
279         condlog(3, "%s: tur checker finished, state %s",
280                 tur_devt(devt, sizeof(devt), ct), checker_state_name(state));
281         tur_thread_cleanup_pop(ct);
282
283         return ((void *)0);
284 }
285
286
287 static void tur_timeout(struct timespec *tsp)
288 {
289         clock_gettime(CLOCK_MONOTONIC, tsp);
290         tsp->tv_nsec += 1000 * 1000; /* 1 millisecond */
291         normalize_timespec(tsp);
292 }
293
294 static void tur_set_async_timeout(struct checker *c)
295 {
296         struct tur_checker_context *ct = c->context;
297         struct timespec now;
298
299         clock_gettime(CLOCK_MONOTONIC, &now);
300         ct->time = now.tv_sec + c->timeout;
301 }
302
303 static int tur_check_async_timeout(struct checker *c)
304 {
305         struct tur_checker_context *ct = c->context;
306         struct timespec now;
307
308         clock_gettime(CLOCK_MONOTONIC, &now);
309         return (now.tv_sec > ct->time);
310 }
311
312 static void copy_msg_to_checker(void *c_p, const char *msg)
313 {
314         struct checker *c = c_p;
315
316         strlcpy(c->message, msg, sizeof(c->message));
317 }
318
319 int libcheck_check(struct checker * c)
320 {
321         struct tur_checker_context *ct = c->context;
322         struct timespec tsp;
323         struct stat sb;
324         pthread_attr_t attr;
325         int tur_status, r;
326         char devt[32];
327
328
329         if (!ct)
330                 return PATH_UNCHECKED;
331
332         if (fstat(c->fd, &sb) == 0) {
333                 pthread_mutex_lock(&ct->lock);
334                 ct->devt = sb.st_rdev;
335                 pthread_mutex_unlock(&ct->lock);
336         }
337
338         if (c->sync)
339                 return tur_check(c->fd, c->timeout, copy_msg_to_checker, c);
340
341         /*
342          * Async mode
343          */
344         r = pthread_mutex_lock(&ct->lock);
345         if (r != 0) {
346                 condlog(2, "%s: tur mutex lock failed with %d",
347                         tur_devt(devt, sizeof(devt), ct), r);
348                 MSG(c, MSG_TUR_FAILED);
349                 return PATH_WILD;
350         }
351
352         if (ct->running) {
353                 /*
354                  * Check if TUR checker is still running. Hold hldr_lock
355                  * around the pthread_cancel() call to avoid that
356                  * pthread_cancel() gets called after the (detached) TUR
357                  * thread has exited.
358                  */
359                 pthread_spin_lock(&ct->hldr_lock);
360                 if (ct->thread) {
361                         if (tur_check_async_timeout(c)) {
362                                 condlog(3, "%s: tur checker timeout",
363                                         tur_devt(devt, sizeof(devt), ct));
364                                 pthread_cancel(ct->thread);
365                                 ct->running = 0;
366                                 MSG(c, MSG_TUR_TIMEOUT);
367                                 tur_status = PATH_TIMEOUT;
368                         } else {
369                                 condlog(3, "%s: tur checker not finished",
370                                         tur_devt(devt, sizeof(devt), ct));
371                                 ct->running++;
372                                 tur_status = PATH_PENDING;
373                         }
374                 } else {
375                         /* TUR checker done */
376                         ct->running = 0;
377                         tur_status = ct->state;
378                         strlcpy(c->message, ct->message, sizeof(c->message));
379                 }
380                 pthread_spin_unlock(&ct->hldr_lock);
381                 pthread_mutex_unlock(&ct->lock);
382         } else {
383                 if (tur_running(ct)) {
384                         /* pthread cancel failed. continue in sync mode */
385                         pthread_mutex_unlock(&ct->lock);
386                         condlog(3, "%s: tur thread not responding",
387                                 tur_devt(devt, sizeof(devt), ct));
388                         return PATH_TIMEOUT;
389                 }
390                 /* Start new TUR checker */
391                 ct->state = PATH_UNCHECKED;
392                 ct->fd = c->fd;
393                 ct->timeout = c->timeout;
394                 pthread_spin_lock(&ct->hldr_lock);
395                 ct->holders++;
396                 pthread_spin_unlock(&ct->hldr_lock);
397                 tur_set_async_timeout(c);
398                 setup_thread_attr(&attr, 32 * 1024, 1);
399                 r = pthread_create(&ct->thread, &attr, tur_thread, ct);
400                 pthread_attr_destroy(&attr);
401                 if (r) {
402                         pthread_spin_lock(&ct->hldr_lock);
403                         ct->holders--;
404                         pthread_spin_unlock(&ct->hldr_lock);
405                         pthread_mutex_unlock(&ct->lock);
406                         ct->thread = 0;
407                         condlog(3, "%s: failed to start tur thread, using"
408                                 " sync mode", tur_devt(devt, sizeof(devt), ct));
409                         return tur_check(c->fd, c->timeout,
410                                          copy_msg_to_checker, c);
411                 }
412                 tur_timeout(&tsp);
413                 r = pthread_cond_timedwait(&ct->active, &ct->lock, &tsp);
414                 tur_status = ct->state;
415                 strlcpy(c->message, ct->message, sizeof(c->message));
416                 pthread_mutex_unlock(&ct->lock);
417                 if (tur_running(ct) &&
418                     (tur_status == PATH_PENDING || tur_status == PATH_UNCHECKED)) {
419                         condlog(3, "%s: tur checker still running",
420                                 tur_devt(devt, sizeof(devt), ct));
421                         ct->running = 1;
422                         tur_status = PATH_PENDING;
423                 }
424         }
425
426         return tur_status;
427 }