multipath: do not call tur in sync mode if pthread_cancel fails
[multipath-tools/.git] / libmultipath / checkers / tur.c
1 /*
2  * Some code borrowed from sg-utils.
3  *
4  * Copyright (c) 2004 Christophe Varoqui
5  */
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <string.h>
9 #include <sys/types.h>
10 #include <sys/stat.h>
11 #include <unistd.h>
12 #include <fcntl.h>
13 #include <sys/ioctl.h>
14 #include <errno.h>
15 #include <sys/time.h>
16 #include <pthread.h>
17
18 #include "checkers.h"
19
20 #include "../libmultipath/debug.h"
21 #include "../libmultipath/sg_include.h"
22 #include "../libmultipath/uevent.h"
23
24 #define TUR_CMD_LEN 6
25 #define HEAVY_CHECK_COUNT       10
26
27 #define MSG_TUR_UP      "tur checker reports path is up"
28 #define MSG_TUR_DOWN    "tur checker reports path is down"
29 #define MSG_TUR_GHOST   "tur checker reports path is in standby state"
30 #define MSG_TUR_RUNNING "tur checker still running"
31 #define MSG_TUR_TIMEOUT "tur checker timed out"
32 #define MSG_TUR_FAILED  "tur checker failed to initialize"
33
34 struct tur_checker_context {
35         dev_t devt;
36         int state;
37         int running;
38         int fd;
39         unsigned int timeout;
40         time_t time;
41         pthread_t thread;
42         pthread_mutex_t lock;
43         pthread_cond_t active;
44         pthread_spinlock_t hldr_lock;
45         int holders;
46         char message[CHECKER_MSG_LEN];
47 };
48
49 #define TUR_DEVT(c) major((c)->devt), minor((c)->devt)
50
51 int libcheck_init (struct checker * c)
52 {
53         struct tur_checker_context *ct;
54
55         ct = malloc(sizeof(struct tur_checker_context));
56         if (!ct)
57                 return 1;
58         memset(ct, 0, sizeof(struct tur_checker_context));
59
60         ct->state = PATH_UNCHECKED;
61         ct->fd = -1;
62         ct->holders = 1;
63         pthread_cond_init(&ct->active, NULL);
64         pthread_mutex_init(&ct->lock, NULL);
65         pthread_spin_init(&ct->hldr_lock, PTHREAD_PROCESS_PRIVATE);
66         c->context = ct;
67
68         return 0;
69 }
70
71 void cleanup_context(struct tur_checker_context *ct)
72 {
73         pthread_mutex_destroy(&ct->lock);
74         pthread_cond_destroy(&ct->active);
75         pthread_spin_destroy(&ct->hldr_lock);
76         free(ct);
77 }
78
79 void libcheck_free (struct checker * c)
80 {
81         if (c->context) {
82                 struct tur_checker_context *ct = c->context;
83                 int holders;
84                 pthread_t thread;
85
86                 pthread_spin_lock(&ct->hldr_lock);
87                 ct->holders--;
88                 holders = ct->holders;
89                 thread = ct->thread;
90                 pthread_spin_unlock(&ct->hldr_lock);
91                 if (holders)
92                         pthread_cancel(thread);
93                 else
94                         cleanup_context(ct);
95                 c->context = NULL;
96         }
97         return;
98 }
99
100 #define TUR_MSG(msg, fmt, args...) snprintf(msg, CHECKER_MSG_LEN, fmt, ##args);
101
102 int
103 tur_check(int fd, unsigned int timeout, char *msg)
104 {
105         struct sg_io_hdr io_hdr;
106         unsigned char turCmdBlk[TUR_CMD_LEN] = { 0x00, 0, 0, 0, 0, 0 };
107         unsigned char sense_buffer[32];
108         int retry_tur = 5;
109
110  retry:
111         memset(&io_hdr, 0, sizeof (struct sg_io_hdr));
112         memset(&sense_buffer, 0, 32);
113         io_hdr.interface_id = 'S';
114         io_hdr.cmd_len = sizeof (turCmdBlk);
115         io_hdr.mx_sb_len = sizeof (sense_buffer);
116         io_hdr.dxfer_direction = SG_DXFER_NONE;
117         io_hdr.cmdp = turCmdBlk;
118         io_hdr.sbp = sense_buffer;
119         io_hdr.timeout = timeout * 1000;
120         io_hdr.pack_id = 0;
121         if (ioctl(fd, SG_IO, &io_hdr) < 0) {
122                 TUR_MSG(msg, MSG_TUR_DOWN);
123                 return PATH_DOWN;
124         }
125         if ((io_hdr.status & 0x7e) == 0x18) {
126                 /*
127                  * SCSI-3 arrays might return
128                  * reservation conflict on TUR
129                  */
130                 TUR_MSG(msg, MSG_TUR_UP);
131                 return PATH_UP;
132         }
133         if (io_hdr.info & SG_INFO_OK_MASK) {
134                 int key = 0, asc, ascq;
135
136                 switch (io_hdr.host_status) {
137                 case DID_OK:
138                 case DID_NO_CONNECT:
139                 case DID_BAD_TARGET:
140                 case DID_ABORT:
141                 case DID_TRANSPORT_FAILFAST:
142                         break;
143                 default:
144                         /* Driver error, retry */
145                         if (--retry_tur)
146                                 goto retry;
147                         break;
148                 }
149                 if (io_hdr.sb_len_wr > 3) {
150                         if (io_hdr.sbp[0] == 0x72 || io_hdr.sbp[0] == 0x73) {
151                                 key = io_hdr.sbp[1] & 0x0f;
152                                 asc = io_hdr.sbp[2];
153                                 ascq = io_hdr.sbp[3];
154                         } else if (io_hdr.sb_len_wr > 13 &&
155                                    ((io_hdr.sbp[0] & 0x7f) == 0x70 ||
156                                     (io_hdr.sbp[0] & 0x7f) == 0x71)) {
157                                 key = io_hdr.sbp[2] & 0x0f;
158                                 asc = io_hdr.sbp[12];
159                                 ascq = io_hdr.sbp[13];
160                         }
161                 }
162                 if (key == 0x6) {
163                         /* Unit Attention, retry */
164                         if (--retry_tur)
165                                 goto retry;
166                 }
167                 else if (key == 0x2) {
168                         /* Not Ready */
169                         /* Note: Other ALUA states are either UP or DOWN */
170                         if( asc == 0x04 && ascq == 0x0b){
171                                 /*
172                                  * LOGICAL UNIT NOT ACCESSIBLE,
173                                  * TARGET PORT IN STANDBY STATE
174                                  */
175                                 TUR_MSG(msg, MSG_TUR_GHOST);
176                                 return PATH_GHOST;
177                         }
178                 }
179                 TUR_MSG(msg, MSG_TUR_DOWN);
180                 return PATH_DOWN;
181         }
182         TUR_MSG(msg, MSG_TUR_UP);
183         return PATH_UP;
184 }
185
186 #define tur_thread_cleanup_push(ct) pthread_cleanup_push(cleanup_func, ct)
187 #define tur_thread_cleanup_pop(ct) pthread_cleanup_pop(1)
188
189 void cleanup_func(void *data)
190 {
191         int holders;
192         struct tur_checker_context *ct = data;
193         pthread_spin_lock(&ct->hldr_lock);
194         ct->holders--;
195         holders = ct->holders;
196         ct->thread = 0;
197         pthread_spin_unlock(&ct->hldr_lock);
198         if (!holders)
199                 cleanup_context(ct);
200 }
201
202 void *tur_thread(void *ctx)
203 {
204         struct tur_checker_context *ct = ctx;
205         int state;
206
207         condlog(3, "%d:%d: tur checker starting up", TUR_DEVT(ct));
208
209         ct->message[0] = '\0';
210         /* This thread can be canceled, so setup clean up */
211         tur_thread_cleanup_push(ct)
212
213         /* TUR checker start up */
214         pthread_mutex_lock(&ct->lock);
215         ct->state = PATH_PENDING;
216         pthread_mutex_unlock(&ct->lock);
217
218         state = tur_check(ct->fd, ct->timeout, ct->message);
219
220         /* TUR checker done */
221         pthread_mutex_lock(&ct->lock);
222         ct->state = state;
223         pthread_mutex_unlock(&ct->lock);
224         pthread_cond_signal(&ct->active);
225
226         condlog(3, "%d:%d: tur checker finished, state %s",
227                 TUR_DEVT(ct), checker_state_name(state));
228         tur_thread_cleanup_pop(ct);
229         return ((void *)0);
230 }
231
232
233 void tur_timeout(struct timespec *tsp)
234 {
235         struct timeval now;
236
237         gettimeofday(&now, NULL);
238         tsp->tv_sec = now.tv_sec;
239         tsp->tv_nsec = now.tv_usec * 1000;
240         tsp->tv_nsec += 1000000; /* 1 millisecond */
241 }
242
243 void tur_set_async_timeout(struct checker *c)
244 {
245         struct tur_checker_context *ct = c->context;
246         struct timeval now;
247
248         gettimeofday(&now, NULL);
249         ct->time = now.tv_sec + c->timeout;
250 }
251
252 int tur_check_async_timeout(struct checker *c)
253 {
254         struct tur_checker_context *ct = c->context;
255         struct timeval now;
256
257         gettimeofday(&now, NULL);
258         return (now.tv_sec > ct->time);
259 }
260
261 extern int
262 libcheck_check (struct checker * c)
263 {
264         struct tur_checker_context *ct = c->context;
265         struct timespec tsp;
266         struct stat sb;
267         pthread_attr_t attr;
268         int tur_status, r;
269
270
271         if (!ct)
272                 return PATH_UNCHECKED;
273
274         if (fstat(c->fd, &sb) == 0)
275                 ct->devt = sb.st_rdev;
276
277         if (c->sync)
278                 return tur_check(c->fd, c->timeout, c->message);
279
280         /*
281          * Async mode
282          */
283         r = pthread_mutex_lock(&ct->lock);
284         if (r != 0) {
285                 condlog(2, "%d:%d: tur mutex lock failed with %d",
286                         TUR_DEVT(ct), r);
287                 MSG(c, MSG_TUR_FAILED);
288                 return PATH_WILD;
289         }
290
291         if (ct->running) {
292                 /* Check if TUR checker is still running */
293                 if (ct->thread) {
294                         if (tur_check_async_timeout(c)) {
295                                 condlog(3, "%d:%d: tur checker timeout",
296                                         TUR_DEVT(ct));
297                                 pthread_cancel(ct->thread);
298                                 ct->running = 0;
299                                 MSG(c, MSG_TUR_TIMEOUT);
300                                 tur_status = PATH_TIMEOUT;
301                         } else {
302                                 condlog(3, "%d:%d: tur checker not finished",
303                                         TUR_DEVT(ct));
304                                 ct->running++;
305                                 tur_status = PATH_PENDING;
306                         }
307                 } else {
308                         /* TUR checker done */
309                         ct->running = 0;
310                         tur_status = ct->state;
311                         strncpy(c->message, ct->message, CHECKER_MSG_LEN);
312                         c->message[CHECKER_MSG_LEN - 1] = '\0';
313                 }
314                 pthread_mutex_unlock(&ct->lock);
315         } else {
316                 if (ct->thread) {
317                         /* pthread cancel failed. continue in sync mode */
318                         pthread_mutex_unlock(&ct->lock);
319                         condlog(3, "%d:%d: tur thread not responding",
320                                 TUR_DEVT(ct));
321                         return PATH_TIMEOUT;
322                 }
323                 /* Start new TUR checker */
324                 ct->state = PATH_UNCHECKED;
325                 ct->fd = c->fd;
326                 ct->timeout = c->timeout;
327                 pthread_spin_lock(&ct->hldr_lock);
328                 ct->holders++;
329                 pthread_spin_unlock(&ct->hldr_lock);
330                 tur_set_async_timeout(c);
331                 setup_thread_attr(&attr, 32 * 1024, 1);
332                 r = pthread_create(&ct->thread, &attr, tur_thread, ct);
333                 if (r) {
334                         pthread_mutex_unlock(&ct->lock);
335                         ct->thread = 0;
336                         ct->holders--;
337                         condlog(3, "%d:%d: failed to start tur thread, using"
338                                 " sync mode", TUR_DEVT(ct));
339                         return tur_check(c->fd, c->timeout, c->message);
340                 }
341                 pthread_attr_destroy(&attr);
342                 tur_timeout(&tsp);
343                 r = pthread_cond_timedwait(&ct->active, &ct->lock, &tsp);
344                 tur_status = ct->state;
345                 strncpy(c->message, ct->message,CHECKER_MSG_LEN);
346                 c->message[CHECKER_MSG_LEN -1] = '\0';
347                 pthread_mutex_unlock(&ct->lock);
348                 if (ct->thread &&
349                     (tur_status == PATH_PENDING || tur_status == PATH_UNCHECKED)) {
350                         condlog(3, "%d:%d: tur checker still running",
351                                 TUR_DEVT(ct));
352                         ct->running = 1;
353                         tur_status = PATH_PENDING;
354                 }
355         }
356
357         return tur_status;
358 }