libmultipath: fix tur memory misuse
[multipath-tools/.git] / libmultipath / checkers / tur.c
1 /*
2  * Some code borrowed from sg-utils.
3  *
4  * Copyright (c) 2004 Christophe Varoqui
5  */
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <string.h>
9 #include <sys/types.h>
10 #include <sys/stat.h>
11 #include <unistd.h>
12 #include <fcntl.h>
13 #include <sys/ioctl.h>
14 #include <sys/sysmacros.h>
15 #include <errno.h>
16 #include <sys/time.h>
17 #include <pthread.h>
18 #include <urcu.h>
19 #include <urcu/uatomic.h>
20
21 #include "checkers.h"
22
23 #include "../libmultipath/debug.h"
24 #include "../libmultipath/sg_include.h"
25 #include "../libmultipath/util.h"
26 #include "../libmultipath/time-util.h"
27 #include "../libmultipath/util.h"
28
29 #define TUR_CMD_LEN 6
30 #define HEAVY_CHECK_COUNT       10
31
32 #define MSG_TUR_UP      "tur checker reports path is up"
33 #define MSG_TUR_DOWN    "tur checker reports path is down"
34 #define MSG_TUR_GHOST   "tur checker reports path is in standby state"
35 #define MSG_TUR_RUNNING "tur checker still running"
36 #define MSG_TUR_TIMEOUT "tur checker timed out"
37 #define MSG_TUR_FAILED  "tur checker failed to initialize"
38
39 struct tur_checker_context {
40         dev_t devt;
41         int state;
42         int running; /* uatomic access only */
43         int fd;
44         unsigned int timeout;
45         time_t time;
46         pthread_t thread;
47         pthread_mutex_t lock;
48         pthread_cond_t active;
49         int holders; /* uatomic access only */
50         char message[CHECKER_MSG_LEN];
51 };
52
53 int libcheck_init (struct checker * c)
54 {
55         struct tur_checker_context *ct;
56         pthread_mutexattr_t attr;
57         struct stat sb;
58
59         ct = malloc(sizeof(struct tur_checker_context));
60         if (!ct)
61                 return 1;
62         memset(ct, 0, sizeof(struct tur_checker_context));
63
64         ct->state = PATH_UNCHECKED;
65         ct->fd = -1;
66         uatomic_set(&ct->holders, 1);
67         pthread_cond_init_mono(&ct->active);
68         pthread_mutexattr_init(&attr);
69         pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE);
70         pthread_mutex_init(&ct->lock, &attr);
71         pthread_mutexattr_destroy(&attr);
72         if (fstat(c->fd, &sb) == 0)
73                 ct->devt = sb.st_rdev;
74         c->context = ct;
75
76         return 0;
77 }
78
79 static void cleanup_context(struct tur_checker_context *ct)
80 {
81         pthread_mutex_destroy(&ct->lock);
82         pthread_cond_destroy(&ct->active);
83         free(ct);
84 }
85
86 void libcheck_free (struct checker * c)
87 {
88         if (c->context) {
89                 struct tur_checker_context *ct = c->context;
90                 int holders;
91                 int running;
92
93                 running = uatomic_xchg(&ct->running, 0);
94                 if (running)
95                         pthread_cancel(ct->thread);
96                 ct->thread = 0;
97                 holders = uatomic_sub_return(&ct->holders, 1);
98                 if (!holders)
99                         cleanup_context(ct);
100                 c->context = NULL;
101         }
102         return;
103 }
104
105 static int
106 tur_check(int fd, unsigned int timeout, char *msg)
107 {
108         struct sg_io_hdr io_hdr;
109         unsigned char turCmdBlk[TUR_CMD_LEN] = { 0x00, 0, 0, 0, 0, 0 };
110         unsigned char sense_buffer[32];
111         int retry_tur = 5;
112
113 retry:
114         memset(&io_hdr, 0, sizeof (struct sg_io_hdr));
115         memset(&sense_buffer, 0, 32);
116         io_hdr.interface_id = 'S';
117         io_hdr.cmd_len = sizeof (turCmdBlk);
118         io_hdr.mx_sb_len = sizeof (sense_buffer);
119         io_hdr.dxfer_direction = SG_DXFER_NONE;
120         io_hdr.cmdp = turCmdBlk;
121         io_hdr.sbp = sense_buffer;
122         io_hdr.timeout = timeout * 1000;
123         io_hdr.pack_id = 0;
124         if (ioctl(fd, SG_IO, &io_hdr) < 0) {
125                 snprintf(msg, CHECKER_MSG_LEN, MSG_TUR_DOWN);
126                 return PATH_DOWN;
127         }
128         if ((io_hdr.status & 0x7e) == 0x18) {
129                 /*
130                  * SCSI-3 arrays might return
131                  * reservation conflict on TUR
132                  */
133                 snprintf(msg, CHECKER_MSG_LEN, MSG_TUR_UP);
134                 return PATH_UP;
135         }
136         if (io_hdr.info & SG_INFO_OK_MASK) {
137                 int key = 0, asc, ascq;
138
139                 switch (io_hdr.host_status) {
140                 case DID_OK:
141                 case DID_NO_CONNECT:
142                 case DID_BAD_TARGET:
143                 case DID_ABORT:
144                 case DID_TRANSPORT_FAILFAST:
145                         break;
146                 default:
147                         /* Driver error, retry */
148                         if (--retry_tur)
149                                 goto retry;
150                         break;
151                 }
152                 if (io_hdr.sb_len_wr > 3) {
153                         if (io_hdr.sbp[0] == 0x72 || io_hdr.sbp[0] == 0x73) {
154                                 key = io_hdr.sbp[1] & 0x0f;
155                                 asc = io_hdr.sbp[2];
156                                 ascq = io_hdr.sbp[3];
157                         } else if (io_hdr.sb_len_wr > 13 &&
158                                    ((io_hdr.sbp[0] & 0x7f) == 0x70 ||
159                                     (io_hdr.sbp[0] & 0x7f) == 0x71)) {
160                                 key = io_hdr.sbp[2] & 0x0f;
161                                 asc = io_hdr.sbp[12];
162                                 ascq = io_hdr.sbp[13];
163                         }
164                 }
165                 if (key == 0x6) {
166                         /* Unit Attention, retry */
167                         if (--retry_tur)
168                                 goto retry;
169                 }
170                 else if (key == 0x2) {
171                         /* Not Ready */
172                         /* Note: Other ALUA states are either UP or DOWN */
173                         if( asc == 0x04 && ascq == 0x0b){
174                                 /*
175                                  * LOGICAL UNIT NOT ACCESSIBLE,
176                                  * TARGET PORT IN STANDBY STATE
177                                  */
178                                 snprintf(msg, CHECKER_MSG_LEN, MSG_TUR_GHOST);
179                                 return PATH_GHOST;
180                         }
181                 }
182                 snprintf(msg, CHECKER_MSG_LEN, MSG_TUR_DOWN);
183                 return PATH_DOWN;
184         }
185         snprintf(msg, CHECKER_MSG_LEN, MSG_TUR_UP);
186         return PATH_UP;
187 }
188
189 #define tur_thread_cleanup_push(ct) pthread_cleanup_push(cleanup_func, ct)
190 #define tur_thread_cleanup_pop(ct) pthread_cleanup_pop(1)
191
192 static void cleanup_func(void *data)
193 {
194         int holders;
195         struct tur_checker_context *ct = data;
196
197         holders = uatomic_sub_return(&ct->holders, 1);
198         if (!holders)
199                 cleanup_context(ct);
200         rcu_unregister_thread();
201 }
202
203 static void *tur_thread(void *ctx)
204 {
205         struct tur_checker_context *ct = ctx;
206         int state, running;
207         char msg[CHECKER_MSG_LEN];
208
209         /* This thread can be canceled, so setup clean up */
210         tur_thread_cleanup_push(ct);
211         rcu_register_thread();
212
213         condlog(3, "%d:%d : tur checker starting up", major(ct->devt),
214                 minor(ct->devt));
215
216         /* TUR checker start up */
217         pthread_mutex_lock(&ct->lock);
218         ct->state = PATH_PENDING;
219         ct->message[0] = '\0';
220         pthread_mutex_unlock(&ct->lock);
221
222         state = tur_check(ct->fd, ct->timeout, msg);
223         pthread_testcancel();
224
225         /* TUR checker done */
226         pthread_mutex_lock(&ct->lock);
227         ct->state = state;
228         strlcpy(ct->message, msg, sizeof(ct->message));
229         pthread_cond_signal(&ct->active);
230         pthread_mutex_unlock(&ct->lock);
231
232         condlog(3, "%d:%d : tur checker finished, state %s", major(ct->devt),
233                 minor(ct->devt), checker_state_name(state));
234
235         running = uatomic_xchg(&ct->running, 0);
236         if (!running)
237                 pause();
238
239         tur_thread_cleanup_pop(ct);
240
241         return ((void *)0);
242 }
243
244
245 static void tur_timeout(struct timespec *tsp)
246 {
247         clock_gettime(CLOCK_MONOTONIC, tsp);
248         tsp->tv_nsec += 1000 * 1000; /* 1 millisecond */
249         normalize_timespec(tsp);
250 }
251
252 static void tur_set_async_timeout(struct checker *c)
253 {
254         struct tur_checker_context *ct = c->context;
255         struct timespec now;
256
257         clock_gettime(CLOCK_MONOTONIC, &now);
258         ct->time = now.tv_sec + c->timeout;
259 }
260
261 static int tur_check_async_timeout(struct checker *c)
262 {
263         struct tur_checker_context *ct = c->context;
264         struct timespec now;
265
266         clock_gettime(CLOCK_MONOTONIC, &now);
267         return (now.tv_sec > ct->time);
268 }
269
270 int libcheck_check(struct checker * c)
271 {
272         struct tur_checker_context *ct = c->context;
273         struct timespec tsp;
274         pthread_attr_t attr;
275         int tur_status, r;
276
277         if (!ct)
278                 return PATH_UNCHECKED;
279
280         if (c->sync)
281                 return tur_check(c->fd, c->timeout, c->message);
282
283         /*
284          * Async mode
285          */
286         r = pthread_mutex_lock(&ct->lock);
287         if (r != 0) {
288                 condlog(2, "%s: tur mutex lock failed with %d", ct->devt, r);
289                 MSG(c, MSG_TUR_FAILED);
290                 return PATH_WILD;
291         }
292
293         if (ct->thread) {
294                 if (tur_check_async_timeout(c)) {
295                         int running = uatomic_xchg(&ct->running, 0);
296                         if (running)
297                                 pthread_cancel(ct->thread);
298                         condlog(3, "%d:%d : tur checker timeout",
299                                 major(ct->devt), minor(ct->devt));
300                         ct->thread = 0;
301                         MSG(c, MSG_TUR_TIMEOUT);
302                         tur_status = PATH_TIMEOUT;
303                 } else if (uatomic_read(&ct->running) != 0) {
304                         condlog(3, "%d:%d : tur checker not finished",
305                                 major(ct->devt), minor(ct->devt));
306                         tur_status = PATH_PENDING;
307                 } else {
308                         /* TUR checker done */
309                         ct->thread = 0;
310                         tur_status = ct->state;
311                         strlcpy(c->message, ct->message, sizeof(c->message));
312                 }
313                 pthread_mutex_unlock(&ct->lock);
314         } else {
315                 if (uatomic_read(&ct->holders) > 1) {
316                         /* The thread has been cancelled but hasn't
317                          * quilt. Fail back to synchronous mode */
318                         pthread_mutex_unlock(&ct->lock);
319                         condlog(3, "%d:%d : tur checker failing back to sync",
320                                 major(ct->devt), minor(ct->devt));
321                         return tur_check(c->fd, c->timeout, c->message);
322                 }
323                 /* Start new TUR checker */
324                 ct->state = PATH_UNCHECKED;
325                 ct->fd = c->fd;
326                 ct->timeout = c->timeout;
327                 uatomic_add(&ct->holders, 1);
328                 uatomic_set(&ct->running, 1);
329                 tur_set_async_timeout(c);
330                 setup_thread_attr(&attr, 32 * 1024, 1);
331                 r = pthread_create(&ct->thread, &attr, tur_thread, ct);
332                 pthread_attr_destroy(&attr);
333                 if (r) {
334                         uatomic_sub(&ct->holders, 1);
335                         uatomic_set(&ct->running, 0);
336                         ct->thread = 0;
337                         pthread_mutex_unlock(&ct->lock);
338                         condlog(3, "%d:%d : failed to start tur thread, using"
339                                 " sync mode", major(ct->devt), minor(ct->devt));
340                         return tur_check(c->fd, c->timeout, c->message);
341                 }
342                 tur_timeout(&tsp);
343                 r = pthread_cond_timedwait(&ct->active, &ct->lock, &tsp);
344                 tur_status = ct->state;
345                 strlcpy(c->message, ct->message, sizeof(c->message));
346                 pthread_mutex_unlock(&ct->lock);
347                 if (uatomic_read(&ct->running) != 0 &&
348                     (tur_status == PATH_PENDING || tur_status == PATH_UNCHECKED)) {
349                         condlog(3, "%d:%d : tur checker still running",
350                                 major(ct->devt), minor(ct->devt));
351                         tur_status = PATH_PENDING;
352                 } else {
353                         int running = uatomic_xchg(&ct->running, 0);
354                         if (running)
355                                 pthread_cancel(ct->thread);
356                         ct->thread = 0;
357                 }
358         }
359
360         return tur_status;
361 }