multipathd, libmultipathd: Make delays independent of clock jumps
[multipath-tools/.git] / libmultipath / checkers / tur.c
1 /*
2  * Some code borrowed from sg-utils.
3  *
4  * Copyright (c) 2004 Christophe Varoqui
5  */
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <string.h>
9 #include <sys/types.h>
10 #include <sys/stat.h>
11 #include <unistd.h>
12 #include <fcntl.h>
13 #include <sys/ioctl.h>
14 #include <errno.h>
15 #include <sys/time.h>
16 #include <pthread.h>
17
18 #include "checkers.h"
19
20 #include "../libmultipath/debug.h"
21 #include "../libmultipath/sg_include.h"
22 #include "../libmultipath/uevent.h"
23 #include "../libmultipath/time-util.h"
24
25 #define TUR_CMD_LEN 6
26 #define HEAVY_CHECK_COUNT       10
27
28 #define MSG_TUR_UP      "tur checker reports path is up"
29 #define MSG_TUR_DOWN    "tur checker reports path is down"
30 #define MSG_TUR_GHOST   "tur checker reports path is in standby state"
31 #define MSG_TUR_RUNNING "tur checker still running"
32 #define MSG_TUR_TIMEOUT "tur checker timed out"
33 #define MSG_TUR_FAILED  "tur checker failed to initialize"
34
35 struct tur_checker_context {
36         dev_t devt;
37         int state;
38         int running;
39         int fd;
40         unsigned int timeout;
41         time_t time;
42         pthread_t thread;
43         pthread_mutex_t lock;
44         pthread_cond_t active;
45         pthread_spinlock_t hldr_lock;
46         int holders;
47         char message[CHECKER_MSG_LEN];
48 };
49
50 #define TUR_DEVT(c) major((c)->devt), minor((c)->devt)
51
52 int libcheck_init (struct checker * c)
53 {
54         struct tur_checker_context *ct;
55
56         ct = malloc(sizeof(struct tur_checker_context));
57         if (!ct)
58                 return 1;
59         memset(ct, 0, sizeof(struct tur_checker_context));
60
61         ct->state = PATH_UNCHECKED;
62         ct->fd = -1;
63         ct->holders = 1;
64         pthread_cond_init_mono(&ct->active);
65         pthread_mutex_init(&ct->lock, NULL);
66         pthread_spin_init(&ct->hldr_lock, PTHREAD_PROCESS_PRIVATE);
67         c->context = ct;
68
69         return 0;
70 }
71
72 static void cleanup_context(struct tur_checker_context *ct)
73 {
74         pthread_mutex_destroy(&ct->lock);
75         pthread_cond_destroy(&ct->active);
76         pthread_spin_destroy(&ct->hldr_lock);
77         free(ct);
78 }
79
80 void libcheck_free (struct checker * c)
81 {
82         if (c->context) {
83                 struct tur_checker_context *ct = c->context;
84                 int holders;
85                 pthread_t thread;
86
87                 pthread_spin_lock(&ct->hldr_lock);
88                 ct->holders--;
89                 holders = ct->holders;
90                 thread = ct->thread;
91                 pthread_spin_unlock(&ct->hldr_lock);
92                 if (holders)
93                         pthread_cancel(thread);
94                 else
95                         cleanup_context(ct);
96                 c->context = NULL;
97         }
98         return;
99 }
100
101 void libcheck_repair (struct checker * c)
102 {
103         return;
104 }
105
106 #define TUR_MSG(msg, fmt, args...) snprintf(msg, CHECKER_MSG_LEN, fmt, ##args);
107
108 static int
109 tur_check(int fd, unsigned int timeout, char *msg)
110 {
111         struct sg_io_hdr io_hdr;
112         unsigned char turCmdBlk[TUR_CMD_LEN] = { 0x00, 0, 0, 0, 0, 0 };
113         unsigned char sense_buffer[32];
114         int retry_tur = 5;
115
116 retry:
117         memset(&io_hdr, 0, sizeof (struct sg_io_hdr));
118         memset(&sense_buffer, 0, 32);
119         io_hdr.interface_id = 'S';
120         io_hdr.cmd_len = sizeof (turCmdBlk);
121         io_hdr.mx_sb_len = sizeof (sense_buffer);
122         io_hdr.dxfer_direction = SG_DXFER_NONE;
123         io_hdr.cmdp = turCmdBlk;
124         io_hdr.sbp = sense_buffer;
125         io_hdr.timeout = timeout * 1000;
126         io_hdr.pack_id = 0;
127         if (ioctl(fd, SG_IO, &io_hdr) < 0) {
128                 TUR_MSG(msg, MSG_TUR_DOWN);
129                 return PATH_DOWN;
130         }
131         if ((io_hdr.status & 0x7e) == 0x18) {
132                 /*
133                  * SCSI-3 arrays might return
134                  * reservation conflict on TUR
135                  */
136                 TUR_MSG(msg, MSG_TUR_UP);
137                 return PATH_UP;
138         }
139         if (io_hdr.info & SG_INFO_OK_MASK) {
140                 int key = 0, asc, ascq;
141
142                 switch (io_hdr.host_status) {
143                 case DID_OK:
144                 case DID_NO_CONNECT:
145                 case DID_BAD_TARGET:
146                 case DID_ABORT:
147                 case DID_TRANSPORT_FAILFAST:
148                         break;
149                 default:
150                         /* Driver error, retry */
151                         if (--retry_tur)
152                                 goto retry;
153                         break;
154                 }
155                 if (io_hdr.sb_len_wr > 3) {
156                         if (io_hdr.sbp[0] == 0x72 || io_hdr.sbp[0] == 0x73) {
157                                 key = io_hdr.sbp[1] & 0x0f;
158                                 asc = io_hdr.sbp[2];
159                                 ascq = io_hdr.sbp[3];
160                         } else if (io_hdr.sb_len_wr > 13 &&
161                                    ((io_hdr.sbp[0] & 0x7f) == 0x70 ||
162                                     (io_hdr.sbp[0] & 0x7f) == 0x71)) {
163                                 key = io_hdr.sbp[2] & 0x0f;
164                                 asc = io_hdr.sbp[12];
165                                 ascq = io_hdr.sbp[13];
166                         }
167                 }
168                 if (key == 0x6) {
169                         /* Unit Attention, retry */
170                         if (--retry_tur)
171                                 goto retry;
172                 }
173                 else if (key == 0x2) {
174                         /* Not Ready */
175                         /* Note: Other ALUA states are either UP or DOWN */
176                         if( asc == 0x04 && ascq == 0x0b){
177                                 /*
178                                  * LOGICAL UNIT NOT ACCESSIBLE,
179                                  * TARGET PORT IN STANDBY STATE
180                                  */
181                                 TUR_MSG(msg, MSG_TUR_GHOST);
182                                 return PATH_GHOST;
183                         }
184                 }
185                 TUR_MSG(msg, MSG_TUR_DOWN);
186                 return PATH_DOWN;
187         }
188         TUR_MSG(msg, MSG_TUR_UP);
189         return PATH_UP;
190 }
191
192 #define tur_thread_cleanup_push(ct) pthread_cleanup_push(cleanup_func, ct)
193 #define tur_thread_cleanup_pop(ct) pthread_cleanup_pop(1)
194
195 static void cleanup_func(void *data)
196 {
197         int holders;
198         struct tur_checker_context *ct = data;
199         pthread_spin_lock(&ct->hldr_lock);
200         ct->holders--;
201         holders = ct->holders;
202         ct->thread = 0;
203         pthread_spin_unlock(&ct->hldr_lock);
204         if (!holders)
205                 cleanup_context(ct);
206 }
207
208 static void *tur_thread(void *ctx)
209 {
210         struct tur_checker_context *ct = ctx;
211         int state;
212
213         condlog(3, "%d:%d: tur checker starting up", TUR_DEVT(ct));
214
215         ct->message[0] = '\0';
216         /* This thread can be canceled, so setup clean up */
217         tur_thread_cleanup_push(ct)
218
219         /* TUR checker start up */
220         pthread_mutex_lock(&ct->lock);
221         ct->state = PATH_PENDING;
222         pthread_mutex_unlock(&ct->lock);
223
224         state = tur_check(ct->fd, ct->timeout, ct->message);
225
226         /* TUR checker done */
227         pthread_mutex_lock(&ct->lock);
228         ct->state = state;
229         pthread_cond_signal(&ct->active);
230         pthread_mutex_unlock(&ct->lock);
231
232         condlog(3, "%d:%d: tur checker finished, state %s",
233                 TUR_DEVT(ct), checker_state_name(state));
234         tur_thread_cleanup_pop(ct);
235         return ((void *)0);
236 }
237
238
239 static void tur_timeout(struct timespec *tsp)
240 {
241         clock_gettime(CLOCK_MONOTONIC, tsp);
242         tsp->tv_nsec += 1000 * 1000; /* 1 millisecond */
243         normalize_timespec(tsp);
244 }
245
246 static void tur_set_async_timeout(struct checker *c)
247 {
248         struct tur_checker_context *ct = c->context;
249         struct timespec now;
250
251         clock_gettime(CLOCK_MONOTONIC, &now);
252         ct->time = now.tv_sec + c->timeout;
253 }
254
255 static int tur_check_async_timeout(struct checker *c)
256 {
257         struct tur_checker_context *ct = c->context;
258         struct timespec now;
259
260         clock_gettime(CLOCK_MONOTONIC, &now);
261         return (now.tv_sec > ct->time);
262 }
263
264 extern int
265 libcheck_check (struct checker * c)
266 {
267         struct tur_checker_context *ct = c->context;
268         struct timespec tsp;
269         struct stat sb;
270         pthread_attr_t attr;
271         int tur_status, r;
272
273
274         if (!ct)
275                 return PATH_UNCHECKED;
276
277         if (fstat(c->fd, &sb) == 0)
278                 ct->devt = sb.st_rdev;
279
280         if (c->sync)
281                 return tur_check(c->fd, c->timeout, c->message);
282
283         /*
284          * Async mode
285          */
286         r = pthread_mutex_lock(&ct->lock);
287         if (r != 0) {
288                 condlog(2, "%d:%d: tur mutex lock failed with %d",
289                         TUR_DEVT(ct), r);
290                 MSG(c, MSG_TUR_FAILED);
291                 return PATH_WILD;
292         }
293
294         if (ct->running) {
295                 /* Check if TUR checker is still running */
296                 if (ct->thread) {
297                         if (tur_check_async_timeout(c)) {
298                                 condlog(3, "%d:%d: tur checker timeout",
299                                         TUR_DEVT(ct));
300                                 pthread_cancel(ct->thread);
301                                 ct->running = 0;
302                                 MSG(c, MSG_TUR_TIMEOUT);
303                                 tur_status = PATH_TIMEOUT;
304                         } else {
305                                 condlog(3, "%d:%d: tur checker not finished",
306                                         TUR_DEVT(ct));
307                                 ct->running++;
308                                 tur_status = PATH_PENDING;
309                         }
310                 } else {
311                         /* TUR checker done */
312                         ct->running = 0;
313                         tur_status = ct->state;
314                         strncpy(c->message, ct->message, CHECKER_MSG_LEN);
315                         c->message[CHECKER_MSG_LEN - 1] = '\0';
316                 }
317                 pthread_mutex_unlock(&ct->lock);
318         } else {
319                 if (ct->thread) {
320                         /* pthread cancel failed. continue in sync mode */
321                         pthread_mutex_unlock(&ct->lock);
322                         condlog(3, "%d:%d: tur thread not responding",
323                                 TUR_DEVT(ct));
324                         return PATH_TIMEOUT;
325                 }
326                 /* Start new TUR checker */
327                 ct->state = PATH_UNCHECKED;
328                 ct->fd = c->fd;
329                 ct->timeout = c->timeout;
330                 pthread_spin_lock(&ct->hldr_lock);
331                 ct->holders++;
332                 pthread_spin_unlock(&ct->hldr_lock);
333                 tur_set_async_timeout(c);
334                 setup_thread_attr(&attr, 32 * 1024, 1);
335                 r = pthread_create(&ct->thread, &attr, tur_thread, ct);
336                 if (r) {
337                         pthread_spin_lock(&ct->hldr_lock);
338                         ct->holders--;
339                         pthread_spin_unlock(&ct->hldr_lock);
340                         pthread_mutex_unlock(&ct->lock);
341                         ct->thread = 0;
342                         condlog(3, "%d:%d: failed to start tur thread, using"
343                                 " sync mode", TUR_DEVT(ct));
344                         return tur_check(c->fd, c->timeout, c->message);
345                 }
346                 pthread_attr_destroy(&attr);
347                 tur_timeout(&tsp);
348                 r = pthread_cond_timedwait(&ct->active, &ct->lock, &tsp);
349                 tur_status = ct->state;
350                 strncpy(c->message, ct->message,CHECKER_MSG_LEN);
351                 c->message[CHECKER_MSG_LEN - 1] = '\0';
352                 pthread_mutex_unlock(&ct->lock);
353                 if (ct->thread &&
354                     (tur_status == PATH_PENDING || tur_status == PATH_UNCHECKED)) {
355                         condlog(3, "%d:%d: tur checker still running",
356                                 TUR_DEVT(ct));
357                         ct->running = 1;
358                         tur_status = PATH_PENDING;
359                 }
360         }
361
362         return tur_status;
363 }