From c172ec5c8dc8c09dd5958f4ae542fa321bb15a92 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 31 Jan 2014 17:49:22 +0200 Subject: [PATCH 1/4] libceph: fix error handling in ceph_osdc_init() msgpool_op_reply message pool isn't destroyed if workqueue construction fails. Fix it. Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil --- net/ceph/osd_client.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 010ff3bd58ad..166d4c7ba065 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -2504,9 +2504,12 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client) err = -ENOMEM; osdc->notify_wq = create_singlethread_workqueue("ceph-watch-notify"); if (!osdc->notify_wq) - goto out_msgpool; + goto out_msgpool_reply; + return 0; +out_msgpool_reply: + ceph_msgpool_destroy(&osdc->msgpool_op_reply); out_msgpool: ceph_msgpool_destroy(&osdc->msgpool_op); out_mempool: From 0bbfdfe8d25fcc1d5c2edb6b060fb0c5cf66aff9 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 31 Jan 2014 19:33:39 +0200 Subject: [PATCH 2/4] libceph: factor out logic from ceph_osdc_start_request() Factor out logic from ceph_osdc_start_request() into a new helper, __ceph_osdc_start_request(). ceph_osdc_start_request() now amounts to taking locks and calling __ceph_osdc_start_request(). Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil --- net/ceph/osd_client.c | 62 +++++++++++++++++++++++++++---------------- 1 file changed, 39 insertions(+), 23 deletions(-) diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 166d4c7ba065..2aa82b6bb305 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -1426,6 +1426,40 @@ static void __send_queued(struct ceph_osd_client *osdc) __send_request(osdc, req); } +/* + * Caller should hold map_sem for read and request_mutex. + */ +static int __ceph_osdc_start_request(struct ceph_osd_client *osdc, + struct ceph_osd_request *req, + bool nofail) +{ + int rc; + + __register_request(osdc, req); + req->r_sent = 0; + req->r_got_reply = 0; + rc = __map_request(osdc, req, 0); + if (rc < 0) { + if (nofail) { + dout("osdc_start_request failed map, " + " will retry %lld\n", req->r_tid); + rc = 0; + } else { + __unregister_request(osdc, req); + } + return rc; + } + + if (req->r_osd == NULL) { + dout("send_request %p no up osds in pg\n", req); + ceph_monc_request_next_osdmap(&osdc->client->monc); + } else { + __send_queued(osdc); + } + + return 0; +} + /* * Timeout callback, called every N seconds when 1 or more osd * requests has been active for more than N seconds. When this @@ -2351,34 +2385,16 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc, struct ceph_osd_request *req, bool nofail) { - int rc = 0; + int rc; down_read(&osdc->map_sem); mutex_lock(&osdc->request_mutex); - __register_request(osdc, req); - req->r_sent = 0; - req->r_got_reply = 0; - rc = __map_request(osdc, req, 0); - if (rc < 0) { - if (nofail) { - dout("osdc_start_request failed map, " - " will retry %lld\n", req->r_tid); - rc = 0; - } else { - __unregister_request(osdc, req); - } - goto out_unlock; - } - if (req->r_osd == NULL) { - dout("send_request %p no up osds in pg\n", req); - ceph_monc_request_next_osdmap(&osdc->client->monc); - } else { - __send_queued(osdc); - } - rc = 0; -out_unlock: + + rc = __ceph_osdc_start_request(osdc, req, nofail); + mutex_unlock(&osdc->request_mutex); up_read(&osdc->map_sem); + return rc; } EXPORT_SYMBOL(ceph_osdc_start_request); From ff513ace9b772e75e337f8e058cc7f12816843fe Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 3 Feb 2014 13:56:33 +0200 Subject: [PATCH 3/4] libceph: take map_sem for read in handle_reply() Handling redirect replies requires both map_sem and request_mutex. Taking map_sem unconditionally near the top of handle_reply() avoids possible race conditions that arise from releasing request_mutex to be able to acquire map_sem in redirect reply case. (Lock ordering is: map_sem, request_mutex, crush_mutex.) Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil --- net/ceph/osd_client.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 2aa82b6bb305..0676f2b199d6 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -1687,6 +1687,7 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg, osdmap_epoch = ceph_decode_32(&p); /* lookup */ + down_read(&osdc->map_sem); mutex_lock(&osdc->request_mutex); req = __lookup_request(osdc, tid); if (req == NULL) { @@ -1743,7 +1744,6 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg, dout("redirect pool %lld\n", redir.oloc.pool); __unregister_request(osdc, req); - mutex_unlock(&osdc->request_mutex); req->r_target_oloc = redir.oloc; /* struct */ @@ -1755,10 +1755,10 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg, * successfully. In the future we might want to follow * original request's nofail setting here. */ - err = ceph_osdc_start_request(osdc, req, true); + err = __ceph_osdc_start_request(osdc, req, true); BUG_ON(err); - goto done; + goto out_unlock; } already_completed = req->r_got_reply; @@ -1776,8 +1776,7 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg, req->r_got_reply = 1; } else if ((flags & CEPH_OSD_FLAG_ONDISK) == 0) { dout("handle_reply tid %llu dup ack\n", tid); - mutex_unlock(&osdc->request_mutex); - goto done; + goto out_unlock; } dout("handle_reply tid %llu flags %d\n", tid, flags); @@ -1792,6 +1791,7 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg, __unregister_request(osdc, req); mutex_unlock(&osdc->request_mutex); + up_read(&osdc->map_sem); if (!already_completed) { if (req->r_unsafe_callback && @@ -1809,10 +1809,14 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg, complete_request(req); } -done: +out: dout("req=%p req->r_linger=%d\n", req, req->r_linger); ceph_osdc_put_request(req); return; +out_unlock: + mutex_unlock(&osdc->request_mutex); + up_read(&osdc->map_sem); + goto out; bad_put: req->r_result = -EIO; @@ -1825,6 +1829,7 @@ bad_put: ceph_osdc_put_request(req); bad_mutex: mutex_unlock(&osdc->request_mutex); + up_read(&osdc->map_sem); bad: pr_err("corrupt osd_op_reply got %d %d\n", (int)msg->front.iov_len, le32_to_cpu(msg->hdr.front_len)); From 0ec1d15ec6ed513ab2cc86c67d94761d71228a32 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 5 Feb 2014 15:19:55 +0200 Subject: [PATCH 4/4] libceph: do not dereference a NULL bio pointer Commit f38a5181d9f3 ("ceph: Convert to immutable biovecs") introduced a NULL pointer dereference, which broke rbd in -rc1. Fix it. Cc: Kent Overstreet Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil --- net/ceph/messenger.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 0e478a0f4204..30efc5c18622 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -840,9 +840,13 @@ static bool ceph_msg_data_bio_advance(struct ceph_msg_data_cursor *cursor, if (!cursor->bvec_iter.bi_size) { bio = bio->bi_next; - cursor->bvec_iter = bio->bi_iter; + cursor->bio = bio; + if (bio) + cursor->bvec_iter = bio->bi_iter; + else + memset(&cursor->bvec_iter, 0, + sizeof(cursor->bvec_iter)); } - cursor->bio = bio; if (!cursor->last_piece) { BUG_ON(!cursor->resid);