svcrdma: Change maximum server payload back to RPCSVC_MAXPAYLOAD

This change “svcrdma: Change maximum server payload back to RPCSVC_MAXPAYLOAD” in Linux kernel is authored by Chuck Lever <chuck.lever [at] oracle.com> on Fri Aug 7 16:55:46 2015 -0400.

svcrdma: Change maximum server payload back to RPCSVC_MAXPAYLOAD

Both commit 0380a3f375 ("svcrdma: Add a separate "max data segs"
macro for svcrdma") and commit 7e5be28827bf ("svcrdma: advertise
the correct max payload") are incorrect. This commit reverts both
changes, restoring the server's maximum payload size to 1MB.

Commit 7e5be28827bf based the server's maximum payload on the
_client's_ RPCRDMA_MAX_DATA_SEGS value. That was wrong.

Commit 0380a3f375 tried to fix this so that the client maximum
payload size could be raised without affecting the server, but
managed to confuse matters more on the server side.

More importantly, limiting the advertised maximum payload size was
meant to be a workaround, not the actual fix. We need to revisit

  https://bugzilla.linux-nfs.org/show_bug.cgi?id=270

A Linux client on a platform with 64KB pages can overrun and crash
an x86_64 NFS/RDMA server when the r/wsize is 1MB. An x86/64 Linux
client seems to work fine using 1MB reads and writes when the Linux
server's maximum payload size is restored to 1MB.

BugLink: https://bugzilla.linux-nfs.org/show_bug.cgi?id=270
Fixes: 0380a3f375 ("svcrdma: Add a separate "max data segs" macro")
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>

This Linux change may have been applied to various maintained Linux releases and you can find Linux releases including commit cc9a903.

There are 12 lines of Linux source code added/deleted in this change. Code changes to Linux kernel are as follows.

 include/linux/sunrpc/svc_rdma.h          | 9 ++-------
 net/sunrpc/xprtrdma/svc_rdma_transport.c | 2 +-
 net/sunrpc/xprtrdma/xprt_rdma.h          | 1 -
 3 files changed, 3 insertions(+), 9 deletions(-)

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 13af61b..d5ee6d8 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -172,13 +172,6 @@ struct svcxprt_rdma {
 #define RDMAXPRT_SQ_PENDING	2
 #define RDMAXPRT_CONN_PENDING	3
 
-#define RPCRDMA_MAX_SVC_SEGS	(64)	/* server max scatter/gather */
-#if RPCSVC_MAXPAYLOAD < (RPCRDMA_MAX_SVC_SEGS << PAGE_SHIFT)
-#define RPCRDMA_MAXPAYLOAD	RPCSVC_MAXPAYLOAD
-#else
-#define RPCRDMA_MAXPAYLOAD	(RPCRDMA_MAX_SVC_SEGS << PAGE_SHIFT)
-#endif
-
 #define RPCRDMA_LISTEN_BACKLOG  10
 /* The default ORD value is based on two outstanding full-size writes with a
  * page size of 4k, or 32k * 2 ops / 4k = 16 outstanding RDMA_READ.  */
@@ -187,6 +180,8 @@ struct svcxprt_rdma {
 #define RPCRDMA_MAX_REQUESTS    32
 #define RPCRDMA_MAX_REQ_SIZE    4096
 
+#define RPCSVC_MAXPAYLOAD_RDMA	RPCSVC_MAXPAYLOAD
+
 /* svc_rdma_marshal.c */
 extern int svc_rdma_xdr_decode_req(struct rpcrdma_msg **, struct svc_rqst *);
 extern int svc_rdma_xdr_encode_error(struct svcxprt_rdma *,
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 4054a9d..21e4036 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -91,7 +91,7 @@ struct svc_xprt_class svc_rdma_class = {
 	.xcl_name = "rdma",
 	.xcl_owner = THIS_MODULE,
 	.xcl_ops = &svc_rdma_ops,
-	.xcl_max_payload = RPCRDMA_MAXPAYLOAD,
+	.xcl_max_payload = RPCSVC_MAXPAYLOAD_RDMA,
 	.xcl_ident = XPRT_TRANSPORT_RDMA,
 };
 
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index f49dd8b..e718d09 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -51,7 +51,6 @@
 #include <linux/sunrpc/clnt.h> 		/* rpc_xprt */
 #include <linux/sunrpc/rpc_rdma.h> 	/* RPC/RDMA protocol */
 #include <linux/sunrpc/xprtrdma.h> 	/* xprt parameters */
-#include <linux/sunrpc/svc.h>		/* RPCSVC_MAXPAYLOAD */
 
 #define RDMA_RESOLVE_TIMEOUT	(5000)	/* 5 seconds */
 #define RDMA_CONNECT_RETRY_MAX	(2)	/* retries if no listener backlog */

The commit for this change in Linux stable tree is cc9a903 (patch).

Leave a Reply

Your email address will not be published. Required fields are marked *