scsi: core: avoid preallocating big SGL for data [Linux 5.3]

This Linux kernel change "scsi: core: avoid preallocating big SGL for data" is included in the Linux 5.3 release. This change is authored by Ming Lei <ming.lei [at]> on Sun Apr 28 15:39:32 2019 +0800. The commit for this change in Linux stable tree is 3dccdf5 (patch).

scsi_mq_setup_tags() preallocates a big buffer for the IO SGL. The size is
based on scsi_mq_sgl_size() which is determined based on
shost->sg_tablesize and SG_CHUNK_SIZE.

Modern DMA engines are often capable of dealing with very big segments so
the resulting scsi_mq_sgl_size() is often too big. SG_CHUNK_SIZE results in
a static 4KB SGL allocation per command.

If an HBA has lots of deep queues, preallocation for the sg list can
consume substantial amounts of memory. For lpfc, nr_hw_queues can be 70
and each queue's depth 3781. This means the resulting preallocation for
the data SGL is 70*3781*2K = 517MB.

Switch to runtime allocation for SGL for lists longer than 2 entries. This
is the approach used by NVMe PCI so it should be reasonable for SCSI as
well. Runtime SGL allocation has always been the case for the legacy I/O
path so this is nothing new.

[mkp: attempted to clarify commit desc]

There are 15 lines of Linux source code added/deleted in this change. Code changes to Linux kernel are as follows.

 drivers/scsi/scsi_lib.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index c115458..45b43e9 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -45,6 +45,8 @@

+#define  SCSI_INLINE_SG_CNT  2
 static struct kmem_cache *scsi_sdb_cache;
 static struct kmem_cache *scsi_sense_cache;
 static struct kmem_cache *scsi_sense_isadma_cache;
@@ -547,7 +549,8 @@ static void scsi_uninit_cmd(struct scsi_cmnd *cmd)
 static void scsi_mq_free_sgtables(struct scsi_cmnd *cmd)
    if (cmd->sdb.table.nents)
-       sg_free_table_chained(&cmd->sdb.table, SG_CHUNK_SIZE);
+       sg_free_table_chained(&cmd->sdb.table,
+               SCSI_INLINE_SG_CNT);
    if (scsi_prot_sg_count(cmd))
@@ -984,7 +987,7 @@ static blk_status_t scsi_init_sgtable(struct request *req,
    if (unlikely(sg_alloc_table_chained(&sdb->table,
            blk_rq_nr_phys_segments(req), sdb->table.sgl,
-           SG_CHUNK_SIZE)))
+           SCSI_INLINE_SG_CNT)))
        return BLK_STS_RESOURCE;

@@ -1550,9 +1553,9 @@ static int scsi_dispatch_cmd(struct scsi_cmnd *cmd)

 /* Size in bytes of the sg-list stored in the scsi-mq command-private data. */
-static unsigned int scsi_mq_sgl_size(struct Scsi_Host *shost)
+static unsigned int scsi_mq_inline_sgl_size(struct Scsi_Host *shost)
-   return min_t(unsigned int, shost->sg_tablesize, SG_CHUNK_SIZE) *
+   return min_t(unsigned int, shost->sg_tablesize, SCSI_INLINE_SG_CNT) *
        sizeof(struct scatterlist);

@@ -1734,7 +1737,7 @@ static int scsi_mq_init_request(struct blk_mq_tag_set *set, struct request *rq,
    if (scsi_host_get_prot(shost)) {
        sg = (void *)cmd + sizeof(struct scsi_cmnd) +
-       cmd->prot_sdb = (void *)sg + scsi_mq_sgl_size(shost);
+       cmd->prot_sdb = (void *)sg + scsi_mq_inline_sgl_size(shost);

    return 0;
@@ -1828,7 +1831,7 @@ int scsi_mq_setup_tags(struct Scsi_Host *shost)
    unsigned int cmd_size, sgl_size;

-   sgl_size = scsi_mq_sgl_size(shost);
+   sgl_size = scsi_mq_inline_sgl_size(shost);
    cmd_size = sizeof(struct scsi_cmnd) + shost->hostt->cmd_size + sgl_size;
    if (scsi_host_get_prot(shost))
        cmd_size += sizeof(struct scsi_data_buffer) +

