scsi: lpfc: Ensure DA_ID handling completion before deleting an NPIV instance

Deleting an NPIV instance requires all fabric ndlps to be released before
an NPIV's resources can be torn down.  Failure to release fabric ndlps
beforehand opens kref imbalance race conditions.  Fix by forcing the DA_ID
to complete synchronously with usage of wait_queue.

Signed-off-by: Justin Tee <justin.tee@broadcom.com>
Link: https://lore.kernel.org/r/20240912232447.45607-6-justintee8345@gmail.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
This commit is contained in:
Justin Tee 2024-09-12 16:24:44 -07:00 committed by Martin K. Petersen
parent d1a2ef63fc
commit 0a3c84f716
3 changed files with 55 additions and 7 deletions

View file

@ -1647,6 +1647,18 @@ lpfc_cmpl_ct(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
} }
out: out:
/* If the caller wanted a synchronous DA_ID completion, signal the
* wait obj and clear flag to reset the vport.
*/
if (ndlp->save_flags & NLP_WAIT_FOR_DA_ID) {
if (ndlp->da_id_waitq)
wake_up(ndlp->da_id_waitq);
}
spin_lock_irq(&ndlp->lock);
ndlp->save_flags &= ~NLP_WAIT_FOR_DA_ID;
spin_unlock_irq(&ndlp->lock);
lpfc_ct_free_iocb(phba, cmdiocb); lpfc_ct_free_iocb(phba, cmdiocb);
lpfc_nlp_put(ndlp); lpfc_nlp_put(ndlp);
return; return;

View file

@ -90,6 +90,8 @@ enum lpfc_nlp_save_flags {
NLP_IN_RECOV_POST_DEV_LOSS = 0x1, NLP_IN_RECOV_POST_DEV_LOSS = 0x1,
/* wait for outstanding LOGO to cmpl */ /* wait for outstanding LOGO to cmpl */
NLP_WAIT_FOR_LOGO = 0x2, NLP_WAIT_FOR_LOGO = 0x2,
/* wait for outstanding DA_ID to finish */
NLP_WAIT_FOR_DA_ID = 0x4
}; };
struct lpfc_nodelist { struct lpfc_nodelist {
@ -159,7 +161,12 @@ struct lpfc_nodelist {
uint32_t nvme_fb_size; /* NVME target's supported byte cnt */ uint32_t nvme_fb_size; /* NVME target's supported byte cnt */
#define NVME_FB_BIT_SHIFT 9 /* PRLI Rsp first burst in 512B units. */ #define NVME_FB_BIT_SHIFT 9 /* PRLI Rsp first burst in 512B units. */
uint32_t nlp_defer_did; uint32_t nlp_defer_did;
/* These wait objects are NPIV specific. These IOs must complete
* synchronously.
*/
wait_queue_head_t *logo_waitq; wait_queue_head_t *logo_waitq;
wait_queue_head_t *da_id_waitq;
}; };
struct lpfc_node_rrq { struct lpfc_node_rrq {

View file

@ -626,6 +626,7 @@ lpfc_vport_delete(struct fc_vport *fc_vport)
struct Scsi_Host *shost = lpfc_shost_from_vport(vport); struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
struct lpfc_hba *phba = vport->phba; struct lpfc_hba *phba = vport->phba;
int rc; int rc;
DECLARE_WAIT_QUEUE_HEAD_ONSTACK(waitq);
if (vport->port_type == LPFC_PHYSICAL_PORT) { if (vport->port_type == LPFC_PHYSICAL_PORT) {
lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT, lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT,
@ -679,21 +680,49 @@ lpfc_vport_delete(struct fc_vport *fc_vport)
if (!ndlp) if (!ndlp)
goto skip_logo; goto skip_logo;
/* Send the DA_ID and Fabric LOGO to cleanup the NPIV fabric entries. */
if (ndlp && ndlp->nlp_state == NLP_STE_UNMAPPED_NODE && if (ndlp && ndlp->nlp_state == NLP_STE_UNMAPPED_NODE &&
phba->link_state >= LPFC_LINK_UP && phba->link_state >= LPFC_LINK_UP &&
phba->fc_topology != LPFC_TOPOLOGY_LOOP) { phba->fc_topology != LPFC_TOPOLOGY_LOOP) {
if (vport->cfg_enable_da_id) { if (vport->cfg_enable_da_id) {
/* Send DA_ID and wait for a completion. */ /* Send DA_ID and wait for a completion. This is best
* effort. If the DA_ID fails, likely the fabric will
* "leak" NportIDs but at least the driver issued the
* command.
*/
ndlp = lpfc_findnode_did(vport, NameServer_DID);
if (!ndlp)
goto issue_logo;
spin_lock_irq(&ndlp->lock);
ndlp->da_id_waitq = &waitq;
ndlp->save_flags |= NLP_WAIT_FOR_DA_ID;
spin_unlock_irq(&ndlp->lock);
rc = lpfc_ns_cmd(vport, SLI_CTNS_DA_ID, 0, 0); rc = lpfc_ns_cmd(vport, SLI_CTNS_DA_ID, 0, 0);
if (rc) { if (!rc) {
lpfc_printf_log(vport->phba, KERN_WARNING, wait_event_timeout(waitq,
LOG_VPORT, !(ndlp->save_flags & NLP_WAIT_FOR_DA_ID),
"1829 CT command failed to " msecs_to_jiffies(phba->fc_ratov * 2000));
"delete objects on fabric, "
"rc %d\n", rc);
} }
lpfc_printf_vlog(vport, KERN_INFO, LOG_VPORT | LOG_ELS,
"1829 DA_ID issue status %d. "
"SFlag x%x NState x%x, NFlag x%x "
"Rpi x%x\n",
rc, ndlp->save_flags, ndlp->nlp_state,
ndlp->nlp_flag, ndlp->nlp_rpi);
/* Remove the waitq and save_flags. It no
* longer matters if the wake happened.
*/
spin_lock_irq(&ndlp->lock);
ndlp->da_id_waitq = NULL;
ndlp->save_flags &= ~NLP_WAIT_FOR_DA_ID;
spin_unlock_irq(&ndlp->lock);
} }
issue_logo:
/* /*
* If the vpi is not registered, then a valid FDISC doesn't * If the vpi is not registered, then a valid FDISC doesn't
* exist and there is no need for a ELS LOGO. Just cleanup * exist and there is no need for a ELS LOGO. Just cleanup